Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 61 additions & 8 deletions vmm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ impl MigrationWorker {
}

/// Perform the migration and communicate with the [`Vmm`] thread.
fn run(mut self) -> (Vm, result::Result<(), MigratableError>) {
fn run(mut self) -> MigrationThreadOut {
debug!("migration thread is starting");

let res = self.migrate().inspect_err(|e| error!("migrate error: {e}"));
Expand All @@ -823,7 +823,10 @@ impl MigrationWorker {
self.check_migration_evt.write(1).unwrap();

debug!("migration thread is finished");
(self.vm, res)
MigrationThreadOut {
vm: self.vm,
migration_res: res,
}
}
}

Expand Down Expand Up @@ -869,6 +872,12 @@ impl MaybeVmOwnership {
}
}

/// Output value of [`MigrationWorker`].
struct MigrationThreadOut {
vm: Vm,
migration_res: result::Result<(), MigratableError>,
}

pub struct Vmm {
epoll: EpollContext,
exit_evt: EventFd,
Expand All @@ -893,7 +902,7 @@ pub struct Vmm {
/// Handle to the [`MigrationWorker`] thread.
///
/// The handle will return the [`Vm`] back in any case. Further, the underlying error (if any) is returned.
migration_thread_handle: Option<JoinHandle<(Vm, result::Result<(), MigratableError>)>>,
migration_thread_handle: Option<JoinHandle<MigrationThreadOut>>,
}

/// Wait for a file descriptor to become readable. In this case, we return
Expand Down Expand Up @@ -2623,18 +2632,21 @@ impl Vmm {
fn check_migration_result(&mut self) {
// At this point, the thread must be finished.
// If we fail here, we have lost anyway. Just panic.
let (vm, migration_res) = self
let MigrationThreadOut {
mut vm,
migration_res,
} = self
.migration_thread_handle
.take()
.expect("should have thread")
.join()
.expect("should have joined");

// Give VMM back control.
self.vm = MaybeVmOwnership::Vmm(vm);

match migration_res {
Ok(()) => {
self.vm = MaybeVmOwnership::None;
drop(vm);

{
info!("Sending Receiver in HTTP thread that migration succeeded");
let (sender, _) = &*ONGOING_LIVEMIGRATION;
Expand All @@ -2649,13 +2661,40 @@ impl Vmm {
}
Err(e) => {
error!("Migration failed: {e}");
// we don't fail the VMM here, it just continues running its VM

// If the failure happened very late in the migration path, the VM might already be
// stopped. We resume it to ensure proper operation.
//
// Cloud Hypervisor only supports migration of running VMs, therefore it cannot
// happen that we resume a previously paused VM.
let state = vm.get_state().expect("should acquire lock");
if state == VmState::Paused {
match vm.resume() {
Ok(_) => {
info!("Resumed VM successfully after failed migration");

// Ensure full VM performance. The operation is idempotent.
let _ = vm.stop_dirty_log().inspect_err(|e| {
warn!("Failed stopping dirty log after resuming VM: {e} - VM performance might be slower than usual");
});
}
Err(e) => {
error!("Failed resuming VM after failed migration: {e}");
self.exit_evt.write(1).unwrap();
}
}
}

// Give VMM back control.
self.vm = MaybeVmOwnership::Vmm(vm);

{
info!("Sending Receiver in HTTP thread that migration failed");
let (sender, _) = &*ONGOING_LIVEMIGRATION;
// unblock API call; propagate migration result
sender.send(Err(e)).unwrap();
}
// we don't fail the VMM here, it just continues running its VM
}
}
}
Expand Down Expand Up @@ -3605,6 +3644,20 @@ impl RequestHandler for Vmm {
)));
}

// Cloud Hypervisor only supports the migration of running VMs.
let current_state = self
.vm
.vm_mut()
.as_ref()
.unwrap()
.get_state()
.expect("should acquire lock");
if current_state != VmState::Running {
return Err(MigratableError::MigrateSend(anyhow!(format!(
"Only running VMs can be migrated! state={current_state:?}"
))));
}

// Take VM ownership. This also means that API events can no longer
// change the VM (e.g. net device hotplug).
let vm = self.vm.take_vm_for_migration();
Expand Down
Loading