Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions src/cli/src/commands/kill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,10 @@ async fn kill_one(
let box_id = record.id.clone();
let name = record.name.clone();

if record.status == "paused" && is_stopping_signal(signal) && signal != SIGKILL {
// Resume a paused box before terminating it. This now also applies to
// SIGKILL: a paused box is SIGSTOP'd, and leaving it frozen would otherwise
// strand the VM (and the via-guest path below cannot reach a frozen guest).
if record.status == "paused" && is_stopping_signal(signal) {
lifecycle::resume_paused_for_termination(&record, pid, "kill")
.map_err(|error| -> Box<dyn std::error::Error> { error.into() })?;
}
Expand All @@ -116,8 +119,18 @@ async fn kill_one(
// signalling the host shim never reaches the container and would kill the
// VM abruptly. Fall back to a host signal only when no guest exec server
// is reachable (older box / socket gone).
//
// SIGKILL is the exception: it cannot be caught/handled, so routing it
// through the guest exec server is pointless AND it HANGS on a box whose
// guest was frozen (the read has no timeout). Force-kill the host shim
// directly — abruptly tearing down the VM is exactly what -9 wants.
let exec_socket = crate::socket_paths::exec(&record);
if !process::deliver_signal_via_guest(&exec_socket, signal).await {
let delivered = if signal == SIGKILL {
false
} else {
process::deliver_signal_via_guest(&exec_socket, signal).await
};
if !delivered {
process::send_signal(pid, signal).map_err(|err| {
format!(
"Failed to send signal {signal} to box {} (PID {pid}): {err}",
Expand Down
19 changes: 19 additions & 0 deletions src/cri/src/runtime_service/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1266,6 +1266,25 @@ impl RuntimeService for BoxRuntimeService {
rootfs_guest_path,
};

// Re-validate the sandbox right before registering the container. The
// heavy async work above (image resolve + rootfs build, which yields the
// task) could have run concurrently with a StopPodSandbox/
// RemovePodSandbox that tore the sandbox (and its rootfs tree) down.
// Without this re-check we would register an orphan container whose
// sandbox is gone — and whose rootfs we just recreated under a
// now-deleted sandbox tree — that nothing ever reaps.
match self.store.sandboxes.get(&container.sandbox_id).await {
Some(sb) if sb.state == SandboxState::Ready => {}
_ => {
self.cleanup_container_rootfs_path(&container.rootfs_path)
.await;
return Err(Status::failed_precondition(format!(
"Sandbox {} is no longer ready; aborting CreateContainer",
container.sandbox_id
)));
}
}

self.store.add_container(container.clone()).await;
self.emit_container_event(
&container.id,
Expand Down
Loading