From a13bfbacf53801326f74a9347002d47220a33615 Mon Sep 17 00:00:00 2001 From: Fynn Datoo Date: Thu, 30 Apr 2026 06:03:28 -0700 Subject: [PATCH 1/3] fix: delete stale state files before launching reused-id container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause of the hang-test 'succeeded' flake: SQLite reuses run rowids when a prior run is deleted (no AUTOINCREMENT). When test N+1 lands on an id whose container from test N is still alive (Waiting phase, listener still connected), the container's bind mount on runs_dir//state is still active. setup_run_dir's del_dir_r then fails with EBUSY, the directory survives, and result.json from the prior run leaks into the new run. read_outcome reads the stale exit_code; if the prior scenario exited 0, the new run is reported 'succeeded' regardless of what actually happened in its container. Two-part fix: 1. Reorder do_mock_launch / do_real_launch to force-remove the prior container BEFORE setup_run_dir, so the bind mount is released and del_dir_r can clean the directory. 2. As a defensive layer, explicitly delete the individual state signal files (result.json, agent-status, session-id, ready) by path. unlink works on individual files inside a bind-mounted directory even when the directory itself can't be removed — so even if a future bug re-introduces the ordering issue, read_outcome won't see stale data. Verified via /tmp/fbi-runs-state artifacts on debug/hang-flake CI run 25166517481: run-3's quantico-debug.log said OUTCOME=SleepingForever (correctly blocked) yet result.json had exit_code:1 — leftover from the crash-fast run that previously held that id. --- src/server/src/fbi/run/worker.gleam | 34 +++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/src/server/src/fbi/run/worker.gleam b/src/server/src/fbi/run/worker.gleam index 89e390e..d427db1 100644 --- a/src/server/src/fbi/run/worker.gleam +++ b/src/server/src/fbi/run/worker.gleam @@ -83,17 +83,21 @@ fn do_real_launch(input: LaunchInput) -> Result(#(String, String), String) { input.config, on_log, )) - use _ <- result.try(setup_run_dir(input)) let container_name = "fbi-run-" <> run_id - // Remove any pre-existing container with this name. Cancel paths and - // crash recovery don't always reach transition_to_finishing (which is - // the only place that calls remove_container), so retrying the same - // run id otherwise hits a "container name in use" error from Docker. - // force=true also handles the case where it's still running. + // Remove any pre-existing container with this name BEFORE setup_run_dir. + // Cancel paths and crash recovery don't always reach + // transition_to_finishing (which is the only place that calls + // remove_container), so retrying the same run id otherwise hits a + // "container name in use" error from Docker. force=true also handles + // the case where it's still running. Order matters: while an old + // container holds the bind mount on run_dir/state, `del_dir_r` fails + // with EBUSY and stale state files (notably result.json) survive into + // the new run. let _ = with_docker(input.config.docker_socket, fn(sock) { docker.remove_container(sock, container_name, True) }) + use _ <- result.try(setup_run_dir(input)) let spec = container_spec(input, image_tag) wisp.log_debug("run " <> run_id <> ": creating container image=" <> image_tag) use cid <- result.try( @@ -128,12 +132,20 @@ fn do_mock_launch(input: LaunchInput) -> Result(#(String, String), String) { Some(p) -> Ok(p) None -> Error("FBI_QUANTICO_BINARY_PATH not set; mock runs require it") }) - use _ <- result.try(setup_run_dir(input)) + // Force-remove any prior container with this name BEFORE setup_run_dir. + // SQLite reuses rowids when prior runs are deleted (no AUTOINCREMENT), + // so a fresh run can land on an id whose state dir is still bind-mounted + // by a not-yet-removed container. While that bind mount is alive, the + // host-side `del_dir_r` of run_dir/state can't remove the directory + // (EBUSY) and stale files (notably result.json) survive — read_outcome + // then mistakes the *prior* run's exit code for the new run's, which is + // exactly the source of the hang-test "succeeded" flake. let container_name = "fbi-run-" <> run_id let _ = with_docker(input.config.docker_socket, fn(sock) { docker.remove_container(sock, container_name, True) }) + use _ <- result.try(setup_run_dir(input)) let spec = mock_container_spec(input, quantico_path, scenario) wisp.log_debug( "run " <> run_id <> ": creating mock container scenario=" <> scenario, @@ -435,6 +447,14 @@ fn setup_run_dir(input: LaunchInput) -> Result(Nil, String) { let _ = simplifile.delete(run_dir <> "/state") let _ = simplifile.delete(run_dir <> "/wip") let _ = simplifile.delete(scripts_dir) + // Belt-and-suspenders: even if `delete(run_dir/state)` fails (EBUSY + // when a prior container still has the bind mount), nuke the + // individual signal files by path so read_outcome / poll_status_loop + // can't see stale values from the previous run id. + let _ = simplifile.delete(run_dir <> "/state/result.json") + let _ = simplifile.delete(run_dir <> "/state/agent-status") + let _ = simplifile.delete(run_dir <> "/state/session-id") + let _ = simplifile.delete(run_dir <> "/state/ready") use _ <- result.try( simplifile.create_directory_all(scripts_dir) |> result.map_error(fn(e) { From a3c9ed9ed60be9c6862ee891750f4110efd5eb93 Mon Sep 17 00:00:00 2001 From: Fynn Datoo Date: Thu, 30 Apr 2026 06:10:35 -0700 Subject: [PATCH 2/3] chore: gleam format Three handler files left unformatted by recent merges; bring them in line so this PR's CI passes the format check. Co-Authored-By: Claude Opus 4.7 --- src/server/src/fbi/db/projects.gleam | 5 ++++- src/server/src/fbi/git/history_ops.gleam | 5 ++++- src/server/src/fbi/handlers/changes.gleam | 6 +++++- src/server/src/fbi/handlers/mcp_servers.gleam | 6 +++++- src/server/src/fbi/handlers/prompts.gleam | 6 +++++- src/server/src/fbi/handlers/runs.gleam | 11 +++++++++-- src/server/src/fbi/handlers/uploads.gleam | 6 +++++- 7 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/server/src/fbi/db/projects.gleam b/src/server/src/fbi/db/projects.gleam index e1538eb..eef252c 100644 --- a/src/server/src/fbi/db/projects.gleam +++ b/src/server/src/fbi/db/projects.gleam @@ -122,7 +122,10 @@ pub fn get(db: sqlight.Connection, id: Int) -> Result(Project, DbError) { ) } -pub fn insert(db: sqlight.Connection, p: NewProject) -> Result(Project, DbError) { +pub fn insert( + db: sqlight.Connection, + p: NewProject, +) -> Result(Project, DbError) { let sql = "INSERT INTO projects (name, repo_url, default_branch, devcontainer_override_json, instructions, diff --git a/src/server/src/fbi/git/history_ops.gleam b/src/server/src/fbi/git/history_ops.gleam index 9f61be2..4819bca 100644 --- a/src/server/src/fbi/git/history_ops.gleam +++ b/src/server/src/fbi/git/history_ops.gleam @@ -81,7 +81,10 @@ pub fn mirror_rebase( } } -pub fn sync_in_container(config: Config, cid: String) -> Result(Outcome, String) { +pub fn sync_in_container( + config: Config, + cid: String, +) -> Result(Outcome, String) { exec_in_container(config, cid, "cd /workspace && git pull --no-rebase 2>&1") } diff --git a/src/server/src/fbi/handlers/changes.gleam b/src/server/src/fbi/handlers/changes.gleam index df28d29..bf7f078 100644 --- a/src/server/src/fbi/handlers/changes.gleam +++ b/src/server/src/fbi/handlers/changes.gleam @@ -116,7 +116,11 @@ pub fn handle_submodule_commit_files( } } -pub fn handle_file_diff(req: Request, ctx: Context, id_str: String) -> Response { +pub fn handle_file_diff( + req: Request, + ctx: Context, + id_str: String, +) -> Response { case req.method { http.Get -> case int.parse(id_str) { diff --git a/src/server/src/fbi/handlers/mcp_servers.gleam b/src/server/src/fbi/handlers/mcp_servers.gleam index 7e4969a..4c0c621 100644 --- a/src/server/src/fbi/handlers/mcp_servers.gleam +++ b/src/server/src/fbi/handlers/mcp_servers.gleam @@ -18,7 +18,11 @@ pub fn handle_global(req: Request, ctx: Context) -> Response { } } -pub fn handle_global_one(req: Request, ctx: Context, id_str: String) -> Response { +pub fn handle_global_one( + req: Request, + ctx: Context, + id_str: String, +) -> Response { case int.parse(id_str) { Error(_) -> wisp.bad_request("Invalid MCP server ID") Ok(id) -> diff --git a/src/server/src/fbi/handlers/prompts.gleam b/src/server/src/fbi/handlers/prompts.gleam index 5c71db2..3c121ff 100644 --- a/src/server/src/fbi/handlers/prompts.gleam +++ b/src/server/src/fbi/handlers/prompts.gleam @@ -19,7 +19,11 @@ pub fn handle_recent( } } -fn serve_recent(req: Request, ctx: Context, project_id_str: String) -> Response { +fn serve_recent( + req: Request, + ctx: Context, + project_id_str: String, +) -> Response { case int.parse(project_id_str) { Error(_) -> wisp.bad_request("Invalid project ID") Ok(pid) -> { diff --git a/src/server/src/fbi/handlers/runs.gleam b/src/server/src/fbi/handlers/runs.gleam index f97667a..4a671fa 100644 --- a/src/server/src/fbi/handlers/runs.gleam +++ b/src/server/src/fbi/handlers/runs.gleam @@ -212,7 +212,11 @@ fn do_continue(req: Request, ctx: Context, run_id: Int) -> Response { } } -pub fn handle_resume_now(req: Request, ctx: Context, id_str: String) -> Response { +pub fn handle_resume_now( + req: Request, + ctx: Context, + id_str: String, +) -> Response { case req.method { http.Post -> case int.parse(id_str) { @@ -478,7 +482,10 @@ fn index_paged(ctx: Context, filter: runs.ListFilter) -> Response { } } -fn get_param(qs: List(#(String, String)), key: String) -> option.Option(String) { +fn get_param( + qs: List(#(String, String)), + key: String, +) -> option.Option(String) { case list.key_find(qs, key) { Ok(v) if v != "" -> Some(v) _ -> None diff --git a/src/server/src/fbi/handlers/uploads.gleam b/src/server/src/fbi/handlers/uploads.gleam index 1ebba8a..98b8b58 100644 --- a/src/server/src/fbi/handlers/uploads.gleam +++ b/src/server/src/fbi/handlers/uploads.gleam @@ -27,7 +27,11 @@ pub fn handle_draft_file( } } -pub fn handle_run_uploads(req: Request, _ctx: Context, _id: String) -> Response { +pub fn handle_run_uploads( + req: Request, + _ctx: Context, + _id: String, +) -> Response { case req.method { http.Get -> json.object([#("files", json.array([], json.string))]) From 8149226e82d05e0954650a280d24626dfe4249a1 Mon Sep 17 00:00:00 2001 From: Fynn Datoo Date: Thu, 30 Apr 2026 06:23:53 -0700 Subject: [PATCH 3/3] fix: add AUTOINCREMENT to runs.id to prevent rowid reuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two-actor-on-one-rowid bug — even with the worker.gleam state-dir fix, a deleted run's actor sits in Waiting phase (5-min listener-keepalive timeout) and still holds state.run_id = N. When SQLite reuses N for the next test's run, the old container_monitor eventually reports ContainerExited; the old actor processes it and calls mark_finished(db, N, prior_outcome). DB row N is now the *new* run, so the new run's state gets overwritten with the prior run's exit code — same 'succeeded'-flake symptom we hit in the e2e hang test. AUTOINCREMENT makes ids strictly grow. The old actor's UPDATE WHERE id = N matches no row (the new run has id N+1+), so the leak becomes a harmless no-op without changing anything else about the actor lifecycle. SQLite doesn't support ALTER TABLE … ADD AUTOINCREMENT, so this migration rebuilds the table. Manual sqlite_sequence DELETE+INSERT seeds it to MAX(id) (sqlite_sequence has no UNIQUE on name, so INSERT OR REPLACE doesn't dedupe). Verified locally: insert id=1, id=2, delete 2, insert → id=3 (was 2 with the old schema). Belt-and-suspenders alongside worker.gleam's state-dir cleanup; both fixes target the same root cause (id reuse + lingering old-run state) from different angles. Co-Authored-By: Claude Opus 4.7 --- .../migrations/009_runs_autoincrement.sql | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/server/priv/migrations/009_runs_autoincrement.sql diff --git a/src/server/priv/migrations/009_runs_autoincrement.sql b/src/server/priv/migrations/009_runs_autoincrement.sql new file mode 100644 index 0000000..e8e4a13 --- /dev/null +++ b/src/server/priv/migrations/009_runs_autoincrement.sql @@ -0,0 +1,86 @@ +-- Prevent rowid reuse on the runs table. +-- +-- Without AUTOINCREMENT, SQLite reuses the rowid of the highest-deleted +-- row. When an e2e test calls run.destroy() (DELETE /api/runs/N) and the +-- next test creates a fresh run, the new run can land on the same id N. +-- The old run's actor is still alive in `Waiting` phase (the test page's +-- WS connection kept listener_count > 0), and when its container_monitor +-- finally reports ContainerExited, the actor calls +-- mark_finished(db, state.run_id, ...) — updating the *new* run's row +-- with the *old* run's outcome. That's the e2e hang/auto-scroll/snapshot +-- "succeeded" flake: a successful prior test (default, env-echo, …) +-- leaks state="succeeded" into a hang test. +-- +-- With AUTOINCREMENT, ids strictly grow, the old actor's UPDATE matches +-- no row, and the leak becomes a harmless no-op. +-- +-- SQLite doesn't support ALTER TABLE … ADD AUTOINCREMENT, so we +-- rebuild via the canonical pattern: create new table, copy rows, drop +-- old, rename. Foreign key references inside the table are rewritten to +-- the post-rename name automatically by ALTER TABLE … RENAME (this is +-- why the parent_run_id self-reference points at runs_new — it gets +-- renamed alongside). +CREATE TABLE runs_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + prompt TEXT NOT NULL, + branch_name TEXT NOT NULL, + state TEXT NOT NULL, + container_id TEXT, + log_path TEXT NOT NULL, + exit_code INTEGER, + error TEXT, + head_commit TEXT, + started_at INTEGER, + finished_at INTEGER, + created_at INTEGER NOT NULL, + state_entered_at INTEGER NOT NULL DEFAULT 0, + model TEXT, + effort TEXT, + subagent_model TEXT, + resume_attempts INTEGER NOT NULL DEFAULT 0, + next_resume_at INTEGER, + claude_session_id TEXT, + last_limit_reset_at INTEGER, + tokens_input INTEGER NOT NULL DEFAULT 0, + tokens_output INTEGER NOT NULL DEFAULT 0, + tokens_cache_read INTEGER NOT NULL DEFAULT 0, + tokens_cache_create INTEGER NOT NULL DEFAULT 0, + tokens_total INTEGER NOT NULL DEFAULT 0, + usage_parse_errors INTEGER NOT NULL DEFAULT 0, + title TEXT, + title_locked INTEGER NOT NULL DEFAULT 0, + parent_run_id INTEGER REFERENCES runs_new(id) ON DELETE SET NULL, + kind TEXT NOT NULL DEFAULT 'work', + kind_args_json TEXT, + mirror_status TEXT, + mock INTEGER NOT NULL DEFAULT 0, + mock_scenario TEXT +); + +INSERT INTO runs_new + SELECT + id, project_id, prompt, branch_name, state, container_id, log_path, + exit_code, error, head_commit, started_at, finished_at, created_at, + state_entered_at, model, effort, subagent_model, resume_attempts, + next_resume_at, claude_session_id, last_limit_reset_at, tokens_input, + tokens_output, tokens_cache_read, tokens_cache_create, tokens_total, + usage_parse_errors, title, title_locked, parent_run_id, kind, + kind_args_json, mirror_status, mock, mock_scenario + FROM runs; + +DROP TABLE runs; +ALTER TABLE runs_new RENAME TO runs; + +-- Seed sqlite_sequence so the next auto-id is max(existing id)+1, not 1. +-- sqlite_sequence has no unique constraint on name, so INSERT OR REPLACE +-- doesn't dedupe — DELETE first then INSERT to avoid duplicate rows +-- (which AUTOINCREMENT tolerates but is messy). For a fresh DB with no +-- prior rows, MAX(id) is NULL → 0, next auto-id is 1. +DELETE FROM sqlite_sequence WHERE name = 'runs'; +INSERT INTO sqlite_sequence (name, seq) + VALUES ('runs', COALESCE((SELECT MAX(id) FROM runs), 0)); + +CREATE INDEX idx_runs_project ON runs (project_id); +CREATE INDEX idx_runs_state ON runs (state); +CREATE INDEX idx_runs_parent ON runs (parent_run_id);