From a13bfbacf53801326f74a9347002d47220a33615 Mon Sep 17 00:00:00 2001
From: Fynn Datoo <fdatoo7@gmail.com>
Date: Thu, 30 Apr 2026 06:03:28 -0700
Subject: [PATCH 1/3] fix: delete stale state files before launching reused-id
 container
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root cause of the hang-test 'succeeded' flake: SQLite reuses run rowids
when a prior run is deleted (no AUTOINCREMENT). When test N+1 lands on
an id whose container from test N is still alive (Waiting phase, listener
still connected), the container's bind mount on runs_dir/<id>/state is
still active. setup_run_dir's del_dir_r then fails with EBUSY, the
directory survives, and result.json from the prior run leaks into the
new run. read_outcome reads the stale exit_code; if the prior scenario
exited 0, the new run is reported 'succeeded' regardless of what
actually happened in its container.

Two-part fix:
  1. Reorder do_mock_launch / do_real_launch to force-remove the prior
     container BEFORE setup_run_dir, so the bind mount is released and
     del_dir_r can clean the directory.
  2. As a defensive layer, explicitly delete the individual state signal
     files (result.json, agent-status, session-id, ready) by path. unlink
     works on individual files inside a bind-mounted directory even when
     the directory itself can't be removed — so even if a future bug
     re-introduces the ordering issue, read_outcome won't see stale data.

Verified via /tmp/fbi-runs-state artifacts on debug/hang-flake CI run
25166517481: run-3's quantico-debug.log said OUTCOME=SleepingForever
(correctly blocked) yet result.json had exit_code:1 — leftover from
the crash-fast run that previously held that id.
---
 src/server/src/fbi/run/worker.gleam | 34 +++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 7 deletions(-)
diff --git a/src/server/src/fbi/run/worker.gleam b/src/server/src/fbi/run/worker.gleam
index 89e390e..d427db1 100644
--- a/src/server/src/fbi/run/worker.gleam
+++ b/src/server/src/fbi/run/worker.gleam
@@ -83,17 +83,21 @@ fn do_real_launch(input: LaunchInput) -> Result(#(String, String), String) {
     input.config,
     on_log,
   ))
-  use _ <- result.try(setup_run_dir(input))
   let container_name = "fbi-run-" <> run_id
-  // Remove any pre-existing container with this name. Cancel paths and
-  // crash recovery don't always reach transition_to_finishing (which is
-  // the only place that calls remove_container), so retrying the same
-  // run id otherwise hits a "container name in use" error from Docker.
-  // force=true also handles the case where it's still running.
+  // Remove any pre-existing container with this name BEFORE setup_run_dir.
+  // Cancel paths and crash recovery don't always reach
+  // transition_to_finishing (which is the only place that calls
+  // remove_container), so retrying the same run id otherwise hits a
+  // "container name in use" error from Docker. force=true also handles
+  // the case where it's still running. Order matters: while an old
+  // container holds the bind mount on run_dir/state, `del_dir_r` fails
+  // with EBUSY and stale state files (notably result.json) survive into
+  // the new run.
   let _ =
     with_docker(input.config.docker_socket, fn(sock) {
       docker.remove_container(sock, container_name, True)
     })
+  use _ <- result.try(setup_run_dir(input))
   let spec = container_spec(input, image_tag)
   wisp.log_debug("run " <> run_id <> ": creating container image=" <> image_tag)
   use cid <- result.try(
@@ -128,12 +132,20 @@ fn do_mock_launch(input: LaunchInput) -> Result(#(String, String), String) {
     Some(p) -> Ok(p)
     None -> Error("FBI_QUANTICO_BINARY_PATH not set; mock runs require it")
   })
-  use _ <- result.try(setup_run_dir(input))
+  // Force-remove any prior container with this name BEFORE setup_run_dir.
+  // SQLite reuses rowids when prior runs are deleted (no AUTOINCREMENT),
+  // so a fresh run can land on an id whose state dir is still bind-mounted
+  // by a not-yet-removed container. While that bind mount is alive, the
+  // host-side `del_dir_r` of run_dir/state can't remove the directory
+  // (EBUSY) and stale files (notably result.json) survive — read_outcome
+  // then mistakes the *prior* run's exit code for the new run's, which is
+  // exactly the source of the hang-test "succeeded" flake.
   let container_name = "fbi-run-" <> run_id
   let _ =
     with_docker(input.config.docker_socket, fn(sock) {
       docker.remove_container(sock, container_name, True)
     })
+  use _ <- result.try(setup_run_dir(input))
   let spec = mock_container_spec(input, quantico_path, scenario)
   wisp.log_debug(
     "run " <> run_id <> ": creating mock container scenario=" <> scenario,
@@ -435,6 +447,14 @@ fn setup_run_dir(input: LaunchInput) -> Result(Nil, String) {
   let _ = simplifile.delete(run_dir <> "/state")
   let _ = simplifile.delete(run_dir <> "/wip")
   let _ = simplifile.delete(scripts_dir)
+  // Belt-and-suspenders: even if `delete(run_dir/state)` fails (EBUSY
+  // when a prior container still has the bind mount), nuke the
+  // individual signal files by path so read_outcome / poll_status_loop
+  // can't see stale values from the previous run id.
+  let _ = simplifile.delete(run_dir <> "/state/result.json")
+  let _ = simplifile.delete(run_dir <> "/state/agent-status")
+  let _ = simplifile.delete(run_dir <> "/state/session-id")
+  let _ = simplifile.delete(run_dir <> "/state/ready")
   use _ <- result.try(
     simplifile.create_directory_all(scripts_dir)
     |> result.map_error(fn(e) {

From a3c9ed9ed60be9c6862ee891750f4110efd5eb93 Mon Sep 17 00:00:00 2001
From: Fynn Datoo <fdatoo7@gmail.com>
Date: Thu, 30 Apr 2026 06:10:35 -0700
Subject: [PATCH 2/3] chore: gleam format

Three handler files left unformatted by recent merges; bring them in
line so this PR's CI passes the format check.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/server/src/fbi/db/projects.gleam          |  5 ++++-
 src/server/src/fbi/git/history_ops.gleam      |  5 ++++-
 src/server/src/fbi/handlers/changes.gleam     |  6 +++++-
 src/server/src/fbi/handlers/mcp_servers.gleam |  6 +++++-
 src/server/src/fbi/handlers/prompts.gleam     |  6 +++++-
 src/server/src/fbi/handlers/runs.gleam        | 11 +++++++++--
 src/server/src/fbi/handlers/uploads.gleam     |  6 +++++-
 7 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/src/server/src/fbi/db/projects.gleam b/src/server/src/fbi/db/projects.gleam
index e1538eb..eef252c 100644
--- a/src/server/src/fbi/db/projects.gleam
+++ b/src/server/src/fbi/db/projects.gleam
@@ -122,7 +122,10 @@ pub fn get(db: sqlight.Connection, id: Int) -> Result(Project, DbError) {
   )
 }
 
-pub fn insert(db: sqlight.Connection, p: NewProject) -> Result(Project, DbError) {
+pub fn insert(
+  db: sqlight.Connection,
+  p: NewProject,
+) -> Result(Project, DbError) {
   let sql =
     "INSERT INTO projects
        (name, repo_url, default_branch, devcontainer_override_json, instructions,
diff --git a/src/server/src/fbi/git/history_ops.gleam b/src/server/src/fbi/git/history_ops.gleam
index 9f61be2..4819bca 100644
--- a/src/server/src/fbi/git/history_ops.gleam
+++ b/src/server/src/fbi/git/history_ops.gleam
@@ -81,7 +81,10 @@ pub fn mirror_rebase(
   }
 }
 
-pub fn sync_in_container(config: Config, cid: String) -> Result(Outcome, String) {
+pub fn sync_in_container(
+  config: Config,
+  cid: String,
+) -> Result(Outcome, String) {
   exec_in_container(config, cid, "cd /workspace && git pull --no-rebase 2>&1")
 }
 
diff --git a/src/server/src/fbi/handlers/changes.gleam b/src/server/src/fbi/handlers/changes.gleam
index df28d29..bf7f078 100644
--- a/src/server/src/fbi/handlers/changes.gleam
+++ b/src/server/src/fbi/handlers/changes.gleam
@@ -116,7 +116,11 @@ pub fn handle_submodule_commit_files(
   }
 }
 
-pub fn handle_file_diff(req: Request, ctx: Context, id_str: String) -> Response {
+pub fn handle_file_diff(
+  req: Request,
+  ctx: Context,
+  id_str: String,
+) -> Response {
   case req.method {
     http.Get ->
       case int.parse(id_str) {
diff --git a/src/server/src/fbi/handlers/mcp_servers.gleam b/src/server/src/fbi/handlers/mcp_servers.gleam
index 7e4969a..4c0c621 100644
--- a/src/server/src/fbi/handlers/mcp_servers.gleam
+++ b/src/server/src/fbi/handlers/mcp_servers.gleam
@@ -18,7 +18,11 @@ pub fn handle_global(req: Request, ctx: Context) -> Response {
   }
 }
 
-pub fn handle_global_one(req: Request, ctx: Context, id_str: String) -> Response {
+pub fn handle_global_one(
+  req: Request,
+  ctx: Context,
+  id_str: String,
+) -> Response {
   case int.parse(id_str) {
     Error(_) -> wisp.bad_request("Invalid MCP server ID")
     Ok(id) ->
diff --git a/src/server/src/fbi/handlers/prompts.gleam b/src/server/src/fbi/handlers/prompts.gleam
index 5c71db2..3c121ff 100644
--- a/src/server/src/fbi/handlers/prompts.gleam
+++ b/src/server/src/fbi/handlers/prompts.gleam
@@ -19,7 +19,11 @@ pub fn handle_recent(
   }
 }
 
-fn serve_recent(req: Request, ctx: Context, project_id_str: String) -> Response {
+fn serve_recent(
+  req: Request,
+  ctx: Context,
+  project_id_str: String,
+) -> Response {
   case int.parse(project_id_str) {
     Error(_) -> wisp.bad_request("Invalid project ID")
     Ok(pid) -> {
diff --git a/src/server/src/fbi/handlers/runs.gleam b/src/server/src/fbi/handlers/runs.gleam
index f97667a..4a671fa 100644
--- a/src/server/src/fbi/handlers/runs.gleam
+++ b/src/server/src/fbi/handlers/runs.gleam
@@ -212,7 +212,11 @@ fn do_continue(req: Request, ctx: Context, run_id: Int) -> Response {
   }
 }
 
-pub fn handle_resume_now(req: Request, ctx: Context, id_str: String) -> Response {
+pub fn handle_resume_now(
+  req: Request,
+  ctx: Context,
+  id_str: String,
+) -> Response {
   case req.method {
     http.Post ->
       case int.parse(id_str) {
@@ -478,7 +482,10 @@ fn index_paged(ctx: Context, filter: runs.ListFilter) -> Response {
   }
 }
 
-fn get_param(qs: List(#(String, String)), key: String) -> option.Option(String) {
+fn get_param(
+  qs: List(#(String, String)),
+  key: String,
+) -> option.Option(String) {
   case list.key_find(qs, key) {
     Ok(v) if v != "" -> Some(v)
     _ -> None
diff --git a/src/server/src/fbi/handlers/uploads.gleam b/src/server/src/fbi/handlers/uploads.gleam
index 1ebba8a..98b8b58 100644
--- a/src/server/src/fbi/handlers/uploads.gleam
+++ b/src/server/src/fbi/handlers/uploads.gleam
@@ -27,7 +27,11 @@ pub fn handle_draft_file(
   }
 }
 
-pub fn handle_run_uploads(req: Request, _ctx: Context, _id: String) -> Response {
+pub fn handle_run_uploads(
+  req: Request,
+  _ctx: Context,
+  _id: String,
+) -> Response {
   case req.method {
     http.Get ->
       json.object([#("files", json.array([], json.string))])

From 8149226e82d05e0954650a280d24626dfe4249a1 Mon Sep 17 00:00:00 2001
From: Fynn Datoo <fdatoo7@gmail.com>
Date: Thu, 30 Apr 2026 06:23:53 -0700
Subject: [PATCH 3/3] fix: add AUTOINCREMENT to runs.id to prevent rowid reuse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two-actor-on-one-rowid bug — even with the worker.gleam state-dir fix,
a deleted run's actor sits in Waiting phase (5-min listener-keepalive
timeout) and still holds state.run_id = N. When SQLite reuses N for
the next test's run, the old container_monitor eventually reports
ContainerExited; the old actor processes it and calls
mark_finished(db, N, prior_outcome). DB row N is now the *new* run,
so the new run's state gets overwritten with the prior run's exit code
— same 'succeeded'-flake symptom we hit in the e2e hang test.

AUTOINCREMENT makes ids strictly grow. The old actor's UPDATE WHERE
id = N matches no row (the new run has id N+1+), so the leak becomes
a harmless no-op without changing anything else about the actor
lifecycle.

SQLite doesn't support ALTER TABLE … ADD AUTOINCREMENT, so this
migration rebuilds the table. Manual sqlite_sequence DELETE+INSERT
seeds it to MAX(id) (sqlite_sequence has no UNIQUE on name, so
INSERT OR REPLACE doesn't dedupe).

Verified locally: insert id=1, id=2, delete 2, insert → id=3 (was 2
with the old schema).

Belt-and-suspenders alongside worker.gleam's state-dir cleanup; both
fixes target the same root cause (id reuse + lingering old-run state)
from different angles.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../migrations/009_runs_autoincrement.sql     | 86 +++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 src/server/priv/migrations/009_runs_autoincrement.sql

diff --git a/src/server/priv/migrations/009_runs_autoincrement.sql b/src/server/priv/migrations/009_runs_autoincrement.sql
new file mode 100644
index 0000000..e8e4a13
--- /dev/null
+++ b/src/server/priv/migrations/009_runs_autoincrement.sql
@@ -0,0 +1,86 @@
+-- Prevent rowid reuse on the runs table.
+--
+-- Without AUTOINCREMENT, SQLite reuses the rowid of the highest-deleted
+-- row. When an e2e test calls run.destroy() (DELETE /api/runs/N) and the
+-- next test creates a fresh run, the new run can land on the same id N.
+-- The old run's actor is still alive in `Waiting` phase (the test page's
+-- WS connection kept listener_count > 0), and when its container_monitor
+-- finally reports ContainerExited, the actor calls
+-- mark_finished(db, state.run_id, ...) — updating the *new* run's row
+-- with the *old* run's outcome. That's the e2e hang/auto-scroll/snapshot
+-- "succeeded" flake: a successful prior test (default, env-echo, …)
+-- leaks state="succeeded" into a hang test.
+--
+-- With AUTOINCREMENT, ids strictly grow, the old actor's UPDATE matches
+-- no row, and the leak becomes a harmless no-op.
+--
+-- SQLite doesn't support ALTER TABLE … ADD AUTOINCREMENT, so we
+-- rebuild via the canonical pattern: create new table, copy rows, drop
+-- old, rename. Foreign key references inside the table are rewritten to
+-- the post-rename name automatically by ALTER TABLE … RENAME (this is
+-- why the parent_run_id self-reference points at runs_new — it gets
+-- renamed alongside).
+CREATE TABLE runs_new (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
+  prompt TEXT NOT NULL,
+  branch_name TEXT NOT NULL,
+  state TEXT NOT NULL,
+  container_id TEXT,
+  log_path TEXT NOT NULL,
+  exit_code INTEGER,
+  error TEXT,
+  head_commit TEXT,
+  started_at INTEGER,
+  finished_at INTEGER,
+  created_at INTEGER NOT NULL,
+  state_entered_at INTEGER NOT NULL DEFAULT 0,
+  model TEXT,
+  effort TEXT,
+  subagent_model TEXT,
+  resume_attempts INTEGER NOT NULL DEFAULT 0,
+  next_resume_at INTEGER,
+  claude_session_id TEXT,
+  last_limit_reset_at INTEGER,
+  tokens_input INTEGER NOT NULL DEFAULT 0,
+  tokens_output INTEGER NOT NULL DEFAULT 0,
+  tokens_cache_read INTEGER NOT NULL DEFAULT 0,
+  tokens_cache_create INTEGER NOT NULL DEFAULT 0,
+  tokens_total INTEGER NOT NULL DEFAULT 0,
+  usage_parse_errors INTEGER NOT NULL DEFAULT 0,
+  title TEXT,
+  title_locked INTEGER NOT NULL DEFAULT 0,
+  parent_run_id INTEGER REFERENCES runs_new(id) ON DELETE SET NULL,
+  kind TEXT NOT NULL DEFAULT 'work',
+  kind_args_json TEXT,
+  mirror_status TEXT,
+  mock INTEGER NOT NULL DEFAULT 0,
+  mock_scenario TEXT
+);
+
+INSERT INTO runs_new
+  SELECT
+    id, project_id, prompt, branch_name, state, container_id, log_path,
+    exit_code, error, head_commit, started_at, finished_at, created_at,
+    state_entered_at, model, effort, subagent_model, resume_attempts,
+    next_resume_at, claude_session_id, last_limit_reset_at, tokens_input,
+    tokens_output, tokens_cache_read, tokens_cache_create, tokens_total,
+    usage_parse_errors, title, title_locked, parent_run_id, kind,
+    kind_args_json, mirror_status, mock, mock_scenario
+  FROM runs;
+
+DROP TABLE runs;
+ALTER TABLE runs_new RENAME TO runs;
+
+-- Seed sqlite_sequence so the next auto-id is max(existing id)+1, not 1.
+-- sqlite_sequence has no unique constraint on name, so INSERT OR REPLACE
+-- doesn't dedupe — DELETE first then INSERT to avoid duplicate rows
+-- (which AUTOINCREMENT tolerates but is messy). For a fresh DB with no
+-- prior rows, MAX(id) is NULL → 0, next auto-id is 1.
+DELETE FROM sqlite_sequence WHERE name = 'runs';
+INSERT INTO sqlite_sequence (name, seq)
+  VALUES ('runs', COALESCE((SELECT MAX(id) FROM runs), 0));
+
+CREATE INDEX idx_runs_project ON runs (project_id);
+CREATE INDEX idx_runs_state ON runs (state);
+CREATE INDEX idx_runs_parent ON runs (parent_run_id);