fdatoo · fdatoo · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/src/server/priv/migrations/009_runs_autoincrement.sql b/src/server/priv/migrations/009_runs_autoincrement.sql
@@ -0,0 +1,86 @@
+-- Prevent rowid reuse on the runs table.
+--
+-- Without AUTOINCREMENT, SQLite reuses the rowid of the highest-deleted
+-- row. When an e2e test calls run.destroy() (DELETE /api/runs/N) and the
+-- next test creates a fresh run, the new run can land on the same id N.
+-- The old run's actor is still alive in `Waiting` phase (the test page's
+-- WS connection kept listener_count > 0), and when its container_monitor
+-- finally reports ContainerExited, the actor calls
+-- mark_finished(db, state.run_id, ...) — updating the *new* run's row
+-- with the *old* run's outcome. That's the e2e hang/auto-scroll/snapshot
+-- "succeeded" flake: a successful prior test (default, env-echo, …)
+-- leaks state="succeeded" into a hang test.
+--
+-- With AUTOINCREMENT, ids strictly grow, the old actor's UPDATE matches
+-- no row, and the leak becomes a harmless no-op.
+--
+-- SQLite doesn't support ALTER TABLE … ADD AUTOINCREMENT, so we
+-- rebuild via the canonical pattern: create new table, copy rows, drop
+-- old, rename. Foreign key references inside the table are rewritten to
+-- the post-rename name automatically by ALTER TABLE … RENAME (this is
+-- why the parent_run_id self-reference points at runs_new — it gets
+-- renamed alongside).
+CREATE TABLE runs_new (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
+  prompt TEXT NOT NULL,
+  branch_name TEXT NOT NULL,
+  state TEXT NOT NULL,
+  container_id TEXT,
+  log_path TEXT NOT NULL,
+  exit_code INTEGER,
+  error TEXT,
+  head_commit TEXT,
+  started_at INTEGER,
+  finished_at INTEGER,
+  created_at INTEGER NOT NULL,
+  state_entered_at INTEGER NOT NULL DEFAULT 0,
+  model TEXT,
+  effort TEXT,
+  subagent_model TEXT,
+  resume_attempts INTEGER NOT NULL DEFAULT 0,
+  next_resume_at INTEGER,
+  claude_session_id TEXT,
+  last_limit_reset_at INTEGER,
+  tokens_input INTEGER NOT NULL DEFAULT 0,
+  tokens_output INTEGER NOT NULL DEFAULT 0,
+  tokens_cache_read INTEGER NOT NULL DEFAULT 0,
+  tokens_cache_create INTEGER NOT NULL DEFAULT 0,
+  tokens_total INTEGER NOT NULL DEFAULT 0,
+  usage_parse_errors INTEGER NOT NULL DEFAULT 0,
+  title TEXT,
+  title_locked INTEGER NOT NULL DEFAULT 0,
+  parent_run_id INTEGER REFERENCES runs_new(id) ON DELETE SET NULL,
+  kind TEXT NOT NULL DEFAULT 'work',
+  kind_args_json TEXT,
+  mirror_status TEXT,
+  mock INTEGER NOT NULL DEFAULT 0,
+  mock_scenario TEXT
+);
+
+INSERT INTO runs_new
+  SELECT
+    id, project_id, prompt, branch_name, state, container_id, log_path,
+    exit_code, error, head_commit, started_at, finished_at, created_at,
+    state_entered_at, model, effort, subagent_model, resume_attempts,
+    next_resume_at, claude_session_id, last_limit_reset_at, tokens_input,
+    tokens_output, tokens_cache_read, tokens_cache_create, tokens_total,
+    usage_parse_errors, title, title_locked, parent_run_id, kind,
+    kind_args_json, mirror_status, mock, mock_scenario
+  FROM runs;
+
+DROP TABLE runs;
+ALTER TABLE runs_new RENAME TO runs;
+
+-- Seed sqlite_sequence so the next auto-id is max(existing id)+1, not 1.
+-- sqlite_sequence has no unique constraint on name, so INSERT OR REPLACE
+-- doesn't dedupe — DELETE first then INSERT to avoid duplicate rows
+-- (which AUTOINCREMENT tolerates but is messy). For a fresh DB with no
+-- prior rows, MAX(id) is NULL → 0, next auto-id is 1.
+DELETE FROM sqlite_sequence WHERE name = 'runs';
+INSERT INTO sqlite_sequence (name, seq)
+  VALUES ('runs', COALESCE((SELECT MAX(id) FROM runs), 0));
+
+CREATE INDEX idx_runs_project ON runs (project_id);
+CREATE INDEX idx_runs_state ON runs (state);
+CREATE INDEX idx_runs_parent ON runs (parent_run_id);
diff --git a/src/server/src/fbi/db/projects.gleam b/src/server/src/fbi/db/projects.gleam
@@ -122,7 +122,10 @@ pub fn get(db: sqlight.Connection, id: Int) -> Result(Project, DbError) {
   )
 }
 
-pub fn insert(db: sqlight.Connection, p: NewProject) -> Result(Project, DbError) {
+pub fn insert(
+  db: sqlight.Connection,
+  p: NewProject,
+) -> Result(Project, DbError) {
   let sql =
     "INSERT INTO projects
        (name, repo_url, default_branch, devcontainer_override_json, instructions,

diff --git a/src/server/src/fbi/git/history_ops.gleam b/src/server/src/fbi/git/history_ops.gleam
@@ -81,7 +81,10 @@ pub fn mirror_rebase(
   }
 }
 
-pub fn sync_in_container(config: Config, cid: String) -> Result(Outcome, String) {
+pub fn sync_in_container(
+  config: Config,
+  cid: String,
+) -> Result(Outcome, String) {
   exec_in_container(config, cid, "cd /workspace && git pull --no-rebase 2>&1")
 }
 

diff --git a/src/server/src/fbi/handlers/changes.gleam b/src/server/src/fbi/handlers/changes.gleam
@@ -116,7 +116,11 @@ pub fn handle_submodule_commit_files(
   }
 }
 
-pub fn handle_file_diff(req: Request, ctx: Context, id_str: String) -> Response {
+pub fn handle_file_diff(
+  req: Request,
+  ctx: Context,
+  id_str: String,
+) -> Response {
   case req.method {
     http.Get ->
       case int.parse(id_str) {

diff --git a/src/server/src/fbi/handlers/mcp_servers.gleam b/src/server/src/fbi/handlers/mcp_servers.gleam
@@ -18,7 +18,11 @@ pub fn handle_global(req: Request, ctx: Context) -> Response {
   }
 }
 
-pub fn handle_global_one(req: Request, ctx: Context, id_str: String) -> Response {
+pub fn handle_global_one(
+  req: Request,
+  ctx: Context,
+  id_str: String,
+) -> Response {
   case int.parse(id_str) {
     Error(_) -> wisp.bad_request("Invalid MCP server ID")
     Ok(id) ->

diff --git a/src/server/src/fbi/handlers/prompts.gleam b/src/server/src/fbi/handlers/prompts.gleam
@@ -19,7 +19,11 @@ pub fn handle_recent(
   }
 }
 
-fn serve_recent(req: Request, ctx: Context, project_id_str: String) -> Response {
+fn serve_recent(
+  req: Request,
+  ctx: Context,
+  project_id_str: String,
+) -> Response {
   case int.parse(project_id_str) {
     Error(_) -> wisp.bad_request("Invalid project ID")
     Ok(pid) -> {

diff --git a/src/server/src/fbi/handlers/runs.gleam b/src/server/src/fbi/handlers/runs.gleam
@@ -212,7 +212,11 @@ fn do_continue(req: Request, ctx: Context, run_id: Int) -> Response {
   }
 }
 
-pub fn handle_resume_now(req: Request, ctx: Context, id_str: String) -> Response {
+pub fn handle_resume_now(
+  req: Request,
+  ctx: Context,
+  id_str: String,
+) -> Response {
   case req.method {
     http.Post ->
       case int.parse(id_str) {
@@ -478,7 +482,10 @@ fn index_paged(ctx: Context, filter: runs.ListFilter) -> Response {
   }
 }
 
-fn get_param(qs: List(#(String, String)), key: String) -> option.Option(String) {
+fn get_param(
+  qs: List(#(String, String)),
+  key: String,
+) -> option.Option(String) {
   case list.key_find(qs, key) {
     Ok(v) if v != "" -> Some(v)
     _ -> None

diff --git a/src/server/src/fbi/handlers/uploads.gleam b/src/server/src/fbi/handlers/uploads.gleam
@@ -27,7 +27,11 @@ pub fn handle_draft_file(
   }
 }
 
-pub fn handle_run_uploads(req: Request, _ctx: Context, _id: String) -> Response {
+pub fn handle_run_uploads(
+  req: Request,
+  _ctx: Context,
+  _id: String,
+) -> Response {
   case req.method {
     http.Get ->
       json.object([#("files", json.array([], json.string))])

diff --git a/src/server/src/fbi/run/worker.gleam b/src/server/src/fbi/run/worker.gleam
@@ -83,17 +83,21 @@ fn do_real_launch(input: LaunchInput) -> Result(#(String, String), String) {
     input.config,
     on_log,
   ))
-  use _ <- result.try(setup_run_dir(input))
   let container_name = "fbi-run-" <> run_id
-  // Remove any pre-existing container with this name. Cancel paths and
-  // crash recovery don't always reach transition_to_finishing (which is
-  // the only place that calls remove_container), so retrying the same
-  // run id otherwise hits a "container name in use" error from Docker.
-  // force=true also handles the case where it's still running.
+  // Remove any pre-existing container with this name BEFORE setup_run_dir.
+  // Cancel paths and crash recovery don't always reach
+  // transition_to_finishing (which is the only place that calls
+  // remove_container), so retrying the same run id otherwise hits a
+  // "container name in use" error from Docker. force=true also handles
+  // the case where it's still running. Order matters: while an old
+  // container holds the bind mount on run_dir/state, `del_dir_r` fails
+  // with EBUSY and stale state files (notably result.json) survive into
+  // the new run.
   let _ =
     with_docker(input.config.docker_socket, fn(sock) {
       docker.remove_container(sock, container_name, True)
     })
+  use _ <- result.try(setup_run_dir(input))
   let spec = container_spec(input, image_tag)
   wisp.log_debug("run " <> run_id <> ": creating container image=" <> image_tag)
   use cid <- result.try(
@@ -128,12 +132,20 @@ fn do_mock_launch(input: LaunchInput) -> Result(#(String, String), String) {
     Some(p) -> Ok(p)
     None -> Error("FBI_QUANTICO_BINARY_PATH not set; mock runs require it")
   })
-  use _ <- result.try(setup_run_dir(input))
+  // Force-remove any prior container with this name BEFORE setup_run_dir.
+  // SQLite reuses rowids when prior runs are deleted (no AUTOINCREMENT),
+  // so a fresh run can land on an id whose state dir is still bind-mounted
+  // by a not-yet-removed container. While that bind mount is alive, the
+  // host-side `del_dir_r` of run_dir/state can't remove the directory
+  // (EBUSY) and stale files (notably result.json) survive — read_outcome
+  // then mistakes the *prior* run's exit code for the new run's, which is
+  // exactly the source of the hang-test "succeeded" flake.
   let container_name = "fbi-run-" <> run_id
   let _ =
     with_docker(input.config.docker_socket, fn(sock) {
       docker.remove_container(sock, container_name, True)
     })
+  use _ <- result.try(setup_run_dir(input))
   let spec = mock_container_spec(input, quantico_path, scenario)
   wisp.log_debug(
     "run " <> run_id <> ": creating mock container scenario=" <> scenario,
@@ -435,6 +447,14 @@ fn setup_run_dir(input: LaunchInput) -> Result(Nil, String) {
   let _ = simplifile.delete(run_dir <> "/state")
   let _ = simplifile.delete(run_dir <> "/wip")
   let _ = simplifile.delete(scripts_dir)
+  // Belt-and-suspenders: even if `delete(run_dir/state)` fails (EBUSY
+  // when a prior container still has the bind mount), nuke the
+  // individual signal files by path so read_outcome / poll_status_loop
+  // can't see stale values from the previous run id.
+  let _ = simplifile.delete(run_dir <> "/state/result.json")
+  let _ = simplifile.delete(run_dir <> "/state/agent-status")
+  let _ = simplifile.delete(run_dir <> "/state/session-id")
+  let _ = simplifile.delete(run_dir <> "/state/ready")
   use _ <- result.try(
     simplifile.create_directory_all(scripts_dir)
     |> result.map_error(fn(e) {