diff --git a/src/server/priv/migrations/009_runs_autoincrement.sql b/src/server/priv/migrations/009_runs_autoincrement.sql new file mode 100644 index 0000000..e8e4a13 --- /dev/null +++ b/src/server/priv/migrations/009_runs_autoincrement.sql @@ -0,0 +1,86 @@ +-- Prevent rowid reuse on the runs table. +-- +-- Without AUTOINCREMENT, SQLite reuses the rowid of the highest-deleted +-- row. When an e2e test calls run.destroy() (DELETE /api/runs/N) and the +-- next test creates a fresh run, the new run can land on the same id N. +-- The old run's actor is still alive in `Waiting` phase (the test page's +-- WS connection kept listener_count > 0), and when its container_monitor +-- finally reports ContainerExited, the actor calls +-- mark_finished(db, state.run_id, ...) — updating the *new* run's row +-- with the *old* run's outcome. That's the e2e hang/auto-scroll/snapshot +-- "succeeded" flake: a successful prior test (default, env-echo, …) +-- leaks state="succeeded" into a hang test. +-- +-- With AUTOINCREMENT, ids strictly grow, the old actor's UPDATE matches +-- no row, and the leak becomes a harmless no-op. +-- +-- SQLite doesn't support ALTER TABLE … ADD AUTOINCREMENT, so we +-- rebuild via the canonical pattern: create new table, copy rows, drop +-- old, rename. Foreign key references inside the table are rewritten to +-- the post-rename name automatically by ALTER TABLE … RENAME (this is +-- why the parent_run_id self-reference points at runs_new — it gets +-- renamed alongside). +CREATE TABLE runs_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE, + prompt TEXT NOT NULL, + branch_name TEXT NOT NULL, + state TEXT NOT NULL, + container_id TEXT, + log_path TEXT NOT NULL, + exit_code INTEGER, + error TEXT, + head_commit TEXT, + started_at INTEGER, + finished_at INTEGER, + created_at INTEGER NOT NULL, + state_entered_at INTEGER NOT NULL DEFAULT 0, + model TEXT, + effort TEXT, + subagent_model TEXT, + resume_attempts INTEGER NOT NULL DEFAULT 0, + next_resume_at INTEGER, + claude_session_id TEXT, + last_limit_reset_at INTEGER, + tokens_input INTEGER NOT NULL DEFAULT 0, + tokens_output INTEGER NOT NULL DEFAULT 0, + tokens_cache_read INTEGER NOT NULL DEFAULT 0, + tokens_cache_create INTEGER NOT NULL DEFAULT 0, + tokens_total INTEGER NOT NULL DEFAULT 0, + usage_parse_errors INTEGER NOT NULL DEFAULT 0, + title TEXT, + title_locked INTEGER NOT NULL DEFAULT 0, + parent_run_id INTEGER REFERENCES runs_new(id) ON DELETE SET NULL, + kind TEXT NOT NULL DEFAULT 'work', + kind_args_json TEXT, + mirror_status TEXT, + mock INTEGER NOT NULL DEFAULT 0, + mock_scenario TEXT +); + +INSERT INTO runs_new + SELECT + id, project_id, prompt, branch_name, state, container_id, log_path, + exit_code, error, head_commit, started_at, finished_at, created_at, + state_entered_at, model, effort, subagent_model, resume_attempts, + next_resume_at, claude_session_id, last_limit_reset_at, tokens_input, + tokens_output, tokens_cache_read, tokens_cache_create, tokens_total, + usage_parse_errors, title, title_locked, parent_run_id, kind, + kind_args_json, mirror_status, mock, mock_scenario + FROM runs; + +DROP TABLE runs; +ALTER TABLE runs_new RENAME TO runs; + +-- Seed sqlite_sequence so the next auto-id is max(existing id)+1, not 1. +-- sqlite_sequence has no unique constraint on name, so INSERT OR REPLACE +-- doesn't dedupe — DELETE first then INSERT to avoid duplicate rows +-- (which AUTOINCREMENT tolerates but is messy). For a fresh DB with no +-- prior rows, MAX(id) is NULL → 0, next auto-id is 1. +DELETE FROM sqlite_sequence WHERE name = 'runs'; +INSERT INTO sqlite_sequence (name, seq) + VALUES ('runs', COALESCE((SELECT MAX(id) FROM runs), 0)); + +CREATE INDEX idx_runs_project ON runs (project_id); +CREATE INDEX idx_runs_state ON runs (state); +CREATE INDEX idx_runs_parent ON runs (parent_run_id); diff --git a/src/server/src/fbi/db/projects.gleam b/src/server/src/fbi/db/projects.gleam index e1538eb..eef252c 100644 --- a/src/server/src/fbi/db/projects.gleam +++ b/src/server/src/fbi/db/projects.gleam @@ -122,7 +122,10 @@ pub fn get(db: sqlight.Connection, id: Int) -> Result(Project, DbError) { ) } -pub fn insert(db: sqlight.Connection, p: NewProject) -> Result(Project, DbError) { +pub fn insert( + db: sqlight.Connection, + p: NewProject, +) -> Result(Project, DbError) { let sql = "INSERT INTO projects (name, repo_url, default_branch, devcontainer_override_json, instructions, diff --git a/src/server/src/fbi/git/history_ops.gleam b/src/server/src/fbi/git/history_ops.gleam index 9f61be2..4819bca 100644 --- a/src/server/src/fbi/git/history_ops.gleam +++ b/src/server/src/fbi/git/history_ops.gleam @@ -81,7 +81,10 @@ pub fn mirror_rebase( } } -pub fn sync_in_container(config: Config, cid: String) -> Result(Outcome, String) { +pub fn sync_in_container( + config: Config, + cid: String, +) -> Result(Outcome, String) { exec_in_container(config, cid, "cd /workspace && git pull --no-rebase 2>&1") } diff --git a/src/server/src/fbi/handlers/changes.gleam b/src/server/src/fbi/handlers/changes.gleam index df28d29..bf7f078 100644 --- a/src/server/src/fbi/handlers/changes.gleam +++ b/src/server/src/fbi/handlers/changes.gleam @@ -116,7 +116,11 @@ pub fn handle_submodule_commit_files( } } -pub fn handle_file_diff(req: Request, ctx: Context, id_str: String) -> Response { +pub fn handle_file_diff( + req: Request, + ctx: Context, + id_str: String, +) -> Response { case req.method { http.Get -> case int.parse(id_str) { diff --git a/src/server/src/fbi/handlers/mcp_servers.gleam b/src/server/src/fbi/handlers/mcp_servers.gleam index 7e4969a..4c0c621 100644 --- a/src/server/src/fbi/handlers/mcp_servers.gleam +++ b/src/server/src/fbi/handlers/mcp_servers.gleam @@ -18,7 +18,11 @@ pub fn handle_global(req: Request, ctx: Context) -> Response { } } -pub fn handle_global_one(req: Request, ctx: Context, id_str: String) -> Response { +pub fn handle_global_one( + req: Request, + ctx: Context, + id_str: String, +) -> Response { case int.parse(id_str) { Error(_) -> wisp.bad_request("Invalid MCP server ID") Ok(id) -> diff --git a/src/server/src/fbi/handlers/prompts.gleam b/src/server/src/fbi/handlers/prompts.gleam index 5c71db2..3c121ff 100644 --- a/src/server/src/fbi/handlers/prompts.gleam +++ b/src/server/src/fbi/handlers/prompts.gleam @@ -19,7 +19,11 @@ pub fn handle_recent( } } -fn serve_recent(req: Request, ctx: Context, project_id_str: String) -> Response { +fn serve_recent( + req: Request, + ctx: Context, + project_id_str: String, +) -> Response { case int.parse(project_id_str) { Error(_) -> wisp.bad_request("Invalid project ID") Ok(pid) -> { diff --git a/src/server/src/fbi/handlers/runs.gleam b/src/server/src/fbi/handlers/runs.gleam index f97667a..4a671fa 100644 --- a/src/server/src/fbi/handlers/runs.gleam +++ b/src/server/src/fbi/handlers/runs.gleam @@ -212,7 +212,11 @@ fn do_continue(req: Request, ctx: Context, run_id: Int) -> Response { } } -pub fn handle_resume_now(req: Request, ctx: Context, id_str: String) -> Response { +pub fn handle_resume_now( + req: Request, + ctx: Context, + id_str: String, +) -> Response { case req.method { http.Post -> case int.parse(id_str) { @@ -478,7 +482,10 @@ fn index_paged(ctx: Context, filter: runs.ListFilter) -> Response { } } -fn get_param(qs: List(#(String, String)), key: String) -> option.Option(String) { +fn get_param( + qs: List(#(String, String)), + key: String, +) -> option.Option(String) { case list.key_find(qs, key) { Ok(v) if v != "" -> Some(v) _ -> None diff --git a/src/server/src/fbi/handlers/uploads.gleam b/src/server/src/fbi/handlers/uploads.gleam index 1ebba8a..98b8b58 100644 --- a/src/server/src/fbi/handlers/uploads.gleam +++ b/src/server/src/fbi/handlers/uploads.gleam @@ -27,7 +27,11 @@ pub fn handle_draft_file( } } -pub fn handle_run_uploads(req: Request, _ctx: Context, _id: String) -> Response { +pub fn handle_run_uploads( + req: Request, + _ctx: Context, + _id: String, +) -> Response { case req.method { http.Get -> json.object([#("files", json.array([], json.string))]) diff --git a/src/server/src/fbi/run/worker.gleam b/src/server/src/fbi/run/worker.gleam index 89e390e..d427db1 100644 --- a/src/server/src/fbi/run/worker.gleam +++ b/src/server/src/fbi/run/worker.gleam @@ -83,17 +83,21 @@ fn do_real_launch(input: LaunchInput) -> Result(#(String, String), String) { input.config, on_log, )) - use _ <- result.try(setup_run_dir(input)) let container_name = "fbi-run-" <> run_id - // Remove any pre-existing container with this name. Cancel paths and - // crash recovery don't always reach transition_to_finishing (which is - // the only place that calls remove_container), so retrying the same - // run id otherwise hits a "container name in use" error from Docker. - // force=true also handles the case where it's still running. + // Remove any pre-existing container with this name BEFORE setup_run_dir. + // Cancel paths and crash recovery don't always reach + // transition_to_finishing (which is the only place that calls + // remove_container), so retrying the same run id otherwise hits a + // "container name in use" error from Docker. force=true also handles + // the case where it's still running. Order matters: while an old + // container holds the bind mount on run_dir/state, `del_dir_r` fails + // with EBUSY and stale state files (notably result.json) survive into + // the new run. let _ = with_docker(input.config.docker_socket, fn(sock) { docker.remove_container(sock, container_name, True) }) + use _ <- result.try(setup_run_dir(input)) let spec = container_spec(input, image_tag) wisp.log_debug("run " <> run_id <> ": creating container image=" <> image_tag) use cid <- result.try( @@ -128,12 +132,20 @@ fn do_mock_launch(input: LaunchInput) -> Result(#(String, String), String) { Some(p) -> Ok(p) None -> Error("FBI_QUANTICO_BINARY_PATH not set; mock runs require it") }) - use _ <- result.try(setup_run_dir(input)) + // Force-remove any prior container with this name BEFORE setup_run_dir. + // SQLite reuses rowids when prior runs are deleted (no AUTOINCREMENT), + // so a fresh run can land on an id whose state dir is still bind-mounted + // by a not-yet-removed container. While that bind mount is alive, the + // host-side `del_dir_r` of run_dir/state can't remove the directory + // (EBUSY) and stale files (notably result.json) survive — read_outcome + // then mistakes the *prior* run's exit code for the new run's, which is + // exactly the source of the hang-test "succeeded" flake. let container_name = "fbi-run-" <> run_id let _ = with_docker(input.config.docker_socket, fn(sock) { docker.remove_container(sock, container_name, True) }) + use _ <- result.try(setup_run_dir(input)) let spec = mock_container_spec(input, quantico_path, scenario) wisp.log_debug( "run " <> run_id <> ": creating mock container scenario=" <> scenario, @@ -435,6 +447,14 @@ fn setup_run_dir(input: LaunchInput) -> Result(Nil, String) { let _ = simplifile.delete(run_dir <> "/state") let _ = simplifile.delete(run_dir <> "/wip") let _ = simplifile.delete(scripts_dir) + // Belt-and-suspenders: even if `delete(run_dir/state)` fails (EBUSY + // when a prior container still has the bind mount), nuke the + // individual signal files by path so read_outcome / poll_status_loop + // can't see stale values from the previous run id. + let _ = simplifile.delete(run_dir <> "/state/result.json") + let _ = simplifile.delete(run_dir <> "/state/agent-status") + let _ = simplifile.delete(run_dir <> "/state/session-id") + let _ = simplifile.delete(run_dir <> "/state/ready") use _ <- result.try( simplifile.create_directory_all(scripts_dir) |> result.map_error(fn(e) {