diff --git a/engineering-loop-policy.yml b/engineering-loop-policy.yml index b4a852b..395896c 100644 --- a/engineering-loop-policy.yml +++ b/engineering-loop-policy.yml @@ -28,5 +28,9 @@ defaults: allowed_pr_remotes: - "origin" allowed_handoff_dirs: + # CI/tests run with TMPDIR under /tmp. - "/tmp" + # Production loop VM writes per-change handoffs under its --output-root + # (engineering_loop_runs_dir); see network-operations host_vars/loop.yml. + - "/var/lib/engineering-loop/runs" repos: {} diff --git a/src/hyrule_engineering_loop/promotion.py b/src/hyrule_engineering_loop/promotion.py index 1ac1356..31184c0 100644 --- a/src/hyrule_engineering_loop/promotion.py +++ b/src/hyrule_engineering_loop/promotion.py @@ -3,6 +3,7 @@ from __future__ import annotations import re +import shutil import subprocess import tempfile from pathlib import Path @@ -49,6 +50,22 @@ def _cleanup_worktree(repo_path: Path, branch: str, worktree_path: Path) -> None ) +def _worktree_branch_registered(repo_path: Path, branch: str) -> bool: + """Return ``True`` when ``branch`` still exists (e.g. left by a crashed run). + + A leftover branch with no worktree directory would make ``worktree add -b`` + fail, so it must be detected even when the worktree path is already gone. + """ + completed = subprocess.run( + ["git", "rev-parse", "--verify", "--quiet", f"refs/heads/{branch}"], + cwd=repo_path, + capture_output=True, + check=False, + text=True, + ) + return completed.returncode == 0 + + def diff_preview_from_results(results: list[dict[str, Any]], *, max_chars: int = 4_000) -> list[dict[str, Any]]: """Return compact diff previews suitable for CLI/Pi summaries.""" previews: list[dict[str, Any]] = [] @@ -101,8 +118,15 @@ def setup_worktrees_for_state(state: GraphState) -> list[dict[str, Any]]: branch = f"{branch_prefix}/{_slug(state['change_id'])}/{_slug(repo_name)}" worktree_path = worktree_parent / f"{_slug(repo_name)}-{_slug(state['change_id'])}" - if worktree_path.exists(): - raise PromotionError(f"worktree path already exists: {worktree_path}") + # Self-heal stale state from a prior crashed run: a leftover worktree + # or branch for this change_id would otherwise wedge every future run, + # because the per-invocation rollback below only removes worktrees + # created in *this* call. _cleanup_worktree + prune are no-ops on a + # clean tree, so this is safe when nothing stale exists. + if worktree_path.exists() or _worktree_branch_registered(repo_path, branch): + _cleanup_worktree(repo_path, branch, worktree_path) + shutil.rmtree(worktree_path, ignore_errors=True) + _run_git(["worktree", "prune"], cwd=repo_path) _run_git(["worktree", "add", "-b", branch, str(worktree_path), base_ref], cwd=repo_path) created.append((repo_path, branch, worktree_path)) results.append( diff --git a/tests/test_phase24_daemon.py b/tests/test_phase24_daemon.py index 938c5ce..bca355a 100644 --- a/tests/test_phase24_daemon.py +++ b/tests/test_phase24_daemon.py @@ -390,6 +390,40 @@ def test_unchanged_diff_across_rounds_aborts_to_signoff(tmp_path: Path) -> None: rollback_promotions(worktrees) +# --- worktree self-heal ----------------------------------------------------- + + +def test_setup_worktrees_self_heals_stale_worktree(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + workspace.mkdir() + _init_repo(workspace / "hyrule-cloud") + + def _fresh_state() -> GraphState: + return cast( + GraphState, + { + "change_id": "STALE_TEST", + "promotion_enabled": True, + "promotion_repositories": {"hyrule-cloud": str(workspace / "hyrule-cloud")}, + "promotion_worktree_root": str(tmp_path / "worktrees"), + "promotion_branch_prefix": "hyrule-feature", + }, + ) + + # First setup creates the branch-backed worktree. + first = setup_worktrees_for_state(_fresh_state()) + worktree_path = Path(first[0]["worktree_path"]) + assert worktree_path.is_dir() + + # A crashed run leaves the worktree + branch on disk. A brand-new state + # (no recorded worktree_results) must self-heal and recreate rather than + # raising "worktree path already exists", which would wedge every retry. + second = setup_worktrees_for_state(_fresh_state()) + assert Path(second[0]["worktree_path"]).is_dir() + assert second[0]["branch"] == first[0]["branch"] + rollback_promotions(second) + + # --- reporting helpers ------------------------------------------------------ diff --git a/tests/test_phase7_policy.py b/tests/test_phase7_policy.py index 001f94c..1b5a1dc 100644 --- a/tests/test_phase7_policy.py +++ b/tests/test_phase7_policy.py @@ -187,3 +187,21 @@ def test_policy_denies_non_allowlisted_repo_root(tmp_path: Path) -> None: violations = validate_graph_state(state) assert any("repo root not allowlisted" in violation for violation in violations) + + +def test_repo_policy_allowlists_production_handoff_dir() -> None: + # Guards the deployed loop VM: the daemon writes per-change handoffs under + # its --output-root (/var/lib/engineering-loop/runs), which must stay + # allowlisted in the shipped policy or every run dead-ends at needs_triage. + repo_root = Path(__file__).resolve().parents[1] + policy_path = repo_root / "engineering-loop-policy.yml" + + state = _base_state(policy_path) + state["handoff_output_dir"] = "/var/lib/engineering-loop/runs/issue_x/handoff" + assert validate_graph_state(state) == [] + + state["handoff_output_dir"] = "/var/lib/engineering-loop/secrets" + assert any( + "handoff output directory is not allowlisted" in violation + for violation in validate_graph_state(state) + )