diff --git a/gpu_stack/cli_verify.py b/gpu_stack/cli_verify.py index c8efc53..4f62fac 100644 --- a/gpu_stack/cli_verify.py +++ b/gpu_stack/cli_verify.py @@ -13,6 +13,7 @@ from gpu_stack.cli_common import _repo_root + @dataclass(frozen=True) class VerifyGate: name: str @@ -147,6 +148,15 @@ def _verify_gates(profile: str, read_only: bool = False) -> List[VerifyGate]: _python_command("-m", "gpu_stack.demo", read_only=read_only), env=env, ), + VerifyGate( + "docs-stats", + _python_command( + "-m", + "gpu_stack.docs_stats_check", + read_only=read_only, + ), + env=env, + ), ] raise ValueError(f"unknown verify profile: {profile}") diff --git a/gpu_stack/docs_stats_check.py b/gpu_stack/docs_stats_check.py new file mode 100644 index 0000000..b5534dc --- /dev/null +++ b/gpu_stack/docs_stats_check.py @@ -0,0 +1,461 @@ +""" +docs_stats_check.py +==================== + +Freshness gate: parse numeric claims in README.md, docs/index.html, and +docs/app.js, then compare them against live registry values. Every claim +is anchored to a specific label so cosmetic rewording does not cause false +positives, but numeric drift fails loudly. + +Claim IDs and their sources: + + README "stats code block" (lines like " variables 1517") + README "Current Snapshot" table (markdown table rows) + docs/index.html stat-grid NNN cells + docs/app.js embedded fact strings with numeric literals + +The checker reports each mismatch as: + [file:claim_id] expected , found + +Exit code is nonzero when any mismatch is found. +""" + +from __future__ import annotations + +import re +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional, Tuple + + +# --------------------------------------------------------------------------- +# Live truth +# --------------------------------------------------------------------------- + +def _live_stats() -> Dict[str, int]: + """Return registry stats, coverage, and derived audit numbers.""" + import gpu_stack + from gpu_stack import Registry, find_cycles, topological_sort + from importlib.metadata import version as _pkg_version + + stats = Registry.stats() + coverage = Registry.coverage() + cycles = find_cycles() + topo = topological_sort() + + # Hard audit failures: collapsed equations + raw-symbol equations + import sympy as sp + collapsed = sum( + 1 for e in Registry.equations.values() + if e.as_sympy() in (sp.S.true, sp.S.false) + ) + raw_symbols = sum( + 1 for e in Registry.equations.values() + if e.raw_dependency_symbols() + ) + hard_failures = (len(cycles) if isinstance(cycles, list) else int(cycles)) + collapsed + raw_symbols + + # Root-debt families + from gpu_stack.core.resolver import _boundary_family + from gpu_stack.cli_root_debt import _root_debt_families, RootDebtEntry + roots = Registry.roots() + rows = [] + for root in roots: + rows.append( + RootDebtEntry( + dependents=len(root.dependents(include_constraints=False)), + name=root.name, + units=root.units, + scope=root.scope, + family=_boundary_family(root), + boundary_category="primitive-root", + primitive_boundary=root.is_root_input, + ) + ) + rows.sort(key=lambda r: (-r.dependents, r.name)) + family_rows = _root_debt_families(rows) + + # Package version from metadata + try: + pkg_version = _pkg_version("gpu_stack") + except Exception: + pkg_version = "unknown" + + return { + # Registry stats + "systems": stats["systems"], + "variables": stats["variables"], + "constants": stats["constants"], + "equations": stats["equations"], + "root_inputs": stats["root_inputs"], + "leaves": stats["leaves"], + # Coverage + "non_constant_variables": coverage["non_constant_variables"], + "with_sp_units": coverage["with_sp_units"], + "with_references": coverage["with_references"], + "equations_with_references": coverage["equations_with_references"], + "equations_with_unit_check": coverage["equations_with_unit_check"], + # Derived + "cycles": len(cycles) if isinstance(cycles, list) else int(cycles), + "topological_order_length": len(topo), + "hard_audit_failures": hard_failures, + "root_debt_families": len(family_rows), + # Version (stored as string, but we keep it separate) + "_pkg_version": pkg_version, + } + + +# --------------------------------------------------------------------------- +# Mismatch record +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class StatMismatch: + file: str + claim_id: str + expected: str + found: str + + def __str__(self) -> str: + return ( + f"[{self.file}:{self.claim_id}] " + f"expected {self.expected!r}, found {self.found!r}" + ) + + +# --------------------------------------------------------------------------- +# README stats code block parser +# (" key value" indented lines inside the ```text block) +# --------------------------------------------------------------------------- + +_README_STATS_BLOCK_KEYS = { + "systems": "systems", + "variables": "variables", + "constants": "constants", + "equations": "equations", + "root_inputs": "root_inputs", + "leaves": "leaves", + "non_constant_variables": "non_constant_variables", + "with_sp_units": "with_sp_units", + "with_references": "with_references", + "equations_with_references": "equations_with_references", + "equations_with_unit_check": "equations_with_unit_check", +} + +# Pattern: leading spaces, key, spaces, integer value +_STATS_LINE_RE = re.compile(r"^\s+(\w+)\s+(\d+)\s*$") + + +def _parse_readme_stats_block(text: str) -> Dict[str, int]: + """ + Extract key->value pairs from the README stats/coverage code block. + + We look for lines of the form " key NNN" between ```text fences. + Returns only the keys listed in _README_STATS_BLOCK_KEYS. + """ + found: Dict[str, int] = {} + in_block = False + for line in text.splitlines(): + stripped = line.strip() + if stripped.startswith("```text"): + in_block = True + continue + if in_block and stripped.startswith("```"): + in_block = False + continue + if not in_block: + continue + m = _STATS_LINE_RE.match(line) + if m: + key = m.group(1) + if key in _README_STATS_BLOCK_KEYS: + found[key] = int(m.group(2)) + return found + + +# --------------------------------------------------------------------------- +# README "Current Snapshot" table parser +# --------------------------------------------------------------------------- + +# Map from table row label to live-stats key (or special sentinel "_version") +_README_TABLE_LABELS: Dict[str, str] = { + "Systems": "systems", + "Variables": "variables", + "Constants": "constants", + "Equations": "equations", + "Root inputs": "root_inputs", + "Leaves": "leaves", + "Cycles": "cycles", + "Topological order length": "topological_order_length", + "Hard audit failures": "hard_audit_failures", + "Non-constant variables with `sp_units`": "with_sp_units", + "Non-constant variables with references": "with_references", + "Equations with references": "equations_with_references", + "Equations with unit checks": "equations_with_unit_check", + "Root-debt families": "root_debt_families", + "Package version": "_version", +} + +# Markdown table row: | Label | Value | +# Value is either an integer or a version string like 0.23.0 +_TABLE_ROW_RE = re.compile(r"^\|\s*(.+?)\s*\|\s*([^\|]+?)\s*\|") + + +def _parse_readme_snapshot_table(text: str) -> Dict[str, str]: + """ + Extract label->raw_value from the "Current Snapshot" markdown table. + + Returns a dict where keys are the label strings from _README_TABLE_LABELS + and values are the raw cell strings (e.g. "1517" or "0.23.0"). + """ + found: Dict[str, str] = {} + in_section = False + for line in text.splitlines(): + if "## Current Snapshot" in line: + in_section = True + continue + # Stop at the next ## heading + if in_section and line.startswith("## "): + break + if not in_section: + continue + m = _TABLE_ROW_RE.match(line) + if not m: + continue + label = m.group(1).strip() + value = m.group(2).strip() + if label in _README_TABLE_LABELS: + found[label] = value + return found + + +# --------------------------------------------------------------------------- +# docs/index.html stat-grid parser +# --------------------------------------------------------------------------- + +# Stat grid cells look like:
1517...
+_STAT_GRID_CELL_RE = re.compile( + r'
\s*(\d+)\s*([^<]+)' +) + +# Map label text fragment -> live-stats key +_HTML_STAT_LABELS: Dict[str, str] = { + "registered variables": "variables", + "equations connecting them": "equations", + "root inputs": "root_inputs", + "equations with unit checks": "equations_with_unit_check", +} + + +def _parse_html_stat_grid(text: str) -> Dict[str, int]: + """ + Extract stat-grid cell values from docs/index.html. + + Returns a dict keyed by the label fragment. + """ + found: Dict[str, int] = {} + for m in _STAT_GRID_CELL_RE.finditer(text): + value = int(m.group(1)) + label = m.group(2).strip() + for fragment, key in _HTML_STAT_LABELS.items(): + if fragment in label: + found[key] = value + return found + + +# --------------------------------------------------------------------------- +# docs/app.js fact string parser +# --------------------------------------------------------------------------- + +# The three fact strings we track: +# "The registry currently names 1517 variables and 959 equations." +# "799 equations are currently covered by unit checks." +# "619 root inputs are still visible in the current summary." + +_APPJS_PATTERNS: List[Tuple[str, re.Pattern, str]] = [ + ( + "appjs:fact_variables_and_equations", + re.compile( + r"The registry currently names\s+(\d+)\s+variables and\s+(\d+)\s+equations" + ), + "variables,equations", + ), + ( + "appjs:fact_unit_checks", + re.compile(r"(\d+)\s+equations are currently covered by unit checks"), + "equations_with_unit_check", + ), + ( + "appjs:fact_root_inputs", + re.compile(r"(\d+)\s+root inputs are still visible in the current summary"), + "root_inputs", + ), +] + + +def _parse_appjs_facts(text: str) -> Dict[str, int]: + """ + Extract numeric literals from the known fact strings in docs/app.js. + """ + found: Dict[str, int] = {} + for claim_id, pattern, keys in _APPJS_PATTERNS: + m = pattern.search(text) + if m is None: + continue + key_list = keys.split(",") + for i, key in enumerate(key_list): + found[key] = int(m.group(i + 1)) + return found + + +# --------------------------------------------------------------------------- +# Main checker +# --------------------------------------------------------------------------- + +def check_docs_stats(repo_root: Path) -> List[StatMismatch]: + """ + Compute live registry truth, parse all claim surfaces, return mismatches. + + Never raises on missing values from documents; instead records a mismatch + with found="". + """ + live = _live_stats() + mismatches: List[StatMismatch] = [] + + readme_path = repo_root / "README.md" + html_path = repo_root / "docs" / "index.html" + appjs_path = repo_root / "docs" / "app.js" + + readme_text = readme_path.read_text(encoding="utf-8") + html_text = html_path.read_text(encoding="utf-8") + appjs_text = appjs_path.read_text(encoding="utf-8") + + # -- README stats code block -- + readme_block = _parse_readme_stats_block(readme_text) + for key, stats_key in _README_STATS_BLOCK_KEYS.items(): + expected = live[stats_key] + found_val = readme_block.get(key) + if found_val is None: + mismatches.append(StatMismatch( + file="README.md", + claim_id=f"stats_block:{key}", + expected=str(expected), + found="", + )) + elif found_val != expected: + mismatches.append(StatMismatch( + file="README.md", + claim_id=f"stats_block:{key}", + expected=str(expected), + found=str(found_val), + )) + + # -- README Current Snapshot table -- + readme_table = _parse_readme_snapshot_table(readme_text) + for label, stats_key in _README_TABLE_LABELS.items(): + found_raw = readme_table.get(label) + if stats_key == "_version": + expected_str = live["_pkg_version"] + else: + expected_str = str(live[stats_key]) + if found_raw is None: + mismatches.append(StatMismatch( + file="README.md", + claim_id=f"snapshot_table:{label}", + expected=expected_str, + found="", + )) + elif found_raw != expected_str: + mismatches.append(StatMismatch( + file="README.md", + claim_id=f"snapshot_table:{label}", + expected=expected_str, + found=found_raw, + )) + + # -- docs/index.html stat grid -- + html_stats = _parse_html_stat_grid(html_text) + for key, stats_key in _HTML_STAT_LABELS.items(): + expected = live[stats_key] + found_val = html_stats.get(stats_key) + if found_val is None: + mismatches.append(StatMismatch( + file="docs/index.html", + claim_id=f"stat_grid:{key}", + expected=str(expected), + found="", + )) + elif found_val != expected: + mismatches.append(StatMismatch( + file="docs/index.html", + claim_id=f"stat_grid:{key}", + expected=str(expected), + found=str(found_val), + )) + + # -- docs/app.js fact strings -- + appjs_vals = _parse_appjs_facts(appjs_text) + + def _check_appjs(stats_key: str, claim_id: str) -> None: + expected = live[stats_key] + found_val = appjs_vals.get(stats_key) + if found_val is None: + mismatches.append(StatMismatch( + file="docs/app.js", + claim_id=claim_id, + expected=str(expected), + found="", + )) + elif found_val != expected: + mismatches.append(StatMismatch( + file="docs/app.js", + claim_id=claim_id, + expected=str(expected), + found=str(found_val), + )) + + _check_appjs("variables", "appjs:fact_variables_and_equations:variables") + _check_appjs("equations", "appjs:fact_variables_and_equations:equations") + _check_appjs("equations_with_unit_check", "appjs:fact_unit_checks") + _check_appjs("root_inputs", "appjs:fact_root_inputs") + + return mismatches + + +def run_docs_stats_gate(repo_root: Path) -> int: + """ + Entry point for the docs-stats gate. + + Prints OK or a list of mismatches. Returns 0 on success, 1 on failure. + """ + mismatches = check_docs_stats(repo_root) + if not mismatches: + print("docs-stats: OK") + return 0 + print(f"docs-stats: {len(mismatches)} mismatch(es) found") + for mm in mismatches: + print(f" {mm}") + return 1 + + +def main(argv: Optional[List[str]] = None) -> int: + """Stand-alone entry point.""" + import argparse + from gpu_stack.cli_common import _repo_root + + parser = argparse.ArgumentParser( + prog="docs-stats-check", + description="Check that README.md and docs/ stats match live registry values.", + ) + parser.add_argument( + "--repo-root", + help="path to the repository root; defaults to auto-detected repo root", + ) + args = parser.parse_args(argv) + root = Path(args.repo_root).resolve() if args.repo_root else _repo_root() + return run_docs_stats_gate(root) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_cli_verify.py b/tests/test_cli_verify.py index 8316817..f502d01 100644 --- a/tests/test_cli_verify.py +++ b/tests/test_cli_verify.py @@ -74,7 +74,9 @@ def fake_run(gate, cwd, timeout_seconds): assert rc == 0 assert "Read-only mode: on" in out - assert [name for name, _, _ in calls] == ["pytest", "syntax", "audit", "demo"] + assert [name for name, _, _ in calls] == [ + "pytest", "syntax", "audit", "demo", "docs-stats" + ] for _, command, env in calls: assert command[1] == "-B" assert env == {"PYTHONDONTWRITEBYTECODE": "1"} diff --git a/tests/test_docs_stats_check.py b/tests/test_docs_stats_check.py new file mode 100644 index 0000000..7e22c41 --- /dev/null +++ b/tests/test_docs_stats_check.py @@ -0,0 +1,366 @@ +""" +tests/test_docs_stats_check.py +================================ + +Tests for the docs-stats freshness gate. + +Three scenarios: + 1. Gate passes on the current tree (numbers are correct). + 2. Gate fails with a precise message when one number is perturbed in a + fixture copy (using tmp_path copies, not the real files). + 3. Parser robustness: label moves within line, extra whitespace. +""" + +from __future__ import annotations + +import shutil +import textwrap +from pathlib import Path + +import pytest + +import gpu_stack +from gpu_stack import Registry +from gpu_stack.docs_stats_check import ( + StatMismatch, + _parse_appjs_facts, + _parse_html_stat_grid, + _parse_readme_snapshot_table, + _parse_readme_stats_block, + check_docs_stats, + run_docs_stats_gate, +) +from gpu_stack.cli_common import _repo_root + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _repo() -> Path: + return _repo_root() + + +def _copy_repo_docs(tmp_path: Path) -> Path: + """ + Copy README.md and the docs/ directory into tmp_path, returning tmp_path + as the fake repo root. + """ + src = _repo() + shutil.copy(src / "README.md", tmp_path / "README.md") + docs_dst = tmp_path / "docs" + docs_dst.mkdir() + shutil.copy(src / "docs" / "index.html", docs_dst / "index.html") + shutil.copy(src / "docs" / "app.js", docs_dst / "app.js") + return tmp_path + + +def _live_variables() -> int: + return Registry.stats()["variables"] + + +def _live_equations() -> int: + return Registry.stats()["equations"] + + +def _live_root_inputs() -> int: + return Registry.stats()["root_inputs"] + + +def _live_unit_checks() -> int: + return Registry.coverage()["equations_with_unit_check"] + + +# --------------------------------------------------------------------------- +# Gate passes on the real tree +# --------------------------------------------------------------------------- + +def test_gate_passes_on_real_tree(): + mismatches = check_docs_stats(_repo()) + assert mismatches == [], ( + f"docs-stats gate unexpectedly failed on the live tree:\n" + + "\n".join(f" {m}" for m in mismatches) + ) + + +def test_run_docs_stats_gate_returns_zero_on_real_tree(capsys): + rc = run_docs_stats_gate(_repo()) + out = capsys.readouterr().out + assert rc == 0, f"gate returned nonzero; output:\n{out}" + assert "OK" in out + + +# --------------------------------------------------------------------------- +# Gate fails when README snapshot table value is perturbed +# --------------------------------------------------------------------------- + +def test_gate_fails_on_perturbed_readme_snapshot_table(tmp_path): + fake_root = _copy_repo_docs(tmp_path) + readme_path = fake_root / "README.md" + original = readme_path.read_text(encoding="utf-8") + + live_variables = _live_variables() + wrong_variables = live_variables + 1 + + # Replace "| Variables | 1517 |" (or whatever live value) with wrong value + perturbed = original.replace( + f"| Variables | {live_variables} |", + f"| Variables | {wrong_variables} |", + ) + assert perturbed != original, "replacement did not change the file" + readme_path.write_text(perturbed, encoding="utf-8") + + mismatches = check_docs_stats(fake_root) + claim_ids = [m.claim_id for m in mismatches] + assert any("Variables" in cid for cid in claim_ids), ( + f"expected mismatch for Variables in snapshot table; got: {claim_ids}" + ) + # The mismatch must name the expected (live) value and the found (wrong) value + variables_mm = next(m for m in mismatches if "Variables" in m.claim_id) + assert variables_mm.expected == str(live_variables), ( + f"mismatch.expected should be live value {live_variables}; " + f"got {variables_mm.expected!r}" + ) + assert variables_mm.found == str(wrong_variables), ( + f"mismatch.found should be perturbed value {wrong_variables}; " + f"got {variables_mm.found!r}" + ) + + +# --------------------------------------------------------------------------- +# Gate fails when README stats code block value is perturbed +# --------------------------------------------------------------------------- + +def test_gate_fails_on_perturbed_readme_stats_block(tmp_path): + fake_root = _copy_repo_docs(tmp_path) + readme_path = fake_root / "README.md" + original = readme_path.read_text(encoding="utf-8") + + live_roots = _live_root_inputs() + wrong_roots = live_roots - 3 + + # Stats block uses " root_inputs 619" format (unique key in the block) + perturbed = original.replace( + f" root_inputs {live_roots}", + f" root_inputs {wrong_roots}", + ) + assert perturbed != original, "replacement did not change the file" + readme_path.write_text(perturbed, encoding="utf-8") + + mismatches = check_docs_stats(fake_root) + claim_ids = [m.claim_id for m in mismatches] + assert any("stats_block:root_inputs" in cid for cid in claim_ids), ( + f"expected stats_block:root_inputs mismatch; got: {claim_ids}" + ) + mm = next(m for m in mismatches if "stats_block:root_inputs" in m.claim_id) + assert mm.expected == str(live_roots) + assert mm.found == str(wrong_roots) + + +# --------------------------------------------------------------------------- +# Gate fails when docs/index.html stat-grid value is perturbed +# --------------------------------------------------------------------------- + +def test_gate_fails_on_perturbed_html_stat_grid(tmp_path): + fake_root = _copy_repo_docs(tmp_path) + html_path = fake_root / "docs" / "index.html" + original = html_path.read_text(encoding="utf-8") + + live_roots = _live_root_inputs() + wrong_roots = live_roots + 7 + + perturbed = original.replace( + f"{live_roots}root inputs", + f"{wrong_roots}root inputs", + ) + assert perturbed != original, "replacement did not change the file" + html_path.write_text(perturbed, encoding="utf-8") + + mismatches = check_docs_stats(fake_root) + claim_ids = [m.claim_id for m in mismatches] + assert any("root inputs" in cid for cid in claim_ids), ( + f"expected root_inputs mismatch in html stat grid; got: {claim_ids}" + ) + mm = next(m for m in mismatches if "root inputs" in m.claim_id) + assert mm.expected == str(live_roots) + assert mm.found == str(wrong_roots) + + +# --------------------------------------------------------------------------- +# Gate fails when docs/app.js fact string value is perturbed +# --------------------------------------------------------------------------- + +def test_gate_fails_on_perturbed_appjs_fact(tmp_path): + fake_root = _copy_repo_docs(tmp_path) + appjs_path = fake_root / "docs" / "app.js" + original = appjs_path.read_text(encoding="utf-8") + + live_unit_checks = _live_unit_checks() + wrong_unit_checks = live_unit_checks + 42 + + perturbed = original.replace( + f'"{live_unit_checks} equations are currently covered by unit checks.', + f'"{wrong_unit_checks} equations are currently covered by unit checks.', + ) + assert perturbed != original, "replacement did not change the file" + appjs_path.write_text(perturbed, encoding="utf-8") + + mismatches = check_docs_stats(fake_root) + claim_ids = [m.claim_id for m in mismatches] + assert any("appjs:fact_unit_checks" in cid for cid in claim_ids), ( + f"expected appjs:fact_unit_checks mismatch; got: {claim_ids}" + ) + mm = next(m for m in mismatches if "appjs:fact_unit_checks" in m.claim_id) + assert mm.expected == str(live_unit_checks) + assert mm.found == str(wrong_unit_checks) + + +# --------------------------------------------------------------------------- +# Parser robustness: extra whitespace in stats block +# --------------------------------------------------------------------------- + +def test_readme_stats_block_tolerates_extra_whitespace(): + live_variables = _live_variables() + # Extra spaces between key and value -- still must parse + text = textwrap.dedent(f"""\ + ```text + Registry stats: + systems 16 + variables {live_variables} + constants 24 + ``` + """) + result = _parse_readme_stats_block(text) + assert result.get("variables") == live_variables + + +def test_readme_stats_block_ignores_lines_outside_fence(): + live_variables = _live_variables() + text = textwrap.dedent(f"""\ + variables {live_variables} + ```text + variables {live_variables} + ``` + variables 999 + """) + result = _parse_readme_stats_block(text) + # Only the inside-fence value is returned + assert result.get("variables") == live_variables + + +# --------------------------------------------------------------------------- +# Parser robustness: snapshot table with different spacing +# --------------------------------------------------------------------------- + +def test_readme_snapshot_table_tolerates_extra_spaces(): + live_eqs = _live_equations() + # Extra spaces in the table cells + text = textwrap.dedent(f"""\ + ## Current Snapshot + + | Signal | Value | + |---|---:| + | Equations | {live_eqs} | + """) + result = _parse_readme_snapshot_table(text) + assert result.get("Equations") == str(live_eqs) + + +def test_readme_snapshot_table_stops_at_next_heading(): + live_eqs = _live_equations() + text = textwrap.dedent(f"""\ + ## Current Snapshot + + | Equations | {live_eqs} | + + ## Other Section + + | Equations | 0 | + """) + result = _parse_readme_snapshot_table(text) + # Only picks up the row before the next heading + assert result.get("Equations") == str(live_eqs) + + +# --------------------------------------------------------------------------- +# Parser unit tests: HTML stat grid +# --------------------------------------------------------------------------- + +def test_parse_html_stat_grid_extracts_all_four_stats(): + live_vars = _live_variables() + live_eqs = _live_equations() + live_roots = _live_root_inputs() + live_unit = _live_unit_checks() + html = textwrap.dedent(f"""\ +
{live_vars}registered variables
+
{live_eqs}equations connecting them
+
{live_roots}root inputs, named instead of hidden
+
{live_unit}equations with unit checks
+ """) + result = _parse_html_stat_grid(html) + assert result["variables"] == live_vars + assert result["equations"] == live_eqs + assert result["root_inputs"] == live_roots + assert result["equations_with_unit_check"] == live_unit + + +# --------------------------------------------------------------------------- +# Parser unit tests: app.js fact strings +# --------------------------------------------------------------------------- + +def test_parse_appjs_facts_extracts_known_strings(): + live_vars = _live_variables() + live_eqs = _live_equations() + live_roots = _live_root_inputs() + live_unit = _live_unit_checks() + appjs = textwrap.dedent(f"""\ + "The registry currently names {live_vars} variables and {live_eqs} equations.", + "{live_unit} equations are currently covered by unit checks.", + "{live_roots} root inputs are still visible in the current summary.", + """) + result = _parse_appjs_facts(appjs) + assert result["variables"] == live_vars + assert result["equations"] == live_eqs + assert result["equations_with_unit_check"] == live_unit + assert result["root_inputs"] == live_roots + + +# --------------------------------------------------------------------------- +# run_docs_stats_gate exit-code contract +# --------------------------------------------------------------------------- + +def test_run_docs_stats_gate_nonzero_on_drift(tmp_path, capsys): + fake_root = _copy_repo_docs(tmp_path) + readme_path = fake_root / "README.md" + original = readme_path.read_text(encoding="utf-8") + + live_vars = _live_variables() + perturbed = original.replace( + f"| Variables | {live_vars} |", + f"| Variables | {live_vars + 1} |", + ) + assert perturbed != original + readme_path.write_text(perturbed, encoding="utf-8") + + rc = run_docs_stats_gate(fake_root) + out = capsys.readouterr().out + assert rc != 0, "gate should return nonzero on drift" + assert "mismatch" in out.lower() or "Variables" in out + + +# --------------------------------------------------------------------------- +# StatMismatch __str__ format +# --------------------------------------------------------------------------- + +def test_stat_mismatch_str(): + mm = StatMismatch( + file="README.md", + claim_id="snapshot_table:Variables", + expected="1517", + found="1518", + ) + text = str(mm) + assert "README.md" in text + assert "snapshot_table:Variables" in text + assert "1517" in text + assert "1518" in text