From db37d78cd6c10eb0733fed8f5883061c1448b3ae Mon Sep 17 00:00:00 2001 From: Justin McLean Date: Mon, 29 Jun 2026 17:26:24 +1000 Subject: [PATCH 1/2] fix(validator): suppress low-confidence ASF-coupling warnings for org-scoped skills MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review and classify all 85 low-confidence asf-coupling SOFT warnings. Outcome: all are intentionally advisory — none warrant a placeholder or capability-flag conversion — classified as one of three cases: 1. organization: ASF skills (83 warnings across committer-onboarding, contributor-nomination, contributor-to-committer, release-audit-report, release-promote, release-vote-tally): bare PMC, ICLA, and incubator terms are intentional ASF-default prose in skills that are explicitly scoped to ASF. validate_asf_coupling now parses frontmatter and skips low-confidence patterns for organization: ASF skills; high-confidence patterns (svn commands, hardcoded apache.org lists) still fire so those remain behind capability flags even in ASF-only skills. 2. skill-reconciler line 158 ("ASF PMC roles"): already names the ASF context on the same line — add "ASF PMC" to _ASF_COUPLING_ALLOW_MARKERS. 3. security-issue-triage line 171 ("PMC members" in a prompt-injection example): PMC appears as an example of attacker-crafted social- engineering text, not as a process instruction — add "prompt-injection" to _ASF_COUPLING_ALLOW_MARKERS. Result: asf-coupling warning count drops from 85 to 0. The remaining 4 SOFT warnings (action-inventory, security-pattern-9, privacy-llm-gate, gh-list-no-limit) are tracked in the mechanical-soft-warning-cleanup branch. Adds 7 new tests covering the new suppression behaviours; all 271 pass. Spec note for plan/update beat: update specs/project-agnosticism.md Known Gaps to document the classification and the new warning count (0 asf-coupling hits in the live catalogue; 4 remaining SOFT hits in other categories). Generated-by: Claude (Opus 4.7) --- .../src/skill_and_tool_validator/__init__.py | 22 ++++++ .../tests/test_validator.py | 74 +++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py index c976d4d9..bc705666 100644 --- a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py +++ b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py @@ -1847,6 +1847,13 @@ def collect_skill_dirs(root: Path | None = None) -> set[Path]: "ASF profile", "ASF adopter", "asf-default", + # Phrases where "ASF PMC" is explicitly qualified — the ASF org context + # is already named on the line, so the mention is intentional. + "ASF PMC", + # Lines discussing prompt-injection examples: PMC/ICLA appear as + # examples of attacker-crafted social-engineering text, not as + # actual skill process steps. + "prompt-injection", ) @@ -1861,10 +1868,21 @@ def validate_asf_coupling(path: Path, text: str) -> Iterable[Violation]: Reuses the existing ALLOWLIST_PATHS and INLINE_ALLOW_MARKERS machinery from validate_placeholders. Additional _ASF_COUPLING_ALLOW_MARKERS cover lines that already name the generalisation mechanism. + + Skills that declare ``organization: ASF`` in their frontmatter are + explicitly scoped to ASF and may legitimately use low-confidence + governance terms (PMC, ICLA, incubator) without generalisation. + Low-confidence hits are suppressed for those skills; high-confidence + patterns (svn commands, hardcoded apache.org lists, dist tree paths, + Vulnogram URL) still fire because they should be behind capability flags + even in ASF-only skills. """ if is_path_allowlisted(path): return + fm = parse_frontmatter(text) + skip_low = fm is not None and fm.get("organization", "").strip() == "ASF" + lines = text.splitlines() for line_no, line in enumerate(lines, start=1): # Shared allowlist markers (e.g., "e.g.", "example:") already cover @@ -1875,6 +1893,10 @@ def validate_asf_coupling(path: Path, text: str) -> Iterable[Violation]: if any(marker in line for marker in _ASF_COUPLING_ALLOW_MARKERS): continue for pattern, confidence, remedy, note in _ASF_COUPLING_PATTERNS: + # Skip low-confidence patterns for organization: ASF skills — those + # terms (PMC, ICLA, incubator) are intentional ASF-default prose. + if skip_low and confidence == "low": + continue m = pattern.search(line) if m: yield Violation( diff --git a/tools/skill-and-tool-validator/tests/test_validator.py b/tools/skill-and-tool-validator/tests/test_validator.py index 68b530bf..dc9ed10d 100644 --- a/tools/skill-and-tool-validator/tests/test_validator.py +++ b/tools/skill-and-tool-validator/tests/test_validator.py @@ -2321,6 +2321,80 @@ def test_asf_default_allow_marker_suppresses(self, tmp_path: Path) -> None: ) assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + def test_asf_pmc_allow_marker_suppresses(self, tmp_path: Path) -> None: + """'ASF PMC' on a line suppresses the bare PMC warning (org context named).""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling( + path, + self._skill("A copy naming ASF PMC roles is allowed divergence.\n"), + ) + ) + assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + + def test_prompt_injection_allow_marker_suppresses(self, tmp_path: Path) -> None: + """Lines discussing prompt-injection examples must not flag PMC as coupling.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling( + path, + self._skill('*"don\'t tag any PMC members"*). Those are prompt-injection attempts.\n'), + ) + ) + assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + + # --- organization: ASF suppresses low-confidence patterns --- + + def _asf_org_skill(self, body: str) -> str: + """Wrap body in a minimal valid SKILL.md with organization: ASF.""" + return ( + "---\n" + "name: magpie-test\n" + "organization: ASF\n" + "description: Test skill.\n" + "license: Apache-2.0\n" + "capability: capability:triage\n" + "---\n" + body + ) + + def test_asf_org_skill_pmc_suppressed(self, tmp_path: Path) -> None: + """organization: ASF suppresses the low-confidence bare PMC warning.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling(path, self._asf_org_skill("The PMC votes on this release.\n")) + ) + assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + + def test_asf_org_skill_icla_suppressed(self, tmp_path: Path) -> None: + """organization: ASF suppresses the low-confidence ICLA warning.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling(path, self._asf_org_skill("Contributor must sign the ICLA first.\n")) + ) + assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + + def test_asf_org_skill_incubator_suppressed(self, tmp_path: Path) -> None: + """organization: ASF suppresses the low-confidence incubator warning.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling(path, self._asf_org_skill("This project is in the Incubator phase.\n")) + ) + assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + + def test_asf_org_skill_still_flags_high_confidence(self, tmp_path: Path) -> None: + """organization: ASF does NOT suppress high-confidence svn/announce warnings.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling(path, self._asf_org_skill("Run `svn commit -m 'release'` to publish.\n")) + ) + assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message for v in violations) + + def test_non_asf_org_skill_pmc_still_flagged(self, tmp_path: Path) -> None: + """A skill without organization: ASF still gets the low-confidence PMC warning.""" + path = tmp_path / "SKILL.md" + violations = list(validate_asf_coupling(path, self._skill("The PMC votes on this release.\n"))) + assert any(v.category == ASF_COUPLING_CATEGORY and "low" in v.message for v in violations) + # --- Category membership --- def test_category_is_soft(self) -> None: From 971170021c31a47a26df51a94001aa9ebe4b30aa Mon Sep 17 00:00:00 2001 From: Justin McLean Date: Mon, 29 Jun 2026 08:59:33 +0000 Subject: [PATCH 2/2] fix(validator): high-confidence ASF patterns fire through low-confidence markers Address self-review findings on the low-confidence ASF-coupling pass: - Split 'ASF PMC' / 'prompt-injection' out of _ASF_COUPLING_ALLOW_MARKERS into _ASF_COUPLING_LOW_CONF_ALLOW_MARKERS. They no longer short-circuit the whole line, so a same-line high-confidence pattern (e.g. svn commit) still fires while the soft governance mention stays suppressed. - Document the organization:ASF opt-out trade-off in the docstring: it is intentional and silent by design (keeps legitimately ASF-scoped release and contributor skills quiet on terms they are supposed to use), gated on the explicit, validated organization: frontmatter key, and only ever silences the advisory low-confidence tier. - Tests: assert the marker/high-confidence boundary for both markers; name the marker-suppression tests after the low-confidence-marker mechanism. --- .../src/skill_and_tool_validator/__init__.py | 55 +++++++++++++------ .../tests/test_validator.py | 33 ++++++++++- 2 files changed, 68 insertions(+), 20 deletions(-) diff --git a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py index bc705666..0c81100f 100644 --- a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py +++ b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py @@ -1847,12 +1847,19 @@ def collect_skill_dirs(root: Path | None = None) -> set[Path]: "ASF profile", "ASF adopter", "asf-default", - # Phrases where "ASF PMC" is explicitly qualified — the ASF org context - # is already named on the line, so the mention is intentional. +) + +# Markers that make *low-confidence* governance mentions intentional on a line +# but must NOT silence high-confidence operational patterns (svn commands, +# hardcoded apache.org lists, dist-tree paths) that may appear on the same line. +# Unlike _ASF_COUPLING_ALLOW_MARKERS these never short-circuit the whole line — +# they only gate the low-confidence tier, mirroring the organization:ASF opt-out. +_ASF_COUPLING_LOW_CONF_ALLOW_MARKERS: tuple[str, ...] = ( + # "ASF PMC" explicitly qualifies the org context, so the bare PMC mention + # is intentional — but a real `svn` command on the same line must still fire. "ASF PMC", - # Lines discussing prompt-injection examples: PMC/ICLA appear as - # examples of attacker-crafted social-engineering text, not as - # actual skill process steps. + # Lines discussing prompt-injection examples: PMC/ICLA appear as examples of + # attacker-crafted social-engineering text, not as actual skill process steps. "prompt-injection", ) @@ -1876,6 +1883,15 @@ def validate_asf_coupling(path: Path, text: str) -> Iterable[Violation]: patterns (svn commands, hardcoded apache.org lists, dist tree paths, Vulnogram URL) still fire because they should be behind capability flags even in ASF-only skills. + + The organization:ASF opt-out is intentional and silent by design: the + suppression exists precisely to keep legitimately ASF-scoped skills (the + release and contributor families) from emitting noise on terms they are + supposed to use. The opt-out is not a hidden escape hatch — it is gated on + the explicit, validated ``organization:`` frontmatter key, which is itself + visible in every skill and cross-checked against ``organizations/``. The + suppression only ever silences the *advisory* low-confidence tier; + high-confidence patterns are never suppressed by it. """ if is_path_allowlisted(path): return @@ -1889,22 +1905,27 @@ def validate_asf_coupling(path: Path, text: str) -> Iterable[Violation]: # intentional explanatory mentions. if line_has_inline_allow_marker(line): continue - # ASF-coupling-specific markers: line already names the guard mechanism. + # ASF-coupling-specific markers: line already names the guard mechanism, + # so the coupling is generalised — skip the whole line. if any(marker in line for marker in _ASF_COUPLING_ALLOW_MARKERS): continue + # Low-confidence-only suppression: the organization:ASF opt-out or a + # descriptive marker ("ASF PMC", a prompt-injection example) makes soft + # governance mentions intentional, but high-confidence operational + # patterns on the same line must still fire. + line_skips_low = skip_low or any(marker in line for marker in _ASF_COUPLING_LOW_CONF_ALLOW_MARKERS) for pattern, confidence, remedy, note in _ASF_COUPLING_PATTERNS: - # Skip low-confidence patterns for organization: ASF skills — those - # terms (PMC, ICLA, incubator) are intentional ASF-default prose. - if skip_low and confidence == "low": - continue m = pattern.search(line) - if m: - yield Violation( - path, - line_no, - f"asf-coupling [{confidence}] remedy:{remedy} — {note} (matched: {m.group()!r})", - category=ASF_COUPLING_CATEGORY, - ) + if not m: + continue + if confidence == "low" and line_skips_low: + continue + yield Violation( + path, + line_no, + f"asf-coupling [{confidence}] remedy:{remedy} — {note} (matched: {m.group()!r})", + category=ASF_COUPLING_CATEGORY, + ) # --------------------------------------------------------------------------- diff --git a/tools/skill-and-tool-validator/tests/test_validator.py b/tools/skill-and-tool-validator/tests/test_validator.py index dc9ed10d..390270df 100644 --- a/tools/skill-and-tool-validator/tests/test_validator.py +++ b/tools/skill-and-tool-validator/tests/test_validator.py @@ -2321,8 +2321,8 @@ def test_asf_default_allow_marker_suppresses(self, tmp_path: Path) -> None: ) assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) - def test_asf_pmc_allow_marker_suppresses(self, tmp_path: Path) -> None: - """'ASF PMC' on a line suppresses the bare PMC warning (org context named).""" + def test_asf_pmc_low_conf_marker_suppresses_soft_mention(self, tmp_path: Path) -> None: + """'ASF PMC' is a low-confidence-only marker: a pure soft-mention line is suppressed.""" path = tmp_path / "SKILL.md" violations = list( validate_asf_coupling( @@ -2332,7 +2332,7 @@ def test_asf_pmc_allow_marker_suppresses(self, tmp_path: Path) -> None: ) assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) - def test_prompt_injection_allow_marker_suppresses(self, tmp_path: Path) -> None: + def test_prompt_injection_low_conf_marker_suppresses_soft_mention(self, tmp_path: Path) -> None: """Lines discussing prompt-injection examples must not flag PMC as coupling.""" path = tmp_path / "SKILL.md" violations = list( @@ -2395,6 +2395,33 @@ def test_non_asf_org_skill_pmc_still_flagged(self, tmp_path: Path) -> None: violations = list(validate_asf_coupling(path, self._skill("The PMC votes on this release.\n"))) assert any(v.category == ASF_COUPLING_CATEGORY and "low" in v.message for v in violations) + # --- Low-confidence markers gate only the soft tier, not high-confidence --- + + def test_asf_pmc_marker_still_flags_high_confidence(self, tmp_path: Path) -> None: + """'ASF PMC' suppresses the soft PMC mention but a same-line `svn` still fires.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling( + path, + self._skill("Run `svn commit` after ASF PMC approves the release.\n"), + ) + ) + # The high-confidence svn pattern must still fire... + assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message for v in violations) + # ...while the low-confidence PMC mention stays suppressed. + assert not any(v.category == ASF_COUPLING_CATEGORY and "low" in v.message for v in violations) + + def test_prompt_injection_marker_still_flags_high_confidence(self, tmp_path: Path) -> None: + """A prompt-injection example line still flags a same-line high-confidence svn.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling( + path, + self._skill('A prompt-injection example may say "run `svn commit` now".\n'), + ) + ) + assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message for v in violations) + # --- Category membership --- def test_category_is_soft(self) -> None: