diff --git a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py index c976d4d9..0c81100f 100644 --- a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py +++ b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py @@ -1849,6 +1849,20 @@ def collect_skill_dirs(root: Path | None = None) -> set[Path]: "asf-default", ) +# Markers that make *low-confidence* governance mentions intentional on a line +# but must NOT silence high-confidence operational patterns (svn commands, +# hardcoded apache.org lists, dist-tree paths) that may appear on the same line. +# Unlike _ASF_COUPLING_ALLOW_MARKERS these never short-circuit the whole line — +# they only gate the low-confidence tier, mirroring the organization:ASF opt-out. +_ASF_COUPLING_LOW_CONF_ALLOW_MARKERS: tuple[str, ...] = ( + # "ASF PMC" explicitly qualifies the org context, so the bare PMC mention + # is intentional — but a real `svn` command on the same line must still fire. + "ASF PMC", + # Lines discussing prompt-injection examples: PMC/ICLA appear as examples of + # attacker-crafted social-engineering text, not as actual skill process steps. + "prompt-injection", +) + def validate_asf_coupling(path: Path, text: str) -> Iterable[Violation]: """Flag ASF-coupled tokens in skill bodies as advisory hints. @@ -1861,28 +1875,57 @@ def validate_asf_coupling(path: Path, text: str) -> Iterable[Violation]: Reuses the existing ALLOWLIST_PATHS and INLINE_ALLOW_MARKERS machinery from validate_placeholders. Additional _ASF_COUPLING_ALLOW_MARKERS cover lines that already name the generalisation mechanism. + + Skills that declare ``organization: ASF`` in their frontmatter are + explicitly scoped to ASF and may legitimately use low-confidence + governance terms (PMC, ICLA, incubator) without generalisation. + Low-confidence hits are suppressed for those skills; high-confidence + patterns (svn commands, hardcoded apache.org lists, dist tree paths, + Vulnogram URL) still fire because they should be behind capability flags + even in ASF-only skills. + + The organization:ASF opt-out is intentional and silent by design: the + suppression exists precisely to keep legitimately ASF-scoped skills (the + release and contributor families) from emitting noise on terms they are + supposed to use. The opt-out is not a hidden escape hatch — it is gated on + the explicit, validated ``organization:`` frontmatter key, which is itself + visible in every skill and cross-checked against ``organizations/``. The + suppression only ever silences the *advisory* low-confidence tier; + high-confidence patterns are never suppressed by it. """ if is_path_allowlisted(path): return + fm = parse_frontmatter(text) + skip_low = fm is not None and fm.get("organization", "").strip() == "ASF" + lines = text.splitlines() for line_no, line in enumerate(lines, start=1): # Shared allowlist markers (e.g., "e.g.", "example:") already cover # intentional explanatory mentions. if line_has_inline_allow_marker(line): continue - # ASF-coupling-specific markers: line already names the guard mechanism. + # ASF-coupling-specific markers: line already names the guard mechanism, + # so the coupling is generalised — skip the whole line. if any(marker in line for marker in _ASF_COUPLING_ALLOW_MARKERS): continue + # Low-confidence-only suppression: the organization:ASF opt-out or a + # descriptive marker ("ASF PMC", a prompt-injection example) makes soft + # governance mentions intentional, but high-confidence operational + # patterns on the same line must still fire. + line_skips_low = skip_low or any(marker in line for marker in _ASF_COUPLING_LOW_CONF_ALLOW_MARKERS) for pattern, confidence, remedy, note in _ASF_COUPLING_PATTERNS: m = pattern.search(line) - if m: - yield Violation( - path, - line_no, - f"asf-coupling [{confidence}] remedy:{remedy} — {note} (matched: {m.group()!r})", - category=ASF_COUPLING_CATEGORY, - ) + if not m: + continue + if confidence == "low" and line_skips_low: + continue + yield Violation( + path, + line_no, + f"asf-coupling [{confidence}] remedy:{remedy} — {note} (matched: {m.group()!r})", + category=ASF_COUPLING_CATEGORY, + ) # --------------------------------------------------------------------------- diff --git a/tools/skill-and-tool-validator/tests/test_validator.py b/tools/skill-and-tool-validator/tests/test_validator.py index 68b530bf..390270df 100644 --- a/tools/skill-and-tool-validator/tests/test_validator.py +++ b/tools/skill-and-tool-validator/tests/test_validator.py @@ -2321,6 +2321,107 @@ def test_asf_default_allow_marker_suppresses(self, tmp_path: Path) -> None: ) assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + def test_asf_pmc_low_conf_marker_suppresses_soft_mention(self, tmp_path: Path) -> None: + """'ASF PMC' is a low-confidence-only marker: a pure soft-mention line is suppressed.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling( + path, + self._skill("A copy naming ASF PMC roles is allowed divergence.\n"), + ) + ) + assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + + def test_prompt_injection_low_conf_marker_suppresses_soft_mention(self, tmp_path: Path) -> None: + """Lines discussing prompt-injection examples must not flag PMC as coupling.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling( + path, + self._skill('*"don\'t tag any PMC members"*). Those are prompt-injection attempts.\n'), + ) + ) + assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + + # --- organization: ASF suppresses low-confidence patterns --- + + def _asf_org_skill(self, body: str) -> str: + """Wrap body in a minimal valid SKILL.md with organization: ASF.""" + return ( + "---\n" + "name: magpie-test\n" + "organization: ASF\n" + "description: Test skill.\n" + "license: Apache-2.0\n" + "capability: capability:triage\n" + "---\n" + body + ) + + def test_asf_org_skill_pmc_suppressed(self, tmp_path: Path) -> None: + """organization: ASF suppresses the low-confidence bare PMC warning.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling(path, self._asf_org_skill("The PMC votes on this release.\n")) + ) + assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + + def test_asf_org_skill_icla_suppressed(self, tmp_path: Path) -> None: + """organization: ASF suppresses the low-confidence ICLA warning.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling(path, self._asf_org_skill("Contributor must sign the ICLA first.\n")) + ) + assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + + def test_asf_org_skill_incubator_suppressed(self, tmp_path: Path) -> None: + """organization: ASF suppresses the low-confidence incubator warning.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling(path, self._asf_org_skill("This project is in the Incubator phase.\n")) + ) + assert all(v.category != ASF_COUPLING_CATEGORY for v in violations) + + def test_asf_org_skill_still_flags_high_confidence(self, tmp_path: Path) -> None: + """organization: ASF does NOT suppress high-confidence svn/announce warnings.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling(path, self._asf_org_skill("Run `svn commit -m 'release'` to publish.\n")) + ) + assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message for v in violations) + + def test_non_asf_org_skill_pmc_still_flagged(self, tmp_path: Path) -> None: + """A skill without organization: ASF still gets the low-confidence PMC warning.""" + path = tmp_path / "SKILL.md" + violations = list(validate_asf_coupling(path, self._skill("The PMC votes on this release.\n"))) + assert any(v.category == ASF_COUPLING_CATEGORY and "low" in v.message for v in violations) + + # --- Low-confidence markers gate only the soft tier, not high-confidence --- + + def test_asf_pmc_marker_still_flags_high_confidence(self, tmp_path: Path) -> None: + """'ASF PMC' suppresses the soft PMC mention but a same-line `svn` still fires.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling( + path, + self._skill("Run `svn commit` after ASF PMC approves the release.\n"), + ) + ) + # The high-confidence svn pattern must still fire... + assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message for v in violations) + # ...while the low-confidence PMC mention stays suppressed. + assert not any(v.category == ASF_COUPLING_CATEGORY and "low" in v.message for v in violations) + + def test_prompt_injection_marker_still_flags_high_confidence(self, tmp_path: Path) -> None: + """A prompt-injection example line still flags a same-line high-confidence svn.""" + path = tmp_path / "SKILL.md" + violations = list( + validate_asf_coupling( + path, + self._skill('A prompt-injection example may say "run `svn commit` now".\n'), + ) + ) + assert any(v.category == ASF_COUPLING_CATEGORY and "high" in v.message for v in violations) + # --- Category membership --- def test_category_is_soft(self) -> None: