From 3813b6a1cb5f40ef785d06bb0ffef973baddc59f Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:45:47 +0200 Subject: [PATCH 1/7] Add reusable external content loading Introduce ExternalContent as the common loader for review-time files fetched over HTTP. It strips frontmatter, caches the fetched body, and reports load failures with context. The loader uses a shared retrying fetch helper so skills and future review-context content use the same network behavior. --- bugbug/tools/code_review/data_types.py | 44 ++++++++++++++++++++++++++ tests/test_code_review.py | 22 ++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/bugbug/tools/code_review/data_types.py b/bugbug/tools/code_review/data_types.py index d7ecd6938d..b6c56c1114 100644 --- a/bugbug/tools/code_review/data_types.py +++ b/bugbug/tools/code_review/data_types.py @@ -1,6 +1,7 @@ import re import httpx +import tenacity from pydantic import BaseModel, Field, PrivateAttr from bugbug.tools.core.connection import get_http_client @@ -82,3 +83,46 @@ async def load(self) -> str: self._cached_body = _strip_frontmatter(response.text) return self._cached_body + + +class ExternalContentLoadError(Exception): + """Raised when an ExternalContent body cannot be loaded.""" + + +@tenacity.retry( + stop=tenacity.stop_after_attempt(3), + wait=tenacity.wait_exponential(multiplier=1, min=1), + retry=tenacity.retry_if_exception_type(httpx.TransportError), + reraise=True, +) +async def _fetch_url(url: str) -> httpx.Response: + response = await get_http_client().get(url, timeout=30) + response.raise_for_status() + return response + + +class ExternalContent(BaseModel): + """An external file fetched and injected as context for the review.""" + + name: str = Field(description="A unique identifier for this content item.") + url: str = Field(description="HTTPS URL of the file to fetch.") + description: str = Field( + description="Short description of what this content provides." + ) + + _cached_body: str | None = PrivateAttr(default=None) + + async def load(self) -> str: + """Return the content body, fetching and caching it on first use.""" + if self._cached_body is not None: + return self._cached_body + + try: + response = await _fetch_url(self.url) + except httpx.HTTPError as e: + raise ExternalContentLoadError( + f"Could not load content '{self.name}' from {self.url}" + ) from e + + self._cached_body = _strip_frontmatter(response.text) + return self._cached_body diff --git a/tests/test_code_review.py b/tests/test_code_review.py index bc3d39e7df..6f04b1dd74 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -9,7 +9,11 @@ from unidiff import PatchSet from bugbug.tools.code_review import data_types, langchain_tools -from bugbug.tools.code_review.data_types import Skill, _strip_frontmatter +from bugbug.tools.code_review.data_types import ( + ExternalContent, + Skill, + _strip_frontmatter, +) from bugbug.tools.code_review.langchain_tools import ( _fetch_file, create_load_skill_tool, @@ -524,3 +528,19 @@ async def test_search_identifier_accepts_double_encoded_tests_value(): ) assert "dom/a.cpp:1: match" in result assert client.search.await_args.kwargs["tests"] == "only" + + +@pytest.mark.asyncio +async def test_external_content_load_caches(): + item = ExternalContent( + name="a", + url="https://example.com/a.md", + description="example", + ) + client = _mock_client_returning("---\nname: a\n---\nbody\n") + with patch.object(data_types, "get_http_client", return_value=client): + first = await item.load() + second = await item.load() + assert first == "body\n" + assert second == "body\n" + assert client.get.await_count == 1 From 43b05d94cf6f60d88014f58f4da115fca58de504 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:46:41 +0200 Subject: [PATCH 2/7] Validate review-context.toml rules Add a stdlib-only schema parser for review-context.toml, including reusable predicate definitions, boolean composition, file predicates, metadata predicates, and file or revision load actions. The validator rejects unknown fields and malformed rules before the runtime loader sees them, and exposes bugbug-validate-review-context for repository-side checks. --- .../code_review/review_context_schema.py | 622 ++++++++++++++++++ pyproject.toml | 1 + tests/test_code_review.py | 101 +++ 3 files changed, 724 insertions(+) create mode 100644 bugbug/tools/code_review/review_context_schema.py diff --git a/bugbug/tools/code_review/review_context_schema.py b/bugbug/tools/code_review/review_context_schema.py new file mode 100644 index 0000000000..ceba192a0c --- /dev/null +++ b/bugbug/tools/code_review/review_context_schema.py @@ -0,0 +1,622 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Schema and validator for code review external context rules. + +This module intentionally depends only on the Python standard library so it can +also be copied into target repositories and used from local tests. +""" + +import argparse +import sys +from collections.abc import Callable +from dataclasses import dataclass, field +from pathlib import Path +from typing import Literal, TypeAlias + +try: + import tomllib +except ImportError: + import tomli as tomllib # type: ignore + + +class ReviewContextValidationError(ValueError): + """Raised when review-context.toml has valid TOML but invalid schema.""" + + +@dataclass(frozen=True) +class GithubPolicy: + allowed_repos: list[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class Policy: + github: GithubPolicy = field(default_factory=GithubPolicy) + + +@dataclass(frozen=True) +class FilePredicate: + include: list[str] = field(default_factory=list) + exclude: list[str] = field(default_factory=list) + ext: list[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class BugzillaPredicate: + product: list[str] = field(default_factory=list) + component: list[str] = field(default_factory=list) + keywords: list[str] = field(default_factory=list) + severity: list[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class ReviewPredicate: + author: list[str] = field(default_factory=list) + reviewer: list[str] = field(default_factory=list) + blocking_reviewer: list[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class PatchPredicate: + repository: list[str] = field(default_factory=list) + is_backport: bool | None = None + + +@dataclass(frozen=True) +class Definitions: + files: dict[str, FilePredicate] = field(default_factory=dict) + bugzilla: dict[str, BugzillaPredicate] = field(default_factory=dict) + review: dict[str, ReviewPredicate] = field(default_factory=dict) + patch: dict[str, PatchPredicate] = field(default_factory=dict) + + +@dataclass(frozen=True) +class AllPredicate: + children: list["Predicate"] + + +@dataclass(frozen=True) +class AnyPredicate: + children: list["Predicate"] + + +@dataclass(frozen=True) +class NotPredicate: + child: "Predicate" + + +@dataclass(frozen=True) +class AnyFilePredicate: + predicate: FilePredicate + + +@dataclass(frozen=True) +class AllFilesPredicate: + predicate: FilePredicate + + +Predicate: TypeAlias = ( + AllPredicate + | AnyPredicate + | NotPredicate + | AnyFilePredicate + | AllFilesPredicate + | BugzillaPredicate + | ReviewPredicate + | PatchPredicate +) + + +@dataclass(frozen=True) +class LoadFileAction: + type: Literal["file"] + path: str + repo: str | None = None + branch: str | None = None + kind: str | None = None + + +@dataclass(frozen=True) +class FetchRevisionAction: + type: Literal["fetch_revision"] + revision: str | None = None + repo: str | None = None + hash: str | None = None + kind: str | None = None + + +RuleAction: TypeAlias = LoadFileAction | FetchRevisionAction + + +@dataclass(frozen=True) +class Rule: + name: str + when: Predicate + load: list[RuleAction] + description: str | None = None + owners: list[str] = field(default_factory=list) + priority: int = 0 + + +@dataclass(frozen=True) +class ReviewContextConfig: + version: int + policy: Policy = field(default_factory=Policy) + definitions: Definitions = field(default_factory=Definitions) + rules: list[Rule] = field(default_factory=list) + + +def _reject_unknown_keys(path: str, value: dict, allowed: set[str]) -> None: + unknown = sorted(set(value) - allowed) + if unknown: + raise ReviewContextValidationError( + f"{path}: unknown field(s): {', '.join(unknown)}" + ) + + +def _require_table(value: object, path: str) -> dict: + if not isinstance(value, dict): + raise ReviewContextValidationError(f"{path}: expected table") + return value + + +def _require_string(value: object, path: str) -> str: + if not isinstance(value, str): + raise ReviewContextValidationError(f"{path}: expected string") + return value + + +def _optional_string(value: object, path: str) -> str | None: + if value is None: + return None + return _require_string(value, path) + + +def _string_list(value: object, path: str) -> list[str]: + if value is None: + return [] + if not isinstance(value, list): + raise ReviewContextValidationError(f"{path}: expected list of strings") + for index, item in enumerate(value): + _require_string(item, f"{path}[{index}]") + return value + + +def _required_non_empty_string_list(value: object, path: str) -> list[str]: + items = _string_list(value, path) + if not items: + raise ReviewContextValidationError(f"{path}: expected non-empty list") + return items + + +def _optional_bool(value: object, path: str) -> bool | None: + if value is None: + return None + if not isinstance(value, bool): + raise ReviewContextValidationError(f"{path}: expected boolean") + return value + + +def _require_int(value: object, path: str) -> int: + if not isinstance(value, int) or isinstance(value, bool): + raise ReviewContextValidationError(f"{path}: expected integer") + return value + + +def _normalise_repo(repo: str) -> str: + return repo.lower() + + +def _parse_github_policy(value: object, path: str) -> GithubPolicy: + table = _require_table(value, path) + _reject_unknown_keys(path, table, {"allowed_repos"}) + return GithubPolicy( + allowed_repos=[ + _normalise_repo(repo) + for repo in _string_list( + table.get("allowed_repos"), f"{path}.allowed_repos" + ) + ] + ) + + +def _parse_policy(value: object | None) -> Policy: + if value is None: + return Policy() + table = _require_table(value, "policy") + _reject_unknown_keys("policy", table, {"github"}) + return Policy(github=_parse_github_policy(table.get("github", {}), "policy.github")) + + +def _parse_file_predicate(value: object, path: str) -> FilePredicate: + table = _require_table(value, path) + _reject_unknown_keys(path, table, {"include", "exclude", "ext", "ref"}) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + raise ReviewContextValidationError(f"{path}.ref: unresolved ref") + predicate = FilePredicate( + include=_string_list(table.get("include"), f"{path}.include"), + exclude=_string_list(table.get("exclude"), f"{path}.exclude"), + ext=_string_list(table.get("ext"), f"{path}.ext"), + ) + if not predicate.include and not predicate.exclude and not predicate.ext: + raise ReviewContextValidationError(f"{path}: expected include, exclude, or ext") + return predicate + + +def _parse_bugzilla_predicate(value: object, path: str) -> BugzillaPredicate: + table = _require_table(value, path) + _reject_unknown_keys( + path, table, {"product", "component", "keywords", "severity", "ref"} + ) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + raise ReviewContextValidationError(f"{path}.ref: unresolved ref") + predicate = BugzillaPredicate( + product=_string_list(table.get("product"), f"{path}.product"), + component=_string_list(table.get("component"), f"{path}.component"), + keywords=_string_list(table.get("keywords"), f"{path}.keywords"), + severity=_string_list(table.get("severity"), f"{path}.severity"), + ) + if ( + not predicate.product + and not predicate.component + and not predicate.keywords + and not predicate.severity + ): + raise ReviewContextValidationError(f"{path}: expected at least one field") + return predicate + + +def _parse_review_predicate(value: object, path: str) -> ReviewPredicate: + table = _require_table(value, path) + _reject_unknown_keys( + path, table, {"author", "reviewer", "blocking_reviewer", "ref"} + ) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + raise ReviewContextValidationError(f"{path}.ref: unresolved ref") + predicate = ReviewPredicate( + author=_string_list(table.get("author"), f"{path}.author"), + reviewer=_string_list(table.get("reviewer"), f"{path}.reviewer"), + blocking_reviewer=_string_list( + table.get("blocking_reviewer"), f"{path}.blocking_reviewer" + ), + ) + if ( + not predicate.author + and not predicate.reviewer + and not predicate.blocking_reviewer + ): + raise ReviewContextValidationError(f"{path}: expected at least one field") + return predicate + + +def _parse_patch_predicate(value: object, path: str) -> PatchPredicate: + table = _require_table(value, path) + _reject_unknown_keys(path, table, {"repository", "is_backport", "ref"}) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + raise ReviewContextValidationError(f"{path}.ref: unresolved ref") + predicate = PatchPredicate( + repository=_string_list(table.get("repository"), f"{path}.repository"), + is_backport=_optional_bool(table.get("is_backport"), f"{path}.is_backport"), + ) + if not predicate.repository and predicate.is_backport is None: + raise ReviewContextValidationError(f"{path}: expected at least one field") + return predicate + + +def _parse_definitions(value: object | None) -> Definitions: + if value is None: + return Definitions() + table = _require_table(value, "definitions") + _reject_unknown_keys("definitions", table, {"files", "bugzilla", "review", "patch"}) + return Definitions( + files={ + name: _parse_file_predicate(definition, f"definitions.files.{name}") + for name, definition in _require_table( + table.get("files", {}), "definitions.files" + ).items() + }, + bugzilla={ + name: _parse_bugzilla_predicate(definition, f"definitions.bugzilla.{name}") + for name, definition in _require_table( + table.get("bugzilla", {}), "definitions.bugzilla" + ).items() + }, + review={ + name: _parse_review_predicate(definition, f"definitions.review.{name}") + for name, definition in _require_table( + table.get("review", {}), "definitions.review" + ).items() + }, + patch={ + name: _parse_patch_predicate(definition, f"definitions.patch.{name}") + for name, definition in _require_table( + table.get("patch", {}), "definitions.patch" + ).items() + }, + ) + + +def _resolve_ref(ref: object, namespace: str, definitions: Definitions, path: str): + ref_name = _require_string(ref, path) + prefix = f"{namespace}." + if not ref_name.startswith(prefix): + raise ReviewContextValidationError( + f"{path}: expected {namespace}.* ref, got {ref_name!r}" + ) + name = ref_name[len(prefix) :] + mapping = getattr(definitions, namespace) + try: + return mapping[name] + except KeyError: + raise ReviewContextValidationError( + f"{path}: unknown ref {ref_name!r}" + ) from None + + +def _parse_file_predicate_ref( + value: object, path: str, definitions: Definitions +) -> FilePredicate: + table = _require_table(value, path) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + return _resolve_ref(table["ref"], "files", definitions, f"{path}.ref") + return _parse_file_predicate(value, path) + + +def _parse_bugzilla_predicate_ref( + value: object, path: str, definitions: Definitions +) -> BugzillaPredicate: + table = _require_table(value, path) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + return _resolve_ref(table["ref"], "bugzilla", definitions, f"{path}.ref") + return _parse_bugzilla_predicate(value, path) + + +def _parse_review_predicate_ref( + value: object, path: str, definitions: Definitions +) -> ReviewPredicate: + table = _require_table(value, path) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + return _resolve_ref(table["ref"], "review", definitions, f"{path}.ref") + return _parse_review_predicate(value, path) + + +def _parse_patch_predicate_ref( + value: object, path: str, definitions: Definitions +) -> PatchPredicate: + table = _require_table(value, path) + if "ref" in table: + if len(table) != 1: + raise ReviewContextValidationError( + f"{path}.ref: cannot combine with fields" + ) + return _resolve_ref(table["ref"], "patch", definitions, f"{path}.ref") + return _parse_patch_predicate(value, path) + + +def _parse_all_predicate( + value: object, path: str, definitions: Definitions +) -> Predicate: + return AllPredicate(_parse_predicate_list(value, path, definitions)) + + +def _parse_any_predicate( + value: object, path: str, definitions: Definitions +) -> Predicate: + return AnyPredicate(_parse_predicate_list(value, path, definitions)) + + +def _parse_not_predicate( + value: object, path: str, definitions: Definitions +) -> Predicate: + return NotPredicate(_parse_predicate(value, path, definitions)) + + +def _parse_any_file_predicate( + value: object, path: str, definitions: Definitions +) -> Predicate: + return AnyFilePredicate(_parse_file_predicate_ref(value, path, definitions)) + + +def _parse_all_files_predicate( + value: object, path: str, definitions: Definitions +) -> Predicate: + return AllFilesPredicate(_parse_file_predicate_ref(value, path, definitions)) + + +PredicateParser: TypeAlias = Callable[[object, str, Definitions], Predicate] + +_PREDICATE_PARSERS: dict[str, PredicateParser] = { + "all": _parse_all_predicate, + "any": _parse_any_predicate, + "not": _parse_not_predicate, + "any_file": _parse_any_file_predicate, + "all_files": _parse_all_files_predicate, + "bugzilla": _parse_bugzilla_predicate_ref, + "review": _parse_review_predicate_ref, + "patch": _parse_patch_predicate_ref, +} + + +def _parse_predicate(value: object, path: str, definitions: Definitions) -> Predicate: + """Parse one predicate node. + + Each TOML predicate object must have exactly one key. Dispatching through a + table keeps the grammar visible and makes adding a predicate a one-line + change plus a parser function. + """ + table = _require_table(value, path) + keys = set(table) + _reject_unknown_keys(path, table, set(_PREDICATE_PARSERS)) + if len(keys) != 1: + raise ReviewContextValidationError(f"{path}: expected exactly one predicate") + key = next(iter(keys)) + return _PREDICATE_PARSERS[key](table[key], f"{path}.{key}", definitions) + + +def _parse_predicate_list( + value: object, path: str, definitions: Definitions +) -> list[Predicate]: + if not isinstance(value, list) or not value: + raise ReviewContextValidationError(f"{path}: expected non-empty list") + return [ + _parse_predicate(child, f"{path}[{index}]", definitions) + for index, child in enumerate(value) + ] + + +def _parse_action(value: object, path: str) -> RuleAction: + """Parse one load action. + + Load actions are intentionally small: validate the schema here, and leave + trust policy and network behavior to the runtime loader. + """ + table = _require_table(value, path) + action_type = table.get("type") + if action_type == "file": + _reject_unknown_keys(path, table, {"type", "path", "repo", "branch", "kind"}) + if "path" not in table: + raise ReviewContextValidationError(f"{path}.path: required for file") + return LoadFileAction( + type="file", + path=_require_string(table["path"], f"{path}.path"), + repo=_optional_string(table.get("repo"), f"{path}.repo"), + branch=_optional_string(table.get("branch"), f"{path}.branch"), + kind=_optional_string(table.get("kind"), f"{path}.kind"), + ) + if action_type == "fetch_revision": + _reject_unknown_keys(path, table, {"type", "revision", "repo", "hash", "kind"}) + revision = _optional_string(table.get("revision"), f"{path}.revision") + repo = _optional_string(table.get("repo"), f"{path}.repo") + commit_hash = _optional_string(table.get("hash"), f"{path}.hash") + if not revision and not (repo and commit_hash): + raise ReviewContextValidationError( + f"{path}: fetch_revision requires revision or repo+hash" + ) + return FetchRevisionAction( + type="fetch_revision", + revision=revision, + repo=repo, + hash=commit_hash, + kind=_optional_string(table.get("kind"), f"{path}.kind"), + ) + if action_type is None: + raise ReviewContextValidationError(f"{path}.type: required") + raise ReviewContextValidationError( + f"{path}.type: unknown action type {action_type!r}" + ) + + +def _parse_rule(value: object, index: int, definitions: Definitions) -> Rule: + path = f"rules[{index}]" + table = _require_table(value, path) + _reject_unknown_keys( + path, + table, + {"name", "description", "owners", "priority", "when", "load"}, + ) + if "name" not in table: + raise ReviewContextValidationError(f"{path}.name: required") + if "when" not in table: + raise ReviewContextValidationError(f"{path}.when: required") + if "load" not in table: + raise ReviewContextValidationError(f"{path}.load: required") + load_value = table["load"] + if not isinstance(load_value, list) or not load_value: + raise ReviewContextValidationError(f"{path}.load: expected non-empty list") + return Rule( + name=_require_string(table["name"], f"{path}.name"), + description=_optional_string(table.get("description"), f"{path}.description"), + owners=_string_list(table.get("owners"), f"{path}.owners"), + priority=_require_int(table.get("priority", 0), f"{path}.priority"), + when=_parse_predicate(table["when"], f"{path}.when", definitions), + load=[ + _parse_action(action, f"{path}.load[{action_index}]") + for action_index, action in enumerate(load_value) + ], + ) + + +def parse_review_context_data(data: dict) -> ReviewContextConfig: + """Validate parsed review-context.toml data.""" + _reject_unknown_keys("", data, {"version", "policy", "definitions", "rules"}) + version = data.get("version") + if version != 1: + raise ReviewContextValidationError("version: expected 1") + policy = _parse_policy(data.get("policy")) + definitions = _parse_definitions(data.get("definitions")) + rules_value = data.get("rules", []) + if not isinstance(rules_value, list): + raise ReviewContextValidationError("rules: expected list of rule tables") + return ReviewContextConfig( + version=version, + policy=policy, + definitions=definitions, + rules=[ + _parse_rule(rule, index, definitions) + for index, rule in enumerate(rules_value) + ], + ) + + +def parse_review_context_toml(text: str) -> ReviewContextConfig: + """Parse and validate review-context.toml content.""" + return parse_review_context_data(tomllib.loads(text)) + + +def validate_review_context_file(path: str | Path) -> ReviewContextConfig: + """Parse and validate a review-context.toml file.""" + return parse_review_context_toml(Path(path).read_text()) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Validate review-context.toml") + parser.add_argument( + "path", + nargs="?", + default="review-context.toml", + help="Path to review-context.toml (default: %(default)s)", + ) + args = parser.parse_args(argv) + + try: + config = validate_review_context_file(args.path) + except (OSError, tomllib.TOMLDecodeError, ReviewContextValidationError) as exc: + print(f"{args.path}: invalid: {exc}", file=sys.stderr) + return 1 + + print(f"{args.path}: valid ({len(config.rules)} rule(s))") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pyproject.toml b/pyproject.toml index f8b9114397..050f05c6a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,6 +121,7 @@ bugbug-data-github = "scripts.github_issue_retriever:main" bugbug-fixed-comments = "scripts.inline_comments_data_collection:main" bugbug-ci-failures-retriever = "scripts.retrieve_ci_failures:main" bugbug-try-pushes-retriever = "scripts.retrieve_try_pushes:main" +bugbug-validate-review-context = "bugbug.tools.code_review.review_context_schema:main" [tool.hatch.version] path = "VERSION" diff --git a/tests/test_code_review.py b/tests/test_code_review.py index 6f04b1dd74..e409c7f7be 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -20,6 +20,13 @@ search_identifier, search_text, ) +from bugbug.tools.code_review.review_context_schema import ( + ReviewContextValidationError, + parse_review_context_toml, +) +from bugbug.tools.code_review.review_context_schema import ( + main as validate_review_context_main, +) from bugbug.tools.code_review.utils import find_comment_scope from bugbug.tools.core.platforms.patch_apply import ( apply_patched_file, @@ -544,3 +551,97 @@ async def test_external_content_load_caches(): assert first == "body\n" assert second == "body\n" assert client.get.await_count == 1 + + +_RULES_TOML = """ +version = 1 + +[[rules]] +name = "Audio/Video C++" +when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } } +load = [ + { type = "file", path = ".claude/skills/dom-media.md" }, +] + +[[rules]] +name = "WebIDL" +when = { any_file = { ext = [".webidl"] } } +load = [ + { type = "file", path = ".claude/skills/webidl.md", repo = "mozilla-firefox/firefox" }, +] + +[[rules]] +name = "Any JS" +when = { any_file = { ext = [".js"] } } +load = [ + { type = "file", path = ".claude/skills/js-style.md" }, +] + +[[rules]] +name = "Bugzilla component only" +when = { bugzilla = { component = ["Core::DOM: Web Audio"] } } +load = [ + { type = "file", path = ".claude/skills/dom-audio.md" }, +] +""" + + +def test_parse_review_context_toml_rejects_missing_when(): + toml = """ +version = 1 +[[rules]] +name = "Broken" +load = [{ type = "file", path = "x.md" }] +""" + with pytest.raises(ReviewContextValidationError, match="rules\\[0\\].when"): + parse_review_context_toml(toml) + + +def test_parse_review_context_toml_rejects_unknown_action_type(): + toml = """ +version = 1 +[[rules]] +name = "Broken" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "url", path = "x.md" }] +""" + with pytest.raises(ReviewContextValidationError, match="unknown action type"): + parse_review_context_toml(toml) + + +def test_parse_review_context_toml_rejects_unknown_rule_field(): + toml = """ +version = 1 +[[rules]] +name = "Broken" +bogus = true +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "x.md" }] +""" + with pytest.raises(ReviewContextValidationError, match="unknown field"): + parse_review_context_toml(toml) + + +def test_validate_review_context_main(tmp_path, capsys): + review_context_path = tmp_path / "review-context.toml" + review_context_path.write_text(_RULES_TOML) + + assert validate_review_context_main([str(review_context_path)]) == 0 + captured = capsys.readouterr() + assert "valid" in captured.out + + +def test_validate_review_context_main_failure(tmp_path, capsys): + review_context_path = tmp_path / "review-context.toml" + review_context_path.write_text( + """ +version = 1 +[[rules]] +name = "Broken" +load = [{ type = "file", path = "x.md" }] +""" + ) + + assert validate_review_context_main([str(review_context_path)]) == 1 + captured = capsys.readouterr() + assert "invalid" in captured.err From 59670730e26d30b8c0854e1bb71c16f583b05d20 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:50:03 +0200 Subject: [PATCH 3/7] Load GitHub review context from matching file rules Add the initial review-context runtime: fetch review-context.toml from GitHub, match rules against changed files, deduplicate load actions, and inject fetched files with an audit manifest. Cross-repository file loads are restricted to the rule repository unless explicitly allow-listed, and the parsed rules file is cached briefly to avoid repeated fetches during adjacent reviews. --- bugbug/tools/code_review/review_context.py | 455 +++++++++++++++++++++ tests/test_code_review.py | 383 ++++++++++++++++- 2 files changed, 837 insertions(+), 1 deletion(-) create mode 100644 bugbug/tools/code_review/review_context.py diff --git a/bugbug/tools/code_review/review_context.py b/bugbug/tools/code_review/review_context.py new file mode 100644 index 0000000000..c92287920c --- /dev/null +++ b/bugbug/tools/code_review/review_context.py @@ -0,0 +1,455 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Rule-based external content loading for code review. + +Fetches review-context.toml from a GitHub repository, matches changed files +against the rules, and pre-loads the referenced content. +""" + +import fnmatch +import hashlib +import json +import time +from dataclasses import dataclass +from logging import getLogger +from typing import Literal + +import httpx +from pydantic import BaseModel +from unidiff import PatchSet + +from bugbug.tools.code_review.data_types import ( + ExternalContent, + ExternalContentLoadError, + _fetch_url, +) +from bugbug.tools.code_review.review_context_schema import ( + AllFilesPredicate, + AllPredicate, + AnyFilePredicate, + AnyPredicate, + FilePredicate, + LoadFileAction, + NotPredicate, + Predicate, + ReviewContextConfig, + ReviewContextValidationError, + Rule, + RuleAction, + parse_review_context_toml, + tomllib, +) + +logger = getLogger(__name__) + +_review_context_cache: dict[ + tuple[str, str, str], tuple[float, "ReviewContextConfig"] +] = {} +_REVIEW_CONTEXT_CACHE_TTL = 300 # seconds +DEFAULT_REVIEW_CONTEXT_PATH = "review-context.toml" + + +@dataclass +class MatchedAction: + action: RuleAction + matched_rules: list[str] + + +class ExternalContentItem(BaseModel): + name: str + body: str + source_type: Literal["github_file", "phabricator_revision", "github_commit"] + source: str + action: dict + matched_rules: list[str] + trusted: bool = True + trust_reason: str + bytes: int + sha256: str + + @classmethod + def create( + cls, + *, + name: str, + body: str, + source_type: Literal["github_file", "phabricator_revision", "github_commit"], + source: str, + action: RuleAction, + matched_rules: list[str], + trust_reason: str, + ) -> "ExternalContentItem": + encoded = body.encode() + return cls( + name=name, + body=body, + source_type=source_type, + source=source, + action=_action_to_dict(action), + matched_rules=matched_rules, + trust_reason=trust_reason, + bytes=len(encoded), + sha256=hashlib.sha256(encoded).hexdigest(), + ) + + def manifest(self) -> dict: + return self.model_dump(exclude={"body"}) + + +def _github_raw_url(repo: str, branch: str, path: str) -> str: + return ( + f"https://raw.githubusercontent.com/{repo}/refs/heads/{branch}/" + f"{path.lstrip('/')}" + ) + + +def _normalise_repo(repo: str) -> str: + return repo.lower() + + +def github_repo_allowed( + repo: str, review_context_repo: str, allowed_repos: set[str] +) -> bool: + repo = _normalise_repo(repo) + review_context_repo = _normalise_repo(review_context_repo) + if repo == review_context_repo: + return True + return any( + repo.startswith(allowed_repo) + if allowed_repo.endswith("/") + else repo == allowed_repo + for allowed_repo in allowed_repos + ) + + +def _action_to_dict(action: RuleAction) -> dict: + return {key: value for key, value in action.__dict__.items() if value is not None} + + +def parse_diff_files(diff: str) -> set[str]: + """Return the set of file paths added or modified by the diff.""" + try: + return {f.path for f in PatchSet.from_string(diff) if not f.is_removed_file} + except Exception: + files = set() + for line in diff.splitlines(): + if line.startswith("+++ b/"): + files.add(line[6:].strip()) + return files + + +def rule_matches( + rule: Rule, changed_files: set[str], bug_component: str | None = None +) -> bool: + return predicate_matches(rule.when, changed_files, bug_component) + + +def _normalise_path(path: str) -> str: + path = path.replace("\\", "/") + if path.startswith("./"): + return path[2:] + return path + + +def _file_matches(predicate: FilePredicate, path: str) -> bool: + path = _normalise_path(path) + if predicate.include and not any( + fnmatch.fnmatchcase(path, pattern) for pattern in predicate.include + ): + return False + if predicate.exclude and any( + fnmatch.fnmatchcase(path, pattern) for pattern in predicate.exclude + ): + return False + if predicate.ext and not any(path.endswith(ext) for ext in predicate.ext): + return False + return True + + +def _unsupported_metadata_predicate() -> bool: + return False + + +def predicate_matches( + predicate: Predicate, + changed_files: set[str], + bug_component: str | None = None, +) -> bool: + match predicate: + case AllPredicate(children=children): + return all( + predicate_matches(child, changed_files, bug_component) + for child in children + ) + case AnyPredicate(children=children): + return any( + predicate_matches(child, changed_files, bug_component) + for child in children + ) + case NotPredicate(child=child): + return not predicate_matches(child, changed_files, bug_component) + case AnyFilePredicate(predicate=file_predicate): + return any(_file_matches(file_predicate, f) for f in changed_files) + case AllFilesPredicate(predicate=file_predicate): + return bool(changed_files) and all( + _file_matches(file_predicate, f) for f in changed_files + ) + case _: + return _unsupported_metadata_predicate() + raise TypeError(f"Unknown predicate type: {type(predicate).__name__}") + + +def _action_key(action: RuleAction) -> tuple: + if isinstance(action, LoadFileAction): + return ( + "file", + action.repo or "", + action.branch or "", + action.path, + ) + raise ValueError(f"Unknown action type: {action.type!r}") + + +def _action_to_content( + action: LoadFileAction, + default_repo: str, + default_branch: str, + allowed_repos: set[str], +) -> ExternalContent: + """Resolve a file action to an ExternalContent instance. + + Actions without an explicit repo load from the same GitHub repo and branch + as the rules file. Cross-repo references use their explicit repo and + optional branch, defaulting to main. + """ + path = action.path + repo = action.repo or default_repo + branch = action.branch or (default_branch if repo == default_repo else "main") + if not github_repo_allowed(repo, default_repo, allowed_repos): + raise ValueError(f"GitHub repo is not allowed for review context: {repo}") + url = _github_raw_url(repo, branch, path) + return ExternalContent(name=path, url=url, description=path) + + +def _merge_rules(base: list[Rule], extra_toml: str) -> list[Rule]: + """Merge extra rules into the base list. + + Rules in extra_toml whose name matches an existing rule replace that rule + in-place. Rules with new names are appended. Rules without a name are always + appended. Returns a new list; base is not mutated. + """ + rules = list(base) + extra = parse_review_context_toml(extra_toml).rules + if not extra: + return rules + index = {r.name: i for i, r in enumerate(rules) if r.name} + for rule in extra: + name = rule.name + if name and name in index: + rules[index[name]] = rule + else: + rules.append(rule) + return rules + + +def collect_actions( + diff: str, + config: ReviewContextConfig, + bug_component: str | None = None, + extra_context_toml: str | None = None, +) -> list[MatchedAction]: + """Return deduplicated actions matched from config against the diff. + + Merges extra_context_toml into config before matching. Evaluates each rule + against the changed files (and optional bug component). Actions are + deduplicated across rules so the same file is never fetched twice. Pure — + no I/O. + """ + if extra_context_toml: + config = ReviewContextConfig( + version=config.version, + policy=config.policy, + definitions=config.definitions, + rules=_merge_rules(config.rules, extra_context_toml), + ) + + changed_files = parse_diff_files(diff) + logger.debug( + "Matching rules against %d changed file(s): %s", + len(changed_files), + changed_files, + ) + + actions_by_key: dict[tuple, MatchedAction] = {} + actions: list[MatchedAction] = [] + + ordered_rules = sorted( + enumerate(config.rules), key=lambda item: (-item[1].priority, item[0]) + ) + for _, rule in ordered_rules: + if not rule_matches(rule, changed_files, bug_component): + continue + rule_name = rule.name + new_actions = [] + for action in rule.load: + key = _action_key(action) + if key in actions_by_key: + actions_by_key[key].matched_rules.append(rule_name) + continue + matched_action = MatchedAction(action=action, matched_rules=[rule_name]) + actions_by_key[key] = matched_action + actions.append(matched_action) + new_actions.append(action) + if new_actions: + logger.debug( + "Rule %r matched: %d action(s) queued", + rule_name, + len(new_actions), + ) + + logger.debug("Total actions to execute: %d", len(actions)) + return actions + + +async def execute_actions( + actions: list[MatchedAction], + default_repo: str, + default_branch: str, + allowed_repos: set[str], + content_overrides: dict[str, str] | None = None, +) -> list[ExternalContentItem]: + """Execute a list of actions and return loaded external content. + + file actions resolve to an ExternalContent URL and fetch the file content. + content_overrides bypasses the network fetch for matching names, allowing + callers to inject test content. + """ + results: list[ExternalContentItem] = [] + for matched_action in actions: + action = matched_action.action + if not isinstance(action, LoadFileAction): + logger.error( + "Unsupported review context action %s", _action_to_dict(action) + ) + continue + try: + item = _action_to_content( + action, default_repo, default_branch, allowed_repos + ) + if content_overrides and item.name in content_overrides: + body = content_overrides[item.name] + else: + body = await item.load() + results.append( + ExternalContentItem.create( + name=item.name, + body=body, + source_type="github_file", + source=item.url, + action=action, + matched_rules=matched_action.matched_rules, + trust_reason="github_repo_content", + ) + ) + except (ValueError, ExternalContentLoadError): + logger.error( + "Failed to load content for action %s", _action_to_dict(action) + ) + return results + + +async def _fetch_review_context( + repo: str, branch: str, review_context_path: str +) -> ReviewContextConfig: + """Fetch and parse review-context.toml, with a short in-process TTL cache.""" + now = time.time() + cache_key = (repo, branch, review_context_path) + if cache_key in _review_context_cache: + ts, config = _review_context_cache[cache_key] + if now - ts < _REVIEW_CONTEXT_CACHE_TTL: + logger.debug( + "Using cached review context from %s@%s:%s (age %.0fs)", + repo, + branch, + review_context_path, + now - ts, + ) + return config + review_context_url = _github_raw_url(repo, branch, review_context_path) + response = await _fetch_url(review_context_url) + config = parse_review_context_toml(response.text) + _review_context_cache[cache_key] = (now, config) + return config + + +async def load_external_content_for_diff( + diff: str, + review_context_repo: str, + review_context_branch: str = "main", + review_context_path: str = DEFAULT_REVIEW_CONTEXT_PATH, + extra_context_toml: str | None = None, + content_overrides: dict[str, str] | None = None, +) -> list[ExternalContentItem]: + """Fetch review-context.toml from GitHub, match against diff, return content. + + Retries the rules fetch on transient errors (via _fetch_url). The parsed + config is cached in-process with a short TTL to avoid redundant fetches + across back-to-back reviews. + """ + try: + config = await _fetch_review_context( + review_context_repo, review_context_branch, review_context_path + ) + except httpx.HTTPError: + logger.error( + "Could not fetch review context from %s@%s:%s", + review_context_repo, + review_context_branch, + review_context_path, + ) + return [] + except (tomllib.TOMLDecodeError, ReviewContextValidationError): + logger.exception( + "Could not parse review context from %s@%s:%s", + review_context_repo, + review_context_branch, + review_context_path, + ) + return [] + + try: + actions = collect_actions(diff, config, extra_context_toml=extra_context_toml) + except (tomllib.TOMLDecodeError, ReviewContextValidationError): + logger.exception("Could not parse extra review context") + return [] + + return await execute_actions( + actions, + review_context_repo, + review_context_branch, + set(config.policy.github.allowed_repos), + content_overrides, + ) + + +def external_content_manifest(content_items: list[ExternalContentItem]) -> list[dict]: + return [item.manifest() for item in content_items] + + +def format_external_content(content_items: list[ExternalContentItem]) -> str: + manifest = json.dumps(external_content_manifest(content_items), indent=2) + content = "\n\n".join( + f'\n{item.body.strip()}\n' + for item in content_items + ) + return ( + "\n\n\n" + f"{manifest}\n" + "" + "\n\n\n" + f"{content}\n" + "" + ) diff --git a/tests/test_code_review.py b/tests/test_code_review.py index e409c7f7be..e13d75bec6 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -8,7 +8,7 @@ import pytest from unidiff import PatchSet -from bugbug.tools.code_review import data_types, langchain_tools +from bugbug.tools.code_review import data_types, langchain_tools, review_context from bugbug.tools.code_review.data_types import ( ExternalContent, Skill, @@ -20,8 +20,20 @@ search_identifier, search_text, ) +from bugbug.tools.code_review.review_context import ( + _merge_rules, + external_content_manifest, + format_external_content, + github_repo_allowed, + load_external_content_for_diff, + parse_diff_files, + rule_matches, +) from bugbug.tools.code_review.review_context_schema import ( + AnyFilePredicate, + FilePredicate, ReviewContextValidationError, + Rule, parse_review_context_toml, ) from bugbug.tools.code_review.review_context_schema import ( @@ -585,6 +597,65 @@ async def test_external_content_load_caches(): ] """ +_DIFF_MEDIA = """\ +diff --git a/dom/media/Foo.cpp b/dom/media/Foo.cpp +--- a/dom/media/Foo.cpp ++++ b/dom/media/Foo.cpp +@@ -1,1 +1,1 @@ +-old ++new +""" + +_DIFF_WEBIDL = """\ +diff --git a/dom/webidl/Foo.webidl b/dom/webidl/Foo.webidl +--- a/dom/webidl/Foo.webidl ++++ b/dom/webidl/Foo.webidl +@@ -1,1 +1,1 @@ +-old ++new +""" + + +@pytest.fixture(autouse=True) +def clear_review_context_cache(): + review_context._review_context_cache.clear() + yield + review_context._review_context_cache.clear() + + +def test_parse_diff_files(): + files = parse_diff_files(_DIFF_MEDIA) + assert files == {"dom/media/Foo.cpp"} + + +def test_github_repo_allowed_policy(): + assert github_repo_allowed("mozilla/cubeb", "example/repo", {"mozilla/"}) + assert github_repo_allowed("whatwg/html", "example/repo", {"whatwg/html"}) + assert github_repo_allowed("example/repo", "example/repo", set()) + assert not github_repo_allowed("mozilla/cubeb", "example/repo", set()) + assert not github_repo_allowed("whatwg/html-tests", "example/repo", {"whatwg/html"}) + + +def test_rule_matches_extension_and_path(): + rule = Rule( + name="test", + when=AnyFilePredicate(FilePredicate(include=["dom/media/**"], ext=[".cpp"])), + load=[], + ) + assert rule_matches(rule, {"dom/media/Foo.cpp"}) + assert not rule_matches(rule, {"dom/canvas/Foo.cpp"}) + assert not rule_matches(rule, {"dom/media/Foo.js"}) + + +def test_rule_matches_extension_only(): + rule = Rule( + name="test", + when=AnyFilePredicate(FilePredicate(ext=[".webidl"])), + load=[], + ) + assert rule_matches(rule, {"dom/webidl/Foo.webidl"}) + assert not rule_matches(rule, {"dom/webidl/Foo.js"}) + def test_parse_review_context_toml_rejects_missing_when(): toml = """ @@ -645,3 +716,313 @@ def test_validate_review_context_main_failure(tmp_path, capsys): assert validate_review_context_main([str(review_context_path)]) == 1 captured = capsys.readouterr() assert "invalid" in captured.err + + +@pytest.mark.asyncio +async def test_load_external_content_for_diff_file_load(): + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = _RULES_TOML + rules_response.raise_for_status = MagicMock() + content_response = MagicMock() + content_response.text = "---\nname: dom-media\n---\nMedia review guidelines\n" + content_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, content_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, review_context_branch="release" + ) + + assert len(results) == 1 + assert results[0].name == ".claude/skills/dom-media.md" + assert results[0].body == "Media review guidelines\n" + assert results[0].source_type == "github_file" + assert results[0].matched_rules == ["Audio/Video C++"] + client.get.assert_any_await( + "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/review-context.toml", + timeout=30, + ) + client.get.assert_any_await( + "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/.claude/skills/dom-media.md", + timeout=30, + ) + + +@pytest.mark.asyncio +async def test_load_external_content_for_diff_no_match(): + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = _RULES_TOML + rules_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(return_value=rules_response) + + diff = """diff --git a/README.txt b/README.txt\n--- a/README.txt\n+++ b/README.txt\n@@ -1 +1 @@\n-a\n+b\n""" + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff(diff, review_context_repo) + + assert results == [] + + +@pytest.mark.asyncio +async def test_load_external_content_for_diff_rules_fetch_failure(): + client = MagicMock() + client.get = AsyncMock(side_effect=httpx.ConnectError("boom")) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, "mozilla-firefox/firefox" + ) + + assert results == [] + + +@pytest.mark.asyncio +async def test_load_external_content_deduplicates_actions(): + rules = """ +version = 1 + +[[rules]] +name = "A" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = ".claude/skills/cpp.md" }] + +[[rules]] +name = "B" +when = { any_file = { include = ["dom/media/**"] } } +load = [{ type = "file", path = ".claude/skills/cpp.md" }] +""" + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = rules + rules_response.raise_for_status = MagicMock() + content_response = MagicMock() + content_response.text = "C++ guidelines" + content_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, content_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff(_DIFF_MEDIA, review_context_repo) + + assert len(results) == 1 + assert results[0].matched_rules == ["A", "B"] + assert client.get.await_count == 2 + + +@pytest.mark.asyncio +async def test_load_external_content_orders_by_priority(): + rules = """ +version = 1 + +[[rules]] +name = "Low" +priority = 0 +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = ".claude/skills/low.md" }] + +[[rules]] +name = "High" +priority = 10 +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = ".claude/skills/high.md" }] +""" + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = rules + rules_response.raise_for_status = MagicMock() + low_response = MagicMock() + low_response.text = "low" + low_response.raise_for_status = MagicMock() + high_response = MagicMock() + high_response.text = "high" + high_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, high_response, low_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + ) + + assert [r.name for r in results] == [ + ".claude/skills/high.md", + ".claude/skills/low.md", + ] + + +@pytest.mark.asyncio +async def test_load_external_content_rejects_disallowed_github_repo(): + rules = """ +version = 1 +[[rules]] +name = "Cross repo" +when = { any_file = { ext = [".webidl"] } } +load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }] +""" + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = rules + rules_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(return_value=rules_response) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_WEBIDL, + review_context_repo, + ) + + assert results == [] + assert client.get.await_count == 1 + + +@pytest.mark.asyncio +async def test_load_external_content_allows_policy_prefix_repo(): + rules = """ +version = 1 +[policy.github] +allowed_repos = ["mozilla/"] +[[rules]] +name = "Cross repo" +when = { any_file = { ext = [".webidl"] } } +load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }] +""" + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = rules + rules_response.raise_for_status = MagicMock() + content_response = MagicMock() + content_response.text = "cubeb guidelines" + content_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, content_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_WEBIDL, + review_context_repo, + ) + + assert len(results) == 1 + assert results[0].source.endswith("/mozilla/cubeb/refs/heads/main/REVIEWING.md") + + +def test_merge_rules_appends_new(): + base = parse_review_context_toml( + """ +version = 1 +[[rules]] +name = "A" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "a.md" }] +""" + ) + merged = _merge_rules( + base.rules, + """ +version = 1 +[[rules]] +name = "B" +when = { any_file = { ext = [".js"] } } +load = [{ type = "file", path = "b.md" }] +""", + ) + assert [rule.name for rule in merged] == ["A", "B"] + + +def test_merge_rules_replaces_by_name(): + base = parse_review_context_toml( + """ +version = 1 +[[rules]] +name = "A" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "a.md" }] +""" + ) + merged = _merge_rules( + base.rules, + """ +version = 1 +[[rules]] +name = "A" +when = { any_file = { ext = [".js"] } } +load = [{ type = "file", path = "replacement.md" }] +""", + ) + assert len(merged) == 1 + assert merged[0].load[0].path == "replacement.md" + + +def test_merge_rules_empty_extra(): + base = parse_review_context_toml( + """ +version = 1 +[[rules]] +name = "A" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "a.md" }] +""" + ) + merged = _merge_rules(base.rules, "version = 1\n") + assert merged == base.rules + + +@pytest.mark.asyncio +async def test_content_override_used_instead_of_fetch(): + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = _RULES_TOML + rules_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(return_value=rules_response) + overrides = {".claude/skills/dom-media.md": "Override body"} + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, content_overrides=overrides + ) + + assert len(results) == 1 + assert results[0].body == "Override body" + assert client.get.await_count == 1 + + +@pytest.mark.asyncio +async def test_external_content_manifest_and_prompt_body(): + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = _RULES_TOML + rules_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(return_value=rules_response) + overrides = {".claude/skills/dom-media.md": "Guideline body"} + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, content_overrides=overrides + ) + + manifest = external_content_manifest(results) + assert manifest == [ + { + "name": ".claude/skills/dom-media.md", + "source_type": "github_file", + "source": "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/main/.claude/skills/dom-media.md", + "action": {"type": "file", "path": ".claude/skills/dom-media.md"}, + "matched_rules": ["Audio/Video C++"], + "trusted": True, + "trust_reason": "github_repo_content", + "bytes": len("Guideline body".encode()), + "sha256": results[0].sha256, + } + ] + prompt = format_external_content(results) + assert "" in prompt + assert "" in prompt + assert '' in prompt + assert "Guideline body" in prompt From 594e5545f6369b15f9b58b47cb77beb93c0a16fd Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:51:15 +0200 Subject: [PATCH 4/7] Match review context on Bugzilla metadata and related revisions Extend review-context matching with Bugzilla component lookup for patches that have an associated bug, failing closed when metadata is unavailable. Also implement fetch_revision actions for Phabricator revisions and GitHub commits so rules can include trusted related diffs as review context. --- bugbug/tools/code_review/review_context.py | 158 +++++++++++++++++++-- tests/test_code_review.py | 130 +++++++++++++++++ 2 files changed, 277 insertions(+), 11 deletions(-) diff --git a/bugbug/tools/code_review/review_context.py b/bugbug/tools/code_review/review_context.py index c92287920c..39791c70a1 100644 --- a/bugbug/tools/code_review/review_context.py +++ b/bugbug/tools/code_review/review_context.py @@ -9,9 +9,11 @@ against the rules, and pre-loads the referenced content. """ +import asyncio import fnmatch import hashlib import json +import re import time from dataclasses import dataclass from logging import getLogger @@ -31,20 +33,27 @@ AllPredicate, AnyFilePredicate, AnyPredicate, + BugzillaPredicate, + FetchRevisionAction, FilePredicate, LoadFileAction, NotPredicate, + PatchPredicate, Predicate, ReviewContextConfig, ReviewContextValidationError, + ReviewPredicate, Rule, RuleAction, parse_review_context_toml, tomllib, ) +from bugbug.tools.core.connection import get_http_client logger = getLogger(__name__) +_PHAB_RE = re.compile(r"D(\d+)$") + _review_context_cache: dict[ tuple[str, str, str], tuple[float, "ReviewContextConfig"] ] = {} @@ -141,6 +150,29 @@ def parse_diff_files(diff: str) -> set[str]: return files +async def get_bug_component(bug_id: int) -> str | None: + """Return 'Product::Component' for the given Bugzilla bug, or None on failure.""" + from libmozdata.bugzilla import Bugzilla + + def _fetch() -> str | None: + bugs: list[dict] = [] + Bugzilla( + bug_id, + include_fields=["product", "component"], + bughandler=lambda bug, data: data.append(bug), + bugdata=bugs, + ).get_data().wait() + if not bugs: + return None + return f"{bugs[0]['product']}::{bugs[0]['component']}" + + try: + return await asyncio.get_event_loop().run_in_executor(None, _fetch) + except Exception: + logger.warning("Could not fetch component for bug %s", bug_id) + return None + + def rule_matches( rule: Rule, changed_files: set[str], bug_component: str | None = None ) -> bool: @@ -169,7 +201,21 @@ def _file_matches(predicate: FilePredicate, path: str) -> bool: return True -def _unsupported_metadata_predicate() -> bool: +def _bugzilla_matches( + predicate: BugzillaPredicate, bug_component: str | None = None +) -> bool: + if predicate.product or predicate.keywords or predicate.severity: + return False + if predicate.component: + return bug_component is not None and bug_component in predicate.component + return False + + +def _review_matches(predicate: ReviewPredicate) -> bool: + return False + + +def _patch_matches(predicate: PatchPredicate) -> bool: return False @@ -197,8 +243,12 @@ def predicate_matches( return bool(changed_files) and all( _file_matches(file_predicate, f) for f in changed_files ) - case _: - return _unsupported_metadata_predicate() + case BugzillaPredicate(): + return _bugzilla_matches(predicate, bug_component) + case ReviewPredicate(): + return _review_matches(predicate) + case PatchPredicate(): + return _patch_matches(predicate) raise TypeError(f"Unknown predicate type: {type(predicate).__name__}") @@ -210,6 +260,13 @@ def _action_key(action: RuleAction) -> tuple: action.branch or "", action.path, ) + if isinstance(action, FetchRevisionAction): + return ( + "fetch_revision", + action.revision or "", + action.repo or "", + action.hash or "", + ) raise ValueError(f"Unknown action type: {action.type!r}") @@ -234,6 +291,68 @@ def _action_to_content( return ExternalContent(name=path, url=url, description=path) +async def _fetch_revision( + action: FetchRevisionAction, +) -> tuple[str, str, Literal["phabricator_revision", "github_commit"], str, str] | None: + """Fetch the diff of another revision, either from Phabricator or GitHub.""" + revision_str = action.revision or "" + repo = action.repo or "" + commit_hash = action.hash or "" + + if revision_str: + m = _PHAB_RE.match(revision_str.strip()) + if not m: + logger.warning("fetch_revision: cannot parse revision %r", revision_str) + return None + rev_num = int(m.group(1)) + try: + from bugbug.tools.core.platforms.phabricator import get_phabricator_client + + def _load(): + phab = get_phabricator_client() + revision = phab.load_revision(rev_id=rev_num) + diff_id = revision["fields"]["diffID"] + return phab.load_raw_diff(diff_id) + + content = await asyncio.get_event_loop().run_in_executor(None, _load) + name = f"Revision D{rev_num}" + return ( + name, + content, + "phabricator_revision", + name, + "phabricator_revision", + ) + except Exception: + logger.warning("fetch_revision: failed to load Phabricator D%s", rev_num) + return None + + if repo and commit_hash: + try: + response = await get_http_client().get( + f"https://api.github.com/repos/{repo}/commits/{commit_hash}", + headers={"Accept": "application/vnd.github.diff"}, + timeout=30, + ) + response.raise_for_status() + source = f"{repo}@{commit_hash}" + return ( + f"{repo}@{commit_hash[:12]}", + response.text, + "github_commit", + source, + "github_repo_content", + ) + except httpx.HTTPError: + logger.warning("fetch_revision: failed to load %s@%s", repo, commit_hash) + return None + + logger.warning( + "fetch_revision: action has neither revision nor repo+hash: %s", action + ) + return None + + def _merge_rules(base: list[Rule], extra_toml: str) -> list[Rule]: """Merge extra rules into the base list. @@ -323,17 +442,29 @@ async def execute_actions( ) -> list[ExternalContentItem]: """Execute a list of actions and return loaded external content. - file actions resolve to an ExternalContent URL and fetch the file content. - content_overrides bypasses the network fetch for matching names, allowing - callers to inject test content. + fetch_revision actions fetch the raw diff of a Phabricator revision or + GitHub commit. file actions resolve to an ExternalContent URL and + fetch the file content. content_overrides bypasses the network fetch for + matching names, allowing callers to inject test content. """ results: list[ExternalContentItem] = [] for matched_action in actions: action = matched_action.action - if not isinstance(action, LoadFileAction): - logger.error( - "Unsupported review context action %s", _action_to_dict(action) - ) + if isinstance(action, FetchRevisionAction): + result = await _fetch_revision(action) + if result: + name, body, source_type, source, trust_reason = result + results.append( + ExternalContentItem.create( + name=name, + body=body, + source_type=source_type, + source=source, + action=action, + matched_rules=matched_action.matched_rules, + trust_reason=trust_reason, + ) + ) continue try: item = _action_to_content( @@ -390,6 +521,7 @@ async def load_external_content_for_diff( review_context_repo: str, review_context_branch: str = "main", review_context_path: str = DEFAULT_REVIEW_CONTEXT_PATH, + bug_id: int | None = None, extra_context_toml: str | None = None, content_overrides: dict[str, str] | None = None, ) -> list[ExternalContentItem]: @@ -420,8 +552,12 @@ async def load_external_content_for_diff( ) return [] + bug_component: str | None = None + if bug_id is not None: + bug_component = await get_bug_component(bug_id) + try: - actions = collect_actions(diff, config, extra_context_toml=extra_context_toml) + actions = collect_actions(diff, config, bug_component, extra_context_toml) except (tomllib.TOMLDecodeError, ReviewContextValidationError): logger.exception("Could not parse extra review context") return [] diff --git a/tests/test_code_review.py b/tests/test_code_review.py index e13d75bec6..f8eac7d462 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -24,6 +24,7 @@ _merge_rules, external_content_manifest, format_external_content, + get_bug_component, github_repo_allowed, load_external_content_for_diff, parse_diff_files, @@ -657,6 +658,35 @@ def test_rule_matches_extension_only(): assert not rule_matches(rule, {"dom/webidl/Foo.js"}) +def test_rule_bugzilla_component_fails_closed_without_component(): + config = parse_review_context_toml(_RULES_TOML) + rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only") + assert not rule_matches(rule, set(), bug_component=None) + + +def test_rule_bugzilla_component_matches(): + config = parse_review_context_toml(_RULES_TOML) + rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only") + assert rule_matches(rule, set(), bug_component="Core::DOM: Web Audio") + assert not rule_matches(rule, set(), bug_component="Core::Layout") + + +@pytest.mark.asyncio +async def test_get_bug_component(): + with patch("libmozdata.bugzilla.Bugzilla") as bugzilla_cls: + instance = MagicMock() + bugzilla_cls.return_value = instance + + def get_data(): + handler = bugzilla_cls.call_args.kwargs["bughandler"] + data = bugzilla_cls.call_args.kwargs["bugdata"] + handler({"product": "Core", "component": "DOM: Web Audio"}, data) + return MagicMock(wait=MagicMock()) + + instance.get_data.side_effect = get_data + assert await get_bug_component(123) == "Core::DOM: Web Audio" + + def test_parse_review_context_toml_rejects_missing_when(): toml = """ version = 1 @@ -1026,3 +1056,103 @@ async def test_external_content_manifest_and_prompt_body(): assert "" in prompt assert '' in prompt assert "Guideline body" in prompt + + +@pytest.mark.asyncio +async def test_extra_context_toml_appended(): + review_context_repo = "mozilla-firefox/firefox" + base_rules = """ +version = 1 +[[rules]] +name = "Base" +when = { any_file = { ext = [".js"] } } +load = [{ type = "file", path = "base.md" }] +""" + extra_rules = """ +version = 1 +[[rules]] +name = "Extra" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "extra.md" }] +""" + rules_response = MagicMock() + rules_response.text = base_rules + rules_response.raise_for_status = MagicMock() + content_response = MagicMock() + content_response.text = "extra" + content_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, content_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + extra_context_toml=extra_rules, + ) + + assert [result.name for result in results] == ["extra.md"] + + +@pytest.mark.asyncio +async def test_extra_context_toml_replaces_by_name(): + review_context_repo = "mozilla-firefox/firefox" + extra_rules = """ +version = 1 +[[rules]] +name = "Audio/Video C++" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "replacement.md" }] +""" + rules_response = MagicMock() + rules_response.text = _RULES_TOML + rules_response.raise_for_status = MagicMock() + content_response = MagicMock() + content_response.text = "replacement" + content_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(side_effect=[rules_response, content_response]) + + with patch.object(data_types, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + extra_context_toml=extra_rules, + ) + + assert [result.name for result in results] == ["replacement.md"] + + +@pytest.mark.asyncio +async def test_load_external_content_fetches_phabricator_revision(): + rules = """ +version = 1 +[[rules]] +name = "Revision" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "fetch_revision", revision = "D123" }] +""" + review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() + rules_response.text = rules + rules_response.raise_for_status = MagicMock() + client = MagicMock() + client.get = AsyncMock(return_value=rules_response) + + phabricator = MagicMock() + phabricator.load_revision.return_value = {"fields": {"diffID": 456}} + phabricator.load_raw_diff.return_value = "diff content" + + with ( + patch.object(data_types, "get_http_client", return_value=client), + patch( + "bugbug.tools.core.platforms.phabricator.get_phabricator_client", + return_value=phabricator, + ), + ): + results = await load_external_content_for_diff(_DIFF_MEDIA, review_context_repo) + + assert len(results) == 1 + assert results[0].name == "Revision D123" + assert results[0].body == "diff content" + assert results[0].source_type == "phabricator_revision" From 501e515ec53638ea2fefca8b181770f6833f502b Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Thu, 4 Jun 2026 15:52:19 +0000 Subject: [PATCH 5/7] Inject matched review context into code review agent Thread review_context_repo, review_context_branch, extra_context_toml, and content overrides through the CodeReviewTool. When matching rules load content, the prompt receives the formatted external context and the tool response records the content manifest for auditing. --- bugbug/tools/code_review/agent.py | 49 ++++++++++++++++++++++++++--- bugbug/tools/code_review/prompts.py | 1 + 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py index cc51c71abc..6de20ea685 100644 --- a/bugbug/tools/code_review/agent.py +++ b/bugbug/tools/code_review/agent.py @@ -182,25 +182,30 @@ def create(cls, **kwargs): def count_tokens(self, text): return len(self._tokenizer.encode(text)) - def generate_initial_prompt(self, patch: Patch, patch_summary: str) -> str: + def generate_initial_prompt( + self, patch: Patch, patch_summary: str, external_context: str = "" + ) -> str: created_before = patch.date_created if self.is_experiment_env else None return FIRST_MESSAGE_TEMPLATE.format( patch=format_patch_set(patch.patch_set), patch_summarization=patch_summary, + external_context=external_context, comment_examples=self._get_comment_examples(patch, created_before), approved_examples=self._get_generated_examples(patch, created_before), ) async def generate_review_comments( - self, patch: Patch, patch_summary: str + self, patch: Patch, patch_summary: str, external_context: str = "" ) -> list[GeneratedReviewComment]: try: async for chunk in self.agent.astream( { "messages": [ HumanMessage( - self.generate_initial_prompt(patch, patch_summary) + self.generate_initial_prompt( + patch, patch_summary, external_context + ) ), ] }, @@ -214,14 +219,47 @@ async def generate_review_comments( return result["structured_response"].comments - async def run(self, patch: Patch) -> CodeReviewToolResponse: + async def run( + self, + patch: Patch, + review_context_repo: Optional[str] = None, + review_context_branch: str = "main", + extra_context_toml: Optional[str] = None, + content_overrides: Optional[dict[str, str]] = None, + ) -> CodeReviewToolResponse: if self.count_tokens(patch.raw_diff) > 21000: raise LargeDiffError("The diff is too large") patch_summary = self.patch_summarizer.run(patch) + external_context = "" + external_content_manifest = [] + if review_context_repo: + from bugbug.tools.code_review.review_context import ( + external_content_manifest as build_external_content_manifest, + ) + from bugbug.tools.code_review.review_context import ( + format_external_content, + load_external_content_for_diff, + ) + + bug_id = getattr(patch, "bug_id", None) + content_items = await load_external_content_for_diff( + patch.raw_diff, + review_context_repo, + review_context_branch=review_context_branch, + bug_id=bug_id, + extra_context_toml=extra_context_toml, + content_overrides=content_overrides, + ) + if content_items: + external_context = format_external_content(content_items) + external_content_manifest = build_external_content_manifest( + content_items + ) + unfiltered_suggestions = await self.generate_review_comments( - patch, patch_summary + patch, patch_summary, external_context ) if not unfiltered_suggestions: logger.info("No suggestions were generated") @@ -238,6 +276,7 @@ async def run(self, patch: Patch) -> CodeReviewToolResponse: details={ "model": self._agent_model_name, "num_unfiltered_suggestions": len(unfiltered_suggestions), + "external_content": external_content_manifest, }, ) diff --git a/bugbug/tools/code_review/prompts.py b/bugbug/tools/code_review/prompts.py index bf4073d474..a5787e4110 100644 --- a/bugbug/tools/code_review/prompts.py +++ b/bugbug/tools/code_review/prompts.py @@ -63,6 +63,7 @@ {patch_summarization} +{external_context} Here are examples of good code review comments to guide your style and approach: From 2a16d6ee0d7711f79a41612edb3cd3d254c28ec7 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 1 Jun 2026 16:52:06 +0200 Subject: [PATCH 6/7] Document rule-based code review context Add developer and rule-author documentation for review-context.toml, including validation, matching semantics, trust policy, metadata predicates, fetch_revision, testing overrides, and prompt injection. The example TOML demonstrates the intended full rule language separately from the subset currently active in the loader, and the docs examples are covered by tests to keep them valid. --- docs/code-review-context-example.toml | 232 +++++++++++ docs/code-review-skills.md | 395 +++++++++++++++++++ tests/test_code_review.py | 540 +++++++++++++++----------- 3 files changed, 941 insertions(+), 226 deletions(-) create mode 100644 docs/code-review-context-example.toml create mode 100644 docs/code-review-skills.md diff --git a/docs/code-review-context-example.toml b/docs/code-review-context-example.toml new file mode 100644 index 0000000000..b6083d87d1 --- /dev/null +++ b/docs/code-review-context-example.toml @@ -0,0 +1,232 @@ +# Proposed review-context.toml external content rules. +# +# This is a design sketch for the next rule language, not the format currently +# accepted by the loader. +# +# This file is intentionally verbose. It shows the major language features and +# comments on what each one does. + +# Version is required so the parser can reject files written for a future +# incompatible format. +version = 1 + +# GitHub loads are restricted. The current review-context repo is always +# allowed. Entries ending in `/` allow every repo under that organization; +# entries in `org/repo` form allow a single repo. +[policy.github] +allowed_repos = [ + "mozilla/", + "web-platform-tests/wpt", + "whatwg/html", +] + +# Definitions are reusable predicate fragments. They are expanded structurally, +# as if the referenced table had been written inline. The parser should reject +# unknown refs, cycles, and refs to the wrong predicate kind. +[definitions.files.media_impl] +include = ["dom/media/**"] +exclude = [ + "dom/media/test/**", + "dom/media/gtest/**", + "dom/media/webaudio/test/**", + "dom/media/**/Generated*.cpp", +] +ext = [".cpp", ".h", ".mm"] + +[definitions.files.frontend_ui] +include = [ + "browser/components/**", + "browser/base/content/**", + "toolkit/components/**", + "toolkit/content/**", +] +exclude = [ + "browser/components/**/test/**", + "toolkit/components/**/test/**", + "**/*.snap", +] +ext = [".js", ".mjs", ".sys.mjs", ".html", ".xhtml", ".css", ".scss"] + +[definitions.files.docs] +ext = [".md", ".rst"] + +# Metadata predicates can be reusable too. A predicate can only reference a +# definition from its own namespace: `bugzilla` refs `definitions.bugzilla.*`, +# `review` refs `definitions.review.*`, and `patch` refs +# `definitions.patch.*`. +[definitions.bugzilla.dom_api] +component = ["Core::DOM: Core & HTML", "Core::DOM: Events"] + +[definitions.review.frontend_reviewers] +# `reviewer` matches both blocking and non-blocking reviewers. +reviewer = ["frontend-reviewers"] + +[definitions.review.dom_reviewers] +reviewer = ["dom-reviewers", "webidl-reviewers"] + +[definitions.review.blocking_media_reviewers] +blocking_reviewer = ["padenot", "media-reviewers"] + +[definitions.patch.backport_repositories] +is_backport = true +repository = ["firefox-beta", "firefox-release", "firefox-esr"] + +# `priority` is optional. It defaults to 0 and only controls ordering among +# loaded external content blocks. Higher priority loads first. Ties preserve +# declaration order. All external content is injected before the patch diff. +[[rules]] +name = "Media: implementation" +description = "Load media architecture and realtime-safety guidance for C++ media changes." +owners = ["media-reviewers"] +priority = 80 + +# `all` means every child predicate must match. +# `any_file` means at least one changed file must match the file predicate. +# The file predicate uses the reusable definition above. +when = { all = [ + { any_file = { ref = "files.media_impl" } }, +] } + +# `kind` is an optional free-form audit/prompt hint. Load failures are +# non-fatal: the loader logs an error and continues with remaining loads. +load = [ + { type = "file", path = ".claude/review/media-architecture.md", kind = "architecture" }, + { type = "file", repo = "mozilla/cubeb", branch = "main", path = "docs/realtime-programming.md", kind = "policy" }, +] + + +[[rules]] +name = "Firefox frontend: JS, HTML, CSS" +description = "Load frontend conventions for browser chrome, toolkit UI, and component styles." +owners = ["frontend-reviewers"] +priority = 60 + +# `any` means at least one child predicate must match. +# This rule fires for frontend UI files when a frontend reviewer/group is +# assigned. +when = { all = [ + { any_file = { ref = "files.frontend_ui" } }, + { review = { ref = "review.frontend_reviewers" } }, +] } + +load = [ + { type = "file", path = ".claude/review/firefox-frontend.md", kind = "style-guide" }, + { type = "file", path = "browser/docs/frontend/performance.md", kind = "performance" }, +] + + +[[rules]] +name = "DOM API implementation" +description = "Load WebIDL and DOM binding guidance for changes that expose or implement web APIs." +owners = ["dom-reviewers"] +priority = 90 + +# Boolean composition can be nested. This rule fires for: +# - any WebIDL file under dom/, +# - C++/header changes under dom/ when Bugzilla says this is a DOM bug, or +# - DOM binding implementation files when a DOM/WebIDL reviewer is assigned. +when = { any = [ + { any_file = { include = ["dom/**"], ext = [".webidl"] } }, + { all = [ + { any_file = { include = ["dom/**"], ext = [".cpp", ".h"] } }, + { bugzilla = { ref = "bugzilla.dom_api" } }, + ] }, + { all = [ + { any_file = { include = ["dom/bindings/**"], ext = [".cpp", ".h", ".py"] } }, + { review = { ref = "review.dom_reviewers" } }, + ] }, +] } + +load = [ + { type = "file", path = ".claude/review/dom-api.md", kind = "api-contract" }, + { type = "file", path = "dom/docs/webidl-bindings.md", kind = "reference" }, +] + + +[[rules]] +name = "Docs-only change" +description = "Load writing and API documentation guidance when every changed file is documentation." +owners = ["docs-reviewers"] + +# `all_files` means every changed file must match the file predicate. This is +# useful for patch-shape rules such as docs-only, tests-only, or generated-only. +# This rule intentionally omits priority; it defaults to 0. +when = { all_files = { ref = "files.docs" } } + +load = [ + { type = "file", path = ".claude/review/docs.md", kind = "style-guide" }, +] + + +[[rules]] +name = "Security-sensitive DOM changes" +description = "Load security guidance for non-test security-relevant DOM changes." +owners = ["sec-reviewers", "dom-reviewers"] +priority = 100 + +# `not` negates its child predicate. This rule fires when some DOM security +# file changed, but the patch is not test-only. +when = { all = [ + { any_file = { include = ["dom/security/**"], ext = [".cpp", ".h", ".webidl"] } }, + { not = { all_files = { include = ["dom/security/test/**"] } } }, +] } + +load = [ + { type = "file", path = ".claude/review/security-sensitive.md", kind = "policy" }, +] + + +[[rules]] +name = "Backport risk checklist" +description = "Load release-branch guidance for patches targeting a backport repository or branch." +owners = ["release-managers"] +priority = 70 + +# `patch.repository` and `patch.is_backport` come from trusted patch metadata. +# For GitHub entrypoints, repository is org/project. For Phabricator reviews, +# repository is the target Phabricator repository exposed by the Mozilla MCP. +when = { all = [ + { patch = { ref = "patch.backport_repositories" } }, +] } + +load = [ + { type = "file", path = ".claude/review/backport-risk.md", kind = "release-policy" }, +] + + +[[rules]] +name = "Blocking reviewer guidance" +description = "Load reviewer-specific expectations when a blocking media reviewer is assigned." +owners = ["media-reviewers"] +priority = 85 + +# Review identities are Phabricator emails or review groups. +# `blocking_reviewer` narrows to reviewers whose approval is required before +# landing. Those reviewers also match ordinary `reviewer` predicates. +when = { all = [ + { any_file = { ref = "files.media_impl" } }, + { review = { ref = "review.blocking_media_reviewers" } }, +] } + +# Dedupe is global per review. If another matched rule already loaded +# media-architecture.md, this action records this rule in the manifest but does +# not fetch or inject the same source a second time. +load = [ + { type = "file", path = ".claude/review/media-architecture.md", kind = "architecture" }, + { type = "file", path = ".claude/review/media-reviewer-checklist.md", kind = "checklist" }, +] + + +[[rules]] +name = "Related revision context" +description = "Load another reviewed diff when a patch depends on behavior from a known revision." +owners = ["media-reviewers"] + +# `fetch_revision` injects a trusted diff as context. Phabricator revisions are +# fetched through the platform client/MCP path; GitHub commits use repo+hash. +when = { any_file = { include = ["dom/media/mediacontrol/**"] } } + +load = [ + { type = "fetch_revision", revision = "D303229", kind = "related-diff" }, + { type = "fetch_revision", repo = "mozilla/cubeb", hash = "0123456789abcdef0123456789abcdef01234567", kind = "related-diff" }, +] diff --git a/docs/code-review-skills.md b/docs/code-review-skills.md new file mode 100644 index 0000000000..ab0cbfd9ae --- /dev/null +++ b/docs/code-review-skills.md @@ -0,0 +1,395 @@ +# Code Review Skills + +The code review agent can load external knowledge at review time, injecting it +as context before the LLM sees the patch. This lets domain experts encode their +knowledge once, in a form the agent uses automatically whenever it reviews +relevant code. + +## How it works + +When the agent reviews a patch, it: + +1. Fetches `review-context.toml` from the root of a GitHub repository passed to + the review entrypoint as `review_context_repo` (and optionally `review_context_branch`). +2. Parses the changed files from the diff. +3. Matches each rule against the changed files (and optionally the patch's + associated Bugzilla component, fetched from BMO). +4. For each matched rule, fetches the referenced skill or documentation files. +5. Injects the content as `` blocks inside an `` + section in the review prompt, before the patch. + +Pass the GitHub repository containing `review-context.toml` to `patch_review`, +using `org/project` syntax. `review_context_branch` defaults to `main`; pass another +branch when reviewing a backport or another release branch. + +``` +/patch_review patch_url="https://phabricator.services.mozilla.com/D295935" \ + review_context_repo="mozilla-firefox/firefox" \ + review_context_branch="main" +``` + +--- + +## For bugbug developers + +### Key files + +| File | Purpose | +| --------------------------------------------------- | ------------------------------------------------------------------ | +| `bugbug/tools/code_review/review_context_schema.py` | Stdlib-only rule schema and validator CLI | +| `bugbug/tools/code_review/review_context.py` | Rule engine: parses rules, matches files, loads external content | +| `bugbug/tools/code_review/data_types.py` | `Skill` model with async HTTP fetch and frontmatter stripping | +| `bugbug/tools/code_review/agent.py` | Calls `load_external_content_for_diff` in `run()`, formats context | +| `bugbug/tools/code_review/prompts/system.md` | System prompt template | +| `bugbug/tools/code_review/prompts/first_message.md` | First user message template (contains `{external_context}` slot) | + +### Prompt templates + +Both `system.md` and `first_message.md` are plain Markdown files loaded at +import time. Edit them directly — no Python changes needed. They use Python +`str.format()` placeholders (`{target_software}`, `{patch}`, +`{external_context}`, etc.). + +The `{external_context}` slot in `first_message.md` is filled by +`format_external_content()` from `review_context.py` when rules match. It is an +empty string otherwise, so no blank section appears in the prompt. + +Loaded content is preceded by an `` block. The +manifest excludes the body text and records each item's name, source, source +type, matched rules, trust reason, byte count, and SHA-256 digest. The same +manifest is also returned in `CodeReviewToolResponse.details["external_content"]`. + +All currently injected external content is trusted: + +- `file` content is fetched from a human-reviewed GitHub repository. +- `fetch_revision` content comes from configured review platforms. +- GitHub `file` loads are restricted by the repository allow-list in + `review-context.toml`; the repository containing the rules file is always + allowed. + +### Caching + +The parsed rules file is cached in-process for a short TTL, keyed by +`review_context_repo`, `review_context_branch`, and rules path. External content +is fetched for each review so the audit manifest reflects exactly what was +injected for that review. The shared `httpx.AsyncClient` from +`get_http_client()` handles connection reuse. + +### Metadata predicates + +Implemented via `libmozdata.Bugzilla` with `include_fields=["product", "component"]`. +The patch's associated bug ID is read from `patch.bug_id` (available on +`PhabricatorPatch`; other patch types pass `None`, which fails closed). The +component string compared against the rule is `"Product::Component"`. + +The schema accepts `bugzilla`, `review`, and `patch` predicates. Runtime +matching currently implements `bugzilla.component`; `bugzilla.product`, +`bugzilla.keywords`, `bugzilla.severity`, `review.*`, and `patch.*` are parsed +and validated but fail closed until trusted metadata is wired into the matcher. + +### `fetch_revision` + +Fetches the raw diff of another revision for use as context. Supports: + +- **Phabricator**: `{ type = "fetch_revision", revision = "D12345" }` — calls + the Phabricator API via `get_phabricator_client()`. +- **GitHub**: `{ type = "fetch_revision", repo = "org/repo", hash = "abc123" }` + — uses the GitHub API with `Accept: application/vnd.github.diff`. + +--- + +## For Mozilla developers: authoring skills and rules + +### Overview + +You add two things to your repository: + +- One or more **skill or documentation files** — Markdown or RST documents + with expert knowledge, guidelines, or relevant specs. These can be existing + docs already in the tree, or new files written specifically as review guides. +- Entries in **`review-context.toml`** at the repo root — rules that say which + files to load when certain code paths are touched. + +The review agent picks these up automatically on the next review of a matching +patch. + +### Skill file format + +Skill files are Markdown (`.md`); documentation files referenced as context can +also be RST (`.rst`). All formats accept optional YAML frontmatter. The +frontmatter is stripped before injection; only the body reaches the LLM. See an +[existing example on Searchfox](https://searchfox.org/mozilla-central/source/.claude/skills/firefox-desktop-frontend/SKILL.md). + +```markdown +--- +name: dom-audio +description: Web Audio API review guidance +--- + +## Web Audio review checklist + +- `AudioContext` must not be created on a background thread. +- `AudioNode` lifetimes are graph-managed; do not hold raw pointers. +- Prefer `MediaStreamTrack` over raw PCM where the spec allows it. +``` + +Conventional location for dedicated skill files: + +``` +.claude/skills//SKILL.md +``` + +### `review-context.toml` format + +`review-context.toml` is parsed as TOML and then validated against the rule +schema before any rule is matched. Unknown rule fields, unknown action types, +and malformed `fetch_revision` actions are rejected. + +You can validate a rules file without running a review: + +``` +bugbug-validate-review-context review-context.toml +``` + +The validator implementation lives in +`bugbug/tools/code_review/review_context_schema.py` and intentionally uses only the +Python standard library. A target repository can either run the bugbug CLI or +copy that single file into its own tests to validate `review-context.toml`. +Complete TOML examples in this document are marked as `toml review-context` and +validated by bugbug's test suite to keep the docs and parser from drifting. +For a complete validated rules file, see `docs/code-review-context-example.toml`. + +```toml review-context +version = 1 + +[[rules]] +name = "DOM: Web Audio C++" +when = { any_file = { include = ["dom/media/webaudio/**"], ext = [".cpp", ".h"] } } +load = [ + { type = "file", path = ".claude/skills/dom-audio/SKILL.md" }, +] +``` + +Multiple rules can fire for a single patch. Actions are deduplicated across +rules, so the same file is never fetched twice. + +All loaded external content is injected before the patch diff. `priority` is +optional and only affects ordering among loaded external content blocks: +matched rules are ordered by descending priority, defaulting to `0`; ties keep +declaration order. Load entries keep their order within a rule. If the same +resolved source is requested more than once, it is loaded once at its first +ordered position and the audit manifest records every matched rule that +requested it. + +Load failures are non-fatal. The loader logs an error and continues with the +remaining loads. + +### Rule matching + +Each rule has a required `when` predicate. Boolean predicates compose matching: +`all` requires every child to match, `any` requires at least one child to match, +and `not` negates its child. + +Path matching is defined over repository-relative paths from the patch, not +over local VCS checkout paths. Paths are normalized to POSIX-style separators +(`/`) with no leading `./`. File predicates use Python `fnmatch.fnmatchcase`, so +matching is case-sensitive on every platform, including Windows and macOS. That +keeps rule behavior tied to Git paths rather than host filesystem casing. + +The supported glob syntax is Python `fnmatchcase`: `*`, `?`, and character +classes such as `[ch]` and `[!0-9]`. In Python `fnmatchcase`, `*` can match +`/`, so patterns like `dom/media/**` match everything below that prefix. + +File quantifiers are explicit: + +```toml +# At least one changed file must match all predicates in the object. +when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } } + +# Every changed file must match all predicates in the object. +when = { all_files = { ext = [".md", ".rst"] } } +``` + +`any_file` is the normal implementation-code case. `all_files` is for +patch-shape rules such as docs-only, tests-only, or generated-only changes. + +### Reusable predicates + +Named predicate definitions let you reuse path sets or metadata groups across +rules. Definitions are referenced explicitly with `ref` and expanded +structurally, as if the referenced table had been written inline. + +```toml review-context +version = 1 + +[definitions.files.media_impl] +include = ["dom/media/**"] +exclude = ["dom/media/test/**", "dom/media/gtest/**"] +ext = [".cpp", ".h", ".mm"] + +[[rules]] +name = "Media: implementation" +when = { any_file = { ref = "files.media_impl" } } +load = [ + { type = "file", path = ".claude/skills/dom-media/SKILL.md" }, +] +``` + +Definitions are validated like inline predicates. References are namespace +checked: `files.*` refs are valid only in file predicates, `bugzilla.*` only in +Bugzilla predicates, `review.*` only in review predicates, and `patch.*` only +in patch predicates. Unknown references and references combined with inline +fields are schema errors. + +### Metadata trigger fields + +File predicates are separate from external metadata predicates. The parser +validates these field names and their value types. At runtime, only +`bugzilla.component` currently affects matching; the other fields are accepted +by the schema for forward-compatible rule files but fail closed. + +| Namespace | Field | Type | Runtime behavior | +| ---------- | ------------------- | --------------- | ------------------------------------------------ | +| `bugzilla` | `component` | list of strings | Matches exact `Product::Component` from Bugzilla | +| `bugzilla` | `product` | list of strings | Parsed and validated; currently fails closed | +| `bugzilla` | `keywords` | list of strings | Parsed and validated; currently fails closed | +| `bugzilla` | `severity` | list of strings | Parsed and validated; currently fails closed | +| `review` | `author` | list of strings | Parsed and validated; currently fails closed | +| `review` | `reviewer` | list of strings | Parsed and validated; currently fails closed | +| `review` | `blocking_reviewer` | list of strings | Parsed and validated; currently fails closed | +| `patch` | `repository` | list of strings | Parsed and validated; currently fails closed | +| `patch` | `is_backport` | boolean | Parsed and validated; currently fails closed | + +When review metadata matching is wired up, `reviewer` should match both +blocking and non-blocking reviewers. Use `blocking_reviewer` only when the rule +specifically needs a reviewer whose approval is required before landing. + +Example — load a skill when C++ or HTML files change under `dom/media/` (which +catches both implementation files and their tests): + +```toml review-context +version = 1 + +[[rules]] +name = "DOM: Media C++ and tests" +when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h", ".html"] } } +load = [ + { type = "file", path = ".claude/skills/dom-media/SKILL.md" }, +] +``` + +### Action types + +#### `file` — fetch a file from a GitHub repository + +```toml +# File from the same repo and branch as review-context.toml (default) +{ type = "file", path = ".claude/skills/dom-audio/SKILL.md" } + +# File from another repo (e.g., cubeb's real-time programming guidelines) +{ type = "file", path = ".claude/skills/real-time-programming/SKILL.md", + repo = "mozilla/cubeb" } + +# Explicit branch +{ type = "file", path = ".claude/skills/dom-audio/SKILL.md", + repo = "mozilla-firefox/firefox", branch = "main" } +``` + +Constructs a raw GitHub URL: +`https://raw.githubusercontent.com/{repo}/refs/heads/{branch}/{path}`. +When `repo` is omitted, the repo is the `review_context_repo` passed to the review +entrypoint. Same-repo content uses `review_context_branch`; cross-repo content defaults +to `main` unless the action specifies `branch`. + +Any Markdown or RST file in the repository can be referenced — skill files, +architecture docs, READMEs, spec notes, etc. + +`kind` is optional on `file` and `fetch_revision` actions. It is a free-form +audit hint recorded in the action object in the external content manifest; the +loader does not interpret it. + +#### GitHub repository allow-list + +GitHub loads are restricted to keep review context auditable: + +- The repository containing `review-context.toml` is always allowed. +- Entries ending in `/`, such as `mozilla/`, allow every repo under that + organization. +- Entries in `org/repo` form allow a single repo. + +```toml +[policy.github] +allowed_repos = [ + "mozilla/", + "web-platform-tests/wpt", + "whatwg/html", +] +``` + +Repository names are normalized to lowercase before checking the policy. If a +`load` action references a disallowed repository, the loader does not fetch it. +The failure is non-fatal: it logs an error, skips that load, and continues with +the remaining loads. Skipped loads are not included in the external content +manifest because no content was injected. + +#### `fetch_revision` — diff of another revision as context + +Injects the raw diff of a related revision — useful for providing context about +a stack parent or a related patch. + +```toml +# Phabricator revision +{ type = "fetch_revision", revision = "D12345" } + +# GitHub commit (use a git commit hash — Firefox moved from Mercurial to GitHub) +{ type = "fetch_revision", repo = "padenot/cubeb", hash = "abc123def456" } +``` + +### Testing your rules and skills + +Pass your work-in-progress rules and skill files directly to `patch_review` +via the MCP. Rules are merged by name into the fetched `review-context.toml` +(replacing a rule with the same name, or appending if new). Skill content is +injected directly, bypassing the network fetch for matching names. + +From Claude Code, trigger a review with your local files: + +``` +/patch_review patch_url="https://phabricator.services.mozilla.com/D295935" \ + review_context_repo="mozilla-firefox/firefox" \ + extra_context_toml="$(cat review-context.toml)" \ + content_overrides='{".claude/skills/dom-audio/SKILL.md": "skill content here"}' +``` + +`content_overrides` is a JSON object (as a string). For multi-line skill files, +build the JSON separately: + +```bash +python3 -c "import json,sys; print(json.dumps({'.claude/skills/dom-audio/SKILL.md': open('.claude/skills/dom-audio/SKILL.md').read()}))" +``` + +Then paste the output as the `content_overrides` value. + +This simulates the state as if your changes had already landed — no push +required. If a skill is missing, verify the rule conditions match the patch's +changed files (`any_file` / `all_files` against the actual `+++ b/` paths in +the diff). + +### Gotchas + +**Rules are evaluated against the post-patch file list.** Paths in file predicates +should match the `+++ b/` filenames in the diff. + +**Glob patterns use `fnmatchcase` semantics.** `dom/media/**` matches +`dom/media/foo/bar.cpp`. + +**Frontmatter is stripped automatically.** You can put metadata (name, +description, owner) in frontmatter without it polluting the injected context. + +**Load failures are non-fatal.** The review proceeds without that content, and +the failure is logged at ERROR level and captured by Sentry. Check the URL +manually if content you expect is not appearing. + +**Bugzilla component rules require the patch to have an associated bug.** On +non-Phabricator patches or patches without a bug link, Bugzilla component rules +never match. diff --git a/tests/test_code_review.py b/tests/test_code_review.py index f8eac7d462..7c9e22258e 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -1,6 +1,8 @@ import asyncio import logging import os +import re +from pathlib import Path from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock, patch @@ -28,14 +30,15 @@ github_repo_allowed, load_external_content_for_diff, parse_diff_files, + parse_review_context_toml, rule_matches, ) from bugbug.tools.code_review.review_context_schema import ( AnyFilePredicate, + BugzillaPredicate, FilePredicate, ReviewContextValidationError, Rule, - parse_review_context_toml, ) from bugbug.tools.code_review.review_context_schema import ( main as validate_review_context_main, @@ -72,6 +75,17 @@ async def fetch(path): return fetch +def extract_review_context_examples(markdown: str) -> list[tuple[int, str]]: + examples = [] + fence_re = re.compile(r"^```(?P[^\n]*)\n(?P.*?)^```", re.M | re.S) + for match in fence_re.finditer(markdown): + info = match.group("info").split() + if info == ["toml", "review-context"]: + line = markdown.count("\n", 0, match.start()) + 1 + examples.append((line, match.group("body"))) + return examples + + # --------------------------------------------------------------------------- # strip_diff_prefix # --------------------------------------------------------------------------- @@ -550,6 +564,13 @@ async def test_search_identifier_accepts_double_encoded_tests_value(): assert client.search.await_args.kwargs["tests"] == "only" +@pytest.fixture(autouse=True) +def clear_review_context_cache(): + review_context._review_context_cache.clear() + yield + review_context._review_context_cache.clear() + + @pytest.mark.asyncio async def test_external_content_load_caches(): item = ExternalContent( @@ -617,13 +638,6 @@ async def test_external_content_load_caches(): """ -@pytest.fixture(autouse=True) -def clear_review_context_cache(): - review_context._review_context_cache.clear() - yield - review_context._review_context_cache.clear() - - def test_parse_diff_files(): files = parse_diff_files(_DIFF_MEDIA) assert files == {"dom/media/Foo.cpp"} @@ -655,81 +669,79 @@ def test_rule_matches_extension_only(): load=[], ) assert rule_matches(rule, {"dom/webidl/Foo.webidl"}) - assert not rule_matches(rule, {"dom/webidl/Foo.js"}) - - -def test_rule_bugzilla_component_fails_closed_without_component(): - config = parse_review_context_toml(_RULES_TOML) - rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only") - assert not rule_matches(rule, set(), bug_component=None) - - -def test_rule_bugzilla_component_matches(): - config = parse_review_context_toml(_RULES_TOML) - rule = next(rule for rule in config.rules if rule.name == "Bugzilla component only") - assert rule_matches(rule, set(), bug_component="Core::DOM: Web Audio") - assert not rule_matches(rule, set(), bug_component="Core::Layout") - - -@pytest.mark.asyncio -async def test_get_bug_component(): - with patch("libmozdata.bugzilla.Bugzilla") as bugzilla_cls: - instance = MagicMock() - bugzilla_cls.return_value = instance - - def get_data(): - handler = bugzilla_cls.call_args.kwargs["bughandler"] - data = bugzilla_cls.call_args.kwargs["bugdata"] - handler({"product": "Core", "component": "DOM: Web Audio"}, data) - return MagicMock(wait=MagicMock()) - - instance.get_data.side_effect = get_data - assert await get_bug_component(123) == "Core::DOM: Web Audio" + assert not rule_matches(rule, {"dom/media/Foo.cpp"}) def test_parse_review_context_toml_rejects_missing_when(): toml = """ version = 1 + [[rules]] -name = "Broken" -load = [{ type = "file", path = "x.md" }] +name = "Bad rule" +load = [{ type = "file", path = "skills/guide.md" }] """ - with pytest.raises(ReviewContextValidationError, match="rules\\[0\\].when"): + with pytest.raises(ReviewContextValidationError): parse_review_context_toml(toml) def test_parse_review_context_toml_rejects_unknown_action_type(): toml = """ version = 1 + [[rules]] -name = "Broken" +name = "Bad rule" when = { any_file = { ext = [".cpp"] } } -load = [{ type = "url", path = "x.md" }] +load = [{ type = "unknown", path = "skills/guide.md" }] """ - with pytest.raises(ReviewContextValidationError, match="unknown action type"): + with pytest.raises(ReviewContextValidationError): parse_review_context_toml(toml) def test_parse_review_context_toml_rejects_unknown_rule_field(): toml = """ version = 1 + [[rules]] -name = "Broken" -bogus = true +name = "Bad rule" when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = "x.md" }] +match_unknown = ["x"] +load = [{ type = "file", path = "skills/guide.md" }] """ - with pytest.raises(ReviewContextValidationError, match="unknown field"): + with pytest.raises(ReviewContextValidationError): parse_review_context_toml(toml) +def test_parse_review_context_example_file(): + repo_root = Path(__file__).resolve().parent.parent + example = (repo_root / "docs/code-review-context-example.toml").read_text() + config = parse_review_context_toml(example) + assert config.version == 1 + assert len(config.rules) >= 1 + assert "whatwg/html" in config.policy.github.allowed_repos + + +def test_parse_review_context_examples_from_docs(): + repo_root = Path(__file__).resolve().parent.parent + docs = (repo_root / "docs/code-review-skills.md").read_text() + examples = extract_review_context_examples(docs) + assert examples + + for line, example in examples: + try: + config = parse_review_context_toml(example) + except ReviewContextValidationError as exc: + pytest.fail(f"docs/code-review-skills.md:{line}: {exc}") + assert config.rules, f"docs/code-review-skills.md:{line}: expected rules" + + def test_validate_review_context_main(tmp_path, capsys): review_context_path = tmp_path / "review-context.toml" review_context_path.write_text(_RULES_TOML) assert validate_review_context_main([str(review_context_path)]) == 0 + captured = capsys.readouterr() - assert "valid" in captured.out + assert "valid (4 rule(s))" in captured.out def test_validate_review_context_main_failure(tmp_path, capsys): @@ -737,63 +749,120 @@ def test_validate_review_context_main_failure(tmp_path, capsys): review_context_path.write_text( """ version = 1 + [[rules]] -name = "Broken" -load = [{ type = "file", path = "x.md" }] +name = "Bad rule" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "unknown" }] """ ) assert validate_review_context_main([str(review_context_path)]) == 1 + captured = capsys.readouterr() assert "invalid" in captured.err + assert "unknown action type" in captured.err + + +def test_rule_bugzilla_component_fails_closed_without_component(): + rule = Rule( + name="test", + when=BugzillaPredicate(component=["Core::DOM: Web Audio"]), + load=[], + ) + assert not rule_matches(rule, {"dom/media/Foo.cpp"}, bug_component=None) + + +def test_rule_bugzilla_component_matches(): + rule = Rule( + name="test", + when=BugzillaPredicate(component=["Core::DOM: Web Audio"]), + load=[], + ) + assert rule_matches( + rule, {"dom/media/Foo.cpp"}, bug_component="Core::DOM: Web Audio" + ) + assert not rule_matches( + rule, {"dom/media/Foo.cpp"}, bug_component="Core::Networking" + ) + + +@pytest.mark.asyncio +async def test_get_bug_component(): + def fake_bugzilla(bug_id, include_fields, bughandler, bugdata): + bughandler({"product": "Core", "component": "DOM: Web Audio"}, bugdata) + mock = MagicMock() + mock.get_data.return_value.wait = MagicMock() + return mock + + with patch("libmozdata.bugzilla.Bugzilla", side_effect=fake_bugzilla): + component = await get_bug_component(12345) + + assert component == "Core::DOM: Web Audio" @pytest.mark.asyncio async def test_load_external_content_for_diff_file_load(): review_context_repo = "mozilla-firefox/firefox" + content_body = "---\nname: dom-media\n---\nAudio guidelines.\n" + rules_response = MagicMock() rules_response.text = _RULES_TOML rules_response.raise_for_status = MagicMock() + content_response = MagicMock() - content_response.text = "---\nname: dom-media\n---\nMedia review guidelines\n" + content_response.text = content_body content_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(side_effect=[rules_response, content_response]) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, review_context_repo, review_context_branch="release" - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, review_context_branch="release" + ) assert len(results) == 1 - assert results[0].name == ".claude/skills/dom-media.md" - assert results[0].body == "Media review guidelines\n" - assert results[0].source_type == "github_file" - assert results[0].matched_rules == ["Audio/Video C++"] - client.get.assert_any_await( - "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/review-context.toml", - timeout=30, + item = results[0] + assert item.name == ".claude/skills/dom-media.md" + assert item.body == "Audio guidelines.\n" + assert item.source_type == "github_file" + assert item.trusted + assert item.trust_reason == "github_repo_content" + assert item.matched_rules == ["Audio/Video C++"] + assert item.bytes == len("Audio guidelines.\n".encode()) + assert item.sha256 + assert client.get.await_args_list[0].args[0] == ( + "https://raw.githubusercontent.com/" + "mozilla-firefox/firefox/refs/heads/release/review-context.toml" ) - client.get.assert_any_await( - "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/release/.claude/skills/dom-media.md", - timeout=30, + assert client.get.await_args_list[1].args[0] == ( + "https://raw.githubusercontent.com/" + "mozilla-firefox/firefox/refs/heads/release/" + ".claude/skills/dom-media.md" ) @pytest.mark.asyncio async def test_load_external_content_for_diff_no_match(): review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() rules_response.text = _RULES_TOML rules_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(return_value=rules_response) - diff = """diff --git a/README.txt b/README.txt\n--- a/README.txt\n+++ b/README.txt\n@@ -1 +1 @@\n-a\n+b\n""" + diff = "diff --git a/build/Makefile b/build/Makefile\n+++ b/build/Makefile\n+new\n" + with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff(diff, review_context_repo) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff(diff, review_context_repo) assert results == [] + assert client.get.await_count == 1 @pytest.mark.asyncio @@ -802,224 +871,254 @@ async def test_load_external_content_for_diff_rules_fetch_failure(): client.get = AsyncMock(side_effect=httpx.ConnectError("boom")) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, "mozilla-firefox/firefox" - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, "mozilla-firefox/firefox" + ) assert results == [] @pytest.mark.asyncio async def test_load_external_content_deduplicates_actions(): - rules = """ + toml = """ version = 1 [[rules]] -name = "A" -when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = ".claude/skills/cpp.md" }] +name = "Rule A" +when = { any_file = { include = ["dom/media/**"], ext = [".cpp"] } } +load = [{ type = "file", path = "skills/guide.md" }] [[rules]] -name = "B" -when = { any_file = { include = ["dom/media/**"] } } -load = [{ type = "file", path = ".claude/skills/cpp.md" }] +name = "Rule B" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", path = "skills/guide.md" }] """ review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() - rules_response.text = rules + rules_response.text = toml rules_response.raise_for_status = MagicMock() + content_response = MagicMock() - content_response.text = "C++ guidelines" + content_response.text = "body" content_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(side_effect=[rules_response, content_response]) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff(_DIFF_MEDIA, review_context_repo) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo + ) assert len(results) == 1 - assert results[0].matched_rules == ["A", "B"] + assert results[0].matched_rules == ["Rule A", "Rule B"] assert client.get.await_count == 2 @pytest.mark.asyncio async def test_load_external_content_orders_by_priority(): - rules = """ + toml = """ version = 1 [[rules]] name = "Low" -priority = 0 +priority = 1 when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = ".claude/skills/low.md" }] +load = [{ type = "file", path = "skills/low.md" }] [[rules]] name = "High" priority = 10 when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = ".claude/skills/high.md" }] +load = [{ type = "file", path = "skills/high.md" }] """ review_context_repo = "mozilla-firefox/firefox" rules_response = MagicMock() - rules_response.text = rules + rules_response.text = toml rules_response.raise_for_status = MagicMock() - low_response = MagicMock() - low_response.text = "low" - low_response.raise_for_status = MagicMock() - high_response = MagicMock() - high_response.text = "high" - high_response.raise_for_status = MagicMock() + client = MagicMock() - client.get = AsyncMock(side_effect=[rules_response, high_response, low_response]) + client.get = AsyncMock(return_value=rules_response) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, - review_context_repo, - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + content_overrides={ + "skills/low.md": "low\n", + "skills/high.md": "high\n", + }, + ) - assert [r.name for r in results] == [ - ".claude/skills/high.md", - ".claude/skills/low.md", - ] + assert [item.name for item in results] == ["skills/high.md", "skills/low.md"] @pytest.mark.asyncio async def test_load_external_content_rejects_disallowed_github_repo(): - rules = """ + toml = """ version = 1 + [[rules]] -name = "Cross repo" -when = { any_file = { ext = [".webidl"] } } -load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }] +name = "External" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", repo = "whatwg/html", path = "review.md" }] """ review_context_repo = "mozilla-firefox/firefox" rules_response = MagicMock() - rules_response.text = rules + rules_response.text = toml rules_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(return_value=rules_response) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_WEBIDL, - review_context_repo, - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + content_overrides={"review.md": "external\n"}, + ) assert results == [] - assert client.get.await_count == 1 @pytest.mark.asyncio async def test_load_external_content_allows_policy_prefix_repo(): - rules = """ + toml = """ version = 1 + [policy.github] allowed_repos = ["mozilla/"] + [[rules]] -name = "Cross repo" -when = { any_file = { ext = [".webidl"] } } -load = [{ type = "file", repo = "mozilla/cubeb", path = "REVIEWING.md" }] +name = "External" +when = { any_file = { ext = [".cpp"] } } +load = [{ type = "file", repo = "mozilla/cubeb", path = "review.md" }] """ review_context_repo = "mozilla-firefox/firefox" rules_response = MagicMock() - rules_response.text = rules + rules_response.text = toml rules_response.raise_for_status = MagicMock() - content_response = MagicMock() - content_response.text = "cubeb guidelines" - content_response.raise_for_status = MagicMock() + client = MagicMock() - client.get = AsyncMock(side_effect=[rules_response, content_response]) + client.get = AsyncMock(return_value=rules_response) with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_WEBIDL, - review_context_repo, - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + content_overrides={"review.md": "external\n"}, + ) - assert len(results) == 1 - assert results[0].source.endswith("/mozilla/cubeb/refs/heads/main/REVIEWING.md") + assert [item.name for item in results] == ["review.md"] + + +# --- _merge_rules --- def test_merge_rules_appends_new(): base = parse_review_context_toml( """ version = 1 + [[rules]] name = "A" when = { any_file = { ext = [".cpp"] } } load = [{ type = "file", path = "a.md" }] """ - ) - merged = _merge_rules( - base.rules, - """ + ).rules + extra = """ version = 1 + [[rules]] name = "B" when = { any_file = { ext = [".js"] } } load = [{ type = "file", path = "b.md" }] -""", - ) - assert [rule.name for rule in merged] == ["A", "B"] +""" + merged = _merge_rules(base, extra) + assert len(merged) == 2 + assert merged[1].name == "B" def test_merge_rules_replaces_by_name(): base = parse_review_context_toml( """ version = 1 + [[rules]] name = "A" when = { any_file = { ext = [".cpp"] } } load = [{ type = "file", path = "a.md" }] """ - ) - merged = _merge_rules( - base.rules, - """ + ).rules + extra = """ version = 1 + [[rules]] name = "A" -when = { any_file = { ext = [".js"] } } -load = [{ type = "file", path = "replacement.md" }] -""", - ) +when = { any_file = { ext = [".cpp", ".h"] } } +load = [{ type = "file", path = "a.md" }] +""" + merged = _merge_rules(base, extra) assert len(merged) == 1 - assert merged[0].load[0].path == "replacement.md" + assert merged[0].when.predicate.ext == [".cpp", ".h"] def test_merge_rules_empty_extra(): base = parse_review_context_toml( """ version = 1 + [[rules]] name = "A" when = { any_file = { ext = [".cpp"] } } load = [{ type = "file", path = "a.md" }] """ - ) - merged = _merge_rules(base.rules, "version = 1\n") - assert merged == base.rules + ).rules + merged = _merge_rules(base, "version = 1\n") + assert len(merged) == 1 + assert merged[0].name == "A" + assert merged[0].when.predicate.ext == [".cpp"] + assert len(merged[0].load) == 1 + + merged = _merge_rules(base, "version = 1\n# comment\n") + assert len(merged) == 1 + assert merged[0].name == "A" + assert merged[0].when.predicate.ext == [".cpp"] + assert len(merged[0].load) == 1 + + +# --- content_overrides --- @pytest.mark.asyncio async def test_content_override_used_instead_of_fetch(): review_context_repo = "mozilla-firefox/firefox" + rules_response = MagicMock() rules_response.text = _RULES_TOML rules_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(return_value=rules_response) - overrides = {".claude/skills/dom-media.md": "Override body"} + + overrides = {".claude/skills/dom-media.md": "Overridden content.\n"} with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, review_context_repo, content_overrides=overrides - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, content_overrides=overrides + ) assert len(results) == 1 - assert results[0].body == "Override body" - assert client.get.await_count == 1 + assert results[0].name == ".claude/skills/dom-media.md" + assert results[0].body == "Overridden content.\n" + assert client.get.await_count == 1 # only the rules fetch, not the content @pytest.mark.asyncio @@ -1028,131 +1127,120 @@ async def test_external_content_manifest_and_prompt_body(): rules_response = MagicMock() rules_response.text = _RULES_TOML rules_response.raise_for_status = MagicMock() + client = MagicMock() client.get = AsyncMock(return_value=rules_response) - overrides = {".claude/skills/dom-media.md": "Guideline body"} + + overrides = {".claude/skills/dom-media.md": "Audio guidelines.\n"} with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, review_context_repo, content_overrides=overrides - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, review_context_repo, content_overrides=overrides + ) manifest = external_content_manifest(results) assert manifest == [ { "name": ".claude/skills/dom-media.md", "source_type": "github_file", - "source": "https://raw.githubusercontent.com/mozilla-firefox/firefox/refs/heads/main/.claude/skills/dom-media.md", - "action": {"type": "file", "path": ".claude/skills/dom-media.md"}, + "source": ( + "https://raw.githubusercontent.com/mozilla-firefox/firefox/" + "refs/heads/main/.claude/skills/dom-media.md" + ), + "action": { + "type": "file", + "path": ".claude/skills/dom-media.md", + }, "matched_rules": ["Audio/Video C++"], "trusted": True, "trust_reason": "github_repo_content", - "bytes": len("Guideline body".encode()), + "bytes": len("Audio guidelines.\n".encode()), "sha256": results[0].sha256, } ] - prompt = format_external_content(results) - assert "" in prompt - assert "" in prompt - assert '' in prompt - assert "Guideline body" in prompt + + prompt_content = format_external_content(results) + assert "" in prompt_content + assert "" in prompt_content + assert "Audio guidelines." in prompt_content @pytest.mark.asyncio async def test_extra_context_toml_appended(): review_context_repo = "mozilla-firefox/firefox" - base_rules = """ + extra = """ version = 1 + [[rules]] -name = "Base" +name = "Extra JS rule" when = { any_file = { ext = [".js"] } } -load = [{ type = "file", path = "base.md" }] +load = [{ type = "file", path = ".claude/skills/js.md" }] """ - extra_rules = """ + diff_js = "diff --git a/Foo.js b/Foo.js\n+++ b/Foo.js\n+new\n" + + # Use a base TOML without any .js rules so only the extra rule fires. + base_toml = """ version = 1 + [[rules]] -name = "Extra" -when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = "extra.md" }] +name = "Audio/Video C++" +when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } } +load = [{ type = "file", path = ".claude/skills/dom-media.md" }] """ rules_response = MagicMock() - rules_response.text = base_rules + rules_response.text = base_toml rules_response.raise_for_status = MagicMock() - content_response = MagicMock() - content_response.text = "extra" - content_response.raise_for_status = MagicMock() + client = MagicMock() - client.get = AsyncMock(side_effect=[rules_response, content_response]) + client.get = AsyncMock(return_value=rules_response) + + overrides = {".claude/skills/js.md": "JS guidelines.\n"} with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, - review_context_repo, - extra_context_toml=extra_rules, - ) + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + diff_js, + review_context_repo, + extra_context_toml=extra, + content_overrides=overrides, + ) - assert [result.name for result in results] == ["extra.md"] + assert len(results) == 1 + assert results[0].name == ".claude/skills/js.md" + assert results[0].body == "JS guidelines.\n" @pytest.mark.asyncio async def test_extra_context_toml_replaces_by_name(): review_context_repo = "mozilla-firefox/firefox" - extra_rules = """ + # Replace the "Audio/Video C++" rule with a version that loads different content + extra = """ version = 1 + [[rules]] name = "Audio/Video C++" -when = { any_file = { ext = [".cpp"] } } -load = [{ type = "file", path = "replacement.md" }] +when = { any_file = { include = ["dom/media/**"], ext = [".cpp", ".h"] } } +load = [{ type = "file", path = ".claude/skills/dom-media-v2.md" }] """ rules_response = MagicMock() rules_response.text = _RULES_TOML rules_response.raise_for_status = MagicMock() - content_response = MagicMock() - content_response.text = "replacement" - content_response.raise_for_status = MagicMock() - client = MagicMock() - client.get = AsyncMock(side_effect=[rules_response, content_response]) - - with patch.object(data_types, "get_http_client", return_value=client): - results = await load_external_content_for_diff( - _DIFF_MEDIA, - review_context_repo, - extra_context_toml=extra_rules, - ) - - assert [result.name for result in results] == ["replacement.md"] - -@pytest.mark.asyncio -async def test_load_external_content_fetches_phabricator_revision(): - rules = """ -version = 1 -[[rules]] -name = "Revision" -when = { any_file = { ext = [".cpp"] } } -load = [{ type = "fetch_revision", revision = "D123" }] -""" - review_context_repo = "mozilla-firefox/firefox" - rules_response = MagicMock() - rules_response.text = rules - rules_response.raise_for_status = MagicMock() client = MagicMock() client.get = AsyncMock(return_value=rules_response) - phabricator = MagicMock() - phabricator.load_revision.return_value = {"fields": {"diffID": 456}} - phabricator.load_raw_diff.return_value = "diff content" + overrides = {".claude/skills/dom-media-v2.md": "Updated guidelines.\n"} - with ( - patch.object(data_types, "get_http_client", return_value=client), - patch( - "bugbug.tools.core.platforms.phabricator.get_phabricator_client", - return_value=phabricator, - ), - ): - results = await load_external_content_for_diff(_DIFF_MEDIA, review_context_repo) + with patch.object(data_types, "get_http_client", return_value=client): + with patch.object(review_context, "get_http_client", return_value=client): + results = await load_external_content_for_diff( + _DIFF_MEDIA, + review_context_repo, + extra_context_toml=extra, + content_overrides=overrides, + ) assert len(results) == 1 - assert results[0].name == "Revision D123" - assert results[0].body == "diff content" - assert results[0].source_type == "phabricator_revision" + assert results[0].name == ".claude/skills/dom-media-v2.md" + assert results[0].body == "Updated guidelines.\n" From 6283994c5a933736e317cabd74388840ac8bac5e Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Thu, 11 Jun 2026 19:05:20 +0200 Subject: [PATCH 7/7] Address review comments --- bugbug/tools/code_review/agent.py | 72 ++++++++++------------ bugbug/tools/code_review/prompts.py | 3 + bugbug/tools/code_review/review_context.py | 56 +++++++++-------- bugbug/tools/core/platforms/base.py | 4 ++ bugbug/tools/core/platforms/phabricator.py | 24 ++++++++ tests/test_code_review.py | 20 +++++- 6 files changed, 110 insertions(+), 69 deletions(-) diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py index 6de20ea685..f04de3d860 100644 --- a/bugbug/tools/code_review/agent.py +++ b/bugbug/tools/code_review/agent.py @@ -70,6 +70,10 @@ def __init__( target_software: str = "Mozilla Firefox", todo_enabled: bool = True, skills: Optional[list[Skill]] = None, + review_context_repo: Optional[str] = None, + review_context_branch: str = "main", + extra_context_toml: Optional[str] = None, + content_overrides: Optional[dict[str, str]] = None, ) -> None: super().__init__() @@ -126,6 +130,11 @@ def __init__( self.verbose = verbose + self._review_context_repo = review_context_repo + self._review_context_branch = review_context_branch + self._extra_context_toml = extra_context_toml + self._content_overrides = content_overrides + @property def _agent_model_name(self) -> str: model = self._agent_model @@ -196,8 +205,23 @@ def generate_initial_prompt( ) async def generate_review_comments( - self, patch: Patch, patch_summary: str, external_context: str = "" - ) -> list[GeneratedReviewComment]: + self, patch: Patch, patch_summary: str + ) -> tuple[list[GeneratedReviewComment], list[dict]]: + external_context = "" + manifest: list[dict] = [] + if self._review_context_repo: + from bugbug.tools.code_review.review_context import ( + load_external_context_for_review, + ) + + external_context, manifest = await load_external_context_for_review( + patch, + self._review_context_repo, + review_context_branch=self._review_context_branch, + extra_context_toml=self._extra_context_toml, + content_overrides=self._content_overrides, + ) + try: async for chunk in self.agent.astream( { @@ -217,50 +241,18 @@ async def generate_review_comments( except GraphRecursionError as e: raise ModelResultError("The model could not complete the review") from e - return result["structured_response"].comments + return result["structured_response"].comments, manifest - async def run( - self, - patch: Patch, - review_context_repo: Optional[str] = None, - review_context_branch: str = "main", - extra_context_toml: Optional[str] = None, - content_overrides: Optional[dict[str, str]] = None, - ) -> CodeReviewToolResponse: + async def run(self, patch: Patch) -> CodeReviewToolResponse: if self.count_tokens(patch.raw_diff) > 21000: raise LargeDiffError("The diff is too large") patch_summary = self.patch_summarizer.run(patch) - external_context = "" - external_content_manifest = [] - if review_context_repo: - from bugbug.tools.code_review.review_context import ( - external_content_manifest as build_external_content_manifest, - ) - from bugbug.tools.code_review.review_context import ( - format_external_content, - load_external_content_for_diff, - ) - - bug_id = getattr(patch, "bug_id", None) - content_items = await load_external_content_for_diff( - patch.raw_diff, - review_context_repo, - review_context_branch=review_context_branch, - bug_id=bug_id, - extra_context_toml=extra_context_toml, - content_overrides=content_overrides, - ) - if content_items: - external_context = format_external_content(content_items) - external_content_manifest = build_external_content_manifest( - content_items - ) - - unfiltered_suggestions = await self.generate_review_comments( - patch, patch_summary, external_context - ) + ( + unfiltered_suggestions, + external_content_manifest, + ) = await self.generate_review_comments(patch, patch_summary) if not unfiltered_suggestions: logger.info("No suggestions were generated") diff --git a/bugbug/tools/code_review/prompts.py b/bugbug/tools/code_review/prompts.py index a5787e4110..8d6d94b61f 100644 --- a/bugbug/tools/code_review/prompts.py +++ b/bugbug/tools/code_review/prompts.py @@ -63,7 +63,10 @@ {patch_summarization} + + {external_context} + Here are examples of good code review comments to guide your style and approach: diff --git a/bugbug/tools/code_review/review_context.py b/bugbug/tools/code_review/review_context.py index 39791c70a1..fa34beb409 100644 --- a/bugbug/tools/code_review/review_context.py +++ b/bugbug/tools/code_review/review_context.py @@ -150,29 +150,6 @@ def parse_diff_files(diff: str) -> set[str]: return files -async def get_bug_component(bug_id: int) -> str | None: - """Return 'Product::Component' for the given Bugzilla bug, or None on failure.""" - from libmozdata.bugzilla import Bugzilla - - def _fetch() -> str | None: - bugs: list[dict] = [] - Bugzilla( - bug_id, - include_fields=["product", "component"], - bughandler=lambda bug, data: data.append(bug), - bugdata=bugs, - ).get_data().wait() - if not bugs: - return None - return f"{bugs[0]['product']}::{bugs[0]['component']}" - - try: - return await asyncio.get_event_loop().run_in_executor(None, _fetch) - except Exception: - logger.warning("Could not fetch component for bug %s", bug_id) - return None - - def rule_matches( rule: Rule, changed_files: set[str], bug_component: str | None = None ) -> bool: @@ -521,7 +498,7 @@ async def load_external_content_for_diff( review_context_repo: str, review_context_branch: str = "main", review_context_path: str = DEFAULT_REVIEW_CONTEXT_PATH, - bug_id: int | None = None, + patch=None, extra_context_toml: str | None = None, content_overrides: dict[str, str] | None = None, ) -> list[ExternalContentItem]: @@ -553,8 +530,8 @@ async def load_external_content_for_diff( return [] bug_component: str | None = None - if bug_id is not None: - bug_component = await get_bug_component(bug_id) + if patch is not None: + bug_component = await patch.bug_component() try: actions = collect_actions(diff, config, bug_component, extra_context_toml) @@ -589,3 +566,30 @@ def format_external_content(content_items: list[ExternalContentItem]) -> str: f"{content}\n" "" ) + + +async def load_external_context_for_review( + patch, + review_context_repo: str, + review_context_branch: str = "main", + extra_context_toml: str | None = None, + content_overrides: dict[str, str] | None = None, +) -> tuple[str, list[dict]]: + """Load and format external review context for a patch. + + Returns (formatted_context_str, manifest_list). Both are empty if no repo + is configured or no rules match. + """ + content_items = await load_external_content_for_diff( + patch.raw_diff, + review_context_repo, + review_context_branch=review_context_branch, + patch=patch, + extra_context_toml=extra_context_toml, + content_overrides=content_overrides, + ) + if not content_items: + return "", [] + return format_external_content(content_items), external_content_manifest( + content_items + ) diff --git a/bugbug/tools/core/platforms/base.py b/bugbug/tools/core/platforms/base.py index 72837d1088..ec89451715 100644 --- a/bugbug/tools/core/platforms/base.py +++ b/bugbug/tools/core/platforms/base.py @@ -73,6 +73,10 @@ def patch_url(self) -> str: """Return the URL of the patch.""" ... + async def bug_component(self) -> Optional[str]: + """Return 'Product::Component' for the associated bug, or None.""" + return None + @abstractmethod async def get_base_revision(self) -> Optional[str]: """Return the VCS revision the patch was written against, or None.""" diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py index 7ea7526b3a..b0725d294e 100644 --- a/bugbug/tools/core/platforms/phabricator.py +++ b/bugbug/tools/core/platforms/phabricator.py @@ -472,6 +472,30 @@ def _revision_metadata(self) -> dict: def bug(self) -> Bug: return Bug.get(self.bug_id) + async def bug_component(self) -> Optional[str]: + if not self.has_bug: + return None + import asyncio + + from libmozdata.bugzilla import Bugzilla + + def _fetch() -> Optional[str]: + bugs: list[dict] = [] + Bugzilla( + self.bug_id, + include_fields=["product", "component"], + bughandler=lambda bug, data: data.append(bug), + bugdata=bugs, + ).get_data().wait() + if not bugs: + return None + return f"{bugs[0]['product']}::{bugs[0]['component']}" + + try: + return await asyncio.get_event_loop().run_in_executor(None, _fetch) + except Exception: + return None + @property def bug_id(self) -> int: return int(self._revision_metadata["fields"]["bugzilla.bug-id"]) diff --git a/tests/test_code_review.py b/tests/test_code_review.py index 7c9e22258e..24009ac14e 100644 --- a/tests/test_code_review.py +++ b/tests/test_code_review.py @@ -26,7 +26,6 @@ _merge_rules, external_content_manifest, format_external_content, - get_bug_component, github_repo_allowed, load_external_content_for_diff, parse_diff_files, @@ -788,15 +787,30 @@ def test_rule_bugzilla_component_matches(): @pytest.mark.asyncio -async def test_get_bug_component(): +async def test_patch_bug_component(): + from bugbug.tools.core.platforms.phabricator import PhabricatorPatch + + class FakePatch(PhabricatorPatch): + def __init__(self): + pass + + @property + def has_bug(self): + return True + + @property + def bug_id(self): + return 12345 + def fake_bugzilla(bug_id, include_fields, bughandler, bugdata): bughandler({"product": "Core", "component": "DOM: Web Audio"}, bugdata) mock = MagicMock() mock.get_data.return_value.wait = MagicMock() return mock + fake_patch = FakePatch() with patch("libmozdata.bugzilla.Bugzilla", side_effect=fake_bugzilla): - component = await get_bug_component(12345) + component = await fake_patch.bug_component() assert component == "Core::DOM: Web Audio"