From b04f650cdee9051b10928a90c4caf99af46be771 Mon Sep 17 00:00:00 2001 From: buke Date: Mon, 15 Jun 2026 10:48:26 +0800 Subject: [PATCH 1/8] feat(catalog): enforce runtime checks and tighten peer dependency policy - add explicit error-coded runtime checks for moduleName consistency, tarball download/integrity verification, depends link integrity, and official pre-1.0 cli range consistency - tighten peer dependency policy by removing global foo allowance and keeping it only as a scoped temporary exception for @choysum-dev/core@0.0.0-20260614200130 - expose tarball verification limits in validate workflow for deterministic CI behavior --- .github/workflows/validate-catalog.yml | 2 + schemas/peer-dependencies-allowlist.json | 22 ++ scripts/build_catalog.py | 413 ++++++++++++++++++++++- 3 files changed, 436 insertions(+), 1 deletion(-) create mode 100644 schemas/peer-dependencies-allowlist.json diff --git a/.github/workflows/validate-catalog.yml b/.github/workflows/validate-catalog.yml index e0af533..4d6b276 100644 --- a/.github/workflows/validate-catalog.yml +++ b/.github/workflows/validate-catalog.yml @@ -19,6 +19,8 @@ jobs: CHOYSUM_NPM_FETCH_MAX_RETRIES: "3" CHOYSUM_NPM_FETCH_BACKOFF_SECONDS: "1.0" CHOYSUM_BUILD_CONCURRENCY: "5" + CHOYSUM_TARBALL_VERIFY_TIMEOUT_SECONDS: "30" + CHOYSUM_TARBALL_MAX_BYTES: "52428800" steps: - name: Checkout uses: actions/checkout@v6 diff --git a/schemas/peer-dependencies-allowlist.json b/schemas/peer-dependencies-allowlist.json new file mode 100644 index 0000000..0e32e86 --- /dev/null +++ b/schemas/peer-dependencies-allowlist.json @@ -0,0 +1,22 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://index.choysum.dev/v1/schema/peer-dependencies-allowlist.json", + "allowedPeerDependencies": [ + "@vicons/ionicons5", + "bcrypt", + "jsonwebtoken", + "naive-ui", + "pinia", + "vue", + "vue-router" + ], + "scopedPeerDependencyExceptions": [ + { + "name": "foo", + "moduleId": "core", + "package": "@choysum-dev/core", + "version": "0.0.0-20260614200130", + "reason": "Temporary compatibility exception for published pre-1.0 metadata; remove after core package metadata is corrected." + } + ] +} diff --git a/scripts/build_catalog.py b/scripts/build_catalog.py index ffe9090..e5c88cc 100755 --- a/scripts/build_catalog.py +++ b/scripts/build_catalog.py @@ -6,6 +6,7 @@ from __future__ import annotations import base64 +import binascii import concurrent.futures import http.client import hashlib @@ -61,8 +62,32 @@ def read_float_env(name: str, default: float, minimum: float = 0.0) -> float: NPM_FETCH_MAX_RETRIES = read_int_env("CHOYSUM_NPM_FETCH_MAX_RETRIES", 3) NPM_FETCH_BACKOFF_SECONDS = read_float_env("CHOYSUM_NPM_FETCH_BACKOFF_SECONDS", 1.0) BUILD_CONCURRENCY = read_int_env("CHOYSUM_BUILD_CONCURRENCY", 5) +TARBALL_VERIFY_TIMEOUT_SECONDS = read_int_env("CHOYSUM_TARBALL_VERIFY_TIMEOUT_SECONDS", 30) +TARBALL_MAX_BYTES = read_int_env("CHOYSUM_TARBALL_MAX_BYTES", 50 * 1024 * 1024) +PEER_DEP_ALLOWLIST_PATH = SCHEMA_SRC / "peer-dependencies-allowlist.json" +OFFICIAL_PRE1_CLI_RANGE = ">=0.0.0-0 <0.0.0" RANGE_TOKEN_RE = re.compile(r"^(<=|>=|<|>)(.+)$") RANGE_OPERATORS = {"<", "<=", ">", ">="} +INTEGRITY_ALGORITHMS = { + "sha1": 20, + "sha256": 32, + "sha384": 48, + "sha512": 64, +} + +ERROR_MODULE_NAME_MISSING = "CATALOG_E_MODULE_NAME_MISSING" +ERROR_MODULE_NAME_MISMATCH = "CATALOG_E_MODULE_NAME_MISMATCH" +ERROR_INTEGRITY_FORMAT = "CATALOG_E_INTEGRITY_FORMAT" +ERROR_INTEGRITY_UNSUPPORTED_ALGORITHM = "CATALOG_E_INTEGRITY_UNSUPPORTED_ALGORITHM" +ERROR_INTEGRITY_MISMATCH = "CATALOG_E_INTEGRITY_MISMATCH" +ERROR_TARBALL_DOWNLOAD = "CATALOG_E_TARBALL_DOWNLOAD" +ERROR_TARBALL_TOO_LARGE = "CATALOG_E_TARBALL_TOO_LARGE" +ERROR_DEPENDS_INVALID_ID = "CATALOG_E_DEPENDS_INVALID_ID" +ERROR_DEPENDS_BROKEN_LINK = "CATALOG_E_DEPENDS_BROKEN_LINK" +ERROR_PEER_DEP_UNKNOWN = "CATALOG_E_PEER_DEP_UNKNOWN" +ERROR_PEER_DEP_ALLOWLIST_INVALID = "CATALOG_E_PEER_DEP_ALLOWLIST_INVALID" +ERROR_OFFICIAL_PRE1_CLI_RANGE = "CATALOG_E_OFFICIAL_PRE1_CLI_RANGE" +ERROR_MODULE_VERSION_INVALID = "CATALOG_E_MODULE_VERSION_INVALID" @dataclass(frozen=True) @@ -78,6 +103,23 @@ class Bound: version: SemVer inclusive: bool + +@dataclass(frozen=True) +class PeerDependencyException: + name: str + module_id: str + package: str + version: str + reason: str + + +def build_error(code: str, message: str) -> str: + return f"[{code}] {message}" + + +def value_error(code: str, message: str) -> ValueError: + return ValueError(build_error(code, message)) + def write_text(path: Path, content: str) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(content, encoding="utf-8") @@ -384,6 +426,362 @@ def resolve_tarball(dist_meta: dict[str, Any], package_name: str, version: str) ) +def validate_module_name( + choysum_meta: dict[str, Any], + module_id: str, + package_name: str, + version: str, +) -> None: + module_name = choysum_meta.get("moduleName") + if not isinstance(module_name, str) or not module_name.strip(): + raise value_error( + ERROR_MODULE_NAME_MISSING, + f"Module '{module_id}' version '{version}' is missing required field " + f"'choysum.moduleName' (package: '{package_name}').", + ) + + normalized = module_name.strip() + if normalized != module_id: + raise value_error( + ERROR_MODULE_NAME_MISMATCH, + f"Module '{module_id}' version '{version}' has choysum.moduleName " + f"'{normalized}', expected '{module_id}' (package: '{package_name}').", + ) + + +def parse_integrity_value(integrity: str, package_name: str, version: str) -> tuple[str, bytes]: + tokens = integrity.strip().split() + if not tokens: + raise value_error( + ERROR_INTEGRITY_FORMAT, + f"Package '{package_name}' version '{version}' has an empty integrity value.", + ) + + algorithm, sep, digest_b64 = tokens[0].partition("-") + if not sep or not algorithm or not digest_b64: + raise value_error( + ERROR_INTEGRITY_FORMAT, + f"Package '{package_name}' version '{version}' has invalid integrity " + f"'{integrity}'. Expected '-'.", + ) + + normalized_algorithm = algorithm.lower() + expected_length = INTEGRITY_ALGORITHMS.get(normalized_algorithm) + if expected_length is None: + raise value_error( + ERROR_INTEGRITY_UNSUPPORTED_ALGORITHM, + f"Package '{package_name}' version '{version}' uses unsupported integrity " + f"algorithm '{algorithm}'.", + ) + + try: + digest = base64.b64decode(digest_b64, validate=True) + except (ValueError, binascii.Error) as exc: + raise value_error( + ERROR_INTEGRITY_FORMAT, + f"Package '{package_name}' version '{version}' has non-base64 integrity digest.", + ) from exc + + if len(digest) != expected_length: + raise value_error( + ERROR_INTEGRITY_FORMAT, + f"Package '{package_name}' version '{version}' has integrity digest length " + f"{len(digest)}, expected {expected_length} for {normalized_algorithm}.", + ) + + return normalized_algorithm, digest + + +def verify_tarball_integrity( + tarball_url: str, + integrity: str, + package_name: str, + version: str, +) -> None: + algorithm, expected_digest = parse_integrity_value(integrity, package_name, version) + hasher = hashlib.new(algorithm) + req = urllib.request.Request( + tarball_url, + headers={"User-Agent": "Choysum-Catalog-Builder/1.0"}, + ) + total_bytes = 0 + + try: + with urllib.request.urlopen(req, timeout=TARBALL_VERIFY_TIMEOUT_SECONDS) as response: + for chunk in iter(lambda: response.read(65536), b""): + total_bytes += len(chunk) + if total_bytes > TARBALL_MAX_BYTES: + raise value_error( + ERROR_TARBALL_TOO_LARGE, + f"Package '{package_name}' version '{version}' tarball exceeds " + f"max size {TARBALL_MAX_BYTES} bytes.", + ) + hasher.update(chunk) + except urllib.error.HTTPError as exc: + raise RuntimeError( + build_error( + ERROR_TARBALL_DOWNLOAD, + f"Failed to download tarball for package '{package_name}' version '{version}' " + f"from '{tarball_url}' (status: {exc.code}).", + ) + ) from exc + except ( + urllib.error.URLError, + http.client.HTTPException, + socket.timeout, + TimeoutError, + ConnectionError, + ) as exc: + raise RuntimeError( + build_error( + ERROR_TARBALL_DOWNLOAD, + f"Failed to download tarball for package '{package_name}' version '{version}' " + f"from '{tarball_url}': {exc}", + ) + ) from exc + + actual_digest = hasher.digest() + if actual_digest != expected_digest: + expected_b64 = base64.b64encode(expected_digest).decode("ascii") + actual_b64 = base64.b64encode(actual_digest).decode("ascii") + raise value_error( + ERROR_INTEGRITY_MISMATCH, + f"Package '{package_name}' version '{version}' tarball integrity mismatch " + f"for {algorithm}: expected '{expected_b64}', got '{actual_b64}'.", + ) + + +def validate_official_pre1_cli_range( + module_id: str, + package_name: str, + version: str, + trust: Any, + normalized_cli_range: str, +) -> None: + if trust != "official": + return + + try: + parsed_version = parse_semver(version) + except ValueError as exc: + raise value_error( + ERROR_MODULE_VERSION_INVALID, + f"Official module '{module_id}' has invalid version key '{version}' " + f"(package: '{package_name}').", + ) from exc + + if parsed_version.major == 0 and parsed_version.minor == 0 and parsed_version.patch == 0: + if normalized_cli_range != OFFICIAL_PRE1_CLI_RANGE: + raise value_error( + ERROR_OFFICIAL_PRE1_CLI_RANGE, + f"Official module '{module_id}' version '{version}' must use " + f"choysum.cli '{OFFICIAL_PRE1_CLI_RANGE}', got '{normalized_cli_range}' " + f"(package: '{package_name}').", + ) + + +def load_peer_dependency_allowlist() -> tuple[set[str], dict[tuple[str, str, str, str], str]]: + if not PEER_DEP_ALLOWLIST_PATH.is_file(): + raise RuntimeError( + build_error( + ERROR_PEER_DEP_ALLOWLIST_INVALID, + "Missing peer dependency allowlist file: " + f"{PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)}", + ) + ) + + payload = load_json(PEER_DEP_ALLOWLIST_PATH) + if not isinstance(payload, dict): + raise RuntimeError( + build_error( + ERROR_PEER_DEP_ALLOWLIST_INVALID, + f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} " + "must be a JSON object.", + ) + ) + + allowed_raw = payload.get("allowedPeerDependencies") + if not isinstance(allowed_raw, list): + raise RuntimeError( + build_error( + ERROR_PEER_DEP_ALLOWLIST_INVALID, + f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} " + "must contain 'allowedPeerDependencies' as an array.", + ) + ) + + allowed: set[str] = set() + for item in allowed_raw: + if not isinstance(item, str) or not item.strip(): + raise RuntimeError( + build_error( + ERROR_PEER_DEP_ALLOWLIST_INVALID, + f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} " + f"contains invalid dependency name: {item!r}.", + ) + ) + allowed.add(item.strip()) + + if not allowed: + raise RuntimeError( + build_error( + ERROR_PEER_DEP_ALLOWLIST_INVALID, + f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} is empty.", + ) + ) + + exceptions_raw = payload.get("scopedPeerDependencyExceptions", []) + if not isinstance(exceptions_raw, list): + raise RuntimeError( + build_error( + ERROR_PEER_DEP_ALLOWLIST_INVALID, + f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} must contain " + "'scopedPeerDependencyExceptions' as an array when present.", + ) + ) + + exceptions: dict[tuple[str, str, str, str], str] = {} + for item in exceptions_raw: + if not isinstance(item, dict): + raise RuntimeError( + build_error( + ERROR_PEER_DEP_ALLOWLIST_INVALID, + f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} " + f"contains non-object scoped exception: {item!r}.", + ) + ) + + exception = PeerDependencyException( + name=str(item.get("name", "")).strip(), + module_id=str(item.get("moduleId", "")).strip(), + package=str(item.get("package", "")).strip(), + version=str(item.get("version", "")).strip(), + reason=str(item.get("reason", "")).strip(), + ) + if ( + not exception.name + or not exception.module_id + or not exception.package + or not exception.version + or not exception.reason + ): + raise RuntimeError( + build_error( + ERROR_PEER_DEP_ALLOWLIST_INVALID, + f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} has an " + f"invalid scoped exception: {item!r}.", + ) + ) + + if exception.name in allowed: + raise RuntimeError( + build_error( + ERROR_PEER_DEP_ALLOWLIST_INVALID, + f"Scoped exception for '{exception.name}' is redundant because it is " + "already globally allowed.", + ) + ) + + key = (exception.name, exception.module_id, exception.package, exception.version) + if key in exceptions: + raise RuntimeError( + build_error( + ERROR_PEER_DEP_ALLOWLIST_INVALID, + f"Duplicate scoped peer dependency exception detected for '{exception.name}' " + f"on module '{exception.module_id}' version '{exception.version}'.", + ) + ) + exceptions[key] = exception.reason + + return allowed, exceptions + + +def validate_runtime_contracts( + modules: dict[str, dict[str, Any]], + allowed_peer_deps: set[str], + scoped_exceptions: dict[tuple[str, str, str, str], str], +) -> None: + known_modules = set(modules.keys()) + errors: list[str] = [] + used_scoped_exceptions: set[tuple[str, str, str, str]] = set() + + for module_id in sorted(modules.keys()): + module_payload = modules[module_id] + package_name = module_payload.get("package") + package_name_text = package_name if isinstance(package_name, str) else "" + versions = module_payload.get("versions") + if not isinstance(versions, dict): + continue + + for version in sorted(versions.keys()): + version_payload = versions[version] + if not isinstance(version_payload, dict): + continue + + depends = version_payload.get("depends") + if isinstance(depends, list): + for dep in depends: + if not isinstance(dep, str) or not dep.strip(): + errors.append( + build_error( + ERROR_DEPENDS_INVALID_ID, + f"Module '{module_id}' version '{version}' has invalid " + f"depends entry {dep!r} (package: '{package_name}').", + ) + ) + continue + if dep not in known_modules: + errors.append( + build_error( + ERROR_DEPENDS_BROKEN_LINK, + f"Module '{module_id}' version '{version}' depends on " + f"unknown module '{dep}' (package: '{package_name}').", + ) + ) + + peer_deps = version_payload.get("peerDependencies") + if isinstance(peer_deps, dict): + for peer_name in sorted(peer_deps.keys()): + if peer_name in allowed_peer_deps: + continue + + scoped_key = (peer_name, module_id, package_name_text, version) + if scoped_key in scoped_exceptions: + used_scoped_exceptions.add(scoped_key) + continue + + reason_hint = "" + if peer_name in {item[0] for item in scoped_exceptions.keys()}: + reason_hint = ( + " Add or update a scopedPeerDependencyExceptions entry " + "if this is an intentional temporary compatibility case." + ) + + if peer_name not in allowed_peer_deps: + errors.append( + build_error( + ERROR_PEER_DEP_UNKNOWN, + f"Module '{module_id}' version '{version}' has unsupported " + f"peer dependency '{peer_name}' (package: '{package_name_text}')." + f"{reason_hint}", + ) + ) + + stale_exceptions = sorted(set(scoped_exceptions.keys()) - used_scoped_exceptions) + for name, module_id, package_name_text, version in stale_exceptions: + errors.append( + build_error( + ERROR_PEER_DEP_ALLOWLIST_INVALID, + f"Scoped exception for '{name}' on module '{module_id}' version '{version}' " + f"(package: '{package_name_text}') is stale and should be removed.", + ) + ) + + if errors: + details = "\n".join(f" - {err}" for err in errors) + raise RuntimeError("Runtime contract validation failed:\n" + details) + + def fetch_npm_meta(package_name: str) -> dict: quoted_name = urllib.parse.quote(package_name, safe="@") url = f"https://registry.npmjs.org/{quoted_name}" @@ -433,6 +831,7 @@ def process_module(entry_file: Path) -> tuple[str, dict[str, Any], dict[str, int package_name = entry.get("package") if not isinstance(package_name, str) or not package_name.strip(): raise ValueError(f"Invalid or missing 'package' field in {entry_file}") + trust = entry.get("trust") print(f"Fetching NPM metadata for {package_name} (module: {module_id})...") npm_data = fetch_npm_meta(package_name) @@ -448,6 +847,8 @@ def process_module(entry_file: Path) -> tuple[str, dict[str, Any], dict[str, int choysum_meta = v_data.get("choysum") if not isinstance(choysum_meta, dict): choysum_meta = {} + validate_module_name(choysum_meta, module_id, package_name, ver) + dist_meta = v_data.get("dist") if not isinstance(dist_meta, dict): dist_meta = {} @@ -463,6 +864,7 @@ def process_module(entry_file: Path) -> tuple[str, dict[str, Any], dict[str, int peer_deps = {} integrity = resolve_integrity(dist_meta, package_name, ver) + verify_tarball_integrity(tarball_url, integrity, package_name, ver) normalized_cli_range, cli_major = resolve_choysum_cli_range( choysum_meta=choysum_meta, @@ -470,6 +872,13 @@ def process_module(entry_file: Path) -> tuple[str, dict[str, Any], dict[str, int package_name=package_name, version=ver, ) + validate_official_pre1_cli_range( + module_id=module_id, + package_name=package_name, + version=ver, + trust=trust, + normalized_cli_range=normalized_cli_range, + ) v_entry = { "tarball": tarball_url, @@ -492,7 +901,7 @@ def process_module(entry_file: Path) -> tuple[str, dict[str, Any], dict[str, int return module_id, { "moduleId": module_id, "package": package_name, - "trust": entry.get("trust"), + "trust": trust, "maintainers": entry.get("maintainers", []), "versions": versions_out }, version_major_map_out @@ -573,6 +982,8 @@ def write_index_artifacts(index_dir: Path, payload: dict[str, Any]) -> tuple[Pat def build() -> None: modules, module_version_major_map = collect_modules() + allowed_peer_deps, scoped_exceptions = load_peer_dependency_allowlist() + validate_runtime_contracts(modules, allowed_peer_deps, scoped_exceptions) generated_at = utc_now_iso() if DIST_ROOT.is_symlink(): From ec5af63644cc2fbffbc74bbd8385585e65bfc456 Mon Sep 17 00:00:00 2001 From: buke Date: Mon, 15 Jun 2026 11:06:30 +0800 Subject: [PATCH 2/8] refactor(catalog): remove peer dependency allowlist gate - stop enforcing peerDependencies allowlist and scoped exceptions during catalog build - keep depends link validation while allowing any peerDependencies from module metadata - delete obsolete schemas/peer-dependencies-allowlist.json configuration file --- schemas/peer-dependencies-allowlist.json | 22 --- scripts/build_catalog.py | 177 +---------------------- 2 files changed, 2 insertions(+), 197 deletions(-) delete mode 100644 schemas/peer-dependencies-allowlist.json diff --git a/schemas/peer-dependencies-allowlist.json b/schemas/peer-dependencies-allowlist.json deleted file mode 100644 index 0e32e86..0000000 --- a/schemas/peer-dependencies-allowlist.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://index.choysum.dev/v1/schema/peer-dependencies-allowlist.json", - "allowedPeerDependencies": [ - "@vicons/ionicons5", - "bcrypt", - "jsonwebtoken", - "naive-ui", - "pinia", - "vue", - "vue-router" - ], - "scopedPeerDependencyExceptions": [ - { - "name": "foo", - "moduleId": "core", - "package": "@choysum-dev/core", - "version": "0.0.0-20260614200130", - "reason": "Temporary compatibility exception for published pre-1.0 metadata; remove after core package metadata is corrected." - } - ] -} diff --git a/scripts/build_catalog.py b/scripts/build_catalog.py index e5c88cc..d1193bf 100755 --- a/scripts/build_catalog.py +++ b/scripts/build_catalog.py @@ -64,7 +64,6 @@ def read_float_env(name: str, default: float, minimum: float = 0.0) -> float: BUILD_CONCURRENCY = read_int_env("CHOYSUM_BUILD_CONCURRENCY", 5) TARBALL_VERIFY_TIMEOUT_SECONDS = read_int_env("CHOYSUM_TARBALL_VERIFY_TIMEOUT_SECONDS", 30) TARBALL_MAX_BYTES = read_int_env("CHOYSUM_TARBALL_MAX_BYTES", 50 * 1024 * 1024) -PEER_DEP_ALLOWLIST_PATH = SCHEMA_SRC / "peer-dependencies-allowlist.json" OFFICIAL_PRE1_CLI_RANGE = ">=0.0.0-0 <0.0.0" RANGE_TOKEN_RE = re.compile(r"^(<=|>=|<|>)(.+)$") RANGE_OPERATORS = {"<", "<=", ">", ">="} @@ -84,8 +83,6 @@ def read_float_env(name: str, default: float, minimum: float = 0.0) -> float: ERROR_TARBALL_TOO_LARGE = "CATALOG_E_TARBALL_TOO_LARGE" ERROR_DEPENDS_INVALID_ID = "CATALOG_E_DEPENDS_INVALID_ID" ERROR_DEPENDS_BROKEN_LINK = "CATALOG_E_DEPENDS_BROKEN_LINK" -ERROR_PEER_DEP_UNKNOWN = "CATALOG_E_PEER_DEP_UNKNOWN" -ERROR_PEER_DEP_ALLOWLIST_INVALID = "CATALOG_E_PEER_DEP_ALLOWLIST_INVALID" ERROR_OFFICIAL_PRE1_CLI_RANGE = "CATALOG_E_OFFICIAL_PRE1_CLI_RANGE" ERROR_MODULE_VERSION_INVALID = "CATALOG_E_MODULE_VERSION_INVALID" @@ -104,15 +101,6 @@ class Bound: inclusive: bool -@dataclass(frozen=True) -class PeerDependencyException: - name: str - module_id: str - package: str - version: str - reason: str - - def build_error(code: str, message: str) -> str: return f"[{code}] {message}" @@ -580,135 +568,13 @@ def validate_official_pre1_cli_range( ) -def load_peer_dependency_allowlist() -> tuple[set[str], dict[tuple[str, str, str, str], str]]: - if not PEER_DEP_ALLOWLIST_PATH.is_file(): - raise RuntimeError( - build_error( - ERROR_PEER_DEP_ALLOWLIST_INVALID, - "Missing peer dependency allowlist file: " - f"{PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)}", - ) - ) - - payload = load_json(PEER_DEP_ALLOWLIST_PATH) - if not isinstance(payload, dict): - raise RuntimeError( - build_error( - ERROR_PEER_DEP_ALLOWLIST_INVALID, - f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} " - "must be a JSON object.", - ) - ) - - allowed_raw = payload.get("allowedPeerDependencies") - if not isinstance(allowed_raw, list): - raise RuntimeError( - build_error( - ERROR_PEER_DEP_ALLOWLIST_INVALID, - f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} " - "must contain 'allowedPeerDependencies' as an array.", - ) - ) - - allowed: set[str] = set() - for item in allowed_raw: - if not isinstance(item, str) or not item.strip(): - raise RuntimeError( - build_error( - ERROR_PEER_DEP_ALLOWLIST_INVALID, - f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} " - f"contains invalid dependency name: {item!r}.", - ) - ) - allowed.add(item.strip()) - - if not allowed: - raise RuntimeError( - build_error( - ERROR_PEER_DEP_ALLOWLIST_INVALID, - f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} is empty.", - ) - ) - - exceptions_raw = payload.get("scopedPeerDependencyExceptions", []) - if not isinstance(exceptions_raw, list): - raise RuntimeError( - build_error( - ERROR_PEER_DEP_ALLOWLIST_INVALID, - f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} must contain " - "'scopedPeerDependencyExceptions' as an array when present.", - ) - ) - - exceptions: dict[tuple[str, str, str, str], str] = {} - for item in exceptions_raw: - if not isinstance(item, dict): - raise RuntimeError( - build_error( - ERROR_PEER_DEP_ALLOWLIST_INVALID, - f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} " - f"contains non-object scoped exception: {item!r}.", - ) - ) - - exception = PeerDependencyException( - name=str(item.get("name", "")).strip(), - module_id=str(item.get("moduleId", "")).strip(), - package=str(item.get("package", "")).strip(), - version=str(item.get("version", "")).strip(), - reason=str(item.get("reason", "")).strip(), - ) - if ( - not exception.name - or not exception.module_id - or not exception.package - or not exception.version - or not exception.reason - ): - raise RuntimeError( - build_error( - ERROR_PEER_DEP_ALLOWLIST_INVALID, - f"Allowlist file {PEER_DEP_ALLOWLIST_PATH.relative_to(ROOT)} has an " - f"invalid scoped exception: {item!r}.", - ) - ) - - if exception.name in allowed: - raise RuntimeError( - build_error( - ERROR_PEER_DEP_ALLOWLIST_INVALID, - f"Scoped exception for '{exception.name}' is redundant because it is " - "already globally allowed.", - ) - ) - - key = (exception.name, exception.module_id, exception.package, exception.version) - if key in exceptions: - raise RuntimeError( - build_error( - ERROR_PEER_DEP_ALLOWLIST_INVALID, - f"Duplicate scoped peer dependency exception detected for '{exception.name}' " - f"on module '{exception.module_id}' version '{exception.version}'.", - ) - ) - exceptions[key] = exception.reason - - return allowed, exceptions - - -def validate_runtime_contracts( - modules: dict[str, dict[str, Any]], - allowed_peer_deps: set[str], - scoped_exceptions: dict[tuple[str, str, str, str], str], -) -> None: +def validate_runtime_contracts(modules: dict[str, dict[str, Any]]) -> None: known_modules = set(modules.keys()) errors: list[str] = [] - used_scoped_exceptions: set[tuple[str, str, str, str]] = set() for module_id in sorted(modules.keys()): module_payload = modules[module_id] package_name = module_payload.get("package") - package_name_text = package_name if isinstance(package_name, str) else "" versions = module_payload.get("versions") if not isinstance(versions, dict): continue @@ -739,44 +605,6 @@ def validate_runtime_contracts( ) ) - peer_deps = version_payload.get("peerDependencies") - if isinstance(peer_deps, dict): - for peer_name in sorted(peer_deps.keys()): - if peer_name in allowed_peer_deps: - continue - - scoped_key = (peer_name, module_id, package_name_text, version) - if scoped_key in scoped_exceptions: - used_scoped_exceptions.add(scoped_key) - continue - - reason_hint = "" - if peer_name in {item[0] for item in scoped_exceptions.keys()}: - reason_hint = ( - " Add or update a scopedPeerDependencyExceptions entry " - "if this is an intentional temporary compatibility case." - ) - - if peer_name not in allowed_peer_deps: - errors.append( - build_error( - ERROR_PEER_DEP_UNKNOWN, - f"Module '{module_id}' version '{version}' has unsupported " - f"peer dependency '{peer_name}' (package: '{package_name_text}')." - f"{reason_hint}", - ) - ) - - stale_exceptions = sorted(set(scoped_exceptions.keys()) - used_scoped_exceptions) - for name, module_id, package_name_text, version in stale_exceptions: - errors.append( - build_error( - ERROR_PEER_DEP_ALLOWLIST_INVALID, - f"Scoped exception for '{name}' on module '{module_id}' version '{version}' " - f"(package: '{package_name_text}') is stale and should be removed.", - ) - ) - if errors: details = "\n".join(f" - {err}" for err in errors) raise RuntimeError("Runtime contract validation failed:\n" + details) @@ -982,8 +810,7 @@ def write_index_artifacts(index_dir: Path, payload: dict[str, Any]) -> tuple[Pat def build() -> None: modules, module_version_major_map = collect_modules() - allowed_peer_deps, scoped_exceptions = load_peer_dependency_allowlist() - validate_runtime_contracts(modules, allowed_peer_deps, scoped_exceptions) + validate_runtime_contracts(modules) generated_at = utc_now_iso() if DIST_ROOT.is_symlink(): From 3c29a65cbc02cb9b24163437947a48fabeca9df9 Mon Sep 17 00:00:00 2001 From: buke Date: Mon, 15 Jun 2026 11:18:23 +0800 Subject: [PATCH 3/8] fix(catalog): address PR #21 review findings - validate tarball URL scheme before download to prevent non-http(s) fetches - support multi-hash integrity values and choose the strongest valid supported algorithm - fail fast on oversized tarballs using Content-Length and enforce duplicate/self depends checks --- scripts/build_catalog.py | 117 +++++++++++++++++++++++++++++---------- 1 file changed, 89 insertions(+), 28 deletions(-) diff --git a/scripts/build_catalog.py b/scripts/build_catalog.py index d1193bf..23da766 100755 --- a/scripts/build_catalog.py +++ b/scripts/build_catalog.py @@ -73,6 +73,13 @@ def read_float_env(name: str, default: float, minimum: float = 0.0) -> float: "sha384": 48, "sha512": 64, } +INTEGRITY_ALGORITHM_PRIORITY = { + "sha1": 1, + "sha256": 2, + "sha384": 3, + "sha512": 4, +} +ALLOWED_TARBALL_SCHEMES = {"https", "http"} ERROR_MODULE_NAME_MISSING = "CATALOG_E_MODULE_NAME_MISSING" ERROR_MODULE_NAME_MISMATCH = "CATALOG_E_MODULE_NAME_MISMATCH" @@ -81,8 +88,11 @@ def read_float_env(name: str, default: float, minimum: float = 0.0) -> float: ERROR_INTEGRITY_MISMATCH = "CATALOG_E_INTEGRITY_MISMATCH" ERROR_TARBALL_DOWNLOAD = "CATALOG_E_TARBALL_DOWNLOAD" ERROR_TARBALL_TOO_LARGE = "CATALOG_E_TARBALL_TOO_LARGE" +ERROR_TARBALL_URL_SCHEME = "CATALOG_E_TARBALL_URL_SCHEME" ERROR_DEPENDS_INVALID_ID = "CATALOG_E_DEPENDS_INVALID_ID" ERROR_DEPENDS_BROKEN_LINK = "CATALOG_E_DEPENDS_BROKEN_LINK" +ERROR_DEPENDS_DUPLICATE = "CATALOG_E_DEPENDS_DUPLICATE" +ERROR_DEPENDS_SELF_REFERENCE = "CATALOG_E_DEPENDS_SELF_REFERENCE" ERROR_OFFICIAL_PRE1_CLI_RANGE = "CATALOG_E_OFFICIAL_PRE1_CLI_RANGE" ERROR_MODULE_VERSION_INVALID = "CATALOG_E_MODULE_VERSION_INVALID" @@ -445,39 +455,48 @@ def parse_integrity_value(integrity: str, package_name: str, version: str) -> tu f"Package '{package_name}' version '{version}' has an empty integrity value.", ) - algorithm, sep, digest_b64 = tokens[0].partition("-") - if not sep or not algorithm or not digest_b64: - raise value_error( - ERROR_INTEGRITY_FORMAT, - f"Package '{package_name}' version '{version}' has invalid integrity " - f"'{integrity}'. Expected '-'.", - ) + candidates: list[tuple[str, bytes]] = [] + has_supported_algorithm = False - normalized_algorithm = algorithm.lower() - expected_length = INTEGRITY_ALGORITHMS.get(normalized_algorithm) - if expected_length is None: - raise value_error( - ERROR_INTEGRITY_UNSUPPORTED_ALGORITHM, - f"Package '{package_name}' version '{version}' uses unsupported integrity " - f"algorithm '{algorithm}'.", - ) + for token in tokens: + algorithm, sep, digest_b64 = token.partition("-") + if not sep or not algorithm or not digest_b64: + continue - try: - digest = base64.b64decode(digest_b64, validate=True) - except (ValueError, binascii.Error) as exc: - raise value_error( - ERROR_INTEGRITY_FORMAT, - f"Package '{package_name}' version '{version}' has non-base64 integrity digest.", - ) from exc + normalized_algorithm = algorithm.lower() + expected_length = INTEGRITY_ALGORITHMS.get(normalized_algorithm) + if expected_length is None: + continue + has_supported_algorithm = True + + try: + digest = base64.b64decode(digest_b64, validate=True) + except (ValueError, binascii.Error): + continue + + if len(digest) != expected_length: + continue + candidates.append((normalized_algorithm, digest)) + + if not candidates: + if not has_supported_algorithm: + raise value_error( + ERROR_INTEGRITY_UNSUPPORTED_ALGORITHM, + f"Package '{package_name}' version '{version}' has no supported integrity " + f"algorithms in '{integrity}'.", + ) - if len(digest) != expected_length: raise value_error( ERROR_INTEGRITY_FORMAT, - f"Package '{package_name}' version '{version}' has integrity digest length " - f"{len(digest)}, expected {expected_length} for {normalized_algorithm}.", + f"Package '{package_name}' version '{version}' has no valid integrity digest " + f"in '{integrity}'.", ) - return normalized_algorithm, digest + candidates.sort( + key=lambda item: INTEGRITY_ALGORITHM_PRIORITY[item[0]], + reverse=True, + ) + return candidates[0] def verify_tarball_integrity( @@ -486,6 +505,14 @@ def verify_tarball_integrity( package_name: str, version: str, ) -> None: + parsed_url = urllib.parse.urlparse(tarball_url) + if parsed_url.scheme.lower() not in ALLOWED_TARBALL_SCHEMES or not parsed_url.netloc: + raise value_error( + ERROR_TARBALL_URL_SCHEME, + f"Package '{package_name}' version '{version}' has disallowed tarball URL " + f"scheme in '{tarball_url}'. Allowed schemes: {sorted(ALLOWED_TARBALL_SCHEMES)}.", + ) + algorithm, expected_digest = parse_integrity_value(integrity, package_name, version) hasher = hashlib.new(algorithm) req = urllib.request.Request( @@ -496,6 +523,19 @@ def verify_tarball_integrity( try: with urllib.request.urlopen(req, timeout=TARBALL_VERIFY_TIMEOUT_SECONDS) as response: + content_length_header = response.headers.get("Content-Length") + if content_length_header: + try: + content_length = int(content_length_header) + except ValueError: + content_length = -1 + if content_length > TARBALL_MAX_BYTES: + raise value_error( + ERROR_TARBALL_TOO_LARGE, + f"Package '{package_name}' version '{version}' tarball content-length " + f"{content_length} exceeds max size {TARBALL_MAX_BYTES} bytes.", + ) + for chunk in iter(lambda: response.read(65536), b""): total_bytes += len(chunk) if total_bytes > TARBALL_MAX_BYTES: @@ -586,6 +626,7 @@ def validate_runtime_contracts(modules: dict[str, dict[str, Any]]) -> None: depends = version_payload.get("depends") if isinstance(depends, list): + seen_deps: set[str] = set() for dep in depends: if not isinstance(dep, str) or not dep.strip(): errors.append( @@ -596,12 +637,32 @@ def validate_runtime_contracts(modules: dict[str, dict[str, Any]]) -> None: ) ) continue - if dep not in known_modules: + normalized_dep = dep.strip() + if normalized_dep == module_id: + errors.append( + build_error( + ERROR_DEPENDS_SELF_REFERENCE, + f"Module '{module_id}' version '{version}' depends on itself " + f"(package: '{package_name}').", + ) + ) + continue + if normalized_dep in seen_deps: + errors.append( + build_error( + ERROR_DEPENDS_DUPLICATE, + f"Module '{module_id}' version '{version}' has duplicate depends " + f"entry '{normalized_dep}' (package: '{package_name}').", + ) + ) + continue + seen_deps.add(normalized_dep) + if normalized_dep not in known_modules: errors.append( build_error( ERROR_DEPENDS_BROKEN_LINK, f"Module '{module_id}' version '{version}' depends on " - f"unknown module '{dep}' (package: '{package_name}').", + f"unknown module '{normalized_dep}' (package: '{package_name}').", ) ) From ffeea7ea8ad3ebfb270ce5ceb56eb93ab1d514cf Mon Sep 17 00:00:00 2001 From: buke Date: Mon, 15 Jun 2026 11:28:33 +0800 Subject: [PATCH 4/8] perf(catalog): add optional tarball cache verification - add optional CHOYSUM_CACHE_DIR support to reuse previously verified tarballs by integrity hash - keep strict integrity/size/scheme checks while reducing redundant downloads on repeated builds - ensure temporary cache files are atomically promoted on success and cleaned on failure --- scripts/build_catalog.py | 109 +++++++++++++++++++++++++++++++++++---- 1 file changed, 99 insertions(+), 10 deletions(-) diff --git a/scripts/build_catalog.py b/scripts/build_catalog.py index 23da766..b28883b 100755 --- a/scripts/build_catalog.py +++ b/scripts/build_catalog.py @@ -80,6 +80,7 @@ def read_float_env(name: str, default: float, minimum: float = 0.0) -> float: "sha512": 4, } ALLOWED_TARBALL_SCHEMES = {"https", "http"} +TARBALL_CACHE_DIR_ENV = "CHOYSUM_CACHE_DIR" ERROR_MODULE_NAME_MISSING = "CATALOG_E_MODULE_NAME_MISSING" ERROR_MODULE_NAME_MISMATCH = "CATALOG_E_MODULE_NAME_MISMATCH" @@ -499,6 +500,55 @@ def parse_integrity_value(integrity: str, package_name: str, version: str) -> tu return candidates[0] +def resolve_tarball_cache_file(algorithm: str, expected_digest: bytes) -> Path | None: + cache_dir_raw = os.getenv(TARBALL_CACHE_DIR_ENV) + if not cache_dir_raw: + return None + + cache_dir = Path(cache_dir_raw).expanduser() + try: + cache_dir.mkdir(parents=True, exist_ok=True) + except OSError: + return None + + return cache_dir / f"{algorithm}-{expected_digest.hex()}.tar" + + +def verify_cached_tarball( + cache_file: Path, + algorithm: str, + expected_digest: bytes, +) -> bool: + if not cache_file.is_file(): + return False + + hasher = hashlib.new(algorithm) + total_bytes = 0 + + try: + with cache_file.open("rb") as handle: + for chunk in iter(lambda: handle.read(65536), b""): + total_bytes += len(chunk) + if total_bytes > TARBALL_MAX_BYTES: + try: + cache_file.unlink() + except OSError: + pass + return False + hasher.update(chunk) + except OSError: + return False + + if hasher.digest() == expected_digest: + return True + + try: + cache_file.unlink() + except OSError: + pass + return False + + def verify_tarball_integrity( tarball_url: str, integrity: str, @@ -514,12 +564,28 @@ def verify_tarball_integrity( ) algorithm, expected_digest = parse_integrity_value(integrity, package_name, version) + cache_file = resolve_tarball_cache_file(algorithm, expected_digest) + if cache_file is not None and verify_cached_tarball(cache_file, algorithm, expected_digest): + return + hasher = hashlib.new(algorithm) req = urllib.request.Request( tarball_url, headers={"User-Agent": "Choysum-Catalog-Builder/1.0"}, ) total_bytes = 0 + temp_cache_file: Path | None = None + cache_handle = None + completed = False + + if cache_file is not None: + temp_cache_file = cache_file.with_name( + f"{cache_file.name}.tmp-{os.getpid()}-{time.time_ns()}" + ) + try: + cache_handle = temp_cache_file.open("wb") + except OSError: + temp_cache_file = None try: with urllib.request.urlopen(req, timeout=TARBALL_VERIFY_TIMEOUT_SECONDS) as response: @@ -545,6 +611,19 @@ def verify_tarball_integrity( f"max size {TARBALL_MAX_BYTES} bytes.", ) hasher.update(chunk) + if cache_handle is not None: + cache_handle.write(chunk) + + actual_digest = hasher.digest() + if actual_digest != expected_digest: + expected_b64 = base64.b64encode(expected_digest).decode("ascii") + actual_b64 = base64.b64encode(actual_digest).decode("ascii") + raise value_error( + ERROR_INTEGRITY_MISMATCH, + f"Package '{package_name}' version '{version}' tarball integrity mismatch " + f"for {algorithm}: expected '{expected_b64}', got '{actual_b64}'.", + ) + completed = True except urllib.error.HTTPError as exc: raise RuntimeError( build_error( @@ -567,16 +646,26 @@ def verify_tarball_integrity( f"from '{tarball_url}': {exc}", ) ) from exc - - actual_digest = hasher.digest() - if actual_digest != expected_digest: - expected_b64 = base64.b64encode(expected_digest).decode("ascii") - actual_b64 = base64.b64encode(actual_digest).decode("ascii") - raise value_error( - ERROR_INTEGRITY_MISMATCH, - f"Package '{package_name}' version '{version}' tarball integrity mismatch " - f"for {algorithm}: expected '{expected_b64}', got '{actual_b64}'.", - ) + finally: + if cache_handle is not None: + try: + cache_handle.close() + except OSError: + pass + if temp_cache_file is not None and temp_cache_file.exists(): + if completed and cache_file is not None: + try: + temp_cache_file.replace(cache_file) + except OSError: + try: + temp_cache_file.unlink() + except OSError: + pass + else: + try: + temp_cache_file.unlink() + except OSError: + pass def validate_official_pre1_cli_range( From ada0337407831968ee4e97f3eb6dadd4614cf9ac Mon Sep 17 00:00:00 2001 From: buke Date: Mon, 15 Jun 2026 13:04:17 +0800 Subject: [PATCH 5/8] fix(catalog): clarify best-effort cache cleanup - add explicit comments in verify_cached_tarball for non-fatal OSError cleanup branches - keep existing behavior unchanged while satisfying github-code-quality empty-except guidance --- scripts/build_catalog.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/build_catalog.py b/scripts/build_catalog.py index b28883b..3327e62 100755 --- a/scripts/build_catalog.py +++ b/scripts/build_catalog.py @@ -533,6 +533,7 @@ def verify_cached_tarball( try: cache_file.unlink() except OSError: + # Cache cleanup is best-effort; deletion failure is non-fatal. pass return False hasher.update(chunk) @@ -545,6 +546,7 @@ def verify_cached_tarball( try: cache_file.unlink() except OSError: + # Cache cleanup is best-effort; deletion failure is non-fatal. pass return False From e90a3f7bb0ac66ca4b490ca1958105c546b6ce3e Mon Sep 17 00:00:00 2001 From: Brian Wang Date: Mon, 15 Jun 2026 13:08:59 +0800 Subject: [PATCH 6/8] Potential fix for pull request finding 'Empty except' Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com> --- scripts/build_catalog.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build_catalog.py b/scripts/build_catalog.py index 3327e62..3a83043 100755 --- a/scripts/build_catalog.py +++ b/scripts/build_catalog.py @@ -653,6 +653,7 @@ def verify_tarball_integrity( try: cache_handle.close() except OSError: + # Cache write handle cleanup is best-effort; close failure is non-fatal. pass if temp_cache_file is not None and temp_cache_file.exists(): if completed and cache_file is not None: From 25ec3ec3ff3e10e3b6ba7bf9a4d91ff2d37c8546 Mon Sep 17 00:00:00 2001 From: Brian Wang Date: Mon, 15 Jun 2026 13:09:11 +0800 Subject: [PATCH 7/8] Potential fix for pull request finding 'Empty except' Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com> --- scripts/build_catalog.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build_catalog.py b/scripts/build_catalog.py index 3a83043..6df71af 100755 --- a/scripts/build_catalog.py +++ b/scripts/build_catalog.py @@ -663,6 +663,7 @@ def verify_tarball_integrity( try: temp_cache_file.unlink() except OSError: + # Temp cache cleanup is best-effort; deletion failure is non-fatal. pass else: try: From c9c72b66eb88faa7b6154664fcd13c5736bf7096 Mon Sep 17 00:00:00 2001 From: Brian Wang Date: Mon, 15 Jun 2026 13:09:19 +0800 Subject: [PATCH 8/8] Potential fix for pull request finding 'Empty except' Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com> --- scripts/build_catalog.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build_catalog.py b/scripts/build_catalog.py index 6df71af..9fdc368 100755 --- a/scripts/build_catalog.py +++ b/scripts/build_catalog.py @@ -669,6 +669,7 @@ def verify_tarball_integrity( try: temp_cache_file.unlink() except OSError: + # Temp cache cleanup is best-effort; deletion failure is non-fatal. pass