diff --git a/Cargo.lock b/Cargo.lock index 2b9c8e7..53fec52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,18 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "async-compression" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -287,6 +299,23 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "compression-codecs" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -344,6 +373,7 @@ version = "1.8.8" dependencies = [ "chrono", "clap", + "corgea", "dirs", "env_logger", "git2", @@ -359,6 +389,7 @@ dependencies = [ "quick-xml", "regex", "reqwest", + "semver", "serde", "serde_derive", "serde_json", @@ -1625,6 +1656,7 @@ version = "0.12.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" dependencies = [ + "async-compression", "base64", "bytes", "cookie", @@ -1651,6 +1683,7 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-native-tls", + "tokio-util", "tower", "tower-http", "tower-service", @@ -1760,6 +1793,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" diff --git a/Cargo.toml b/Cargo.toml index d60edad..f4e4818 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,12 +5,25 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[[bin]] +name = "corgea" +path = "src/main.rs" + +[features] +# Compiles the in-crate vuln-api test stub (`vuln_api_stub`). Enabled for all +# test builds via the self dev-dependency below; never part of release builds. +test-stub = [] + +[dev-dependencies] +corgea = { path = ".", features = ["test-stub"] } + [dependencies] clap = { version = "4.4.13", features = ["derive"] } dirs = "5.0.1" reqwest = { version = "0.12.23", default-features = false, features = [ "blocking", "cookies", + "gzip", "json", "multipart", "native-tls", @@ -19,6 +32,7 @@ reqwest = { version = "0.12.23", default-features = false, features = [ toml = "0.8.8" log = "0.4" env_logger = "0.11" +semver = "1" serde = { version = "1.0.195", features = ["derive"] } serde_json = "1.0.111" serde_derive = "1.0.195" diff --git a/README.md b/README.md index 03b116d..e3aec87 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,8 @@ pip install corgea-cli You can get the latest binaries for your OS from https://github.com/Corgea/cli/releases. ### Setup -Once the binary is installed, login with your token from the Corgea app. +Once the binary is installed, login with your token from the Corgea app to enable +authenticated enforcement and private Corgea intelligence. ``` corgea login ``` @@ -50,6 +51,19 @@ corgea deps policy init --exist-ok # write starter policy, or keep existing See [Dependency Scanning (CLI)](https://docs.corgea.app/cli/deps) for the full flag and exit-code reference. +## Install Wrappers + +`corgea pip|npm|yarn|pnpm|uv ` runs package-manager install commands +through Corgea's install gate. Baseline CVE checks need no token: known vulnerable +or malicious package versions block, while vuln-api lookup outages warn and +continue in public fail-open mode. + +Logging in enables authenticated enforcement against the default Corgea vuln-api, +including fail-closed behavior for lookup failures and any private Corgea +intelligence. A custom `CORGEA_VULN_API_URL` is public by default, even when +`CORGEA_TOKEN` exists. Set `CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL=1` to send +the token to a custom URL and make lookup failures fail closed. + ## Development Setup ### Prerequisites diff --git a/harness b/harness index 84b5076..8b430d9 100755 --- a/harness +++ b/harness @@ -260,6 +260,12 @@ $output" } cmd_pre_commit() { + # git exports GIT_DIR/GIT_INDEX_FILE/… to hooks. From a linked + # worktree GIT_DIR is absolute, so any `git init`/`git add` a test + # spawns in a tempdir would resolve to the shared gitdir and + # corrupt the real repo. Scrub the hook env before running tests. + unset GIT_DIR GIT_WORK_TREE GIT_INDEX_FILE GIT_OBJECT_DIRECTORY \ + GIT_COMMON_DIR GIT_PREFIX local staged; staged="$(staged_rs_files)" if [ -z "$staged" ]; then printf "No staged Rust files — skipping checks\n" diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index f23293f..be46be0 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -109,6 +109,138 @@ corgea setup-hooks --default-config # Default: secrets + PII, fail on Installs a pre-commit hook running `corgea scan blast --only-uncommitted`. Bypass with `git commit --no-verify`. +### Install Wrappers — `corgea pip|npm|yarn|pnpm|uv ` + +Run a package manager through Corgea's install gate. Install commands with named +targets are resolved against the public registry first, then gated twice: a version +published within `--threshold` (default `2d`) blocks (exit 1), and each resolved +version is checked against Corgea's vuln-api. Baseline public CVE checks need no +token: known-vulnerable or malicious versions block, but vuln-api lookup outages +warn and continue because public mode is fail-open. A Corgea token on the default +vuln-api enables authenticated enforcement and private Corgea intelligence; in +that mode, verdict lookup failures also block (fail-closed). Everything else +passes through with the package manager's own exit code. Git/URL/path specs +(including `pip install .`, PEP 508 `name @ url` direct references, and npm +GitHub shorthand `user/repo`) are noted, never blocked. The install verb is +found behind global flags (`npm --loglevel silent install x` is still gated). +Bare `npm install` (zero specs, project `package.json` found like npm finds it +— nearest ancestor) is gated too: the full lockfile-resolved tree is verdicted, +so a vulnerable lockfile blocks. `npm ci` (and aliases) is gated from the +project lockfile directly, like `uv sync`. Bare `yarn` (with or without the +`install` verb) and bare `pnpm` installs have no safe dry-run; they run +unchecked after a stderr note (`note: bare ' ' is not gated …`). +`-r requirements.txt` files get a printed note when the tree pass doesn't +cover them. + +Wrapper flags (`--force`, `--no-fail`, `--json`, `-t`) are read between the +manager name and the install verb (`corgea npm --force install x`); flags +after the verb belong to the package manager and are forwarded untouched. + +Blocked findings steer to the fix: each advisory line shows `fixed in ` (or +`no fixed version known`). When every advisory on a package has a fix, the gate +prints `→ safe version: @` — the highest fix covering every advisory. + +The vuln check covers the **full would-install set** where the manager has a safe +resolver, not just the named targets: `pip` and `npm` resolve the complete tree +(named + transitive) via a safe dry-run (`pip install --dry-run …`; an isolated +`npm install --package-lock-only` in a temp dir, never touching your lockfile), and +`uv pip install` / `uv add` / `uv pip sync` resolve theirs via `uv pip compile`; +every resolved package is verdicted, so a flagged **transitive** dependency blocks +the install too. `uv sync` is gated from `uv.lock` (found like uv finds it — +nearest ancestor). `yarn` and `pnpm` have no safe dry-run, so they verify the +named targets only and print +`warning: transitive dependencies not checked (…); only named packages were verified.` +The same warning is emitted (and the gate falls back to named-only) whenever a +dry-run fails or an npm flag redirects the project root (`--prefix`, `-g`). +Verdict requests run in a bounded pool (8 parallel). + +```bash +corgea pip install requests==2.31.0 # resolves, checks recency + vuln verdict, then runs pip +corgea npm install axios@^1.0.0 # same gate for npm ranges +corgea pip --no-fail install newpkg # demote a recency block to a warning (vuln blocks still apply) +corgea pip --force install badpkg # print findings but install anyway (overrides every block) +corgea pip --json install newpkg # machine-readable per-target report incl. verdicts +corgea pip list # non-install subcommands pass straight through +``` + +| Flag | Short | Description | +|------|-------|-------------| +| `--threshold` | `-t` | Recency threshold (`2d`, `12h`). Younger resolved versions block. | +| `--no-fail` | | Demote a recency block to a warning. Does NOT bypass vulnerable blocks or authenticated unverifiable blocks. | +| `--force` | | Proceed despite all findings (vulnerable, unverifiable, recent). Findings still print. Also bypasses the wrong-package-manager and PEP 668 refusals, and unparsable-lockfile refusals on `uv sync`/`npm ci`. | +| `--json` | | JSON report instead of text. Per-result `verdict` object + `verdict_mode` + `tree`. Stdout carries only the report; the package manager's output moves to stderr. | + +`--json` adds `verdict_mode` (`"public"` or `"authenticated"`) and a +`tree` object: `null` when no tree pass ran; otherwise `mode` is `"full"` (transitive +checked) or `"named-only"` (with a `reason`), plus `resolved_count` and a `transitive[]` +array of `{name, version, verdict}` for packages beyond the named targets. Vulnerable +`verdict` objects carry a `remediation` field: the safe version covering every advisory, +or `null` when any advisory has no known fix. + +Recency gating and baseline CVE checks need no token. The default vuln-api uses +`CORGEA_TOKEN` when present. A custom `CORGEA_VULN_API_URL` is public by default, even +when `CORGEA_TOKEN` exists; set `CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL=1` to send +the token to that custom URL and make lookup failures fail closed. Overrides for +testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, `CORGEA_VULN_API_URL`. + +#### Limitations + +The gate is a wrapper, not an enforcement boundary. By design it cannot catch: + +- **Direct invocation** — running the package manager itself (`pip`, `npm`, + `python -m pip`) skips the gate entirely. +- **Custom indexes/registries** — `--index-url`, `--registry`, and `.npmrc`/ + `pip.conf` overrides change where packages resolve from. The gate still + verdicts each `name@version`, but it cannot vouch that a substituted + registry serves the same artifact those advisories describe. +- **Ungated managers** — bare `yarn`/`pnpm` installs run unchecked (see the + bare-install note above); only their named targets are verified. + +Hard enforcement needs org-level controls — lockfile review, registry +allow-listing — alongside the wrapper. + +#### Testing the gate + +The staging vuln-api (`https://cve-worker-staging.corgea.workers.dev`) serves +deterministic verdicts for dogfooding and is currently the default endpoint, so +with `CORGEA_TOKEN` set it runs authenticated with no extra setup. The explicit +`CORGEA_VULN_API_URL` + `CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL=1` below keep +that true even if the default endpoint moves (a custom URL is public-mode unless +the opt-in is set). Known-vulnerable targets: + +| Ecosystem | Target | Verdict | +|-----------|--------|---------| +| npm | `axios@0.21.0` | vulnerable — fixed in 0.21.2 | +| npm | `minimist@0.0.8` | vulnerable — fixed in 1.2.2 | +| npm | `node-fetch@2.6.0` | vulnerable — fixed in 2.6.7 | +| PyPI | `mezzanine==6.0.0` | vulnerable — no fixed version known | + +Verify the gate end-to-end: + +```bash +CORGEA_TOKEN=dogfood-dummy \ +CORGEA_VULN_API_URL=https://cve-worker-staging.corgea.workers.dev \ +CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL=1 \ +corgea npm install axios@0.21.0 +``` + +Expected output (exit code 1; nothing is installed): + +``` +Pre-checking `npm install axios@0.21.0` (threshold 2d) + 1 ok, 0 recent, 1 vulnerable, 0 unverifiable, 0 skipped, 0 errors + tree: 2 packages resolved, 1 transitive checked + ✗ axios@0.21.0 → axios@0.21.0 known vulnerable: + CVE-2021-3749 (high) — fixed in 0.21.2 + CVE-2020-28168 (medium) — fixed in 0.21.1 + → safe version: axios@0.21.2 +Refusing to run install. Pass --force to proceed despite findings. +``` + +Caveat: the staging PyPI seed covers recent CVEs only. Decade-old classics +(`pyyaml==5.1`, `django==2.2`) return clean **by design** — a clean verdict on +those does not mean the gate is broken. + ### Deps — `corgea deps ` diff --git a/src/authorize.rs b/src/authorize.rs index 7271cf9..80fbbcf 100644 --- a/src/authorize.rs +++ b/src/authorize.rs @@ -94,7 +94,8 @@ pub fn run(scope: Option, url: Option) -> Result<(), Box Result> { // Try a more reliable approach - start from a higher range that's less likely to be used let search_ranges = vec![ - (start_port, start_port + 50), + // Saturate: a start port near u16::MAX must clamp, not overflow. + (start_port, start_port.saturating_add(50)), (9000, 9100), (8000, 8100), (7000, 7100), @@ -632,7 +633,16 @@ mod tests { assert!(!port_is_available(port)); drop(listener); - assert!(port_is_available(port)); + // The freed port returns to the OS ephemeral pool, where a parallel + // test's `bind(":0")` can snatch it before the re-check — so accept + // any of several freshly freed ports reading available. The chain is + // lazy: fresh ports are only reserved after a collision. + assert!( + std::iter::once(port) + .chain((0..4).map(|_| reserve_ephemeral_port())) + .any(port_is_available), + "five consecutive freed ports all read unavailable" + ); } #[test] diff --git a/src/config.rs b/src/config.rs index 257a483..2c9287c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,6 +2,8 @@ use serde::{Deserialize, Serialize}; use std::path::PathBuf; use std::{env, fs, io}; +pub const DEFAULT_VULN_API_URL: &str = "https://cve-worker-staging.corgea.workers.dev"; + #[derive(Serialize, Deserialize, Clone)] pub struct Config { pub(crate) url: String, @@ -101,3 +103,38 @@ impl Config { self.debug } } + +/// Base URL for the vuln-api service: `CORGEA_VULN_API_URL` env var, +/// then the public default. Pure env/constant — no config file field. +pub fn vuln_api_url() -> String { + crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") + .unwrap_or_else(|| DEFAULT_VULN_API_URL.to_string()) + .trim() + .trim_end_matches('/') + .to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + /// All `vuln_api_url` cases in one test fn: the env-var cases + /// mutate process-global state, so they must not run concurrently + /// with each other under the parallel test harness. + #[test] + fn vuln_api_url_resolution_order() { + env::remove_var("CORGEA_VULN_API_URL"); + + // Default when the env var is unset. + assert_eq!(vuln_api_url(), DEFAULT_VULN_API_URL); + + // Env var wins; whitespace and trailing slash trimmed. + env::set_var("CORGEA_VULN_API_URL", " https://env.example.com/ "); + assert_eq!(vuln_api_url(), "https://env.example.com"); + + // Empty / whitespace-only env var is treated as unset. + env::set_var("CORGEA_VULN_API_URL", " "); + assert_eq!(vuln_api_url(), DEFAULT_VULN_API_URL); + env::remove_var("CORGEA_VULN_API_URL"); + } +} diff --git a/src/deps/detect.rs b/src/deps/detect.rs index bf3636c..bedbfc3 100644 --- a/src/deps/detect.rs +++ b/src/deps/detect.rs @@ -16,10 +16,6 @@ pub enum DepFileKind { MavenPom, GradleBuild, GradleLockfile, - GoMod, - GoSum, - CargoManifest, - CargoLock, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -61,7 +57,7 @@ fn detect_recursive(dir: &Path, out: &mut Vec) { let name = file_name.to_string_lossy(); if path.is_dir() { - if SKIP_DIRS.iter().any(|s| name == *s) { + if should_skip_dir(&name) { continue; } detect_recursive(&path, out); @@ -89,10 +85,6 @@ fn classify_file(path: &Path) -> Option { "pom.xml" => (DepFileKind::MavenPom, Ecosystem::Maven), "build.gradle" | "build.gradle.kts" => (DepFileKind::GradleBuild, Ecosystem::Maven), "gradle.lockfile" => (DepFileKind::GradleLockfile, Ecosystem::Maven), - "go.mod" => (DepFileKind::GoMod, Ecosystem::Go), - "go.sum" => (DepFileKind::GoSum, Ecosystem::Go), - "Cargo.toml" => (DepFileKind::CargoManifest, Ecosystem::Cargo), - "Cargo.lock" => (DepFileKind::CargoLock, Ecosystem::Cargo), _ => return None, }; Some(DetectedFile { @@ -101,3 +93,7 @@ fn classify_file(path: &Path) -> Option { ecosystem: kind_eco.1, }) } + +fn should_skip_dir(name: &str) -> bool { + name.starts_with('.') || SKIP_DIRS.contains(&name) +} diff --git a/src/deps/ecosystems/maven.rs b/src/deps/ecosystems/maven.rs index a5d2469..bf1b064 100644 --- a/src/deps/ecosystems/maven.rs +++ b/src/deps/ecosystems/maven.rs @@ -1,5 +1,8 @@ use std::path::Path; +use quick_xml::events::Event; +use quick_xml::reader::Reader; + use crate::deps::detect::DepFileKind; use crate::deps::ecosystems::classify_constraint; use crate::deps::ecosystems::evaluate::{ @@ -33,6 +36,22 @@ struct MavenDep { scope: Scope, } +#[derive(Default)] +struct PartialMavenDep { + group: String, + artifact: String, + version: String, + scope: String, +} + +#[derive(Clone, Copy)] +enum MavenField { + Group, + Artifact, + Version, + Scope, +} + fn scan_maven_pom(ctx: &mut ScanContext<'_>, dir: &Path, pom_path: &Path) -> Result<(), DepsError> { let rel = pom_path .strip_prefix(ctx.root) @@ -51,7 +70,10 @@ fn scan_maven_pom(ctx: &mut ScanContext<'_>, dir: &Path, pom_path: &Path) -> Res dep001(ctx.findings, ctx.policy, &rel, "Maven"); - let deps = parse_pom_dependencies(&content)?; + // Name the offending pom — a monorepo scan dies on the first malformed + // one, and a path-less error gives nothing to act on. + let deps = parse_pom_dependencies(&content) + .map_err(|e| DepsError(format!("parse XML {}: {}", pom_path.display(), e.0)))?; for dep in deps { let name = dep.artifact.clone(); let declared = dep.version.clone(); @@ -90,44 +112,116 @@ fn scan_maven_pom(ctx: &mut ScanContext<'_>, dir: &Path, pom_path: &Path) -> Res } fn parse_pom_dependencies(content: &str) -> Result, DepsError> { - Ok(parse_pom_regex(content)) -} - -fn parse_pom_regex(content: &str) -> Vec { + let mut reader = Reader::from_str(content); + reader.config_mut().trim_text(true); + let mut buf = Vec::new(); let mut deps = Vec::new(); - let dep_blocks: Vec<&str> = content.split("").skip(1).collect(); - for block in dep_blocks { - let group = extract_xml_tag(block, "groupId"); - let artifact = extract_xml_tag(block, "artifactId"); - let version = extract_xml_tag(block, "version"); - let scope = extract_xml_tag(block, "scope"); - if artifact.is_empty() { - continue; + let mut current: Option = None; + let mut dep_depth = 0usize; + let mut field: Option = None; + + loop { + match reader.read_event_into(&mut buf) { + Ok(Event::Start(ref e)) => { + let name = e.name(); + let tag = local_xml_name(name.as_ref()); + if current.is_none() && tag == b"dependency" { + current = Some(PartialMavenDep::default()); + dep_depth = 1; + field = None; + } else if current.is_some() { + if dep_depth == 1 { + field = maven_field_from_tag(tag); + } + dep_depth += 1; + } + } + Ok(Event::Text(text)) => { + if dep_depth == 2 { + let value = text + .unescape() + .map(|value| value.trim().to_string()) + .unwrap_or_else(|_| { + String::from_utf8_lossy(text.as_ref()).trim().to_string() + }); + append_maven_field(current.as_mut(), field, &value); + } + } + // `` — CDATA is element text. + Ok(Event::CData(text)) => { + if dep_depth == 2 { + let value = String::from_utf8_lossy(text.as_ref()).trim().to_string(); + append_maven_field(current.as_mut(), field, &value); + } + } + Ok(Event::End(ref e)) => { + if current.is_some() { + let name = e.name(); + let tag = local_xml_name(name.as_ref()); + if dep_depth == 2 { + field = None; + } + if tag == b"dependency" && dep_depth == 1 { + let dep = current.take().expect("dependency started"); + if !dep.artifact.is_empty() { + deps.push(MavenDep { + group: dep.group, + artifact: dep.artifact, + version: dep.version, + scope: if dep.scope == "test" { + Scope::Development + } else { + Scope::Production + }, + }); + } + field = None; + } else { + dep_depth = dep_depth.saturating_sub(1); + } + } + } + Ok(Event::Eof) => break, + Err(e) => return Err(DepsError(e.to_string())), + _ => {} } - deps.push(MavenDep { - group, - artifact: artifact.clone(), - version: version.clone(), - scope: if scope == "test" { - Scope::Development - } else { - Scope::Production - }, - }); + buf.clear(); } - deps + + Ok(deps) } -fn extract_xml_tag(block: &str, tag: &str) -> String { - let open = format!("<{tag}>"); - let close = format!(""); - if let Some(start) = block.find(&open) { - let rest = &block[start + open.len()..]; - if let Some(end) = rest.find(&close) { - return rest[..end].trim().to_string(); - } +fn local_xml_name(name: &[u8]) -> &[u8] { + name.rsplit(|b| *b == b':').next().unwrap_or(name) +} + +fn maven_field_from_tag(tag: &[u8]) -> Option { + match tag { + b"groupId" => Some(MavenField::Group), + b"artifactId" => Some(MavenField::Artifact), + b"version" => Some(MavenField::Version), + b"scope" => Some(MavenField::Scope), + _ => None, + } +} + +/// Append a text/CDATA segment to the active field. Appending (rather than +/// assigning) keeps values split by inline comments intact: +/// `1.2` is `1.2`, not `2`. +fn append_maven_field(dep: Option<&mut PartialMavenDep>, field: Option, value: &str) { + if value.is_empty() { + return; } - String::new() + let (Some(dep), Some(field)) = (dep, field) else { + return; + }; + let slot = match field { + MavenField::Group => &mut dep.group, + MavenField::Artifact => &mut dep.artifact, + MavenField::Version => &mut dep.version, + MavenField::Scope => &mut dep.scope, + }; + slot.push_str(value); } fn scan_gradle(ctx: &mut ScanContext<'_>, dir: &Path, gradle_path: &Path) -> Result<(), DepsError> { diff --git a/src/deps/ecosystems/npm.rs b/src/deps/ecosystems/npm.rs index edbc7cf..1fd8d91 100644 --- a/src/deps/ecosystems/npm.rs +++ b/src/deps/ecosystems/npm.rs @@ -312,7 +312,11 @@ fn parse_npm_lock(path: &Path) -> Result, DepsError Ok(out) } -fn package_name_from_lock_key(key: &str) -> &str { +/// Package name from a lockfile `packages` key: the path after the last +/// `node_modules/` (or the whole key), truncated to one component — two for +/// scoped names. Also shared with the install gate's lockfile parse +/// (`precheck::tree`). +pub(crate) fn package_name_from_lock_key(key: &str) -> &str { let package_path = key .rsplit_once("node_modules/") .map(|(_, name)| name) diff --git a/src/deps/ecosystems/pypi.rs b/src/deps/ecosystems/pypi.rs index 062f13c..0f5fa77 100644 --- a/src/deps/ecosystems/pypi.rs +++ b/src/deps/ecosystems/pypi.rs @@ -367,7 +367,10 @@ fn exact_version_from_declared(name: &str, declared: &str) -> Option { Some(declared.trim_start_matches('=').trim().to_string()) } -fn normalize_pypi_name(name: &str) -> String { +/// PEP 503 name normalization: lowercase, runs of `-`/`_`/`.` collapse to `-`. +/// Also used by the install gate (`precheck`) so both features share one +/// canonical pypi name form. +pub(crate) fn normalize_pypi_name(name: &str) -> String { let mut out = String::new(); let mut last_was_separator = false; for c in name.trim().chars() { diff --git a/src/deps/tests/detect_tests.rs b/src/deps/tests/detect_tests.rs index b4ee1aa..c9fbfcd 100644 --- a/src/deps/tests/detect_tests.rs +++ b/src/deps/tests/detect_tests.rs @@ -45,6 +45,6 @@ fn detect_finds_gradle_files() { } #[test] -fn detect_finds_go_mod_smoke() { - assert!(kinds("go-mod-smoke").contains(&DepFileKind::GoMod)); +fn detect_ignores_unsupported_go_files() { + assert!(kinds("go-mod-smoke").is_empty()); } diff --git a/src/deps/tests/maven_tests.rs b/src/deps/tests/maven_tests.rs index 6d6390d..b881f87 100644 --- a/src/deps/tests/maven_tests.rs +++ b/src/deps/tests/maven_tests.rs @@ -54,7 +54,9 @@ fn gradle_classify_latest_release_is_unbounded() { } use super::common::scan_fixture; -use crate::deps::model::{PackageId, Severity}; +use crate::deps::model::{PackageId, Scope, Severity}; +use crate::deps::policy::Policy; +use crate::deps::scan; #[test] fn maven_graph_lists_all_direct_dependencies() { @@ -127,3 +129,84 @@ fn maven_snapshot_is_dep021_high() { "recommendation should name SNAPSHOT" ); } + +#[test] +fn maven_parser_accepts_namespace_prefixes_and_attributes() { + let tmp = tempfile::TempDir::new().expect("temp dir"); + std::fs::write( + tmp.path().join("pom.xml"), + r#" + + + + org.example + demo-lib + 1.2.3 + test + + + ignored + ignored-artifact + + + + + +"#, + ) + .expect("write pom"); + + let inv = scan(tmp.path(), &Policy::default()).expect("scan"); + let node = inv.node("demo-lib").expect("demo-lib node"); + assert_eq!(node.version(), Some("1.2.3")); + assert_eq!(node.scope(), Scope::Development); + assert!(inv.node("ignored-artifact").is_none()); +} + +#[test] +fn maven_parser_reads_cdata_and_comment_split_versions() { + let tmp = tempfile::TempDir::new().expect("temp dir"); + std::fs::write( + tmp.path().join("pom.xml"), + r#" + + + + org.example + cdata-lib + + + + org.example + split-lib + 1.2 + + + +"#, + ) + .expect("write pom"); + + let inv = scan(tmp.path(), &Policy::default()).expect("scan"); + let cdata = inv.node("cdata-lib").expect("cdata-lib node"); + assert_eq!(cdata.version(), Some("1.2.3")); + let split = inv.node("split-lib").expect("split-lib node"); + assert_eq!(split.version(), Some("1.2")); +} + +#[test] +fn maven_parse_error_names_the_pom() { + let tmp = tempfile::TempDir::new().expect("temp dir"); + std::fs::write( + tmp.path().join("pom.xml"), + "\nga1", + ) + .expect("write pom"); + + let err = scan(tmp.path(), &Policy::default()).expect_err("malformed pom must error"); + assert!( + err.0.contains("pom.xml"), + "error should name the file: {}", + err.0 + ); +} diff --git a/src/deps/tests/robustness_tests.rs b/src/deps/tests/robustness_tests.rs index e18aac0..92c9fb3 100644 --- a/src/deps/tests/robustness_tests.rs +++ b/src/deps/tests/robustness_tests.rs @@ -103,3 +103,25 @@ fn robust_scan_skips_node_modules() { .components() .any(|c| { c.as_os_str() == "node_modules" }))); } + +#[test] +fn robust_scan_skips_hidden_tooling_dirs() { + use std::fs; + let tmp = tempfile::TempDir::new().expect("temp dir"); + fs::write( + tmp.path().join("package.json"), + r#"{"name":"x","version":"1.0.0","dependencies":{}}"#, + ) + .unwrap(); + let hidden = tmp + .path() + .join(".claude/worktrees/agent/tests/fixtures/malformed"); + fs::create_dir_all(&hidden).unwrap(); + fs::write(hidden.join("package-lock.json"), "{").unwrap(); + + let files = crate::deps::detect::detect_dependency_files(tmp.path()); + assert!(files + .iter() + .all(|f| !f.path.components().any(|c| { c.as_os_str() == ".claude" }))); + assert!(scan(tmp.path(), &Policy::default()).is_ok()); +} diff --git a/src/lib.rs b/src/lib.rs index 49bc6d0..498e83d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,13 @@ pub mod deps; +pub mod precheck; +pub mod verify_deps; +// Also declared in the binary crate (src/main.rs); re-declared here so library modules +// (e.g. vuln_api) can use `crate::log::debug`. src/log.rs is a thin `::log` facade that +// compiles cleanly in both crates. +mod log; +pub mod vuln_api; +// Test-only HTTP stub for the vuln-api. Gated out of release builds; the +// `test-stub` feature is enabled for every test build by the self +// dev-dependency in Cargo.toml, so integration tests can use it too. +#[cfg(any(test, feature = "test-stub"))] +pub mod vuln_api_stub; diff --git a/src/main.rs b/src/main.rs index 442c5a1..6d5f0c4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -199,6 +199,16 @@ enum Commands { #[command(subcommand)] command: corgea::deps::run::DepsSubcommand, }, + /// Wrap `npm` commands: verify install targets' publish recency, then run npm. + Npm(InstallWrapArgs), + /// Wrap `yarn` commands: verify install targets' publish recency, then run yarn. + Yarn(InstallWrapArgs), + /// Wrap `pnpm` commands: verify install targets' publish recency, then run pnpm. + Pnpm(InstallWrapArgs), + /// Wrap `pip` commands: verify install targets' publish recency, then run pip. + Pip(InstallWrapArgs), + /// Wrap `uv` commands: verify install targets' publish recency, then run uv. + Uv(InstallWrapArgs), } #[derive(Subcommand, Debug, Clone, PartialEq)] @@ -221,6 +231,93 @@ impl FromStr for Scanner { } } +/// Shared flags for the install-wrapper subcommands (`corgea npm|yarn|pnpm|pip|uv`). +#[derive(clap::Args, Debug, Clone)] +struct InstallWrapArgs { + #[arg( + long, + short = 't', + default_value = "2d", + value_parser = corgea::verify_deps::parse_threshold, + help = "Recency threshold. Resolved versions younger than this are blocked. e.g. '2d', '12h'." + )] + threshold: std::time::Duration, + + #[arg( + long, + help = "Demote a recency block to a printed warning. The install still runs." + )] + no_fail: bool, + + #[arg( + long, + help = "Proceed with the install despite vulnerable, unverifiable, or recent findings. Findings are still printed." + )] + force: bool, + + #[arg( + long, + help = "Output the result as JSON instead of human-readable text." + )] + json: bool, + + /// Arguments forwarded to the package manager (subcommand and package specs). + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + cmd: Vec, +} + +fn install_wrap_options( + args: &InstallWrapArgs, + config: &Config, +) -> corgea::precheck::PrecheckOptions { + let token = config.get_token(); + let token = token.trim(); + let base_url = config::vuln_api_url(); + let custom_vuln_api_url = base_url != config::DEFAULT_VULN_API_URL; + let send_token_to_custom = + utils::generic::get_env_var_if_exists("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL") + .is_some_and(|v| v.trim() == "1"); + let mode = select_verdict_mode(token, custom_vuln_api_url, send_token_to_custom); + let verdict = Some(corgea::precheck::VerdictConfig { + base_url, + mode, + public_login_hint: token.is_empty(), + }); + corgea::precheck::PrecheckOptions { + threshold: args.threshold, + no_fail: args.no_fail, + force: args.force, + json: args.json, + verdict, + npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), + pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), + } +} + +fn select_verdict_mode( + token: &str, + custom_vuln_api_url: bool, + send_token_to_custom: bool, +) -> corgea::precheck::VerdictMode { + if !token.is_empty() && (!custom_vuln_api_url || send_token_to_custom) { + corgea::precheck::VerdictMode::Authenticated { + token: token.to_string(), + } + } else { + corgea::precheck::VerdictMode::Public + } +} + +fn run_install_wrap_command( + manager: corgea::precheck::PackageManager, + args: &InstallWrapArgs, + config: &Config, +) { + let code = + corgea::precheck::run_install(manager, &args.cmd, install_wrap_options(args, config)); + std::process::exit(code); +} + /// Initialize the global logger. /// /// `CORGEA_DEBUG=1` (env var or config file) raises the default verbosity to @@ -504,7 +601,29 @@ fn main() { // Offline: no token / network. Exit code propagates fail-on policy. std::process::exit(i32::from(corgea::deps::run::run(command.clone()))); } + // Install wrappers: no hard auth gate. Public CVE checks run without a + // token; a token on the default service enables authenticated fail-closed + // enforcement. + Some(Commands::Npm(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Npm, args, &corgea_config) + } + Some(Commands::Yarn(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Yarn, args, &corgea_config) + } + Some(Commands::Pnpm(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Pnpm, args, &corgea_config) + } + Some(Commands::Pip(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Pip, args, &corgea_config) + } + Some(Commands::Uv(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Uv, args, &corgea_config) + } None => { + if let Some(message) = corgea::precheck::pip3_alias_message(&cli.args) { + eprintln!("{message}"); + std::process::exit(1); + } utils::terminal::show_welcome_message(); let _ = Cli::command().print_help(); println!(); @@ -523,4 +642,27 @@ mod tests { assert_eq!(default_log_level(2), "info"); // only ==1 means debug assert_eq!(default_log_level(-1), "info"); } + + #[test] + fn verdict_mode_selection_matrix() { + use corgea::precheck::VerdictMode; + + assert_eq!( + select_verdict_mode("token", false, false), + VerdictMode::Authenticated { + token: "token".to_string() + } + ); + assert_eq!(select_verdict_mode("", false, false), VerdictMode::Public); + assert_eq!( + select_verdict_mode("token", true, false), + VerdictMode::Public + ); + assert_eq!( + select_verdict_mode("token", true, true), + VerdictMode::Authenticated { + token: "token".to_string() + } + ); + } } diff --git a/src/precheck/detect.rs b/src/precheck/detect.rs new file mode 100644 index 0000000..3411b46 --- /dev/null +++ b/src/precheck/detect.rs @@ -0,0 +1,319 @@ +//! Package-manager/project detection: wrong-manager and +//! externally-managed-pip (PEP 668) guidance messages. + +use std::ffi::OsString; +use std::path::Path; +use std::process::Command; + +use super::{corgea_cmd, parse, PackageManager}; + +pub(super) fn wrong_package_manager_message( + manager: PackageManager, + rest: &[String], + parsed: &parse::ParsedInstall, +) -> Option { + let cwd = &std::env::current_dir().ok()?; + let expected = match manager { + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => { + let expected = detect_node_manager_from(cwd)?; + (expected != manager).then_some(expected)? + } + PackageManager::Pip if detect_uv_project_from(cwd) => PackageManager::Uv, + PackageManager::Uv if detect_pip_project_from(cwd) => PackageManager::Pip, + _ => return None, + }; + + let suggestion = suggested_install_command(expected, rest, parsed); + Some(format!( + "error: this project appears to use {}, but you ran {}.\nDid you mean `{suggestion}`?", + expected.binary_name(), + manager.binary_name() + )) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ProjectManagerDetection { + None, + Ambiguous, + Found(PackageManager), +} + +fn detect_node_manager_from(start: &Path) -> Option { + for dir in start.ancestors() { + match detect_node_manager_in_dir(dir) { + ProjectManagerDetection::Found(manager) => return Some(manager), + ProjectManagerDetection::Ambiguous => return None, + ProjectManagerDetection::None => {} + } + // A `package.json` marks the project root (npm/yarn/pnpm scope + // their own discovery the same way). A project with no manager + // indicators of its own must not inherit a stray ancestor lockfile + // — that would hard-refuse installs in every fresh project under it. + if dir.join("package.json").is_file() { + return None; + } + } + None +} + +fn detect_node_manager_in_dir(dir: &Path) -> ProjectManagerDetection { + match package_json_manager(dir) { + ProjectManagerDetection::None => {} + found => return found, + } + + let mut found = Vec::new(); + if dir.join("pnpm-lock.yaml").is_file() { + found.push(PackageManager::Pnpm); + } + if dir.join("yarn.lock").is_file() { + found.push(PackageManager::Yarn); + } + if dir.join("package-lock.json").is_file() || dir.join("npm-shrinkwrap.json").is_file() { + found.push(PackageManager::Npm); + } + + match found.as_slice() { + [] => ProjectManagerDetection::None, + [manager] => ProjectManagerDetection::Found(*manager), + _ => ProjectManagerDetection::Ambiguous, + } +} + +/// `packageManager`-field detection. Missing/unparsable `package.json` and a +/// missing field both fall through to lockfile detection (`None`). +fn package_json_manager(dir: &Path) -> ProjectManagerDetection { + let json: Option = std::fs::read_to_string(dir.join("package.json")) + .ok() + .and_then(|raw| serde_json::from_str(&raw).ok()); + let Some(package_manager) = json + .as_ref() + .and_then(|j| j.get("packageManager")) + .and_then(|v| v.as_str()) + else { + return ProjectManagerDetection::None; + }; + parse_node_package_manager(package_manager) + .map(ProjectManagerDetection::Found) + .unwrap_or(ProjectManagerDetection::Ambiguous) +} + +fn parse_node_package_manager(raw: &str) -> Option { + let name = raw.trim().split('@').next().unwrap_or("").trim(); + match name { + "npm" => Some(PackageManager::Npm), + "yarn" => Some(PackageManager::Yarn), + "pnpm" => Some(PackageManager::Pnpm), + _ => None, + } +} + +/// Walk up looking for `uv.lock`, but stop at the nearest Python project +/// boundary (a `pyproject.toml` or requirements file without a `uv.lock` +/// beside it) — symmetric with [`detect_pip_project_from`], so a stray +/// `~/uv.lock` can't condemn every pip project beneath it. +fn detect_uv_project_from(start: &Path) -> bool { + for dir in start.ancestors() { + if dir.join("uv.lock").is_file() { + return true; + } + if dir.join("pyproject.toml").is_file() || has_requirements_file(dir) { + return false; + } + } + false +} + +fn detect_pip_project_from(start: &Path) -> bool { + start + .ancestors() + .take_while(|dir| !dir.join("pyproject.toml").is_file() && !dir.join("uv.lock").is_file()) + .any(has_requirements_file) +} + +fn has_requirements_file(dir: &Path) -> bool { + let Ok(entries) = std::fs::read_dir(dir) else { + return false; + }; + entries.filter_map(Result::ok).any(|entry| { + let name = entry.file_name(); + let name = name.to_string_lossy(); + entry.path().is_file() + && ((name.starts_with("requirements") + && (name.ends_with(".txt") || name.ends_with(".in"))) + || name.ends_with("-requirements.txt")) + }) +} + +fn suggested_install_command( + expected: PackageManager, + rest: &[String], + parsed: &parse::ParsedInstall, +) -> String { + let mut parts = vec!["corgea".to_string(), expected.binary_name().to_string()]; + match expected { + PackageManager::Npm => parts.push("install".to_string()), + PackageManager::Yarn | PackageManager::Pnpm => { + if parsed.targets.is_empty() && parsed.requirements_files.is_empty() { + parts.push("install".to_string()); + } else { + parts.push("add".to_string()); + } + } + PackageManager::Uv => { + if is_plain_pip_target_install(rest, parsed) { + parts.push("add".to_string()); + parts.extend(parsed.targets.iter().map(|target| target.display.clone())); + return parts.join(" "); + } + parts.push("pip".to_string()); + parts.push("install".to_string()); + } + PackageManager::Pip => parts.push("install".to_string()), + } + parts.extend(rest.iter().cloned()); + parts.join(" ") +} + +fn is_plain_pip_target_install(rest: &[String], parsed: &parse::ParsedInstall) -> bool { + !parsed.targets.is_empty() + && parsed.requirements_files.is_empty() + && rest.len() == parsed.targets.len() + && rest + .iter() + .zip(&parsed.targets) + .all(|(arg, target)| arg == &target.display) +} + +pub(super) fn externally_managed_pip_message( + manager: PackageManager, + rest: &[String], + _parsed: &parse::ParsedInstall, +) -> Option { + if manager != PackageManager::Pip + || pip_install_overrides_external_management(rest) + || !pip_environment_is_externally_managed() + { + return None; + } + + Some(format!( + "error: this Python environment is externally managed (PEP 668).\nCreate and activate a virtualenv, then retry `{}`.", + corgea_cmd(&["pip", "install"], rest) + )) +} + +fn pip_install_overrides_external_management(args: &[String]) -> bool { + const VALUE_FLAGS: [&str; 4] = ["-t", "--target", "--prefix", "--root"]; + args.iter().any(|arg| { + arg == "--break-system-packages" + || VALUE_FLAGS + .iter() + .any(|flag| arg == flag || arg.starts_with(&format!("{flag}="))) + }) +} + +fn pip_environment_is_externally_managed() -> bool { + let Ok(pip) = super::exec::resolve_binary("pip") else { + return false; + }; + // PEP 668 markers live in a system interpreter's stdlib; pip inside an + // active virtualenv can't be externally managed - skip the spawn. + if let Some(venv) = std::env::var_os("VIRTUAL_ENV") { + if pip.starts_with(&venv) { + return false; + } + } + let Some(interpreter) = python_interpreter_from_shebang(&pip) else { + return false; + }; + + let mut command = Command::new(&interpreter[0]); + command.args(&interpreter[1..]); + let Ok(output) = command.arg("-c").arg(EXTERNALLY_MANAGED_PYTHON).output() else { + return false; + }; + output.status.success() && String::from_utf8_lossy(&output.stdout).trim() == "1" +} + +const EXTERNALLY_MANAGED_PYTHON: &str = r#" +import pathlib +import sysconfig + +paths = [] +for key in ("stdlib", "platstdlib"): + path = sysconfig.get_path(key) + if path and path not in paths: + paths.append(path) + +print("1" if any((pathlib.Path(path) / "EXTERNALLY-MANAGED").is_file() for path in paths) else "0") +"#; + +fn python_interpreter_from_shebang(path: &Path) -> Option> { + let content = std::fs::read_to_string(path).ok()?; + let first = content.lines().next()?.strip_prefix("#!")?.trim(); + let mut parts: Vec<&str> = first.split_whitespace().collect(); + if parts.is_empty() { + return None; + } + if parts[0].ends_with("/env") || parts[0] == "env" { + parts.remove(0); + if parts.first() == Some(&"-S") { + parts.remove(0); + } + } + let executable = parts.first()?; + if !executable.contains("python") { + return None; + } + Some(parts.iter().map(OsString::from).collect()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn touch(path: &Path) { + std::fs::write(path, "").expect("write marker file"); + } + + #[test] + fn node_walk_stops_at_the_project_boundary() { + // A stray ancestor lockfile must not condemn a fresh project that + // has its own package.json but no manager indicators yet. + let root = tempfile::tempdir().expect("tempdir"); + touch(&root.path().join("package-lock.json")); + let project = root.path().join("newapp"); + std::fs::create_dir(&project).expect("mkdir"); + std::fs::write(project.join("package.json"), "{}").expect("write manifest"); + + assert_eq!(detect_node_manager_from(&project), None); + + // Without its own package.json the walk still reaches the ancestor. + let bare = root.path().join("scratch"); + std::fs::create_dir(&bare).expect("mkdir"); + assert_eq!(detect_node_manager_from(&bare), Some(PackageManager::Npm)); + } + + #[test] + fn uv_walk_stops_at_a_nearer_python_project() { + // A pip project (requirements/pyproject, no uv.lock) must not be + // blamed for a stray uv.lock further up. + let root = tempfile::tempdir().expect("tempdir"); + touch(&root.path().join("uv.lock")); + let pip_project = root.path().join("legacy"); + std::fs::create_dir(&pip_project).expect("mkdir"); + touch(&pip_project.join("requirements.txt")); + + assert!(!detect_uv_project_from(&pip_project)); + + // The uv root itself (uv.lock beside pyproject.toml) still detects. + touch(&root.path().join("pyproject.toml")); + assert!(detect_uv_project_from(root.path())); + + // And a plain subdirectory of the uv project still walks up to it. + let sub = root.path().join("src"); + std::fs::create_dir(&sub).expect("mkdir"); + assert!(detect_uv_project_from(&sub)); + } +} diff --git a/src/precheck/exec.rs b/src/precheck/exec.rs new file mode 100644 index 0000000..6aa3429 --- /dev/null +++ b/src/precheck/exec.rs @@ -0,0 +1,80 @@ +//! Resolve and exec the real package manager, forwarding args and exit codes. + +use std::ffi::OsString; +use std::process::Command; + +use super::PackageManager; + +pub(super) fn exec_install_with_args( + manager: PackageManager, + subcommand: &str, + rest: &[String], + stdout_to_stderr: bool, +) -> i32 { + let mut full = Vec::with_capacity(rest.len() + 1); + full.push(subcommand.to_string()); + full.extend(rest.iter().cloned()); + exec_command_with_stdio(manager.binary_name(), &full, stdout_to_stderr) +} + +/// Resolve `binary` on PATH. On Windows this finds `.cmd` shims. pip is the +/// one manager with a conventional alias, so a missing `pip` retries `pip3`. +/// The error names the binary and any fallback tried. +pub(super) fn resolve_binary(binary: &str) -> Result { + if let Ok(p) = which::which(binary) { + return Ok(p); + } + if binary == "pip" { + if let Ok(p) = which::which("pip3") { + return Ok(p); + } + return Err("error: 'pip' not found on PATH (also tried 'pip3')".to_string()); + } + Err(format!("error: '{binary}' not found on PATH")) +} + +pub(super) fn exec_command(binary: &str, args: &[String]) -> i32 { + exec_command_with_stdio(binary, args, false) +} + +/// `stdout_to_stderr` keeps stdout machine-readable under `--json`: the +/// package manager's own output moves to stderr so stdout carries only the +/// Corgea report. +pub(super) fn exec_command_with_stdio( + binary: &str, + args: &[String], + stdout_to_stderr: bool, +) -> i32 { + let resolved = match resolve_binary(binary) { + Ok(p) => p, + Err(msg) => { + eprintln!("{msg}"); + return 127; + } + }; + + let os_args: Vec = args.iter().map(OsString::from).collect(); + + let mut command = Command::new(&resolved); + command.args(&os_args); + if stdout_to_stderr { + command.stdout(std::io::stderr()); + } + match command.status() { + Ok(status) => status.code().unwrap_or_else(|| { + #[cfg(unix)] + { + use std::os::unix::process::ExitStatusExt; + if let Some(sig) = status.signal() { + return 128 + sig; + } + } + 1 + }), + Err(e) => { + // Name the resolved path: it may be the pip3 fallback, not `binary`. + eprintln!("failed to exec {}: {}", resolved.display(), e); + 1 + } + } +} diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs new file mode 100644 index 0000000..6953efa --- /dev/null +++ b/src/precheck/mod.rs @@ -0,0 +1,930 @@ +//! Install wrappers: `corgea npm`, `corgea yarn`, `corgea pnpm`, `corgea pip`, `corgea uv`. +//! +//! Wraps an install command from a supported package manager, resolves what +//! the package manager *would* install against the public registry, and either +//! blocks the install or runs it transparently. +//! +//! Verification rule: a package is rejected if the resolved version +//! was published within `--threshold` (default `2d`). This mirrors +//! the `deps` flow but applies to the install-time set of +//! packages instead of the already-locked set. +//! +//! By default a "recent" finding makes the wrapper exit with status 1 +//! *without* running the install. Use `--no-fail` to demote this to a +//! warning (the install runs anyway). + +mod detect; +mod exec; +mod parse; +mod render; +mod tree; +mod uv; +mod verdict; + +#[cfg(test)] +mod test_support; + +use std::time::Duration; + +use chrono::Utc; + +/// Supported package managers. Each one shares enough behaviour with +/// the others that we only need a small per-manager dispatch. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PackageManager { + Npm, + Yarn, + Pnpm, + Pip, + Uv, +} + +impl PackageManager { + pub fn binary_name(self) -> &'static str { + match self { + PackageManager::Npm => "npm", + PackageManager::Yarn => "yarn", + PackageManager::Pnpm => "pnpm", + PackageManager::Pip => "pip", + PackageManager::Uv => "uv", + } + } + + /// Subcommands that this manager treats as "install something new" + /// — the only ones we need to verify before running. + pub fn is_install_subcommand(self, sub: &str) -> bool { + match self { + PackageManager::Npm => matches!(sub, "install" | "i" | "add"), + PackageManager::Yarn => matches!(sub, "add" | "install"), + PackageManager::Pnpm => matches!(sub, "add" | "install" | "i"), + PackageManager::Pip => matches!(sub, "install"), + PackageManager::Uv => false, + } + } + + /// vuln-api ecosystem for this manager's registry. + pub fn ecosystem(self) -> crate::vuln_api::Ecosystem { + match self { + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => { + crate::vuln_api::Ecosystem::Npm + } + PackageManager::Pip | PackageManager::Uv => crate::vuln_api::Ecosystem::Pypi, + } + } + + /// Canonical package name for dedup/matching across spec spellings — + /// the ecosystem's rule (`vuln_api::Ecosystem::normalize_name`). + /// + /// Invariant: request-time normalization is owned by the vuln-api + /// client (`vuln_api::check_package_version`); comparison sites + /// (`verdict::apply_verdicts` / tree dedup) normalize here. Parsers + /// and resolvers carry raw names. + pub fn normalize_name(self, name: &str) -> String { + self.ecosystem().normalize_name(name) + } +} + +/// Auth and failure policy for the vuln-api verdict pass. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum VerdictMode { + /// No auth header; vuln-api lookup errors warn and fail open. + Public, + /// Auth header sent; vuln-api lookup errors fail closed. + Authenticated { token: String }, +} + +impl VerdictMode { + fn auth_token(&self) -> Option<&str> { + match self { + VerdictMode::Public => None, + VerdictMode::Authenticated { token } => Some(token.as_str()), + } + } +} + +/// Connection details for the vuln-api verdict pass. +/// Public mode is still a verdict pass: known vulnerable/malicious verdicts +/// block, while lookup errors warn and continue. +#[derive(Debug, Clone)] +pub struct VerdictConfig { + pub base_url: String, + pub mode: VerdictMode, + /// Print the tokenless public-mode hint after a check is attempted. + pub public_login_hint: bool, +} + +/// Threat verdict for one resolved target. +#[derive(Debug, Clone)] +pub enum VerdictStatus { + /// vuln-api answered: no known advisories for this exact version. + Clean, + /// vuln-api answered: known vulnerable or malicious — blocks. + Vulnerable(Vec), + /// The verdict could not be obtained (network/5xx/auth/integrity). + /// Blocks only in authenticated mode. + Unverifiable(String), + /// Verdict never attempted. The constant reason (`NO_VERDICT_REASON`) + /// is attached at render time. + NotChecked, +} + +impl VerdictStatus { + /// Whether this verdict blocks the install: vulnerable always; + /// unverifiable only when the mode fails closed (authenticated). + /// The single definition of "blocking finding", shared by + /// `verdict::block_reason` and the refusal-blame test. + fn blocks(&self, fail_closed: bool) -> bool { + match self { + VerdictStatus::Vulnerable(_) => true, + VerdictStatus::Unverifiable(_) => fail_closed, + VerdictStatus::Clean | VerdictStatus::NotChecked => false, + } + } +} + +#[derive(Debug, Clone)] +pub struct PrecheckOptions { + pub threshold: Duration, + /// If true, demote a recent finding from "block" to "warn-and-run". + pub no_fail: bool, + /// If true, never block: print findings (recent, vulnerable, + /// unverifiable) and run the install anyway. + pub force: bool, + pub json: bool, + /// `Some` ⇒ run the vuln-api verdict pass against this endpoint. + /// `None` is retained for tests and direct library callers that want + /// recency-only behavior. + pub verdict: Option, + /// Optional registry overrides, used by tests. + pub npm_registry: Option, + pub pypi_registry: Option, +} + +/// Each item the user (or a `-r` requirements file) asked us to install. +#[derive(Debug, Clone)] +pub struct InstallTarget { + pub name: String, + /// Display form, e.g. `axios@^1.0.0` or `requests==2.31.0`. + pub display: String, + /// What we'll feed into the resolver. + pub kind: TargetKind, +} + +#[derive(Debug, Clone)] +pub enum TargetKind { + Npm(crate::verify_deps::registry::NpmSpec), + Pypi(crate::verify_deps::registry::PypiSpec), + /// Something we can't verify (URL/git/file/path) — we surface this + /// as a warning but never block on it. + Unverifiable { + reason: String, + }, +} + +/// Outcome of resolving + verifying a single target. +#[derive(Debug, Clone)] +pub enum TargetOutcome { + /// Resolved cleanly. The blocking recency condition is derived from + /// `age` against the report's threshold (`PrecheckReport::is_recent`). + Resolved { + target: InstallTarget, + resolved: crate::verify_deps::registry::ResolvedPackage, + age: Duration, + verdict: VerdictStatus, + }, + /// We deliberately couldn't verify this target (URL / git / etc.). + Skipped { + target: InstallTarget, + reason: String, + }, + /// Resolution failed (network, unknown package, bad spec). + Error { + target: InstallTarget, + error: String, + }, +} + +/// Why a tree-pass finding is in the would-install set. Drives the +/// provenance label so a package the user asked for (or already depends on) +/// is never mislabeled "(transitive)". +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TreeOrigin { + /// Pulled in as a dependency of something else. + Transitive, + /// Explicitly requested (pip report `"requested"` — CLI arg or + /// requirements file; leftovers here come from `-r` files since named + /// CLI targets match a named outcome instead). + Requested, + /// Already a direct dependency in the project's `package.json`. + PreExisting, + /// Pinned by the project's lockfile (`uv sync` from `uv.lock`). + Locked, +} + +impl TreeOrigin { + fn label(self) -> &'static str { + match self { + TreeOrigin::Transitive => "(transitive)", + TreeOrigin::Requested => "(from requirements)", + TreeOrigin::PreExisting => "(already in package.json)", + TreeOrigin::Locked => "(locked)", + } + } + + fn json_name(self) -> &'static str { + match self { + TreeOrigin::Transitive => "transitive", + TreeOrigin::Requested => "requested", + TreeOrigin::PreExisting => "pre-existing", + TreeOrigin::Locked => "locked", + } + } +} + +/// Verdict for one package the tree pass resolved beyond the named targets. +#[derive(Debug)] +pub struct TreeOutcome { + pub name: String, + pub version: String, + pub origin: TreeOrigin, + pub verdict: VerdictStatus, +} + +/// Result of the tree pass. `PrecheckReport.tree` is `None` when the pass +/// never ran (named-only managers, or verdicts disabled). +#[derive(Debug)] +pub enum TreeReport { + /// The full would-install set was resolved and verdicted. + Full { + /// Distinct packages the dry-run resolved (named + transitive). + resolved_count: usize, + /// Verdicts for resolved packages beyond the named targets. + transitive: Vec, + }, + /// Resolution unavailable or failed — only named targets were verified. + NamedOnly { reason: String }, +} + +#[derive(Debug)] +pub struct PrecheckReport { + pub manager: PackageManager, + pub subcommand: String, + pub original_args: Vec, + pub outcomes: Vec, + pub threshold: Duration, + /// `None` ⇒ no tree pass ran. + pub tree: Option, + /// True when the command named nothing — no CLI targets and no + /// requirements files — so everything the tree pass resolved predates + /// this command (bare `npm install`). Distinct from + /// `outcomes.is_empty()`: a requirements-only install also has no named + /// outcomes, but its resolved set IS added by the command. + pub bare_install: bool, +} + +impl PrecheckReport { + fn count(&self, pred: impl Fn(&TargetOutcome) -> bool) -> usize { + self.outcomes.iter().filter(|o| pred(o)).count() + } + /// True when this age is within the recency threshold (the blocking + /// condition). The single definition of "recent". + fn is_recent(&self, age: Duration) -> bool { + age < self.threshold + } + pub fn ok_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if !self.is_recent(*age))) + } + pub fn recent_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if self.is_recent(*age))) + } + /// Every verdict in the report: named (resolved) outcomes, then + /// transitive tree findings. + fn verdicts(&self) -> impl Iterator { + self.named_verdicts().chain(self.tree_verdicts()) + } + /// Verdicts on the named targets this command adds. + fn named_verdicts(&self) -> impl Iterator { + self.outcomes.iter().filter_map(|o| match o { + TargetOutcome::Resolved { verdict, .. } => Some(verdict), + _ => None, + }) + } + /// Verdicts beyond the named targets (the resolved tree). + fn tree_verdicts(&self) -> impl Iterator { + match &self.tree { + Some(TreeReport::Full { transitive, .. }) => transitive.as_slice(), + Some(TreeReport::NamedOnly { .. }) | None => &[], + } + .iter() + .map(|o| &o.verdict) + } + pub fn vulnerable_count(&self) -> usize { + self.verdicts() + .filter(|v| matches!(v, VerdictStatus::Vulnerable(_))) + .count() + } + pub fn unverifiable_count(&self) -> usize { + self.verdicts() + .filter(|v| matches!(v, VerdictStatus::Unverifiable(_))) + .count() + } + /// Vulnerable findings beyond the named targets (the resolved tree). + pub fn tree_vulnerable_count(&self) -> usize { + self.tree_verdicts() + .filter(|v| matches!(v, VerdictStatus::Vulnerable(_))) + .count() + } + /// Unverifiable findings beyond the named targets (the resolved tree). + pub fn tree_unverifiable_count(&self) -> usize { + self.tree_verdicts() + .filter(|v| matches!(v, VerdictStatus::Unverifiable(_))) + .count() + } + pub fn skipped_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Skipped { .. })) + } + pub fn error_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Error { .. })) + } +} + +/// Canonical entry for ecosystem commands (`corgea npm install …`). +/// +/// `cmd` is everything after the ecosystem name, e.g. +/// `["install", "axios@^1.0.0", "--save-dev"]`. An empty `cmd` execs the +/// package manager with no arguments. +pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOptions) -> i32 { + if manager == PackageManager::Uv { + return uv::run_uv(cmd, opts); + } + + if cmd.is_empty() { + // Bare `yarn` IS `yarn install` — route it through the install + // path so the bare-install note prints instead of a silent exec. + if manager == PackageManager::Yarn { + let install = ["install".to_string()]; + return run_install(manager, &install, opts); + } + return exec::exec_command(manager.binary_name(), &[]); + } + + // The install verb may follow global flags (`npm --silent install x`); + // route on the first non-flag token so flags-before-verb can't slip + // past the gate ungated. + let Some(verb_idx) = find_subcommand(manager, cmd) else { + return exec::exec_command(manager.binary_name(), cmd); + }; + let subcommand = &cmd[verb_idx]; + let rest_vec: Vec = cmd[..verb_idx] + .iter() + .chain(&cmd[verb_idx + 1..]) + .cloned() + .collect(); + let rest = rest_vec.as_slice(); + + if manager == PackageManager::Pip && subcommand == "add" { + return refuse_guard(&opts, unsupported_pip_add_message(rest), 1); + } + + // `npm ci` installs the lockfile exactly as written — gate it from the + // project lockfile like `uv sync` is gated from `uv.lock`. + if manager == PackageManager::Npm + && matches!( + subcommand.as_str(), + "ci" | "ic" | "clean-install" | "install-clean" | "isntall-clean" + ) + { + return run_npm_ci(subcommand, rest, opts); + } + + if !manager.is_install_subcommand(subcommand) { + // Non-install subcommand: transparent passthrough, args untouched. + return exec::exec_command(manager.binary_name(), cmd); + } + + let parsed = match parse::parse_install_args(manager, rest) { + Ok(p) => p, + Err(e) => { + return refuse_guard(&opts, format!("failed to parse install args: {}", e), 2); + } + }; + + // Project guards. `--force` (documented as overriding every block) is + // the escape hatch — a stray ancestor lockfile must not leave the + // command permanently refused. + if !opts.force { + if let Some(message) = detect::wrong_package_manager_message(manager, rest, &parsed) { + return refuse_guard(&opts, message, 1); + } + + if let Some(message) = detect::externally_managed_pip_message(manager, rest, &parsed) { + return refuse_guard(&opts, message, 1); + } + } + + let json = opts.json; + run_parsed_install( + manager, + subcommand, + rest, + parsed, + || exec::exec_install_with_args(manager, subcommand, rest, json), + opts, + ) +} + +/// Index of the first non-flag token in `cmd` — the subcommand verb. +/// Skips flag values with the same `takes_value` table as the arg parsers, +/// so `npm --loglevel silent install x` routes on `install`, not `silent`. +/// `None` ⇒ no subcommand at all (flags only, e.g. `npm --version`). +fn find_subcommand(manager: PackageManager, cmd: &[String]) -> Option { + let mut i = 0; + while i < cmd.len() { + let a = &cmd[i]; + if a == "--" { + return (i + 1 < cmd.len()).then_some(i + 1); + } + if !a.starts_with('-') { + return Some(i); + } + i += if !a.contains('=') && parse::takes_value(manager, a) { + 2 + } else { + 1 + }; + } + None +} + +/// Guard refusals happen before any report exists; under `--json` stdout +/// must still carry one parseable document. +fn refuse_guard(opts: &PrecheckOptions, message: String, code: i32) -> i32 { + if opts.json { + println!("{}", serde_json::json!({ "error": message })); + } + eprintln!("{message}"); + code +} + +/// `corgea ` — the suggested-command string used by the +/// "Did you mean …" messages. +fn corgea_cmd(words: &[&str], rest: &[String]) -> String { + let mut parts = vec!["corgea".to_string()]; + parts.extend(words.iter().map(|w| w.to_string())); + parts.extend(rest.iter().cloned()); + parts.join(" ") +} + +pub fn pip3_alias_message(args: &[String]) -> Option { + let rest = args.strip_prefix(&["pip3".to_string()])?; + Some(format!( + "error: unknown package manager `pip3`.\nDid you mean `{}`?", + corgea_cmd(&["pip"], rest) + )) +} + +fn unsupported_pip_add_message(rest: &[String]) -> String { + format!( + "error: pip does not support `add`.\nDid you mean `{}`?", + corgea_cmd(&["pip", "install"], rest) + ) +} + +/// Shared tail of every gated path: render the report, refuse (exit 1) when +/// the block predicate fires, otherwise run the install. +fn report_and_exec( + report: &PrecheckReport, + opts: &PrecheckOptions, + exec: impl FnOnce() -> i32, +) -> i32 { + if opts.json { + render::print_json(report, opts); + } else { + render::print_text(report); + } + render::warn_public_lookup_failures(report, opts); + if let Some(reason) = verdict::block_reason(report, opts) { + if !opts.json { + render::print_refusal(reason); + } + return 1; + } + exec() +} + +/// Post-parse verification shared by npm/yarn/pnpm/pip and uv install paths. +fn run_parsed_install( + manager: PackageManager, + subcommand_label: &str, + rest: &[String], + parsed: parse::ParsedInstall, + exec: impl FnOnce() -> i32, + opts: PrecheckOptions, +) -> i32 { + // With a verdict config, the tree pass resolves the full would-install + // set; `tree::covers_input` owns what each manager's resolver can chew on. + let tree_eligible = opts.verdict.is_some() && tree::covers_input(manager, &parsed); + let bare_install = parsed.targets.is_empty() && parsed.requirements_files.is_empty(); + + if parsed.targets.is_empty() && !tree_eligible { + // Only a truly bare install gets the bare note. A `-r requirements.txt` + // install is covered by `requirements_note`. + if bare_install { + render::bare_install_note(manager, subcommand_label); + } + render::requirements_note(&parsed); + return exec(); + } + + // The named-target registry lookups and the tree dry-run are independent + // network/subprocess work — overlap them; verdicts need both. + let now = Utc::now(); + let (mut outcomes, tree_resolution) = std::thread::scope(|s| { + let tree = tree_eligible.then(|| s.spawn(|| tree::resolve_tree(manager, rest, &parsed))); + let outcomes = verdict::verify_all(&parsed.targets, &opts, &now); + ( + outcomes, + tree.map(|handle| handle.join().expect("tree resolution thread panicked")), + ) + }); + + let tree = if let Some(resolution) = tree_resolution { + Some(run_tree_pass( + manager, + resolution, + &mut outcomes, + &parsed, + &opts, + &now, + )) + } else { + run_verdict_pass(manager, &mut outcomes, &opts); + None + }; + + // The mandatory loud warning when the tree pass fell back to named-only. + if let Some(TreeReport::NamedOnly { reason }) = &tree { + eprintln!( + "warning: transitive dependencies not checked ({reason}); only named packages were verified." + ); + } + // The requirements note only matters when the tree pass did *not* cover + // those files (fallback to named-only, or verdicts disabled). + if !matches!(&tree, Some(TreeReport::Full { .. })) { + render::requirements_note(&parsed); + } + if verdict::public_verdict(&opts).is_some_and(|cfg| cfg.public_login_hint) { + eprintln!( + "warning: using public CVE checks; login enables authenticated enforcement and private Corgea intelligence." + ); + } + + let report = PrecheckReport { + manager, + subcommand: subcommand_label.to_string(), + original_args: rest.to_vec(), + outcomes, + threshold: opts.threshold, + tree, + bare_install, + }; + + report_and_exec(&report, &opts, exec) +} + +/// Gate a lockfile-pinned install (`uv sync`, `npm ci`): verdict every +/// locked package. Recency isn't checked — locked versions aren't newly +/// chosen by this command; the verdict pass is the gate. +fn run_locked_install( + manager: PackageManager, + subcommand: &str, + original_args: Vec, + lock: Result, String>, + opts: &PrecheckOptions, + exec: impl FnOnce() -> i32, +) -> i32 { + let Some(cfg) = &opts.verdict else { + // Direct callers may still disable verdicts completely. + return exec(); + }; + let jobs = match lock { + Ok(jobs) => jobs, + Err(e) if opts.force => { + eprintln!( + "warning: cannot verify '{} {}' ({e}); proceeding under --force", + manager.binary_name(), + subcommand + ); + return exec(); + } + Err(e) => { + // The single documented bypass of the "all blocking goes through + // `verdict::block_reason`" invariant: an unparsable lockfile + // means there is no report to feed the predicate, so the gate + // refuses directly (--force above is the only escape). + eprintln!( + "error: cannot verify '{} {}': {e} (pass --force to proceed unchecked)", + manager.binary_name(), + subcommand + ); + return 1; + } + }; + + let resolved_count = jobs.len(); + let results = verdict::verdict_pool(jobs, cfg, manager); + let transitive = results + .into_iter() + .map(|(pkg, verdict)| TreeOutcome { + name: pkg.name, + version: pkg.version, + origin: TreeOrigin::Locked, + verdict, + }) + .collect(); + let report = PrecheckReport { + manager, + subcommand: subcommand.to_string(), + original_args, + outcomes: Vec::new(), + threshold: opts.threshold, + tree: Some(TreeReport::Full { + resolved_count, + transitive, + }), + bare_install: true, + }; + + report_and_exec(&report, opts, exec) +} + +/// `npm ci` (and aliases): installs the project lockfile exactly as +/// written, so the gate verdicts the lockfile-pinned set directly — no +/// dry-run needed. Without a project or lockfile npm errors on its own; +/// the gate just execs. +fn run_npm_ci(subcommand: &str, rest: &[String], opts: PrecheckOptions) -> i32 { + let json = opts.json; + let exec = || exec::exec_install_with_args(PackageManager::Npm, subcommand, rest, json); + + if opts.verdict.is_none() { + return exec(); + } + let Some(root) = tree::npm_project_root() else { + return exec(); + }; + let Some(lock_path) = ["package-lock.json", "npm-shrinkwrap.json"] + .iter() + .map(|n| root.join(n)) + .find(|p| p.is_file()) + else { + return exec(); + }; + + let lock = std::fs::read_to_string(&lock_path) + .map_err(|e| format!("read {}: {e}", lock_path.display())) + .and_then(|content| tree::parse_npm_lockfile(&content)); + run_locked_install( + PackageManager::Npm, + subcommand, + rest.to_vec(), + lock, + &opts, + exec, + ) +} + +/// One verdict job (`requested: true`) per named resolved target, in +/// outcome order. +fn resolved_jobs(outcomes: &[TargetOutcome]) -> impl Iterator + '_ { + outcomes.iter().filter_map(|o| match o { + TargetOutcome::Resolved { resolved, .. } => Some(tree::TreePackage { + name: resolved.name.clone(), + version: resolved.version.clone(), + requested: true, + }), + _ => None, + }) +} + +/// Verdict the resolved would-install set (`tree::resolve_tree`'s result). +/// On any resolution failure, fall back to the named-only verdict pass; the +/// caller renders the loud warning from the returned `NamedOnly` reason. +/// Only called when `opts.verdict.is_some()`. +fn run_tree_pass( + manager: PackageManager, + resolution: Result, String>, + outcomes: &mut Vec, + parsed: &parse::ParsedInstall, + opts: &PrecheckOptions, + now: &chrono::DateTime, +) -> TreeReport { + let set = match resolution { + Ok(set) => set, + Err(reason) => { + outcomes.extend(requirements_fallback_outcomes(manager, parsed, opts, now)); + run_verdict_pass(manager, outcomes, opts); + return TreeReport::NamedOnly { reason }; + } + }; + + // Dedup the dry-run set (npm lockfiles repeat the same name@version at + // multiple nested paths), then union in the named-resolved targets — a + // named target already installed is absent from the dry-run delta but + // must still be verdicted. + let norm = |n: &str| manager.normalize_name(n); + let mut seen = std::collections::HashSet::new(); + let mut jobs: Vec = Vec::with_capacity(set.len()); + for p in set { + if seen.insert((norm(&p.name), p.version.clone())) { + jobs.push(p); + } + } + let resolved_count = jobs.len(); + for p in resolved_jobs(outcomes) { + if seen.insert((norm(&p.name), p.version.clone())) { + jobs.push(p); + } + } + + // npm leftovers that are direct deps of the project manifest are + // pre-existing, not transitive. pip carries `requested` instead. + let direct_deps = if manager == PackageManager::Npm { + tree::project_direct_deps() + } else { + Default::default() + }; + + let cfg = opts + .verdict + .as_ref() + .expect("tree pass requires verdict config"); + let results = verdict::verdict_pool(jobs, cfg, manager); + let transitive = verdict::apply_verdicts(manager, results, outcomes, &direct_deps); + TreeReport::Full { + resolved_count, + transitive, + } +} + +fn requirements_fallback_outcomes( + manager: PackageManager, + parsed: &parse::ParsedInstall, + opts: &PrecheckOptions, + now: &chrono::DateTime, +) -> Vec { + if !matches!(manager, PackageManager::Pip | PackageManager::Uv) + || parsed.requirements_files.is_empty() + { + return Vec::new(); + } + + let mut targets = Vec::new(); + let mut outcomes = Vec::new(); + for file in &parsed.requirements_files { + match parse::parse_requirement_file_targets(file) { + Ok(mut file_targets) => targets.append(&mut file_targets), + Err(error) => outcomes.push(TargetOutcome::Error { + target: InstallTarget { + name: file.display().to_string(), + display: file.display().to_string(), + kind: TargetKind::Unverifiable { + reason: "requirements file could not be read".to_string(), + }, + }, + error, + }), + } + } + + outcomes.extend(verdict::verify_all(&targets, opts, now)); + outcomes +} + +/// Vuln-api verdict pass over resolved targets, run through the bounded +/// worker pool. No-op without a `VerdictConfig` (direct recency-only callers). +/// Any client/call failure becomes `Unverifiable`; authenticated mode blocks +/// on that and public mode warns but continues. +fn run_verdict_pass( + manager: PackageManager, + outcomes: &mut [TargetOutcome], + opts: &PrecheckOptions, +) { + let Some(cfg) = &opts.verdict else { return }; + + // One job per resolved target, in outcome order; the pool preserves + // order, so verdicts zip straight back onto the resolved outcomes. + let jobs: Vec = resolved_jobs(outcomes).collect(); + + let mut results = verdict::verdict_pool(jobs, cfg, manager).into_iter(); + for o in outcomes.iter_mut() { + if let TargetOutcome::Resolved { verdict, .. } = o { + *verdict = match results.next() { + Some((_, v)) => v, + // Pool invariant broken — fail safe instead of panicking: + // Unverifiable blocks in authenticated mode and warns in + // public mode, same as a client failure. + None => VerdictStatus::Unverifiable( + "internal error: verdict pool returned fewer results than outcomes".to_string(), + ), + }; + } + } +} + +#[cfg(test)] +mod tests { + use super::test_support::*; + use super::*; + + #[test] + fn install_subcommand_recognition() { + assert!(PackageManager::Npm.is_install_subcommand("install")); + assert!(PackageManager::Npm.is_install_subcommand("i")); + assert!(PackageManager::Npm.is_install_subcommand("add")); + assert!(!PackageManager::Npm.is_install_subcommand("update")); + + assert!(PackageManager::Yarn.is_install_subcommand("add")); + assert!(PackageManager::Yarn.is_install_subcommand("install")); + + assert!(PackageManager::Pnpm.is_install_subcommand("add")); + assert!(PackageManager::Pnpm.is_install_subcommand("install")); + assert!(PackageManager::Pnpm.is_install_subcommand("i")); + + assert!(PackageManager::Pip.is_install_subcommand("install")); + assert!(!PackageManager::Pip.is_install_subcommand("freeze")); + } + + /// Run `run_parsed_install` for `pip install ` with an exec + /// closure that records whether it ran (returning 42 instead of + /// spawning anything). + fn gate_pip_install(args: &[&str], opts: PrecheckOptions) -> (i32, bool) { + let rest: Vec = args.iter().map(|s| s.to_string()).collect(); + let parsed = parse::parse_install_args(PackageManager::Pip, &rest).expect("parse"); + let mut exec_ran = false; + let code = run_parsed_install( + PackageManager::Pip, + "install", + &rest, + parsed, + || { + exec_ran = true; + 42 + }, + opts, + ); + (code, exec_ran) + } + + #[test] + fn unverifiable_target_skips_and_proceeds() { + // git+ spec → Skipped outcome, no registry hit, install proceeds. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&["git+https://github.com/psf/requests.git"], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn bare_install_passes_through_without_verification() { + // Bare `pip install` (no targets) → straight exec, no registry hit. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&[], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn requirements_files_note_then_exec() { + // `-r reqs.txt` alone → printed note, no verification, exec runs. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&["-r", "reqs.txt"], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn ecosystem_mapping() { + use crate::vuln_api::Ecosystem; + assert_eq!(PackageManager::Pip.ecosystem(), Ecosystem::Pypi); + assert_eq!(PackageManager::Uv.ecosystem(), Ecosystem::Pypi); + assert_eq!(PackageManager::Npm.ecosystem(), Ecosystem::Npm); + assert_eq!(PackageManager::Yarn.ecosystem(), Ecosystem::Npm); + assert_eq!(PackageManager::Pnpm.ecosystem(), Ecosystem::Npm); + } + + #[test] + fn normalize_name_per_manager() { + // pypi: PEP 503 — lowercase, separator runs collapse to one `-`. + assert_eq!( + PackageManager::Pip.normalize_name("Flask_Cors"), + "flask-cors" + ); + assert_eq!( + PackageManager::Uv.normalize_name("zope.interface"), + "zope-interface" + ); + assert_eq!(PackageManager::Pip.normalize_name("a__b"), "a-b"); + // npm names are case-sensitive and pass through verbatim. + assert_eq!(PackageManager::Npm.normalize_name("Left_Pad"), "Left_Pad"); + } +} diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs new file mode 100644 index 0000000..b1a29ed --- /dev/null +++ b/src/precheck/parse.rs @@ -0,0 +1,1240 @@ +//! Parse install-command argument lists into structured `InstallTarget`s. +//! +//! The goal is to be liberal with valid inputs (real install commands +//! mix flags, package specs, and pass-through args freely) and clear +//! about anything we can't verify (URLs / git / filesystem refs). + +use std::path::{Path, PathBuf}; + +use crate::verify_deps::registry::{NpmSpec, PypiSpec}; + +use super::{InstallTarget, PackageManager, TargetKind}; + +#[derive(Debug, Default)] +pub struct ParsedInstall { + pub targets: Vec, + /// `pip install -r foo.txt` — requirements files are only noted + /// (not verified) by the baseline gate. + pub requirements_files: Vec, +} + +/// `uv pip install` argument list (everything after `pip install`). +pub fn parse_pip_install_args(args: &[String]) -> Result { + Ok(build_parsed_install( + extract_pip_positionals(args)?, + parse_pypi_spec, + )) +} + +/// `uv add` argument list (everything after `add`). +pub fn parse_pypi_positionals_args(args: &[String]) -> ParsedInstall { + build_parsed_install( + extract_node_positionals(PackageManager::Uv, args), + parse_pypi_spec, + ) +} + +fn build_parsed_install( + positionals: PositionalSplit, + parse_spec: fn(&str) -> InstallTarget, +) -> ParsedInstall { + ParsedInstall { + targets: positionals + .specs + .iter() + .map(|raw| parse_spec(raw)) + .collect(), + requirements_files: positionals.requirements_files, + } +} + +pub fn parse_install_args( + manager: PackageManager, + args: &[String], +) -> Result { + match manager { + PackageManager::Pip => parse_pip_install_args(args), + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => Ok( + build_parsed_install(extract_node_positionals(manager, args), parse_npm_spec), + ), + PackageManager::Uv => unreachable!("uv uses classify_uv_command"), + } +} + +/// Best-effort extraction of registry-installable entries from pip +/// requirements files. This is a fallback for when pip's full dry-run cannot +/// resolve the tree. It deliberately skips file-level options and constraints, +/// while preserving URL/VCS/editable entries as unverifiable targets. +pub(super) fn parse_requirement_file_targets(path: &Path) -> Result, String> { + let mut seen = std::collections::HashSet::new(); + parse_requirement_file_targets_inner(path, &mut seen) +} + +fn parse_requirement_file_targets_inner( + path: &Path, + seen: &mut std::collections::HashSet, +) -> Result, String> { + let path_for_io = if path.is_absolute() { + path.to_path_buf() + } else { + std::env::current_dir() + .map_err(|e| format!("read {}: {e}", path.display()))? + .join(path) + }; + let seen_key = std::fs::canonicalize(&path_for_io).unwrap_or_else(|_| path_for_io.clone()); + if !seen.insert(seen_key) { + return Ok(Vec::new()); + } + + let content = std::fs::read_to_string(&path_for_io) + .map_err(|e| format!("read {}: {e}", path.display()))?; + let base = path_for_io.parent().unwrap_or_else(|| Path::new(".")); + let mut targets = Vec::new(); + + for line in requirement_logical_lines(&content) { + match requirement_line_entry(&line) { + Some(RequirementLineEntry::Target(spec)) => targets.push(parse_pypi_spec(&spec)), + Some(RequirementLineEntry::Include(include)) => { + targets.extend(parse_requirement_file_targets_inner( + &base.join(include), + seen, + )?); + } + None => {} + } + } + + Ok(targets) +} + +enum RequirementLineEntry { + Target(String), + Include(PathBuf), +} + +fn requirement_logical_lines(content: &str) -> Vec { + let mut lines = Vec::new(); + let mut current = String::new(); + + for raw in content.lines() { + let trimmed = raw.trim_end(); + let (part, continued) = match trimmed.strip_suffix('\\') { + Some(part) => (part.trim_end(), true), + None => (trimmed, false), + }; + if !current.is_empty() { + current.push(' '); + } + current.push_str(part.trim()); + if !continued { + lines.push(std::mem::take(&mut current)); + } + } + + if !current.trim().is_empty() { + lines.push(current); + } + lines +} + +fn requirement_line_entry(line: &str) -> Option { + let line = strip_requirement_comment(line); + if line.is_empty() { + return None; + } + + if let Some(path) = requirement_flag_value(line, "-r", "--requirement") { + return Some(RequirementLineEntry::Include(PathBuf::from(path))); + } + if requirement_flag_value(line, "-c", "--constraint").is_some() { + return None; + } + if let Some(path) = requirement_flag_value(line, "-e", "--editable") { + return Some(RequirementLineEntry::Target(format!("-e {path}"))); + } + + if line.starts_with('-') { + return None; + } + + let spec = strip_inline_requirement_options(line); + (!spec.is_empty()).then(|| RequirementLineEntry::Target(spec.to_string())) +} + +fn strip_requirement_comment(line: &str) -> &str { + let trimmed = line.trim(); + if trimmed.starts_with('#') { + return ""; + } + [" #", "\t#"] + .iter() + .filter_map(|marker| trimmed.find(marker)) + .min() + .map_or(trimmed, |idx| trimmed[..idx].trim()) +} + +fn requirement_flag_value<'a>(line: &'a str, short: &str, long: &str) -> Option<&'a str> { + let mut parts = line.split_whitespace(); + let first = parts.next()?; + if first == short || first == long { + return parts.next(); + } + if let Some(value) = first.strip_prefix(&format!("{long}=")) { + return Some(value); + } + first + .strip_prefix(short) + .filter(|value| !value.is_empty() && !value.starts_with('-')) +} + +fn strip_inline_requirement_options(line: &str) -> &str { + [ + " --hash", + " --config-setting", + " --global-option", + " --install-option", + ] + .iter() + .filter_map(|marker| line.find(marker)) + .min() + .map_or(line.trim(), |idx| line[..idx].trim()) +} + +/// Install-shaped `uv` invocations we know how to verify. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UvCommand<'a> { + Passthrough, + PipInstall { + install_args: &'a [String], + }, + /// `uv pip sync reqs.txt` — installs exactly the given requirements + /// set; gated like `uv pip install -r reqs.txt`. + PipSync { + sync_args: &'a [String], + }, + Add { + add_args: &'a [String], + }, + /// `uv sync` — installs the locked project environment; gated from + /// `uv.lock`. (`uv lock` stays passthrough: it installs nothing.) + Sync, +} + +pub fn classify_uv_command(cmd: &[String]) -> UvCommand<'_> { + match cmd.first().map(String::as_str) { + Some("pip") if matches!(cmd.get(1).map(String::as_str), Some("install" | "i")) => { + UvCommand::PipInstall { + install_args: &cmd[2..], + } + } + Some("pip") if cmd.get(1).map(String::as_str) == Some("sync") => UvCommand::PipSync { + sync_args: &cmd[2..], + }, + Some("add") => UvCommand::Add { + add_args: &cmd[1..], + }, + Some("sync") => UvCommand::Sync, + _ => UvCommand::Passthrough, + } +} + +/// `uv pip sync` argument list: positionals are requirements files, not +/// package specs. +pub fn parse_pip_sync_args(args: &[String]) -> ParsedInstall { + let split = extract_node_positionals(PackageManager::Uv, args); + let mut requirements_files = split.requirements_files; + requirements_files.extend(split.specs.iter().map(PathBuf::from)); + ParsedInstall { + targets: Vec::new(), + requirements_files, + } +} + +#[derive(Debug, Default)] +struct PositionalSplit { + specs: Vec, + requirements_files: Vec, +} + +/// Known install flags that take a separate value argument, per manager. +/// The fallback heuristic in [`skip_unknown_flag`] only skips URL/path-like +/// values, so a bare-word value (`-w my-workspace`) would otherwise parse — +/// and get verified or blocked — as a package spec. Not exhaustive; the +/// heuristic still backstops anything unlisted. The same letter can differ +/// by manager: npm's `-w ` takes a value, while pnpm's `-w` +/// (workspace-root) and yarn's `-W` are boolean. +pub(super) fn takes_value(manager: PackageManager, flag: &str) -> bool { + match manager { + PackageManager::Npm => matches!( + flag, + "-w" | "--workspace" + | "--prefix" + | "--registry" + | "--tag" + | "--omit" + | "--include" + | "--loglevel" + | "--install-strategy" + | "--before" + | "--cpu" + | "--os" + | "--libc" + | "--otp" + | "--location" + | "--cache" + | "--script-shell" + | "--userconfig" + | "--globalconfig" + | "--depth" + ), + PackageManager::Pnpm => matches!( + flag, + "-C" | "--dir" + | "--filter" + | "--registry" + | "--reporter" + | "--loglevel" + | "--store-dir" + | "--virtual-store-dir" + | "--modules-dir" + | "--lockfile-dir" + ), + PackageManager::Yarn => matches!( + flag, + "--registry" + | "--modules-folder" + | "--cache-folder" + | "--mutex" + | "--network-timeout" + | "--network-concurrency" + | "--global-folder" + | "--link-folder" + | "--preferred-cache-folder" + ), + PackageManager::Uv => matches!( + flag, + "--group" + | "--extra" + | "--index" + | "--default-index" + | "--index-url" + | "--extra-index-url" + | "-f" + | "--find-links" + | "--index-strategy" + | "--keyring-provider" + | "--tag" + | "--branch" + | "--rev" + | "--package" + | "-c" + | "--constraints" + | "--constraint" + | "--overrides" + | "-p" + | "--python" + | "--resolution" + | "--prerelease" + | "--exclude-newer" + | "--directory" + | "--project" + | "--config-setting" + | "--link-mode" + ), + PackageManager::Pip => matches!( + flag, + "-i" | "--index-url" + | "--extra-index-url" + | "-f" + | "--find-links" + | "--platform" + | "--python-version" + | "--implementation" + | "--abi" + | "-t" + | "--target" + | "--prefix" + | "--root" + | "--src" + | "--upgrade-strategy" + | "--no-binary" + | "--only-binary" + | "--progress-bar" + | "--proxy" + | "--retries" + | "--timeout" + | "--exists-action" + | "--trusted-host" + | "--cert" + | "--client-cert" + | "--cache-dir" + | "--log" + | "--python" + | "--keyring-provider" + | "--report" + | "--use-feature" + | "--use-deprecated" + | "--config-settings" + | "-C" + | "--global-option" + | "--hash" + ), + } +} + +/// Strip flags from a npm/yarn/pnpm (or `uv add`) install argument list, +/// returning only the positional package specs. +/// +/// We treat anything starting with `-` as a flag. Boolean flags (`-D`, +/// `--save-dev`, `--no-save`, ...) are dropped on their own. Flags +/// that take a value can be written as either `--flag=value` or +/// `--flag value`; known value-taking flags ([`takes_value`]) skip the +/// next token outright, anything else skips it only if it looks like a +/// value (a URL / path), never like a package spec. +fn extract_node_positionals(manager: PackageManager, args: &[String]) -> PositionalSplit { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + // After `--`, everything is positional. + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + if a.starts_with('-') { + // `uv add -r reqs.txt` adds the file's entries as dependencies — + // track the file like pip's `-r` so the gate covers its contents. + if manager == PackageManager::Uv { + if matches!(a.as_str(), "-r" | "--requirements" | "--requirement") { + if let Some(path) = args.get(i + 1) { + out.requirements_files.push(PathBuf::from(path)); + } + i += 2; + continue; + } + if let Some(rest) = a + .strip_prefix("--requirements=") + .or_else(|| a.strip_prefix("--requirement=")) + { + out.requirements_files.push(PathBuf::from(rest)); + i += 1; + continue; + } + } + if !a.contains('=') && takes_value(manager, a) { + i += 2; + continue; + } + i = skip_unknown_flag(args, i); + continue; + } + out.specs.push(a.clone()); + i += 1; + } + out +} + +/// Advance past an unknown flag at `i`. `--flag=value` is self-contained; +/// otherwise peek at the next arg and skip it too if it doesn't look like +/// a package spec (contains `://` or is path-like) — see the heuristic +/// rationale on [`extract_node_positionals`]. +fn skip_unknown_flag(args: &[String], i: usize) -> usize { + if args[i].contains('=') { + return i + 1; + } + let next_is_value = args + .get(i + 1) + .map(|n| { + !n.starts_with('-') + && (n.contains("://") + || n.starts_with('/') + || n.starts_with("./") + || n.starts_with('~')) + }) + .unwrap_or(false); + i + if next_is_value { 2 } else { 1 } +} + +/// pip's argument grammar is more structured than npm's: there are +/// known flags that take a value (`-r FILE`, `-c FILE`, `-e PATH`, +/// `--index-url URL`, `--target DIR`, ...). We special-case `-r/-c/-e` +/// because they affect behaviour, and treat the rest with the same +/// liberal heuristic as npm. +fn extract_pip_positionals(args: &[String]) -> Result { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + match a.as_str() { + "-r" | "--requirement" => { + let path = args + .get(i + 1) + .ok_or_else(|| "`-r` / `--requirement` requires a file path".to_string())?; + out.requirements_files.push(PathBuf::from(path)); + i += 2; + continue; + } + "-c" | "--constraint" => { + // Constraints don't add packages, but skip the path. + i += 2; + continue; + } + "-e" | "--editable" => { + // Editable installs are explicit unverifiable targets. + let path = args.get(i + 1).cloned().unwrap_or_default(); + out.specs.push(format!("-e {}", path)); + i += if args.get(i + 1).is_some() { 2 } else { 1 }; + continue; + } + _ => {} + } + // Attached short-option forms (pip's optparse): `-rreqs.txt`, + // `-cfile`, `-e./path`. Missing these would silently skip the + // whole gate (`-rreqs.txt` would read as a boolean flag and the + // install would look bare). + if let Some(path) = attached_short_value(a, "-r") { + out.requirements_files.push(PathBuf::from(path)); + i += 1; + continue; + } + if attached_short_value(a, "-c").is_some() { + i += 1; + continue; + } + if let Some(path) = attached_short_value(a, "-e") { + out.specs.push(format!("-e {}", path)); + i += 1; + continue; + } + // Long-form `--requirement=foo.txt`. + if let Some(rest) = a.strip_prefix("--requirement=") { + out.requirements_files.push(PathBuf::from(rest)); + i += 1; + continue; + } + if a.strip_prefix("--constraint=").is_some() { + i += 1; + continue; + } + if let Some(rest) = a.strip_prefix("--editable=") { + out.specs.push(format!("-e {}", rest)); + i += 1; + continue; + } + if a.starts_with('-') { + if !a.contains('=') && takes_value(PackageManager::Pip, a) { + i += 2; + continue; + } + i = skip_unknown_flag(args, i); + continue; + } + out.specs.push(a.clone()); + i += 1; + } + Ok(out) +} + +/// `-rreqs.txt` → `reqs.txt`: the value attached directly to a short +/// option. `None` for the bare flag itself (handled by the exact-match +/// arms) and for long `--` forms. +fn attached_short_value<'a>(arg: &'a str, flag: &str) -> Option<&'a str> { + arg.strip_prefix(flag).filter(|rest| !rest.is_empty()) +} + +/// Parse a single npm-style positional, e.g. `axios`, `axios@1.0.0`, +/// `axios@^1.0.0`, `axios@latest`, `@types/node@20.10.5`, +/// `git+https://...`, `file:./local`, `./local`, `npm:other@1.0.0`. +fn parse_npm_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", + "git:", + "git@", + "github:", + "gist:", + "bitbucket:", + "gitlab:", + "ssh://", + "http://", + "https://", + "file:", + "./", + "../", + "/", + "~/", + "npm:", + "workspace:", + ]; + if let Some(p) = unverifiable_prefixes + .iter() + .find(|p| trimmed.starts_with(*p)) + { + let reason = match *p { + "npm:" => "npm: aliased dependency — registry verification skipped", + "workspace:" => "workspace: dependency — registry verification skipped", + _ => "spec is a URL/git/filesystem reference — registry verification skipped", + }; + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: reason.to_string(), + }, + }; + } + + // Bare `.` / `..` install the current/parent directory; `user/repo` + // (one `/`, not an `@scope/` name) is npm's GitHub shorthand. Neither + // is a registry package — resolving them would 404 and (in + // authenticated mode) block a command plain npm accepts. + if trimmed == "." || trimmed == ".." { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a filesystem path — registry verification skipped".to_string(), + }, + }; + } + if !trimmed.starts_with('@') && trimmed.contains('/') { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a GitHub shorthand or path — registry verification skipped" + .to_string(), + }, + }; + } + + // Find the version separator. Scoped names start with `@` and the + // version separator is the *next* `@` (if any). Unscoped names + // use the first `@`. + let (name_part, spec_part): (&str, &str) = if let Some(rest) = trimmed.strip_prefix('@') { + match rest.find('@') { + Some(at_in_rest) => { + let split = 1 + at_in_rest; + (&trimmed[..split], &trimmed[split + 1..]) + } + None => (trimmed, ""), + } + } else { + match trimmed.find('@') { + Some(at) => (&trimmed[..at], &trimmed[at + 1..]), + None => (trimmed, ""), + } + }; + + let name = name_part.trim().to_string(); + let spec_str = spec_part.trim(); + + let kind = if spec_str.is_empty() || spec_str.eq_ignore_ascii_case("latest") { + TargetKind::Npm(NpmSpec::Latest) + } else if semver::Version::parse(spec_str).is_ok() { + TargetKind::Npm(NpmSpec::Exact(spec_str.to_string())) + } else if let Some(rest) = spec_str + .strip_prefix('v') + .filter(|rest| semver::Version::parse(rest).is_ok()) + { + // npm coerces a leading `v` (`pkg@v1.2.3` installs 1.2.3); without + // this it would read as a dist-tag and error. + TargetKind::Npm(NpmSpec::Exact(rest.to_string())) + } else if looks_like_npm_range(spec_str) { + TargetKind::Npm(NpmSpec::Range(spec_str.to_string())) + } else if is_npm_dist_tag(spec_str) { + TargetKind::Npm(NpmSpec::Tag(spec_str.to_string())) + } else { + TargetKind::Unverifiable { + reason: format!( + "could not classify version spec '{}' (not a valid semver, range, or dist-tag)", + spec_str + ), + } + }; + + InstallTarget { + name, + display, + kind, + } +} + +/// Loose check: does this spec look like an npm version range? +/// We accept anything that *starts* with a range metacharacter +/// (`^`, `~`, `>`, `<`, `=`, `*`) or with a digit (so `1.x`, `1.2.x`, +/// and bare ranges still resolve). Validation against the registry's +/// version list happens later inside the resolver. +fn looks_like_npm_range(s: &str) -> bool { + matches!( + s.chars().next(), + Some('^') | Some('~') | Some('>') | Some('<') | Some('=') | Some('*') + ) || s + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) +} + +/// A dist-tag is a non-empty alphanumeric string (e.g. `latest`, +/// `next`, `beta`, `alpha-1`). We reject anything that contains +/// version-spec metacharacters. +fn is_npm_dist_tag(s: &str) -> bool { + !s.is_empty() + && s.chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.') + && s.chars() + .next() + .map(|c| c.is_ascii_alphabetic()) + .unwrap_or(false) +} + +/// Parse a single pip-style positional, e.g. `requests`, `requests==2.31.0`, +/// `requests>=2.0`, `requests[security]`, `git+https://...`, `./local`. +fn parse_pypi_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", "hg+", "svn+", "bzr+", "http://", "https://", "file:", "./", "../", "/", "~/", + "-e ", "-e=", + ]; + if unverifiable_prefixes.iter().any(|p| trimmed.starts_with(p)) { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a VCS / URL / editable / filesystem reference — registry verification skipped".to_string(), + }, + }; + } + + // Strip the PEP 508 environment marker first — its comparison operators + // (`; python_version >= "3.7"`) must not be mistaken for version + // operators, which would split the name inside the marker. + let req_part = trimmed.split(';').next().unwrap_or(trimmed).trim(); + + // PEP 508 direct reference: `name @ https://…` — unverifiable like a + // bare URL (never a registry lookup, never a block). + if let Some((_, after_at)) = req_part.split_once('@') { + if after_at.contains("://") { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a PEP 508 direct reference (name @ url) — registry verification skipped".to_string(), + }, + }; + } + } + + // Bare `.` / `..` and anything with a path separator install from the + // filesystem (`pip install .`), not the registry. + if req_part == "." || req_part == ".." || req_part.contains('/') || req_part.contains('\\') { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a filesystem path — registry verification skipped".to_string(), + }, + }; + } + + // Split at the leftmost specifier operator (`==`, `>=`, `<=`, `!=`, + // `~=`, `>`, `<`; PEP 440 also allows `===`). Only the index matters — + // the operator itself stays with the spec part. + let separators = ["===", "==", ">=", "<=", "!=", "~=", ">", "<"]; + let split_at = separators.iter().filter_map(|sep| req_part.find(sep)).min(); + + let (name_part, spec_part): (&str, &str) = match split_at { + Some(idx) => (&req_part[..idx], &req_part[idx..]), + None => (req_part, ""), + }; + + // Strip extras: `requests[security]` -> `requests`. + let name_no_extras = name_part + .split_once('[') + .map_or(name_part, |(n, _)| n) + .trim(); + + let spec_str = spec_part.trim(); + + let kind = if spec_str.is_empty() { + TargetKind::Pypi(PypiSpec::Latest) + } else if let Some(rest) = spec_str.strip_prefix("===") { + TargetKind::Pypi(PypiSpec::Exact(rest.trim().to_string())) + } else if let Some(rest) = spec_str.strip_prefix("==") { + let v = rest.trim(); + if v.is_empty() { + TargetKind::Unverifiable { + reason: "empty `==` specifier".to_string(), + } + } else if v.contains('*') { + // Wildcard pin (`==1.4.*`) — a range, not a literal version; + // the resolver desugars it. + TargetKind::Pypi(PypiSpec::Specifier(spec_str.to_string())) + } else { + TargetKind::Pypi(PypiSpec::Exact(v.to_string())) + } + } else { + TargetKind::Pypi(PypiSpec::Specifier(spec_str.to_string())) + }; + + InstallTarget { + name: name_no_extras.to_string(), + display, + kind, + } +} + +/// Bare PyPI name from a requirement line: stop at extras, operators, +/// markers, or whitespace. Callers normalize when they need a comparison key. +pub(super) fn pypi_name_part(spec: &str) -> &str { + let stop = |c: char| matches!(c, '[' | '<' | '>' | '=' | '!' | '~' | ';' | ' '); + let cut = spec.find(stop).unwrap_or(spec.len()); + spec[..cut].trim() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn extracts_npm_positionals_skipping_flags() { + let args = vec![ + "axios".to_string(), + "--save-dev".to_string(), + "@types/node@latest".to_string(), + "-D".to_string(), + "--registry".to_string(), + "https://example.com/registry".to_string(), + "lodash@^4.0.0".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!( + p.specs, + vec![ + "axios".to_string(), + "@types/node@latest".to_string(), + "lodash@^4.0.0".to_string(), + ] + ); + } + + #[test] + fn npm_workspace_flag_value_is_not_a_spec() { + // npm's `-w ` / `--workspace ` take a bare-word value; + // it must never be verified (or blocked) as a package spec. + for flag in ["-w", "--workspace"] { + let args = vec![ + flag.to_string(), + "my-workspace".to_string(), + "lodash".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()], "flag {flag}"); + } + // `--workspace=name` is self-contained. + let args = vec!["--workspace=my-workspace".to_string(), "lodash".to_string()]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + } + + #[test] + fn pnpm_and_yarn_boolean_workspace_flags_keep_the_spec() { + // pnpm's `-w` (--workspace-root) and yarn's `-W` are boolean — + // the next token is the package being installed. + let args = vec!["-w".to_string(), "lodash".to_string()]; + let p = extract_node_positionals(PackageManager::Pnpm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + + let args = vec!["-W".to_string(), "lodash".to_string()]; + let p = extract_node_positionals(PackageManager::Yarn, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + + // pnpm's `--filter ` does take a value. + let args = vec![ + "--filter".to_string(), + "my-app".to_string(), + "lodash".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Pnpm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + } + + #[test] + fn uv_add_group_flag_value_is_not_a_spec() { + let args = vec![ + "--group".to_string(), + "dev".to_string(), + "requests".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Uv, &args); + assert_eq!(p.specs, vec!["requests".to_string()]); + } + + #[test] + fn extracts_npm_positionals_after_double_dash() { + let args = vec![ + "--save-dev".to_string(), + "--".to_string(), + "axios".to_string(), + "--this-is-positional-now".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!( + p.specs, + vec!["axios".to_string(), "--this-is-positional-now".to_string()] + ); + } + + #[test] + fn parse_npm_spec_classifies() { + let cases = vec![ + ("axios", NpmSpec::Latest), + ("axios@", NpmSpec::Latest), + ("axios@latest", NpmSpec::Latest), + ("axios@1.0.0", NpmSpec::Exact("1.0.0".to_string())), + ("axios@^1.0.0", NpmSpec::Range("^1.0.0".to_string())), + ("axios@~1.0.0", NpmSpec::Range("~1.0.0".to_string())), + ( + "axios@>=1.0.0 <2.0.0", + NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), + ), + ("axios@next", NpmSpec::Tag("next".to_string())), + ("axios@beta", NpmSpec::Tag("beta".to_string())), + ("@types/node", NpmSpec::Latest), + ("@types/node@20.10.5", NpmSpec::Exact("20.10.5".to_string())), + ("@types/node@^20.0.0", NpmSpec::Range("^20.0.0".to_string())), + ("@types/node@latest", NpmSpec::Latest), + ]; + for (input, expected) in cases { + let target = parse_npm_spec(input); + match (&target.kind, &expected) { + (TargetKind::Npm(actual), expected) => { + assert_eq!(actual, expected, "for input '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_npm_spec_extracts_scoped_names() { + assert_eq!(parse_npm_spec("@types/node").name, "@types/node"); + assert_eq!(parse_npm_spec("@types/node@20.10.5").name, "@types/node"); + assert_eq!(parse_npm_spec("axios@1.2.3").name, "axios"); + assert_eq!(parse_npm_spec("axios").name, "axios"); + } + + #[test] + fn parse_npm_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "git@github.com:x/y.git", + "github:expressjs/express", + "https://example.com/pkg.tgz", + "file:./local-pkg", + "./local-pkg", + "../sibling", + "/abs/path", + "npm:alias-of-other@1.0.0", + "workspace:*", + // GitHub shorthand and bare paths — registry lookups would 404. + "expressjs/express", + "user/repo#semver:^1.0.0", + ".", + "..", + ]; + for u in unverifiable { + let t = parse_npm_spec(u); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + // Scoped names keep their one `/` and stay verifiable. + assert!(matches!( + parse_npm_spec("@types/node").kind, + TargetKind::Npm(NpmSpec::Latest) + )); + } + + #[test] + fn parse_npm_spec_coerces_leading_v() { + // npm installs `pkg@v1.2.3` as 1.2.3; a dist-tag reading would error. + let t = parse_npm_spec("axios@v1.2.3"); + assert!( + matches!(t.kind, TargetKind::Npm(NpmSpec::Exact(ref v)) if v == "1.2.3"), + "got {:?}", + t.kind + ); + // …but a real tag that merely starts with `v` stays a tag. + let t = parse_npm_spec("node@v8-canary"); + assert!( + matches!(t.kind, TargetKind::Npm(NpmSpec::Tag(ref s)) if s == "v8-canary"), + "got {:?}", + t.kind + ); + } + + #[test] + fn parse_pypi_spec_classifies() { + let cases = vec![ + ("requests", PypiSpec::Latest), + ("requests==2.31.0", PypiSpec::Exact("2.31.0".to_string())), + ("requests>=2.0", PypiSpec::Specifier(">=2.0".to_string())), + ("requests~=2.0", PypiSpec::Specifier("~=2.0".to_string())), + ("requests<3,>=2", PypiSpec::Specifier("<3,>=2".to_string())), + ("requests[security]", PypiSpec::Latest), + ( + "requests[security]==2.31.0", + PypiSpec::Exact("2.31.0".to_string()), + ), + ]; + for (input, expected) in cases { + let t = parse_pypi_spec(input); + match (&t.kind, &expected) { + (TargetKind::Pypi(actual), expected) => { + assert_eq!(actual, expected, "for '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_pypi_spec_strips_extras_and_markers() { + assert_eq!( + parse_pypi_spec("requests[security]==2.31.0").name, + "requests" + ); + let t = parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\""); + assert_eq!(t.name, "requests"); + assert!( + matches!(t.kind, TargetKind::Pypi(PypiSpec::Exact(ref v)) if v == "2.31.0"), + "env marker must not leak into the spec: {:?}", + t.kind + ); + + // A marker-only spec must not split inside the marker: the name is + // `pkg` and the (versionless) spec resolves latest. + let marker_only = parse_pypi_spec("pkg; python_version >= \"3.7\""); + assert_eq!(marker_only.name, "pkg"); + assert!( + matches!(marker_only.kind, TargetKind::Pypi(PypiSpec::Latest)), + "got {:?}", + marker_only.kind + ); + } + + #[test] + fn parse_pypi_spec_wildcard_pin_is_a_specifier() { + // `==1.4.*` is a range; matching it as a literal release key would + // always miss and block. + let t = parse_pypi_spec("django==4.2.*"); + assert_eq!(t.name, "django"); + assert!( + matches!(t.kind, TargetKind::Pypi(PypiSpec::Specifier(ref s)) if s == "==4.2.*"), + "got {:?}", + t.kind + ); + } + + #[test] + fn parse_pypi_spec_direct_reference_and_paths_are_unverifiable() { + // PEP 508 direct reference, bare dot, and separator-bearing paths + // must never be looked up (and thus never blocked) as registry names. + for spec in [ + "requests @ https://files.pythonhosted.org/requests-2.31.0.whl", + "pkg @ https://example.com/x.whl ; python_version >= \"3.7\"", + ".", + "..", + "sub/dir", + ] { + let t = parse_pypi_spec(spec); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}': {:?}", + spec, + t.kind + ); + } + } + + #[test] + fn pypi_name_part_strips_extras_markers_and_operators() { + assert_eq!(pypi_name_part("requests"), "requests"); + assert_eq!(pypi_name_part("requests[security]==2.31.0"), "requests"); + assert_eq!(pypi_name_part("Flask_Cors>=4.0"), "Flask_Cors"); + assert_eq!(pypi_name_part("pkg; python_version >= \"3.7\""), "pkg"); + assert_eq!(pypi_name_part("pkg ==1.0"), "pkg"); + assert_eq!(pypi_name_part(""), ""); + } + + #[test] + fn parse_pypi_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "https://example.com/pkg.tar.gz", + "./local-pkg", + "/abs/path", + "-e ./local", + ]; + for u in unverifiable { + let t = parse_pypi_spec(u); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + } + + #[test] + fn classify_uv_command_recognizes_install_shapes() { + assert!(matches!( + classify_uv_command(&[ + "pip".to_string(), + "install".to_string(), + "requests".to_string(), + ]), + UvCommand::PipInstall { .. } + )); + assert!(matches!( + classify_uv_command(&["pip".to_string(), "i".to_string()]), + UvCommand::PipInstall { .. } + )); + assert!(matches!( + classify_uv_command(&["add".to_string(), "django".to_string()]), + UvCommand::Add { .. } + )); + assert_eq!( + classify_uv_command(&["sync".to_string(), "--extra".to_string(), "dev".to_string()]), + UvCommand::Sync + ); + assert_eq!( + classify_uv_command(&["run".to_string(), "pytest".to_string()]), + UvCommand::Passthrough + ); + assert_eq!( + classify_uv_command(&["lock".to_string()]), + UvCommand::Passthrough + ); + } + + #[test] + fn uv_add_positionals_parse_as_pypi_specs() { + let parsed = parse_pypi_positionals_args(&["requests==2.31.0".into()]); + assert_eq!(parsed.targets.len(), 1); + assert!( + matches!( + &parsed.targets[0].kind, + TargetKind::Pypi(PypiSpec::Exact(v)) if v == "2.31.0" + ), + "uv add targets must parse as PyPI specs, got {:?}", + parsed.targets[0].kind + ); + } + + #[test] + fn pip_args_extract_requirements_files() { + let args = vec![ + "-r".to_string(), + "reqs.txt".to_string(), + "requests==2.31.0".to_string(), + "--requirement=other.txt".to_string(), + "--constraint".to_string(), + "constraints.txt".to_string(), + "--constraint=other-constraints.txt".to_string(), + "-e".to_string(), + "./local".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!( + p.requirements_files, + vec![PathBuf::from("reqs.txt"), PathBuf::from("other.txt")] + ); + assert!(p.specs.contains(&"requests==2.31.0".to_string())); + assert!(p.specs.iter().any(|s| s.starts_with("-e "))); + assert!(!p.specs.contains(&"constraints.txt".to_string())); + assert!(!p.specs.contains(&"other-constraints.txt".to_string())); + assert!(!p + .requirements_files + .contains(&PathBuf::from("constraints.txt"))); + assert!(!p + .requirements_files + .contains(&PathBuf::from("other-constraints.txt"))); + } + + #[test] + fn pip_attached_short_options_are_recognized() { + // pip accepts `-rreqs.txt` (value attached); reading it as a boolean + // flag would make the install look bare and skip the gate entirely. + let args = vec![ + "-rreqs.txt".to_string(), + "-cconstraints.txt".to_string(), + "-e./local".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!(p.requirements_files, vec![PathBuf::from("reqs.txt")]); + assert!(p.specs.contains(&"-e ./local".to_string())); + assert!(!p.specs.contains(&"-cconstraints.txt".to_string())); + } + + #[test] + fn pip_value_flag_values_are_not_specs() { + // A bare-word value of a known value-taking flag must not be + // verified (or blocked) as a package. + let args = vec![ + "--platform".to_string(), + "win_amd64".to_string(), + "--no-binary".to_string(), + ":all:".to_string(), + "--target".to_string(), + "build".to_string(), + "requests".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!(p.specs, vec!["requests".to_string()]); + } + + #[test] + fn uv_add_requirements_flag_tracks_the_file() { + for args in [ + vec!["-r".to_string(), "reqs.txt".to_string()], + vec!["--requirements".to_string(), "reqs.txt".to_string()], + vec!["--requirements=reqs.txt".to_string()], + ] { + let p = extract_node_positionals(PackageManager::Uv, &args); + assert_eq!( + p.requirements_files, + vec![PathBuf::from("reqs.txt")], + "args {args:?}" + ); + assert!(p.specs.is_empty(), "args {args:?}"); + } + // `-c constraints.txt` doesn't add packages — value skipped. + let p = extract_node_positionals( + PackageManager::Uv, + &[ + "-c".to_string(), + "cons.txt".to_string(), + "flask".to_string(), + ], + ); + assert_eq!(p.specs, vec!["flask".to_string()]); + assert!(p.requirements_files.is_empty()); + } +} diff --git a/src/precheck/render.rs b/src/precheck/render.rs new file mode 100644 index 0000000..47fb377 --- /dev/null +++ b/src/precheck/render.rs @@ -0,0 +1,616 @@ +//! Report rendering: text/JSON output, refusal line, fix/steer helpers. + +use crate::verify_deps; + +use super::{ + parse, PackageManager, PrecheckOptions, PrecheckReport, TargetOutcome, TreeOrigin, TreeReport, + VerdictMode, VerdictStatus, +}; + +/// Reason recorded on resolved targets when no verdict pass ran. +const NO_VERDICT_REASON: &str = "vulnerability verdict not checked"; + +/// One honest stderr line when a zero-spec install can't be gated: +/// yarn/pnpm/uv have no safe dry-run, so a bare install pulls its whole +/// dependency set unchecked. No-op for other managers (bare npm is gated +/// via the tree pass; bare pip installs nothing). +pub(super) fn bare_install_note(manager: PackageManager, subcommand_label: &str) { + if matches!( + manager, + PackageManager::Yarn | PackageManager::Pnpm | PackageManager::Uv + ) { + eprintln!( + "note: bare '{} {}' is not gated (no safe dry-run) — dependencies install unchecked", + manager.binary_name(), + subcommand_label + ); + } +} + +/// The refusal line on stderr. Messaging only; the block decision and the +/// choice of escape hatch live in `verdict::block_reason`. +pub(super) fn print_refusal(reason: super::verdict::BlockReason) { + use super::verdict::BlockReason; + match reason { + BlockReason::ExistingTree => eprintln!( + "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." + ), + BlockReason::Findings => { + eprintln!("Refusing to run install. Pass --force to proceed despite findings.") + } + BlockReason::RecencyOnly => { + eprintln!("Refusing to run install. Pass --no-fail to proceed anyway.") + } + } +} + +/// Print the "requirements files are not recency-checked" note when the +/// install carried any `-r` files. No-op otherwise. +pub(super) fn requirements_note(parsed: &parse::ParsedInstall) { + if parsed.requirements_files.is_empty() { + return; + } + let files: Vec = parsed + .requirements_files + .iter() + .map(|p| p.display().to_string()) + .collect(); + eprintln!( + "note: requirements files ({}) are not recency-checked by the baseline gate", + files.join(", ") + ); +} + +pub(super) fn warn_public_lookup_failures(report: &PrecheckReport, opts: &PrecheckOptions) { + if super::verdict::public_verdict(opts).is_some() && report.unverifiable_count() > 0 { + eprintln!("warning: CVE check unavailable; continuing because public mode is fail-open."); + } +} + +/// Suffix for a vulnerable match line: the advisory's fix, if known. +fn fix_note(m: &crate::vuln_api::VulnMatch) -> String { + match &m.fixed_version { + Some(v) => format!(" — fixed in {v}"), + None => " — no fixed version known".to_string(), + } +} + +/// Highest of `fixes` after sort/dedup: a single distinct value is returned +/// as-is (no parsing — preserves odd-but-unambiguous forms); several distinct +/// values compare by lenient semver. With `all_must_parse`, one unparsable +/// candidate among several poisons the answer (`None`); otherwise unparsable +/// candidates are skipped. +fn highest_fix(mut fixes: Vec<&str>, all_must_parse: bool) -> Option { + fixes.sort_unstable(); + fixes.dedup(); + match fixes.as_slice() { + [] => None, + [only] => Some((*only).to_string()), + many => { + let mut parsed = Vec::with_capacity(many.len()); + for raw in many { + match semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) { + Ok(v) => parsed.push((v, *raw)), + Err(_) if all_must_parse => return None, + Err(_) => {} + } + } + parsed + .into_iter() + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, raw)| raw.to_string()) + } + } +} + +/// The one version certified to clear every match. Requires every match to +/// carry a `fixed_version`; any match without one — or an unparsable +/// candidate among several — means no version can be certified, so `None`. +fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let fixes: Vec<&str> = matches + .iter() + .map(|m| m.fixed_version.as_deref()) + .collect::>()?; + highest_fix(fixes, true) +} + +/// Highest `fixed_version` the advisories advertise, by lenient semver. +/// Unlike `safe_version` this is *not* a certification: matches without a +/// fix are ignored, so the result may still be vulnerable to them. `None` +/// only when no match advertises a fix (or no candidate parses). +fn advertised_fix(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let fixes: Vec<&str> = matches + .iter() + .filter_map(|m| m.fixed_version.as_deref()) + .collect(); + highest_fix(fixes, false) +} + +/// Per-match advisory lines plus the safe-version steer, shared by the +/// named-target and transitive vulnerable render arms. +fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) { + for m in matches { + println!( + " {} ({}){}", + m.advisory_id, + m.severity_level, + fix_note(m) + ); + } + if let Some(safe) = safe_version(matches) { + println!(" → safe version: {name}@{safe}"); + } +} + +/// One summary-line segment, e.g. `"2 vulnerable (2 from resolved tree)"`. +/// The parenthetical separates findings the resolved tree carried in from +/// findings on the targets this command names; omitted when the tree +/// contributed none. +fn summary_segment(total: usize, from_tree: usize, label: &str) -> String { + if from_tree > 0 { + format!("{total} {label} ({from_tree} from resolved tree)") + } else { + format!("{total} {label}") + } +} + +/// More than this many unverifiable findings with the same error-prefix +/// render as one collapsed line instead of one line per package. +const UNVERIFIABLE_COLLAPSE_THRESHOLD: usize = 3; + +/// Group key for collapsing repeated unverifiable errors: the text before +/// the first `(` — strips per-package detail (URLs, status codes) so one +/// outage groups under one key. +fn error_prefix(error: &str) -> &str { + match error.find('(') { + Some(i) => error[..i].trim_end(), + None => error, + } +} + +/// Unverifiable error strings across transitive tree findings and named +/// outcomes, in render order. +fn unverifiable_errors(report: &PrecheckReport) -> Vec<&str> { + let mut errors = Vec::new(); + if let Some(TreeReport::Full { transitive, .. }) = &report.tree { + for t in transitive { + if let VerdictStatus::Unverifiable(e) = &t.verdict { + errors.push(e.as_str()); + } + } + } + for o in &report.outcomes { + if let TargetOutcome::Resolved { + verdict: VerdictStatus::Unverifiable(e), + .. + } = o + { + errors.push(e.as_str()); + } + } + errors +} + +/// `(prefix, count, first error)` groups of unverifiable findings large +/// enough to collapse (> `UNVERIFIABLE_COLLAPSE_THRESHOLD` per prefix) — +/// the vuln-api outage case, where every package fails the same way. +/// Display-only: counts and exit codes never change. +fn collapsed_unverifiable_groups(report: &PrecheckReport) -> Vec<(&str, usize, &str)> { + let mut groups: Vec<(&str, usize, &str)> = Vec::new(); + for e in unverifiable_errors(report) { + let prefix = error_prefix(e); + match groups.iter_mut().find(|(p, _, _)| *p == prefix) { + Some((_, count, _)) => *count += 1, + None => groups.push((prefix, 1, e)), + } + } + groups.retain(|(_, count, _)| *count > UNVERIFIABLE_COLLAPSE_THRESHOLD); + groups +} + +pub(super) fn print_text(report: &PrecheckReport) { + // Build the echoed command from non-empty parts: a bare gated install + // (e.g. `npm install` with zero specs) has no args to append. + let mut command = format!("{} {}", report.manager.binary_name(), report.subcommand); + if !report.original_args.is_empty() { + command.push(' '); + command.push_str(&report.original_args.join(" ")); + } + + let collapsed = collapsed_unverifiable_groups(report); + let is_collapsed = |error: &str| { + collapsed + .iter() + .any(|(prefix, _, _)| *prefix == error_prefix(error)) + }; + + println!( + "Pre-checking `{}` (threshold {})", + command, + verify_deps::format_duration(report.threshold) + ); + println!( + " {} ok, {} recent, {}, {}, {} skipped, {} errors", + report.ok_count(), + report.recent_count(), + summary_segment( + report.vulnerable_count(), + report.tree_vulnerable_count(), + "vulnerable" + ), + summary_segment( + report.unverifiable_count(), + report.tree_unverifiable_count(), + "unverifiable" + ), + report.skipped_count(), + report.error_count(), + ); + + match &report.tree { + Some(TreeReport::Full { + resolved_count, + transitive, + .. + }) => { + println!( + " tree: {} packages resolved, {} transitive checked", + resolved_count, + transitive.len() + ); + for t in transitive { + match &t.verdict { + VerdictStatus::Vulnerable(matches) => { + println!( + " ✗ {}@{} {} known vulnerable:", + t.name, + t.version, + t.origin.label() + ); + print_vulnerable_matches(&t.name, matches); + // A vulnerable dep the project already declares can be + // bumped directly — point at the fix as a command. + // When `safe_version` is `Some` it equals + // `advertised_fix` and clears every advisory; otherwise + // some advisory has no fix, so the "(advertised fix)" + // hedge marks the bump as partial. + if t.origin == TreeOrigin::PreExisting { + if let Some(fix) = advertised_fix(matches) { + let hedge = if safe_version(matches).is_some() { + "" + } else { + " (advertised fix)" + }; + println!( + " fix with: corgea {} install {}@{}{}", + report.manager.binary_name(), + t.name, + fix, + hedge + ); + } + } + } + VerdictStatus::Unverifiable(error) => { + if !is_collapsed(error) { + println!( + " ⚠ {}@{} {} could not be verified: {}", + t.name, + t.version, + t.origin.label(), + error + ); + } + } + // Clean / not-checked tree entries stay quiet in text mode. + VerdictStatus::Clean | VerdictStatus::NotChecked => {} + } + } + } + Some(TreeReport::NamedOnly { reason }) => { + println!(" tree: transitive dependencies NOT checked ({reason})"); + } + None => {} + } + + // One line per collapsed outage group instead of one per package. + for (_, count, first_error) in &collapsed { + println!( + " ⚠ {count} packages could not be verified (vuln-api unreachable: {first_error})" + ); + } + + for o in &report.outcomes { + match o { + TargetOutcome::Resolved { + target, + resolved, + age, + verdict, + } => match verdict { + VerdictStatus::Vulnerable(matches) => { + println!( + " ✗ {} → {}@{} known vulnerable:", + target.display, resolved.name, resolved.version, + ); + print_vulnerable_matches(&resolved.name, matches); + } + VerdictStatus::Unverifiable(error) => { + if !is_collapsed(error) { + println!( + " ⚠ {} → {}@{} could not be verified: {}", + target.display, resolved.name, resolved.version, error, + ); + } + } + VerdictStatus::Clean | VerdictStatus::NotChecked => { + if report.is_recent(*age) { + println!( + " ⚠ {} → {}@{} published {} ago at {} (within threshold)", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } else { + println!( + " ✓ {} → {}@{} published {} ago", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + ); + } + } + }, + TargetOutcome::Skipped { target, reason } => { + println!(" ? {}: {}", target.display, reason); + } + TargetOutcome::Error { target, error } => { + println!(" ✗ {}: {}", target.display, error); + } + } + } +} + +/// JSON shape for a single verdict. Shared by named outcomes and tree +/// (transitive) outcomes so both render verdicts identically. +/// `remediation` carries the version that clears every advisory +/// (`safe_version`); `null` when any advisory has no known fix. +fn verdict_json(verdict: &VerdictStatus) -> serde_json::Value { + use serde_json::json; + match verdict { + VerdictStatus::Clean => json!({ "status": "clean" }), + VerdictStatus::Vulnerable(matches) => { + json!({ + "status": "vulnerable", + "matches": matches, + "remediation": safe_version(matches), + }) + } + VerdictStatus::Unverifiable(error) => { + json!({ "status": "unverifiable", "error": error }) + } + VerdictStatus::NotChecked => { + json!({ "status": "not_checked", "reason": NO_VERDICT_REASON }) + } + } +} + +pub(super) fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { + use serde_json::json; + let verdict_mode = match opts.verdict.as_ref().map(|cfg| &cfg.mode) { + Some(VerdictMode::Public) => "public", + Some(VerdictMode::Authenticated { .. }) => "authenticated", + None => "recency-only", + }; + let outcomes: Vec<_> = report + .outcomes + .iter() + .map(|o| match o { + TargetOutcome::Resolved { + target, + resolved, + age, + verdict, + } => { + let verdict_json = verdict_json(verdict); + json!({ + "status": if report.is_recent(*age) { "recent" } else { "ok" }, + "spec": target.display, + "name": resolved.name, + "resolved_version": resolved.version, + "published_at": resolved.published_at.to_rfc3339(), + "age_seconds": age.as_secs(), + "verdict": verdict_json, + }) + } + TargetOutcome::Skipped { target, reason } => json!({ + "status": "skipped", + "spec": target.display, + "name": target.name, + "reason": reason, + }), + TargetOutcome::Error { target, error } => json!({ + "status": "error", + "spec": target.display, + "name": target.name, + "error": error, + }), + }) + .collect(); + + let body = json!({ + "manager": report.manager.binary_name(), + "subcommand": report.subcommand, + "args": report.original_args, + "threshold_seconds": report.threshold.as_secs(), + "summary": { + "ok": report.ok_count(), + "recent": report.recent_count(), + "vulnerable": report.vulnerable_count(), + "unverifiable": report.unverifiable_count(), + "skipped": report.skipped_count(), + "errors": report.error_count(), + }, + "verdict_mode": verdict_mode, + "results": outcomes, + "tree": report.tree.as_ref().map(|t| match t { + TreeReport::Full { resolved_count, transitive } => json!({ + "mode": "full", + "reason": serde_json::Value::Null, + "resolved_count": resolved_count, + "transitive": transitive.iter().map(|o| json!({ + "name": o.name, + "version": o.version, + "origin": o.origin.json_name(), + "verdict": verdict_json(&o.verdict), + })).collect::>(), + }), + TreeReport::NamedOnly { reason } => json!({ + "mode": "named-only", + "reason": reason, + "resolved_count": 0, + "transitive": [], + }), + }), + }); + + println!("{}", serde_json::to_string_pretty(&body).unwrap()); +} + +#[cfg(test)] +mod tests { + use super::super::test_support::*; + use super::super::TreeOutcome; + use super::*; + + #[test] + fn safe_version_single_fix() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0"))]), + Some("2.0.0".to_string()) + ); + } + + #[test] + fn safe_version_duplicate_fixes_collapse_without_parsing() { + // "1.0rc1" is unparsable, but a single distinct value needs no parse. + assert_eq!( + safe_version(&[vm("A-1", Some("1.0rc1")), vm("A-2", Some("1.0rc1"))]), + Some("1.0rc1".to_string()) + ); + } + + #[test] + fn safe_version_picks_highest_of_distinct_fixes() { + // Semver order, not lexical ("1.2.0" > "1.10.0" lexically). + assert_eq!( + safe_version(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } + + #[test] + fn safe_version_two_component_versions_normalize() { + assert_eq!( + safe_version(&[vm("A-1", Some("4.0")), vm("A-2", Some("3.2.5"))]), + Some("4.0".to_string()) + ); + } + + #[test] + fn safe_version_mixed_fix_and_none_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + None + ); + } + + #[test] + fn safe_version_unparsable_among_distinct_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2!1.0")), vm("A-2", Some("1.0.0"))]), + None + ); + } + + #[test] + fn safe_version_empty_matches_is_none() { + assert_eq!(safe_version(&[]), None); + } + + #[test] + fn error_prefix_strips_parenthesized_detail() { + // The reqwest network-failure shape: per-package URL in parens. + assert_eq!( + error_prefix("Failed to send vuln-api request: error sending request for url (http://x/v1/packages/pypi/a/versions/1.0.0/check)"), + "Failed to send vuln-api request: error sending request for url" + ); + assert_eq!( + error_prefix("vuln-api unavailable (HTTP 503)"), + "vuln-api unavailable" + ); + assert_eq!(error_prefix("no parens here"), "no parens here"); + } + + /// Four unverifiable findings sharing a prefix collapse into one group + /// (named + transitive both count); three do not. + #[test] + fn collapsed_groups_require_more_than_threshold() { + let unverifiable = |name: &str| { + let mut o = resolved_outcome(name, "1.0.0", false); + set_verdict( + &mut o, + VerdictStatus::Unverifiable(format!("vuln-api unavailable (HTTP 503: {name})")), + ); + o + }; + + let mut report = report_with(vec![ + unverifiable("a"), + unverifiable("b"), + unverifiable("c"), + ]); + assert!(collapsed_unverifiable_groups(&report).is_empty()); + + report.tree = Some(TreeReport::Full { + resolved_count: 4, + transitive: vec![TreeOutcome { + name: "d".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Unverifiable( + "vuln-api unavailable (HTTP 503: d)".to_string(), + ), + origin: TreeOrigin::Transitive, + }], + }); + let groups = collapsed_unverifiable_groups(&report); + assert_eq!(groups.len(), 1); + let (prefix, count, first) = groups[0]; + assert_eq!(prefix, "vuln-api unavailable"); + assert_eq!(count, 4); + // Render order is transitive-first, so the tree finding leads. + assert_eq!(first, "vuln-api unavailable (HTTP 503: d)"); + } + + #[test] + fn advertised_fix_ignores_matches_without_fix() { + // safe_version returns None here; the advertised fix still surfaces. + assert_eq!( + advertised_fix(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + Some("2.0.0".to_string()) + ); + assert_eq!(advertised_fix(&[vm("A-1", None)]), None); + assert_eq!(advertised_fix(&[]), None); + } + + #[test] + fn advertised_fix_picks_highest_by_semver() { + assert_eq!( + advertised_fix(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } +} diff --git a/src/precheck/test_support.rs b/src/precheck/test_support.rs new file mode 100644 index 0000000..ea16aae --- /dev/null +++ b/src/precheck/test_support.rs @@ -0,0 +1,124 @@ +//! Shared builders for precheck unit tests (mod.rs, render.rs, verdict.rs). +//! Test-only: declared `#[cfg(test)]` from mod.rs. + +use std::time::Duration; + +use chrono::Utc; + +use super::{ + InstallTarget, PackageManager, PrecheckOptions, PrecheckReport, TargetKind, TargetOutcome, + VerdictConfig, VerdictMode, VerdictStatus, +}; + +/// Baseline options: pypi registry at a dead address (a port that +/// refuses connections - these tests never dial it), no verdict config. +/// Override fields per test via struct update. +pub(crate) fn stub_opts() -> PrecheckOptions { + PrecheckOptions { + threshold: Duration::from_secs(2 * 86400), + no_fail: false, + force: false, + json: false, + verdict: None, + npm_registry: None, + pypi_registry: Some("http://127.0.0.1:9".to_string()), + } +} + +/// `stub_opts()` plus a verdict config pointing at `base_url`. +pub(crate) fn verdict_opts(base_url: &str) -> PrecheckOptions { + PrecheckOptions { + verdict: Some(VerdictConfig { + base_url: base_url.to_string(), + mode: VerdictMode::Authenticated { + token: "test-token".to_string(), + }, + public_login_hint: false, + }), + ..stub_opts() + } +} + +pub(crate) fn public_opts(no_fail: bool, force: bool) -> PrecheckOptions { + PrecheckOptions { + no_fail, + force, + verdict: Some(VerdictConfig { + base_url: "http://127.0.0.1:9".to_string(), + mode: VerdictMode::Public, + public_login_hint: true, + }), + ..stub_opts() + } +} + +pub(crate) fn authenticated_opts(no_fail: bool, force: bool) -> PrecheckOptions { + PrecheckOptions { + no_fail, + force, + verdict: Some(VerdictConfig { + base_url: "http://127.0.0.1:9".to_string(), + mode: VerdictMode::Authenticated { + token: "test-token".to_string(), + }, + public_login_hint: false, + }), + ..stub_opts() + } +} + +pub(crate) fn resolved_outcome(name: &str, version: &str, recent: bool) -> TargetOutcome { + // Recency derives from age vs `report_with`'s 2-day threshold: + // one hour => recent, a year => not. + let age = if recent { + Duration::from_secs(3600) + } else { + Duration::from_secs(365 * 86400) + }; + TargetOutcome::Resolved { + target: InstallTarget { + name: name.to_string(), + display: format!("{name}=={version}"), + kind: TargetKind::Unverifiable { + reason: "test".to_string(), + }, + }, + resolved: crate::verify_deps::registry::ResolvedPackage { + name: name.to_string(), + version: version.to_string(), + published_at: Utc::now() - chrono::Duration::from_std(age).unwrap(), + }, + age, + verdict: VerdictStatus::NotChecked, + } +} + +pub(crate) fn report_with(outcomes: Vec) -> PrecheckReport { + PrecheckReport { + manager: PackageManager::Pip, + subcommand: "install".to_string(), + original_args: vec![], + outcomes, + threshold: Duration::from_secs(2 * 86400), + tree: None, + // Most tests model an install that named something; bare-install + // cases set this explicitly. + bare_install: false, + } +} + +pub(crate) fn set_verdict(outcome: &mut TargetOutcome, v: VerdictStatus) { + if let TargetOutcome::Resolved { verdict, .. } = outcome { + *verdict = v; + } +} + +pub(crate) fn vm(advisory: &str, fixed: Option<&str>) -> crate::vuln_api::VulnMatch { + crate::vuln_api::VulnMatch { + advisory_id: advisory.to_string(), + severity_level: "high".to_string(), + tier: 1, + vulnerable_version_range: None, + fixed_version: fixed.map(str::to_string), + } +} diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs new file mode 100644 index 0000000..1f28967 --- /dev/null +++ b/src/precheck/tree.rs @@ -0,0 +1,574 @@ +//! Full would-install-set resolution (the "tree pass"). +//! +//! Safety invariant: resolution must never execute package code. +//! pip: `--only-binary :all:` prevents sdist builds (pypa/pip#13091). +//! npm: `--ignore-scripts` guards npm/cli#2787. + +use std::process::Command; + +use super::PackageManager; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TreePackage { + pub name: String, + pub version: String, + /// pip report `"requested"`: the user named this package (CLI arg or + /// requirements file). Always false for npm — its lockfile has no + /// equivalent flag. + pub requested: bool, +} + +/// Whether this manager's resolver has anything to resolve for the parsed +/// install. pip's dry-run and uv's compile also read `-r` requirements +/// files, so those make an install eligible even with no named targets. +/// npm's lockfile resolution reads `package.json`, so a bare `npm install` +/// is eligible whenever the project (found like npm finds it — nearest +/// ancestor manifest) has one. +pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstall) -> bool { + !parsed.targets.is_empty() + || (matches!(manager, PackageManager::Pip | PackageManager::Uv) + && !parsed.requirements_files.is_empty()) + || (manager == PackageManager::Npm && npm_project_root().is_some()) +} + +/// Nearest ancestor file named `name`, starting at the CWD. +pub(super) fn find_up(name: &str) -> Option { + let cwd = std::env::current_dir().ok()?; + cwd.ancestors() + .map(|dir| dir.join(name)) + .find(|p| p.is_file()) +} + +/// The project directory npm itself would operate on: the nearest ancestor +/// holding `package.json`. A bare `npm install` from a subdirectory +/// installs THAT project's tree, so the gate must look there too. +pub(super) fn npm_project_root() -> Option { + Some(find_up("package.json")?.parent()?.to_path_buf()) +} + +/// `Err(reason)`: no safe dry-run for this manager, or the dry-run failed — +/// the caller falls back to named-only and its warning carries `reason`. +pub fn resolve_tree( + manager: PackageManager, + install_args: &[String], + parsed: &super::parse::ParsedInstall, +) -> Result, String> { + match manager { + PackageManager::Pip => resolve_pip_tree(manager.binary_name(), install_args), + PackageManager::Npm => resolve_npm_tree(manager.binary_name(), install_args), + PackageManager::Uv => resolve_uv_tree(parsed), + PackageManager::Yarn | PackageManager::Pnpm => { + Err(format!("{} has no safe dry-run", manager.binary_name())) + } + } +} + +/// Last stderr line of a failed subprocess, for one-line error messages. +fn stderr_tail(output: &std::process::Output) -> String { + String::from_utf8_lossy(&output.stderr) + .trim() + .lines() + .last() + .unwrap_or("unknown error") + .to_string() +} + +fn resolve_pip_tree(binary: &str, install_args: &[String]) -> Result, String> { + // Same binary resolution as the exec path (pip → pip3 fallback) — the + // tree pass must not silently degrade on pip3-only systems. + let resolved = super::exec::resolve_binary(binary)?; + let output = Command::new(resolved) + .arg("install") + .args([ + "--dry-run", + "--quiet", + "--report", + "-", + "--only-binary", + ":all:", + ]) + .args(install_args) + .output() + .map_err(|e| format!("run pip dry-run: {e}"))?; + if !output.status.success() { + return Err(format!("pip dry-run failed: {}", stderr_tail(&output))); + } + parse_pip_report(&String::from_utf8_lossy(&output.stdout)) +} + +fn parse_pip_report(json: &str) -> Result, String> { + let report: serde_json::Value = + serde_json::from_str(json).map_err(|e| format!("parse pip report: {e}"))?; + let install = report + .get("install") + .and_then(|v| v.as_array()) + .ok_or("pip report has no install[] array")?; + install + .iter() + .map(|item| { + let metadata = item.get("metadata").ok_or("report item missing metadata")?; + let field = |k: &str| { + metadata + .get(k) + .and_then(|v| v.as_str()) + .map(str::to_string) + .ok_or_else(|| format!("report item missing metadata.{k}")) + }; + Ok(TreePackage { + name: field("name")?, + version: field("version")?, + requested: item + .get("requested") + .and_then(|v| v.as_bool()) + .unwrap_or(false), + }) + }) + .collect() +} + +/// Resolve uv's would-install set with `uv pip compile` — uv's own +/// resolver, run without executing package code (`--only-binary :all:` +/// blocks sdist builds, mirroring the pip dry-run guard). Compile takes +/// requirements files rather than bare specs, so named registry specs and +/// absolutized `-r` includes are written to a temp `.in` file. +/// Unverifiable targets (URL / git / editable / path) are excluded — they +/// are already surfaced as skipped warnings. Index selection comes from +/// uv's env/config; index flags on the wrapped command don't carry over. +fn resolve_uv_tree(parsed: &super::parse::ParsedInstall) -> Result, String> { + let uv = super::exec::resolve_binary("uv")?; + let mut input = String::new(); + for t in &parsed.targets { + if !matches!(t.kind, super::TargetKind::Unverifiable { .. }) { + input.push_str(&t.display); + input.push('\n'); + } + } + for f in &parsed.requirements_files { + let abs = std::fs::canonicalize(f).map_err(|e| format!("read {}: {e}", f.display()))?; + input.push_str(&format!("-r {}\n", abs.display())); + } + if input.is_empty() { + return Err("nothing uv pip compile can resolve (all targets are URL/path refs)".into()); + } + + let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; + let in_file = work.path().join("corgea-gate.in"); + std::fs::write(&in_file, &input).map_err(|e| format!("write compile input: {e}"))?; + let output = Command::new(&uv) + .args([ + "pip", + "compile", + "--only-binary", + ":all:", + "--no-header", + "--no-annotate", + "--quiet", + ]) + .arg(&in_file) + .output() + .map_err(|e| format!("run uv pip compile: {e}"))?; + if !output.status.success() { + return Err(format!("uv pip compile failed: {}", stderr_tail(&output))); + } + parse_compiled_requirements( + &String::from_utf8_lossy(&output.stdout), + &requested_names(parsed), + ) +} + +/// PEP 503-normalized names the user asked for — named CLI targets plus +/// entries of `-r` files — so tree findings label "(from requirements)" like +/// pip's `requested` report flag. Best-effort line parse; anything unparsed +/// just labels "(transitive)". +fn requested_names(parsed: &super::parse::ParsedInstall) -> std::collections::HashSet { + let norm = |n: &str| crate::vuln_api::Ecosystem::Pypi.normalize_name(n); + let mut out: std::collections::HashSet = parsed + .targets + .iter() + .filter(|t| !matches!(t.kind, super::TargetKind::Unverifiable { .. })) + .map(|t| norm(&t.name)) + .collect(); + for f in &parsed.requirements_files { + let Ok(content) = std::fs::read_to_string(f) else { + continue; + }; + for line in content.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with(['#', '-']) || line.contains("://") { + continue; + } + let name = super::parse::pypi_name_part(line); + if !name.is_empty() { + out.insert(norm(name)); + } + } + } + out +} + +/// Parse `uv pip compile` stdout (requirements.txt-format `name==version` +/// pins) into the would-install set. Any line that isn't a pin is an error — +/// silently skipping could hide part of the tree. +fn parse_compiled_requirements( + out: &str, + requested: &std::collections::HashSet, +) -> Result, String> { + let mut pkgs = Vec::new(); + for line in out.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with(['#', '-']) { + continue; + } + // Strip env markers and trailing comments: `pkg==1.0 ; marker # via`. + let line = line.split(';').next().unwrap_or(line).trim(); + let line = line.split(" #").next().unwrap_or(line).trim(); + let Some((name, version)) = line.split_once("==") else { + return Err(format!( + "unexpected line in uv pip compile output: '{line}'" + )); + }; + // Strip extras: `celery[redis]==5.3.4`. + let name = super::parse::pypi_name_part(name).to_string(); + pkgs.push(TreePackage { + requested: requested.contains(&crate::vuln_api::Ecosystem::Pypi.normalize_name(&name)), + name, + version: version.trim().to_string(), + }); + } + if pkgs.is_empty() { + return Err("uv pip compile produced no packages".to_string()); + } + Ok(pkgs) +} + +/// Direct dependency names declared by the project's `package.json` (the +/// manifest `resolve_npm_tree` copies — nearest ancestor, like npm). +/// Empty when the manifest is absent or unparsable — origin labeling then +/// degrades to `(transitive)`. +pub fn project_direct_deps() -> std::collections::HashSet { + npm_project_root() + .and_then(|root| std::fs::read_to_string(root.join("package.json")).ok()) + .map(|s| direct_deps_from_manifest(&s)) + .unwrap_or_default() +} + +fn direct_deps_from_manifest(json: &str) -> std::collections::HashSet { + let Ok(manifest) = serde_json::from_str::(json) else { + return Default::default(); + }; + let groups = [ + "dependencies", + "devDependencies", + "optionalDependencies", + "peerDependencies", + ]; + groups + .iter() + .filter_map(|g| manifest.get(g)?.as_object()) + .flat_map(|deps| deps.keys().cloned()) + .collect() +} + +/// Resolve npm's full would-install set by generating a lockfile in a +/// throwaway dir so the user's own lockfile is never touched. npm's +/// `--dry-run --json` only emits counts (npm/cli#6558), so we read the +/// generated `package-lock.json` instead. +/// +/// `--ignore-scripts` because npm has run lifecycle scripts under +/// `--package-lock-only` before (npm/cli#2787). +fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result, String> { + // Flags that redirect npm's project root would defeat the throwaway-dir + // isolation below (`--prefix` overrides `current_dir`, so the dry run + // would write the USER'S package-lock.json) — degrade to named-only. + const ROOT_REDIRECT_FLAGS: [&str; 5] = ["--prefix", "-C", "--global", "-g", "--location"]; + if let Some(flag) = install_args.iter().find(|a| { + ROOT_REDIRECT_FLAGS + .iter() + .any(|f| a.as_str() == *f || a.starts_with(&format!("{f}="))) + }) { + return Err(format!( + "'{flag}' redirects npm's project root; lockfile resolution skipped" + )); + } + + let resolved = super::exec::resolve_binary(binary)?; + let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; + // Copy the manifests from the project npm would operate on (nearest + // ancestor package.json), not just the CWD. + let root = npm_project_root(); + for manifest in [ + "package.json", + "package-lock.json", + "npm-shrinkwrap.json", + ".npmrc", + ] { + let src = match &root { + Some(root) => root.join(manifest), + None => std::path::PathBuf::from(manifest), + }; + if src.exists() { + std::fs::copy(&src, work.path().join(manifest)) + .map_err(|e| format!("copy {manifest}: {e}"))?; + } + } + let output = Command::new(&resolved) + .arg("install") + .args(install_args) + .args([ + "--package-lock-only", + "--ignore-scripts", + "--no-audit", + "--no-fund", + ]) + .current_dir(work.path()) + .output() + .map_err(|e| format!("run npm lockfile resolution: {e}"))?; + if !output.status.success() { + return Err(format!( + "npm lockfile resolution failed: {}", + stderr_tail(&output) + )); + } + let lock = std::fs::read_to_string(work.path().join("package-lock.json")) + .map_err(|e| format!("read generated package-lock.json: {e}"))?; + parse_npm_lockfile(&lock) +} + +pub(super) fn parse_npm_lockfile(json: &str) -> Result, String> { + let lock: serde_json::Value = + serde_json::from_str(json).map_err(|e| format!("parse package-lock.json: {e}"))?; + let packages = lock + .get("packages") + .and_then(|v| v.as_object()) + .ok_or("package-lock.json has no packages map (npm < 7?)")?; + Ok(packages + .iter() + // Skip the root project entry ("") and symlinked (workspace) entries. + .filter(|(path, entry)| { + !path.is_empty() && entry.get("link").and_then(|v| v.as_bool()) != Some(true) + }) + .filter_map(|(path, entry)| { + let name = entry + .get("name") + .and_then(|v| v.as_str()) + .map(str::to_string) + .or_else(|| name_from_lock_path(path))?; + let version = entry.get("version").and_then(|v| v.as_str())?; + Some(TreePackage { + name, + version: version.to_string(), + requested: false, + }) + }) + .collect()) +} + +/// Derive a package name from a lockfile path key like +/// `node_modules/a/node_modules/@scope/pkg` → `@scope/pkg`. `None` for keys +/// outside `node_modules/` (workspace stanzas carry an explicit `name`). +fn name_from_lock_path(path: &str) -> Option { + if !path.contains("node_modules/") { + return None; + } + let name = crate::deps::ecosystems::npm::package_name_from_lock_key(path); + (!name.is_empty()).then(|| name.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + const OK_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + + #[test] + fn parse_pip_report_ok() { + let pkgs = parse_pip_report(OK_REPORT).expect("parse ok report"); + assert_eq!( + pkgs, + vec![ + TreePackage { + name: "oldpkg".to_string(), + version: "1.0.0".to_string(), + requested: true, + }, + TreePackage { + name: "evildep".to_string(), + version: "0.4.2".to_string(), + requested: false, + }, + ] + ); + } + + #[test] + fn parse_pip_report_missing_requested_defaults_false() { + let json = r#"{"install":[{"metadata":{"name":"x","version":"1.0.0"}}]}"#; + let pkgs = parse_pip_report(json).expect("parse report without requested"); + assert!(!pkgs[0].requested); + } + + #[test] + fn parse_pip_report_missing_install() { + let err = parse_pip_report(r#"{"version":"1"}"#).expect_err("no install[]"); + assert!(err.contains("no install[]"), "got: {err}"); + } + + #[test] + fn parse_pip_report_missing_version() { + let json = r#"{"install":[{"metadata":{"name":"x"}}]}"#; + let err = parse_pip_report(json).expect_err("missing version"); + assert!(err.contains("metadata.version"), "got: {err}"); + } + + #[test] + fn parse_pip_report_non_json() { + let err = parse_pip_report("not json").expect_err("non-json"); + assert!(err.contains("parse pip report"), "got: {err}"); + } + + #[test] + fn parse_compiled_requirements_pins_extras_and_markers() { + let requested = std::collections::HashSet::from(["flask-cors".to_string()]); + let out = "Flask_Cors==4.0.0\ncelery[redis]==5.3.4\nwerkzeug==3.1.8 ; python_version >= \"3.9\"\n\n# comment\n--index-url https://example.com\n"; + let pkgs = parse_compiled_requirements(out, &requested).expect("parse pins"); + assert_eq!( + pkgs, + vec![ + TreePackage { + name: "Flask_Cors".to_string(), + version: "4.0.0".to_string(), + requested: true, + }, + TreePackage { + name: "celery".to_string(), + version: "5.3.4".to_string(), + requested: false, + }, + TreePackage { + name: "werkzeug".to_string(), + version: "3.1.8".to_string(), + requested: false, + }, + ] + ); + } + + #[test] + fn parse_compiled_requirements_rejects_non_pins() { + let none = std::collections::HashSet::new(); + let err = parse_compiled_requirements("flask>=2.0\n", &none).expect_err("not a pin"); + assert!(err.contains("unexpected line"), "got: {err}"); + let err = parse_compiled_requirements("", &none).expect_err("empty"); + assert!(err.contains("no packages"), "got: {err}"); + } + + #[test] + fn requested_names_unions_targets_and_requirements_files() { + let dir = tempfile::tempdir().expect("temp dir"); + let req = dir.path().join("requirements.txt"); + std::fs::write( + &req, + "# comment\nFlask_Cors==4.0.0\nrequests[security]>=2.0 ; python_version >= \"3.9\"\n-r other.txt\nhttps://example.com/pkg.whl\n", + ) + .expect("write requirements"); + let parsed = super::super::parse::ParsedInstall { + targets: vec![super::super::InstallTarget { + name: "celery".to_string(), + display: "celery==5.3.4".to_string(), + kind: super::super::TargetKind::Pypi( + crate::verify_deps::registry::PypiSpec::Exact("5.3.4".to_string()), + ), + }], + requirements_files: vec![req], + }; + let names = requested_names(&parsed); + for name in ["celery", "flask-cors", "requests"] { + assert!(names.contains(name), "missing {name}: {names:?}"); + } + assert_eq!(names.len(), 3); + } + + // lockfile-v3 with: root entry (skipped), a plain dep, a nested dep, + // a scoped dep, and a workspace `link: true` entry (skipped). + const NPM_LOCK: &str = r#"{ + "name": "proj", "lockfileVersion": 3, + "packages": { + "": {"name": "proj", "version": "1.0.0"}, + "node_modules/oldpkg": {"version": "1.0.0"}, + "node_modules/evildep": {"version": "0.4.2"}, + "node_modules/a/node_modules/b": {"version": "2.3.4"}, + "node_modules/@scope/pkg": {"version": "9.0.1"}, + "node_modules/localdep": {"resolved": "../local", "link": true}, + "packages/localdep": {"name": "localdep", "version": "0.0.1"} + } + }"#; + + #[test] + fn parse_npm_lockfile_ok() { + let mut pkgs = parse_npm_lockfile(NPM_LOCK).expect("parse npm lock"); + pkgs.sort_by(|a, b| a.name.cmp(&b.name)); + let pkg = |name: &str, version: &str| TreePackage { + name: name.to_string(), + version: version.to_string(), + requested: false, + }; + assert_eq!( + pkgs, + vec![ + pkg("@scope/pkg", "9.0.1"), + pkg("b", "2.3.4"), + pkg("evildep", "0.4.2"), + pkg("localdep", "0.0.1"), + pkg("oldpkg", "1.0.0"), + ] + ); + } + + #[test] + fn parse_npm_lockfile_missing_packages() { + let err = parse_npm_lockfile(r#"{"lockfileVersion":1}"#).expect_err("no packages map"); + assert!(err.contains("no packages map"), "got: {err}"); + } + + #[test] + fn name_from_lock_path_handles_nested_and_scoped() { + assert_eq!( + name_from_lock_path("node_modules/oldpkg").as_deref(), + Some("oldpkg") + ); + assert_eq!( + name_from_lock_path("node_modules/a/node_modules/b").as_deref(), + Some("b") + ); + assert_eq!( + name_from_lock_path("node_modules/a/node_modules/@scope/pkg").as_deref(), + Some("@scope/pkg") + ); + assert_eq!(name_from_lock_path("packages/foo"), None); + } + + #[test] + fn direct_deps_from_manifest_unions_all_groups() { + let manifest = r#"{ + "name": "proj", + "dependencies": {"a": "^1.0.0", "@scope/b": "2.x"}, + "devDependencies": {"c": "*"}, + "optionalDependencies": {"d": "1.2.3"}, + "peerDependencies": {"e": ">=1"} + }"#; + let deps = direct_deps_from_manifest(manifest); + for name in ["a", "@scope/b", "c", "d", "e"] { + assert!(deps.contains(name), "missing {name}"); + } + assert_eq!(deps.len(), 5); + } + + #[test] + fn direct_deps_from_manifest_degrades_to_empty() { + assert!(direct_deps_from_manifest("not json").is_empty()); + assert!(direct_deps_from_manifest(r#"{"name":"proj"}"#).is_empty()); + assert!(direct_deps_from_manifest(r#"{"dependencies":[]}"#).is_empty()); + } +} diff --git a/src/precheck/uv.rs b/src/precheck/uv.rs new file mode 100644 index 0000000..8b1ceb5 --- /dev/null +++ b/src/precheck/uv.rs @@ -0,0 +1,177 @@ +//! `corgea uv` routing: `uv pip install` / `uv add` / `uv pip sync` reuse +//! the parsed-install gate; `uv sync` is gated from `uv.lock`. + +use super::{corgea_cmd, detect, exec, parse, tree, PackageManager, PrecheckOptions}; + +pub(super) fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { + let json = opts.json; + let exec = move || exec::exec_command_with_stdio("uv", cmd, json); + + if matches!(cmd.first().map(String::as_str), Some("install" | "i")) { + return super::refuse_guard(&opts, unsupported_uv_install_message(&cmd[1..]), 1); + } + + match parse::classify_uv_command(cmd) { + // Passthrough is a transparent shim: no report, untouched stdio. + parse::UvCommand::Passthrough => exec::exec_command("uv", cmd), + parse::UvCommand::PipInstall { install_args } => { + let parsed = match parse::parse_pip_install_args(install_args) { + Ok(p) => p, + Err(e) => { + return super::refuse_guard( + &opts, + format!("failed to parse install args: {}", e), + 2, + ); + } + }; + super::run_parsed_install( + PackageManager::Uv, + "pip install", + install_args, + parsed, + exec, + opts, + ) + } + parse::UvCommand::PipSync { sync_args } => { + // `uv pip sync reqs.txt` installs exactly the given requirements + // set — gate it like `uv pip install -r reqs.txt`. + let parsed = parse::parse_pip_sync_args(sync_args); + if parsed.requirements_files.is_empty() { + // No files named: uv errors on its own. + return exec::exec_command("uv", cmd); + } + super::run_parsed_install( + PackageManager::Uv, + "pip sync", + sync_args, + parsed, + exec, + opts, + ) + } + parse::UvCommand::Add { add_args } => { + let parsed = parse::parse_pypi_positionals_args(add_args); + if !opts.force { + if let Some(message) = + detect::wrong_package_manager_message(PackageManager::Uv, add_args, &parsed) + { + return super::refuse_guard(&opts, message, 1); + } + } + super::run_parsed_install(PackageManager::Uv, "add", add_args, parsed, exec, opts) + } + parse::UvCommand::Sync => run_uv_sync(cmd, opts, exec), + } +} + +fn unsupported_uv_install_message(rest: &[String]) -> String { + format!( + "error: uv does not support top-level `install`.\nDid you mean `{}`?", + corgea_cmd(&["uv", "pip", "install"], rest) + ) +} + +/// Gate `uv sync` from the project's `uv.lock`. The lockfile is the full +/// locked universe (all groups/extras) — a superset of what sync installs, +/// conservative in the blocking direction; a stale lock that sync would +/// re-resolve is gated as written. Recency isn't checked (locked versions +/// aren't newly chosen by this command); the verdict pass is the gate. We +/// never run `uv lock` ourselves — locking can build sdists, which would +/// execute package code before any verdict. +fn run_uv_sync(cmd: &[String], opts: PrecheckOptions, exec: impl FnOnce() -> i32) -> i32 { + if opts.verdict.is_none() { + // Direct callers may still disable verdicts completely. + return exec(); + } + // uv discovers the project by walking up from the CWD — find `uv.lock` + // the same way, so a sync run from a project subdirectory stays gated. + let Some(lock_path) = tree::find_up("uv.lock") else { + eprintln!( + "note: no uv.lock here — 'uv sync' is not gated; dependencies install unchecked (run 'uv lock' first to enable the gate)" + ); + return exec(); + }; + let lock = std::fs::read_to_string(&lock_path) + .map_err(|e| format!("read {}: {e}", lock_path.display())) + .and_then(|content| parse_uv_lock(&content)); + super::run_locked_install( + PackageManager::Uv, + "sync", + cmd[1..].to_vec(), + lock, + &opts, + exec, + ) +} + +/// Packages from `uv.lock` that `uv sync` installs from an index. Local +/// stanzas (the project itself and path deps: editable / virtual / +/// directory / path sources) carry no registry identity and are skipped. +fn parse_uv_lock(content: &str) -> Result, String> { + #[derive(serde::Deserialize)] + struct Lock { + #[serde(default)] + package: Vec, + } + #[derive(serde::Deserialize)] + struct Pkg { + name: String, + version: Option, + #[serde(default)] + source: std::collections::BTreeMap, + } + const LOCAL_SOURCES: [&str; 4] = ["editable", "virtual", "directory", "path"]; + + let lock: Lock = toml::from_str(content).map_err(|e| format!("parse uv.lock: {e}"))?; + Ok(lock + .package + .into_iter() + .filter(|p| !LOCAL_SOURCES.iter().any(|k| p.source.contains_key(*k))) + .filter_map(|p| { + Some(tree::TreePackage { + name: p.name, + version: p.version?, + requested: false, + }) + }) + .collect()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_uv_lock_keeps_index_packages_and_skips_local_sources() { + let lock = r#" +version = 1 + +[[package]] +name = "proj" +version = "0.1.0" +source = { editable = "." } + +[[package]] +name = "evildep" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } + +[[package]] +name = "gitdep" +version = "1.2.3" +source = { git = "https://example.com/repo?rev=abc#abc" } +"#; + let pkgs = parse_uv_lock(lock).expect("parse uv.lock"); + let names: Vec<&str> = pkgs.iter().map(|p| p.name.as_str()).collect(); + assert_eq!(names, vec!["evildep", "gitdep"]); + assert_eq!(pkgs[0].version, "0.4.2"); + } + + #[test] + fn parse_uv_lock_rejects_invalid_toml() { + let err = parse_uv_lock("not = [valid").expect_err("invalid toml"); + assert!(err.contains("parse uv.lock"), "got: {err}"); + } +} diff --git a/src/precheck/verdict.rs b/src/precheck/verdict.rs new file mode 100644 index 0000000..52eba9d --- /dev/null +++ b/src/precheck/verdict.rs @@ -0,0 +1,650 @@ +//! Verdict pass: bounded vuln-api worker pool, result matching, and the +//! single block predicate (`block_reason`). + +use std::time::Duration; + +use super::{ + tree, InstallTarget, PackageManager, PrecheckOptions, PrecheckReport, TargetKind, + TargetOutcome, TreeOrigin, TreeOutcome, TreeReport, VerdictConfig, VerdictMode, VerdictStatus, +}; + +/// Above this many verdict jobs, print a stderr progress line so a big tree +/// pass doesn't look hung. +const VERDICT_PROGRESS_THRESHOLD: usize = 8; + +/// Max parallel vuln-api / registry requests. +const VERDICT_CONCURRENCY: usize = 8; + +/// Bounded worker pool over the verdict jobs. On client/request failure every +/// job comes back `Unverifiable`; `block_reason` decides whether that +/// fails closed for the selected mode. Order is preserved: result `i` +/// belongs to job `i`. +pub(super) fn verdict_pool( + jobs: Vec, + cfg: &VerdictConfig, + manager: PackageManager, +) -> Vec<(tree::TreePackage, VerdictStatus)> { + let client = match crate::vuln_api::http_client() { + Ok(c) => c, + Err(e) => { + return jobs + .into_iter() + .map(|j| (j, VerdictStatus::Unverifiable(e.clone()))) + .collect(); + } + }; + + if jobs.len() > VERDICT_PROGRESS_THRESHOLD { + eprintln!("checking {} packages against Corgea vuln-api…", jobs.len()); + } + + let ecosystem = manager.ecosystem(); + let verdicts = + pooled_map( + &jobs, + VERDICT_CONCURRENCY, + |job| match crate::vuln_api::check_package_version( + &client, + &cfg.base_url, + cfg.mode.auth_token(), + ecosystem, + &job.name, + &job.version, + ) { + Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), + Ok(_) => VerdictStatus::Clean, + Err(e) => VerdictStatus::Unverifiable(e.to_string()), + }, + ); + jobs.into_iter().zip(verdicts).collect() +} + +/// Order-preserving bounded worker pool: `results[i]` is `f(&items[i])`. +/// Each call is an independent blocking HTTP request on the gate's critical +/// path, so they must not run serially. Plain work-stealing over an index, +/// no new crates; single-item lists skip the thread machinery. +fn pooled_map( + items: &[T], + concurrency: usize, + f: impl Fn(&T) -> R + Sync, +) -> Vec { + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Mutex; + + if items.len() <= 1 { + return items.iter().map(&f).collect(); + } + let next = AtomicUsize::new(0); + let results: Mutex>> = Mutex::new(items.iter().map(|_| None).collect()); + let workers = concurrency.clamp(1, items.len()); + std::thread::scope(|s| { + for _ in 0..workers { + s.spawn(|| loop { + let i = next.fetch_add(1, Ordering::Relaxed); + let Some(item) = items.get(i) else { break }; + let result = f(item); + results.lock().unwrap()[i] = Some(result); + }); + } + }); + results + .into_inner() + .unwrap() + .into_iter() + .map(|r| r.expect("pooled_map worker filled every slot")) + .collect() +} + +/// Assign pooled verdicts onto matching named outcomes (by normalized +/// name + version) and return the unmatched leftovers — the tree findings. +/// Each leftover carries its provenance: pip's `requested` flag, membership +/// in the project manifest's direct deps (`direct_deps`), or transitive. +pub(super) fn apply_verdicts( + manager: PackageManager, + results: Vec<(tree::TreePackage, VerdictStatus)>, + outcomes: &mut [TargetOutcome], + direct_deps: &std::collections::HashSet, +) -> Vec { + let norm = |n: &str| manager.normalize_name(n); + // Index named outcomes by (normalized name, version) so matching the + // pooled results stays linear on big trees. + let mut named: std::collections::HashMap<(String, String), Vec> = + std::collections::HashMap::new(); + for (i, o) in outcomes.iter().enumerate() { + if let TargetOutcome::Resolved { resolved, .. } = o { + named + .entry((norm(&resolved.name), resolved.version.clone())) + .or_default() + .push(i); + } + } + + let mut transitive = Vec::new(); + for (pkg, verdict) in results { + if let Some(indices) = named.get(&(norm(&pkg.name), pkg.version.clone())) { + for &i in indices { + if let TargetOutcome::Resolved { verdict: v, .. } = &mut outcomes[i] { + *v = verdict.clone(); + } + } + } else { + let origin = if pkg.requested { + TreeOrigin::Requested + } else if direct_deps.contains(&pkg.name) { + TreeOrigin::PreExisting + } else { + TreeOrigin::Transitive + }; + transitive.push(TreeOutcome { + name: pkg.name, + version: pkg.version, + origin, + verdict, + }); + } + } + transitive +} + +/// Authenticated mode fails closed: lookup errors block instead of warning. +pub(super) fn authenticated_verdict(opts: &PrecheckOptions) -> bool { + opts.verdict + .as_ref() + .is_some_and(|cfg| cfg.mode.auth_token().is_some()) +} + +/// The verdict config when running in fail-open public mode; `None` when +/// verdicts are off or authenticated. The one definition of "public mode", +/// dual of `authenticated_verdict`. +pub(super) fn public_verdict(opts: &PrecheckOptions) -> Option<&VerdictConfig> { + opts.verdict + .as_ref() + .filter(|cfg| matches!(cfg.mode, VerdictMode::Public)) +} + +/// Why the gate refuses to run the install. The single owner of both the +/// block decision and the escape hatch the refusal advertises — +/// `render::print_refusal` only maps variants to text. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum BlockReason { + /// Every blocking finding predates this command (existing tree only). + /// `--force` is the escape. + ExistingTree, + /// Vulnerable findings, or unverifiable/error findings in fail-closed + /// (authenticated) mode. `--force` is the escape. + Findings, + /// Only the recency threshold fired. `--no-fail` is the escape. + RecencyOnly, +} + +pub(super) fn block_reason(report: &PrecheckReport, opts: &PrecheckOptions) -> Option { + if opts.force { + return None; + } + // A resolution error means no verdict was obtained for that target, so + // in authenticated mode it fails closed like `Unverifiable` — otherwise a + // registry outage silently bypasses the gate. + let fail_closed = authenticated_verdict(opts); + if report.verdicts().any(|v| v.blocks(fail_closed)) || (fail_closed && report.error_count() > 0) + { + return Some(if blames_existing_tree(report, opts) { + BlockReason::ExistingTree + } else { + BlockReason::Findings + }); + } + if !opts.no_fail && report.recent_count() > 0 { + return Some(BlockReason::RecencyOnly); + } + None +} + +/// True when the block is entirely the existing tree's doing: vulnerable +/// findings exist, no named target blocks, and every *blocking* tree +/// finding (`VerdictStatus::blocks`, same predicate `block_reason` refuses +/// on) genuinely predates this command. A `Requested` finding (pip `-r`) +/// is added by this command and renders as `(from requirements)`; a +/// `Transitive` finding on any install that names targets or requirements +/// files is being pulled in by them right now. Only a truly bare install +/// (`report.bare_install`) or manifest-declared `PreExisting` findings may +/// blame the existing tree. +fn blames_existing_tree(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + let fail_closed = authenticated_verdict(opts); + let named_blocks = report.named_verdicts().any(|v| v.blocks(fail_closed)); + if report.vulnerable_count() == 0 || named_blocks { + return false; + } + let Some(TreeReport::Full { transitive, .. }) = &report.tree else { + return false; + }; + transitive + .iter() + .filter(|t| t.verdict.blocks(fail_closed)) + .all(|t| match t.origin { + // A locked pin predates the sync command that installs it. + TreeOrigin::PreExisting | TreeOrigin::Locked => true, + TreeOrigin::Requested => false, + TreeOrigin::Transitive => report.bare_install, + }) +} + +/// Resolve every named target against its registry through the bounded +/// worker pool. Order is preserved: outcome `i` belongs to `targets[i]`. +pub(super) fn verify_all( + targets: &[InstallTarget], + opts: &PrecheckOptions, + now: &chrono::DateTime, +) -> Vec { + pooled_map(targets, VERDICT_CONCURRENCY, |t| verify_one(t, opts, now)) +} + +fn verify_one( + target: &InstallTarget, + opts: &PrecheckOptions, + now: &chrono::DateTime, +) -> TargetOutcome { + use crate::verify_deps::registry; + + let resolved = match &target.kind { + TargetKind::Unverifiable { reason } => { + return TargetOutcome::Skipped { + target: target.clone(), + reason: reason.clone(), + }; + } + TargetKind::Npm(spec) => { + registry::npm_resolve(&target.name, spec, opts.npm_registry.as_deref()) + } + TargetKind::Pypi(spec) => { + registry::pypi_resolve(&target.name, spec, opts.pypi_registry.as_deref()) + } + }; + + match resolved { + Ok(resolved) => { + // Future publish dates clamp to zero — maximally recent. + let age = now + .signed_duration_since(resolved.published_at) + .to_std() + .unwrap_or_else(|_| Duration::from_secs(0)); + TargetOutcome::Resolved { + target: target.clone(), + resolved, + age, + verdict: VerdictStatus::NotChecked, + } + } + Err(e) => TargetOutcome::Error { + target: target.clone(), + error: e, + }, + } +} + +#[cfg(test)] +mod tests { + use super::super::test_support::*; + use super::super::{ + run_verdict_pass, tree, InstallTarget, PackageManager, PrecheckOptions, TargetKind, + TargetOutcome, TreeOrigin, TreeOutcome, TreeReport, VerdictConfig, VerdictMode, + VerdictStatus, + }; + use super::*; + + fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + block_reason(report, opts).is_some() + } + + /// Predicate matrix: force ⇒ never block; vulnerable blocks in every + /// verdict mode; unverifiable/error findings block only in authenticated + /// mode; recency keeps its task-2 --no-fail demotion. + #[test] + fn block_predicate_matrix() { + let clean = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Clean); + report_with(vec![o]) + }; + let recent = report_with(vec![resolved_outcome("pkg", "1.0.0", true)]); + let vulnerable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Vulnerable(vec![])); + report_with(vec![o]) + }; + let unverifiable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Unverifiable("503".to_string())); + report_with(vec![o]) + }; + let resolution_error = report_with(vec![TargetOutcome::Error { + target: InstallTarget { + name: "pkg".to_string(), + display: "pkg==1.0.0".to_string(), + kind: TargetKind::Unverifiable { + reason: "test".to_string(), + }, + }, + error: "registry unavailable".to_string(), + }]); + + assert!(!should_block_install(&clean, &public_opts(false, false))); + assert!(should_block_install(&recent, &public_opts(false, false))); + assert!(!should_block_install(&recent, &public_opts(true, false))); + assert!(should_block_install( + &vulnerable, + &public_opts(false, false) + )); + assert!( + should_block_install(&vulnerable, &public_opts(true, false)), + "--no-fail must not waive a vulnerable block" + ); + assert!( + !should_block_install(&unverifiable, &public_opts(false, false)), + "public mode must fail open on lookup errors" + ); + assert!( + should_block_install(&unverifiable, &authenticated_opts(true, false)), + "authenticated mode must fail closed on lookup errors" + ); + assert!( + !should_block_install(&resolution_error, &public_opts(false, false)), + "public mode must fail open when no verdict can be obtained" + ); + assert!( + should_block_install(&resolution_error, &authenticated_opts(false, false)), + "authenticated mode must fail closed when no verdict can be obtained" + ); + for report in [ + &clean, + &recent, + &vulnerable, + &unverifiable, + &resolution_error, + ] { + assert!( + !should_block_install(report, &public_opts(false, true)), + "--force must never block" + ); + assert!(!should_block_install( + report, + &authenticated_opts(true, true) + )); + } + } + + /// A clean named outcome plus a vulnerable transitive tree finding must + /// roll into the block counts: `vulnerable_count() == 1`, + /// `should_block_install` true without `--force`, false with it. + #[test] + fn tree_findings_extend_block_counts() { + let mut named = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut named, VerdictStatus::Clean); + let mut report = report_with(vec![named]); + report.tree = Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![TreeOutcome { + name: "evildep".to_string(), + version: "0.4.2".to_string(), + origin: TreeOrigin::Transitive, + verdict: VerdictStatus::Vulnerable(vec![]), + }], + }); + + assert_eq!(report.vulnerable_count(), 1); + let opts = |force: bool| PrecheckOptions { + force, + ..stub_opts() + }; + assert!(should_block_install(&report, &opts(false))); + assert!(!should_block_install(&report, &opts(true))); + } + + /// The existing-tree refusal fires only when every vulnerable finding + /// predates the command: a `Requested` finding (pip `-r`) is added by + /// this command, and a `Transitive` finding is being pulled in right + /// now unless the install is truly bare. `bare_install` is the explicit + /// discriminator — a requirements-only install also has no named + /// outcomes, but its resolved set is the command's doing. + #[test] + fn refusal_blame_respects_finding_origin() { + let tree_vulnerable = |origin| TreeOutcome { + name: "dep".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + origin, + }; + // (origin, named outcomes present, bare_install, expected). + // (origin, named=false, bare=false) is the requirements-only shape. + let cases = [ + (TreeOrigin::PreExisting, false, true, true), + (TreeOrigin::PreExisting, false, false, true), + (TreeOrigin::PreExisting, true, false, true), + (TreeOrigin::Transitive, false, true, true), + (TreeOrigin::Transitive, false, false, false), + (TreeOrigin::Transitive, true, false, false), + (TreeOrigin::Requested, false, true, false), + (TreeOrigin::Requested, false, false, false), + (TreeOrigin::Requested, true, false, false), + ]; + for (origin, with_named, bare_install, blames_tree) in cases { + let outcomes = if with_named { + vec![resolved_outcome("cleanpkg", "1.0.0", false)] + } else { + vec![] + }; + let mut report = report_with(outcomes); + report.bare_install = bare_install; + report.tree = Some(TreeReport::Full { + resolved_count: 1, + transitive: vec![tree_vulnerable(origin)], + }); + assert_eq!( + blames_existing_tree(&report, &authenticated_opts(false, false)), + blames_tree, + "origin {origin:?}, with_named {with_named}, bare {bare_install}" + ); + } + } + + /// Unverifiable tree findings block too (`block_reason`), so they must + /// pass the same origin test before the refusal may blame the existing + /// tree: a command-added unverifiable transitive alongside a + /// pre-existing vulnerable dep keeps the generic refusal on a named + /// install, while on a bare install everything still predates the + /// command. + #[test] + fn refusal_blame_considers_unverifiable_tree_findings() { + let tree_finding = |name: &str, verdict, origin| TreeOutcome { + name: name.to_string(), + version: "1.0.0".to_string(), + verdict, + origin, + }; + let mixed_tree = || { + Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![ + tree_finding( + "stickydep", + VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + TreeOrigin::PreExisting, + ), + tree_finding( + "newdep", + VerdictStatus::Unverifiable("vuln-api unavailable".to_string()), + TreeOrigin::Transitive, + ), + ], + }) + }; + + // Named install: the unverifiable transitive is being added by this + // command, so "none were added by this command" would lie. + let mut report = report_with(vec![resolved_outcome("cleanpkg", "1.0.0", false)]); + report.tree = mixed_tree(); + assert!(!blames_existing_tree( + &report, + &authenticated_opts(false, false) + )); + assert!(blames_existing_tree(&report, &public_opts(false, false))); + + // Bare install: nothing named, everything resolved predates the + // command — the mixed findings still blame the existing tree. + let mut report = report_with(vec![]); + report.bare_install = true; + report.tree = mixed_tree(); + assert!(blames_existing_tree( + &report, + &authenticated_opts(false, false) + )); + } + + /// Verdict pass against an in-process stub: vulnerable body → Vulnerable + /// with matches; 503 override → Unverifiable; no VerdictConfig → outcomes + /// keep NotChecked. + #[test] + fn verdict_pass_maps_stub_responses() { + use std::collections::HashMap; + + let key = |name: &str| crate::vuln_api_stub::key("pypi", name, "1.0.0"); + let mut checks = HashMap::new(); + checks.insert( + key("evil"), + crate::vuln_api_stub::vulnerable_body("pypi", "evil", "1.0.0", "MAL-2024-0001", None), + ); + checks.insert(key("flaky"), "{}".to_string()); + let mut statuses = HashMap::new(); + statuses.insert(key("flaky"), 503u16); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, statuses); + + let opts = verdict_opts(&stub.base_url); + + let mut outcomes = vec![ + resolved_outcome("evil", "1.0.0", false), + resolved_outcome("flaky", "1.0.0", false), + resolved_outcome("goodpkg", "1.0.0", false), // unknown → stub default clean + ]; + run_verdict_pass(PackageManager::Pip, &mut outcomes, &opts); + + let verdicts: Vec<_> = outcomes + .iter() + .map(|o| match o { + TargetOutcome::Resolved { verdict, .. } => verdict.clone(), + _ => unreachable!(), + }) + .collect(); + assert!( + matches!(&verdicts[0], VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + assert!(matches!(&verdicts[1], VerdictStatus::Unverifiable(_))); + assert!(matches!(&verdicts[2], VerdictStatus::Clean)); + + // Without a VerdictConfig the pass is a no-op. + let mut untouched = vec![resolved_outcome("evil", "1.0.0", false)]; + let no_verdict = stub_opts(); + run_verdict_pass(PackageManager::Pip, &mut untouched, &no_verdict); + assert!(matches!( + &untouched[0], + TargetOutcome::Resolved { + verdict: VerdictStatus::NotChecked, + .. + } + )); + } + + /// The pool must verdict every job exactly once and return the flagged + /// job `Vulnerable` with the rest `Clean`. + #[test] + fn verdict_pool_returns_all_results() { + use std::collections::HashMap; + + let mut checks = HashMap::new(); + checks.insert( + crate::vuln_api_stub::key("pypi", "evil", "1.0.0"), + crate::vuln_api_stub::vulnerable_body("pypi", "evil", "1.0.0", "MAL-2024-0001", None), + ); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + + let cfg = VerdictConfig { + base_url: stub.base_url.clone(), + mode: VerdictMode::Authenticated { + token: "test-token".to_string(), + }, + public_login_hint: false, + }; + + let jobs: Vec = ["a", "b", "evil", "c", "d", "e"] + .iter() + .map(|n| tree::TreePackage { + name: n.to_string(), + version: "1.0.0".to_string(), + requested: false, + }) + .collect(); + + let results = verdict_pool(jobs, &cfg, PackageManager::Pip); + assert_eq!(results.len(), 6, "all jobs verdicted"); + let flagged = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Vulnerable(_))) + .count(); + let clean = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Clean)) + .count(); + assert_eq!(flagged, 1, "only evil flagged"); + assert_eq!(clean, 5, "rest clean"); + let evil = results + .iter() + .find(|(p, _)| p.name == "evil") + .expect("evil present"); + assert!( + matches!(&evil.1, VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + } + + /// `pooled_map` maps every item and preserves order at any concurrency + /// (1 = serial, 8 > item count = all workers spawn but some drain empty). + #[test] + fn pooled_map_preserves_order_at_any_concurrency() { + let items: Vec = (0..6).collect(); + for concurrency in [1usize, 8] { + assert_eq!( + pooled_map(&items, concurrency, |i| i * 2), + vec![0, 2, 4, 6, 8, 10], + "concurrency {concurrency}" + ); + } + } + + /// Leftover origin assignment: pip `requested` ⇒ Requested; manifest + /// direct dep ⇒ PreExisting; otherwise Transitive. Requested wins over + /// a direct-dep hit. + #[test] + fn apply_verdicts_assigns_origins() { + let pkg = |name: &str, requested: bool| tree::TreePackage { + name: name.to_string(), + version: "1.0.0".to_string(), + requested, + }; + let results = vec![ + (pkg("reqdep", true), VerdictStatus::Clean), + (pkg("predep", false), VerdictStatus::Clean), + (pkg("deepdep", false), VerdictStatus::Clean), + ]; + let direct_deps = std::collections::HashSet::from(["predep".to_string()]); + let mut outcomes = []; + let mut tree = apply_verdicts(PackageManager::Npm, results, &mut outcomes, &direct_deps); + tree.sort_by(|a, b| a.name.cmp(&b.name)); + let origins: Vec<(&str, TreeOrigin)> = + tree.iter().map(|t| (t.name.as_str(), t.origin)).collect(); + assert_eq!( + origins, + vec![ + ("deepdep", TreeOrigin::Transitive), + ("predep", TreeOrigin::PreExisting), + ("reqdep", TreeOrigin::Requested), + ] + ); + } +} diff --git a/src/utils/api.rs b/src/utils/api.rs index 9b9a445..a218378 100644 --- a/src/utils/api.rs +++ b/src/utils/api.rs @@ -1,5 +1,6 @@ use crate::log::debug; use crate::utils; +use corgea::vuln_api::{auth_header, source}; use reqwest::header::HeaderMap; use reqwest::StatusCode; use reqwest::{ @@ -18,26 +19,11 @@ use std::path::Path; const CHUNK_SIZE: usize = 50 * 1024 * 1024; // 50 MB const API_BASE: &str = "/api/v1"; -fn get_source() -> String { - std::env::var("CORGEA_SOURCE").unwrap_or_else(|_| "cli".to_string()) -} - -fn is_jwt(token: &str) -> bool { - let parts: Vec<&str> = token.splitn(4, '.').collect(); - parts.len() == 3 && parts.iter().all(|p| !p.is_empty()) -} - fn auth_headers(token: &str) -> HeaderMap { let mut headers = HeaderMap::new(); - if is_jwt(token) { - headers.insert( - "Authorization", - format!("Bearer {}", token).parse().unwrap(), - ); - } else { - headers.insert("CORGEA-TOKEN", token.parse().unwrap()); - } - headers.insert("CORGEA-SOURCE", get_source().parse().unwrap()); + let (name, value) = auth_header(token); + headers.insert(name, value.parse().unwrap()); + headers.insert("CORGEA-SOURCE", source().parse().unwrap()); headers } @@ -666,7 +652,7 @@ pub fn exchange_code_for_token( let response = client .get(&exchange_url) - .header("CORGEA-SOURCE", get_source()) + .header("CORGEA-SOURCE", source()) .query(&[("code", code)]) .send()?; @@ -1039,27 +1025,6 @@ mod tests { use super::*; use reqwest::header::{HeaderMap, HeaderValue}; - #[test] - fn is_jwt_accepts_three_dot_separated_non_empty_parts() { - assert!(is_jwt("aaa.bbb.ccc")); - assert!(is_jwt("header.payload.signature")); - } - - #[test] - fn is_jwt_rejects_wrong_part_count() { - assert!(!is_jwt("aaa.bbb")); - assert!(!is_jwt("aaa.bbb.ccc.ddd")); - assert!(!is_jwt("plainstring")); - assert!(!is_jwt("")); - } - - #[test] - fn is_jwt_rejects_when_any_part_is_empty() { - assert!(!is_jwt("aaa..ccc")); - assert!(!is_jwt(".bbb.ccc")); - assert!(!is_jwt("aaa.bbb.")); - } - #[test] fn auth_headers_uses_bearer_for_jwt_tokens() { let headers = auth_headers("aaa.bbb.ccc"); diff --git a/src/verify_deps/mod.rs b/src/verify_deps/mod.rs new file mode 100644 index 0000000..b813529 --- /dev/null +++ b/src/verify_deps/mod.rs @@ -0,0 +1,137 @@ +//! Slim slice of #89's verify_deps: registry resolution + threshold helpers. + +pub mod registry; + +use std::time::Duration; + +/// Parse a human-friendly duration like `2d`, `48h`, `30m`, `45s`, or +/// a bare integer (interpreted as days). Returns the parsed duration. +pub fn parse_threshold(input: &str) -> Result { + let s = input.trim(); + if s.is_empty() { + return Err("threshold cannot be empty".to_string()); + } + + let (num_str, unit) = match s.chars().last() { + Some(c) if c.is_ascii_alphabetic() => { + (&s[..s.len() - c.len_utf8()], c.to_ascii_lowercase()) + } + _ => (s, 'd'), + }; + + let value: f64 = num_str + .trim() + .parse() + .map_err(|_| format!("invalid threshold number: '{}'", num_str))?; + + if value < 0.0 || !value.is_finite() { + return Err(format!( + "threshold must be a non-negative finite number: '{}'", + input + )); + } + + let secs = match unit { + 's' => value, + 'm' => value * 60.0, + 'h' => value * 3600.0, + 'd' => value * 86400.0, + 'w' => value * 7.0 * 86400.0, + other => { + return Err(format!( + "unknown threshold unit '{}'. Use s, m, h, d, or w.", + other + )) + } + }; + + let d = Duration::try_from_secs_f64(secs).map_err(|_| "threshold too large".to_string())?; + // Establish the invariant every consumer relies on: the threshold + // must also fit in a `chrono::Duration` (see precheck's from_std). + chrono::Duration::from_std(d).map_err(|_| "threshold too large".to_string())?; + Ok(d) +} + +/// Format a Duration as a short human-readable string (e.g. `1d 4h`). +pub fn format_duration(d: Duration) -> String { + let total_secs = d.as_secs(); + if total_secs < 60 { + return format!("{}s", total_secs); + } + let mins = total_secs / 60; + if mins < 60 { + return format!("{}m", mins); + } + let hours = total_secs / 3600; + let rem_mins = (total_secs % 3600) / 60; + if hours < 24 { + if rem_mins == 0 { + return format!("{}h", hours); + } + return format!("{}h {}m", hours, rem_mins); + } + let days = total_secs / 86400; + let rem_hours = (total_secs % 86400) / 3600; + if rem_hours == 0 { + format!("{}d", days) + } else { + format!("{}d {}h", days, rem_hours) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_threshold_units() { + assert_eq!( + parse_threshold("2d").unwrap(), + Duration::from_secs(2 * 86400) + ); + assert_eq!( + parse_threshold("48h").unwrap(), + Duration::from_secs(48 * 3600) + ); + assert_eq!( + parse_threshold("30m").unwrap(), + Duration::from_secs(30 * 60) + ); + assert_eq!(parse_threshold("90s").unwrap(), Duration::from_secs(90)); + assert_eq!( + parse_threshold("1w").unwrap(), + Duration::from_secs(7 * 86400) + ); + assert_eq!( + parse_threshold("3").unwrap(), + Duration::from_secs(3 * 86400) + ); + assert_eq!(parse_threshold("0.5d").unwrap(), Duration::from_secs(43200)); + } + + #[test] + fn parse_threshold_rejects_garbage() { + assert!(parse_threshold("").is_err()); + assert!(parse_threshold("abc").is_err()); + assert!(parse_threshold("-1d").is_err()); + assert!(parse_threshold("1y").is_err()); + } + + #[test] + fn parse_threshold_rejects_absurdly_large_values() { + // Too large for chrono::Duration (precheck converts via from_std). + assert!(parse_threshold("999999999999d").is_err()); + // Too large even for std::time::Duration. + assert!(parse_threshold("1e308d").is_err()); + } + + #[test] + fn format_duration_short() { + assert_eq!(format_duration(Duration::from_secs(5)), "5s"); + assert_eq!(format_duration(Duration::from_secs(120)), "2m"); + assert_eq!(format_duration(Duration::from_secs(3600)), "1h"); + assert_eq!(format_duration(Duration::from_secs(3700)), "1h 1m"); + assert_eq!(format_duration(Duration::from_secs(86400)), "1d"); + assert_eq!(format_duration(Duration::from_secs(90000)), "1d 1h"); + } +} diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs new file mode 100644 index 0000000..6d67e2b --- /dev/null +++ b/src/verify_deps/registry.rs @@ -0,0 +1,862 @@ +//! Registry lookups for npm and PyPI publish times. +//! +//! These talk to public registries (no auth) and are kept independent +//! of the rest of the CLI's HTTP client because: +//! * we must not send the user's Corgea auth header to a third-party, +//! * the timeouts and retry policy are different. +//! +//! Both resolvers turn a version spec into the concrete version that +//! would be installed, plus its publish time as a UTC timestamp. + +use chrono::{DateTime, Utc}; +use serde::Deserialize; +use std::sync::OnceLock; +use std::time::Duration; + +const DEFAULT_NPM_REGISTRY: &str = "https://registry.npmjs.org"; +const DEFAULT_PYPI_REGISTRY: &str = "https://pypi.org"; + +// Matches `vuln_api::REQUEST_TIMEOUT` so a gate run degrades uniformly: +// both legs of a verdict pass give up at the same horizon. +const REQUEST_TIMEOUT: Duration = Duration::from_secs(30); + +/// `corgea-cli/ (