diff --git a/Cargo.lock b/Cargo.lock index 2b9c8e7..c2b933e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -344,6 +344,7 @@ version = "1.8.8" dependencies = [ "chrono", "clap", + "corgea", "dirs", "env_logger", "git2", @@ -359,6 +360,7 @@ dependencies = [ "quick-xml", "regex", "reqwest", + "semver", "serde", "serde_derive", "serde_json", @@ -1760,6 +1762,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" diff --git a/Cargo.toml b/Cargo.toml index d60edad..afaf048 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,18 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[[bin]] +name = "corgea" +path = "src/main.rs" + +[features] +# Compiles the in-crate vuln-api test stub (`vuln_api_stub`). Enabled for all +# test builds via the self dev-dependency below; never part of release builds. +test-stub = [] + +[dev-dependencies] +corgea = { path = ".", features = ["test-stub"] } + [dependencies] clap = { version = "4.4.13", features = ["derive"] } dirs = "5.0.1" @@ -19,6 +31,7 @@ reqwest = { version = "0.12.23", default-features = false, features = [ toml = "0.8.8" log = "0.4" env_logger = "0.11" +semver = "1" serde = { version = "1.0.195", features = ["derive"] } serde_json = "1.0.111" serde_derive = "1.0.195" diff --git a/harness b/harness index 84b5076..8b430d9 100755 --- a/harness +++ b/harness @@ -260,6 +260,12 @@ $output" } cmd_pre_commit() { + # git exports GIT_DIR/GIT_INDEX_FILE/… to hooks. From a linked + # worktree GIT_DIR is absolute, so any `git init`/`git add` a test + # spawns in a tempdir would resolve to the shared gitdir and + # corrupt the real repo. Scrub the hook env before running tests. + unset GIT_DIR GIT_WORK_TREE GIT_INDEX_FILE GIT_OBJECT_DIRECTORY \ + GIT_COMMON_DIR GIT_PREFIX local staged; staged="$(staged_rs_files)" if [ -z "$staged" ]; then printf "No staged Rust files — skipping checks\n" diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index f23293f..656e5f7 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -109,6 +109,103 @@ corgea setup-hooks --default-config # Default: secrets + PII, fail on Installs a pre-commit hook running `corgea scan blast --only-uncommitted`. Bypass with `git commit --no-verify`. +### Install Wrappers — `corgea pip|npm|yarn|pnpm|uv ` + +Run a package manager through Corgea's install gate. Install commands with named +targets are resolved against the public registry first, then gated twice: a version +published within `--threshold` (default `2d`) blocks (exit 1), and each resolved +version is checked for known-vulnerable or malicious package records. Public mode +needs no token and fails open on vulnerability-service outages. Authenticated mode +uses the configured Corgea token against the default vuln-api and fails closed when a +Corgea verdict cannot be obtained. OSV is queried as a secondary public source; an +OSV finding blocks, but an OSV clean result never weakens a Corgea fail-closed result. +Everything else passes through with the package manager's own exit code. Git/URL/path +specs are noted, never blocked. Bare `npm install` (zero specs, `package.json` present) +is gated too: the full lockfile-resolved tree is verdicted, so a vulnerable lockfile blocks. Bare +`yarn`/`pnpm`/`uv` installs have no safe dry-run; they run unchecked after a stderr note +(`note: bare ' ' is not gated …`). `-r requirements.txt` files get a printed +note when the tree pass doesn't cover them. + +Blocked findings steer to the fix: each advisory line shows `fixed in ` (or +`no fixed version known`). When every advisory on a package has a fix, the gate +prints `→ safe version: @` — the highest fix covering every advisory. + +When vulnerability checks are enabled, the gate covers the **full would-install set** +where safe, not just the named targets: `pip` and `npm` resolve the complete tree (named + transitive) via a +safe dry-run (`pip install --dry-run …`; an isolated `npm install --package-lock-only` +in a temp dir, never touching your lockfile) and verdict every package, so a flagged +**transitive** dependency blocks the install too. `yarn`, `pnpm`, and `uv` have no safe +dry-run, so they verify the named targets only and print +`warning: transitive dependencies not checked (…); only named packages were verified.` +The same warning is emitted (and the gate falls back to named-only) whenever a pip/npm +dry-run fails. Verdict requests run in a bounded pool (8 parallel). + +```bash +corgea pip install requests==2.31.0 # resolves, checks recency + vuln verdict, then runs pip +corgea npm install axios@^1.0.0 # same gate for npm ranges +corgea pip --no-fail install newpkg # demote a recency block to a warning (vuln blocks still apply) +corgea pip --force install badpkg # print findings but install anyway (overrides every block) +corgea pip --json install newpkg # machine-readable per-target report incl. verdicts +corgea pip list # non-install subcommands pass straight through +``` + +| Flag | Short | Description | +|------|-------|-------------| +| `--threshold` | `-t` | Recency threshold (`2d`, `12h`). Younger resolved versions block. | +| `--no-fail` | | Demote a recency block to a warning. Does NOT bypass vulnerable/unverifiable blocks. | +| `--force` | | Proceed despite all findings (vulnerable, unverifiable, recent). Findings still print. | +| `--json` | | JSON report instead of text. Per-result `verdict` object + `verdict_mode` + `tree`. | + +`--json` adds a `tree` object: `null` when no tree pass ran; otherwise `mode` is `"full"` +(transitive checked) or `"named-only"` (with a `reason`), plus `resolved_count` and a +`transitive[]` array of `{name, version, verdict}` for packages beyond the named targets. +Vulnerable `verdict` objects carry a `remediation` field: the safe version covering +every advisory, or `null` when any advisory has no known fix. + +Recency and public vulnerability gating need no token. `CORGEA_VULN_API_URL` is public +by default even when `CORGEA_TOKEN` is set; set +`CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL=1` to forward the token to a custom URL and +make that path fail closed. Overrides for testing: `CORGEA_PYPI_REGISTRY`, +`CORGEA_NPM_REGISTRY`, `CORGEA_VULN_API_URL`, `CORGEA_OSV_API_URL`. + +#### Testing the gate + +Staging vuln-api (`CORGEA_VULN_API_URL=https://cve-worker-staging.corgea.workers.dev`) +serves deterministic verdicts for dogfooding. It ignores auth — any non-empty +`CORGEA_TOKEN` value enables full-gate mode. Known-vulnerable targets: + +| Ecosystem | Target | Verdict | +|-----------|--------|---------| +| npm | `axios@0.21.0` | vulnerable — fixed in 0.21.2 | +| npm | `minimist@0.0.8` | vulnerable — fixed in 1.2.2 | +| npm | `node-fetch@2.6.0` | vulnerable — fixed in 2.6.7 | +| PyPI | `mezzanine==6.0.0` | vulnerable — no fixed version known | + +Verify the gate end-to-end: + +```bash +CORGEA_TOKEN=dogfood-dummy \ +CORGEA_VULN_API_URL=https://cve-worker-staging.corgea.workers.dev \ +corgea npm install axios@0.21.0 +``` + +Expected output (exit code 1; nothing is installed): + +``` +Pre-checking `npm install axios@0.21.0` (threshold 2d) + 1 ok, 0 recent, 1 vulnerable, 0 unverifiable, 0 skipped, 0 errors + tree: 2 packages resolved, 1 transitive checked + ✗ axios@0.21.0 → axios@0.21.0 known vulnerable: + CVE-2021-3749 (high) — fixed in 0.21.2 + CVE-2020-28168 (medium) — fixed in 0.21.1 + → safe version: axios@0.21.2 +Refusing to run install. Pass --force to proceed despite findings. +``` + +Caveat: the staging PyPI seed covers recent CVEs only. Decade-old classics +(`pyyaml==5.1`, `django==2.2`) return clean **by design** — a clean verdict on +those does not mean the gate is broken. + ### Deps — `corgea deps ` diff --git a/src/authorize.rs b/src/authorize.rs index 7271cf9..80fbbcf 100644 --- a/src/authorize.rs +++ b/src/authorize.rs @@ -94,7 +94,8 @@ pub fn run(scope: Option, url: Option) -> Result<(), Box Result> { // Try a more reliable approach - start from a higher range that's less likely to be used let search_ranges = vec![ - (start_port, start_port + 50), + // Saturate: a start port near u16::MAX must clamp, not overflow. + (start_port, start_port.saturating_add(50)), (9000, 9100), (8000, 8100), (7000, 7100), @@ -632,7 +633,16 @@ mod tests { assert!(!port_is_available(port)); drop(listener); - assert!(port_is_available(port)); + // The freed port returns to the OS ephemeral pool, where a parallel + // test's `bind(":0")` can snatch it before the re-check — so accept + // any of several freshly freed ports reading available. The chain is + // lazy: fresh ports are only reserved after a collision. + assert!( + std::iter::once(port) + .chain((0..4).map(|_| reserve_ephemeral_port())) + .any(port_is_available), + "five consecutive freed ports all read unavailable" + ); } #[test] diff --git a/src/config.rs b/src/config.rs index 257a483..f508bb8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -100,4 +100,53 @@ impl Config { self.debug } + + /// Base URL for the vuln-api service: `CORGEA_VULN_API_URL` env var, + /// then the public default. + pub fn get_vuln_api_url(&self) -> String { + crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") + .unwrap_or_else(|| "https://vuln-api.corgea.app".to_string()) + .trim() + .trim_end_matches('/') + .to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_config() -> Config { + Config { + url: "https://www.corgea.app".to_string(), + debug: 0, + token: "".to_string(), + } + } + + /// All `get_vuln_api_url` cases in one test fn: the env-var cases + /// mutate process-global state, so they must not run concurrently + /// with each other under the parallel test harness. + #[test] + fn get_vuln_api_url_resolution_order() { + env::remove_var("CORGEA_VULN_API_URL"); + + // Default when the env var is unset. + assert_eq!( + test_config().get_vuln_api_url(), + "https://vuln-api.corgea.app" + ); + + // Env var wins; whitespace and trailing slash trimmed. + env::set_var("CORGEA_VULN_API_URL", " https://env.example.com/ "); + assert_eq!(test_config().get_vuln_api_url(), "https://env.example.com"); + + // Empty / whitespace-only env var is treated as unset. + env::set_var("CORGEA_VULN_API_URL", " "); + assert_eq!( + test_config().get_vuln_api_url(), + "https://vuln-api.corgea.app" + ); + env::remove_var("CORGEA_VULN_API_URL"); + } } diff --git a/src/deps/ecosystems/pypi.rs b/src/deps/ecosystems/pypi.rs index 062f13c..0f5fa77 100644 --- a/src/deps/ecosystems/pypi.rs +++ b/src/deps/ecosystems/pypi.rs @@ -367,7 +367,10 @@ fn exact_version_from_declared(name: &str, declared: &str) -> Option { Some(declared.trim_start_matches('=').trim().to_string()) } -fn normalize_pypi_name(name: &str) -> String { +/// PEP 503 name normalization: lowercase, runs of `-`/`_`/`.` collapse to `-`. +/// Also used by the install gate (`precheck`) so both features share one +/// canonical pypi name form. +pub(crate) fn normalize_pypi_name(name: &str) -> String { let mut out = String::new(); let mut last_was_separator = false; for c in name.trim().chars() { diff --git a/src/lib.rs b/src/lib.rs index 49bc6d0..537ab35 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,14 @@ pub mod deps; +pub mod osv; +pub mod precheck; +pub mod verify_deps; +// Also declared in the binary crate (src/main.rs); re-declared here so library modules +// (e.g. vuln_api) can use `crate::log::debug`. src/log.rs is a thin `::log` facade that +// compiles cleanly in both crates. +mod log; +pub mod vuln_api; +// Test-only HTTP stub for the vuln-api. Gated out of release builds; the +// `test-stub` feature is enabled for every test build by the self +// dev-dependency in Cargo.toml, so integration tests can use it too. +#[cfg(any(test, feature = "test-stub"))] +pub mod vuln_api_stub; diff --git a/src/main.rs b/src/main.rs index 442c5a1..3b901e1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -199,6 +199,16 @@ enum Commands { #[command(subcommand)] command: corgea::deps::run::DepsSubcommand, }, + /// Wrap `npm` commands: verify install targets' publish recency, then run npm. + Npm(InstallWrapArgs), + /// Wrap `yarn` commands: verify install targets' publish recency, then run yarn. + Yarn(InstallWrapArgs), + /// Wrap `pnpm` commands: verify install targets' publish recency, then run pnpm. + Pnpm(InstallWrapArgs), + /// Wrap `pip` commands: verify install targets' publish recency, then run pip. + Pip(InstallWrapArgs), + /// Wrap `uv` commands: verify install targets' publish recency, then run uv. + Uv(InstallWrapArgs), } #[derive(Subcommand, Debug, Clone, PartialEq)] @@ -221,6 +231,191 @@ impl FromStr for Scanner { } } +/// Shared flags for the install-wrapper subcommands (`corgea npm|yarn|pnpm|pip|uv`). +#[derive(clap::Args, Debug, Clone)] +struct InstallWrapArgs { + #[arg( + long, + short = 't', + default_value = "2d", + value_parser = corgea::verify_deps::parse_threshold, + help = "Recency threshold. Resolved versions younger than this are blocked. e.g. '2d', '12h'." + )] + threshold: std::time::Duration, + + #[arg( + long, + help = "Demote a recency block to a printed warning. The install still runs." + )] + no_fail: bool, + + #[arg( + long, + help = "Proceed with the install despite vulnerable, unverifiable, or recent findings. Findings are still printed." + )] + force: bool, + + #[arg( + long, + help = "Output the result as JSON instead of human-readable text." + )] + json: bool, + + /// Arguments forwarded to the package manager (subcommand and package specs). + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + cmd: Vec, +} + +fn install_wrap_options( + args: &InstallWrapArgs, + config: &Config, +) -> corgea::precheck::PrecheckOptions { + let token = config.get_token(); + let token = token.trim(); + let custom_vuln_api = utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL").is_some(); + let send_token_to_custom = + utils::generic::get_env_var_if_exists("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL") + .is_some_and(|v| v == "1"); + let token_for_vuln_api = if !token.is_empty() && (!custom_vuln_api || send_token_to_custom) { + Some(token.to_string()) + } else { + None + }; + let fail_closed = token_for_vuln_api.is_some(); + let verdict = Some(corgea::precheck::VerdictConfig { + base_url: config.get_vuln_api_url(), + token: token_for_vuln_api, + fail_closed, + }); + corgea::precheck::PrecheckOptions { + threshold: args.threshold, + no_fail: args.no_fail, + force: args.force, + json: args.json, + verdict, + osv: Some(corgea::osv::OsvConfig { + base_url: utils::generic::get_env_var_if_exists("CORGEA_OSV_API_URL") + .unwrap_or_else(|| "https://api.osv.dev".to_string()), + }), + npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), + pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), + } +} + +fn run_install_wrap_command( + manager: corgea::precheck::PackageManager, + args: &InstallWrapArgs, + config: &Config, +) { + let code = + corgea::precheck::run_install(manager, &args.cmd, install_wrap_options(args, config)); + std::process::exit(code); +} + +fn maybe_run_install_wrap_from_raw_args() { + let raw: Vec = std::env::args().skip(1).collect(); + let Some(first) = raw.first().map(String::as_str) else { + return; + }; + if first == "pip3" { + eprintln!("`corgea pip3 ...` is not supported. Use `corgea pip ...`; Corgea will fall back to the pip3 binary when needed."); + std::process::exit(2); + } + let manager = match first { + "npm" => corgea::precheck::PackageManager::Npm, + "yarn" => corgea::precheck::PackageManager::Yarn, + "pnpm" => corgea::precheck::PackageManager::Pnpm, + "pip" => corgea::precheck::PackageManager::Pip, + "uv" => corgea::precheck::PackageManager::Uv, + _ => return, + }; + if install_wrapper_requests_help(&raw[1..]) { + return; + } + let args = match parse_install_wrap_raw_args(&raw[1..]) { + Ok(args) => args, + Err(e) => { + eprintln!("{e}"); + std::process::exit(2); + } + }; + let config = Config::load().expect("Failed to load config"); + run_install_wrap_command(manager, &args, &config); +} + +fn install_wrapper_requests_help(raw: &[String]) -> bool { + let mut i = 0; + while i < raw.len() { + match raw[i].as_str() { + "--help" | "-h" => return true, + "--threshold" | "-t" => { + i += 2; + } + "--no-fail" | "--force" | "--json" => { + i += 1; + } + arg if arg.starts_with("--threshold=") => { + i += 1; + } + _ => return false, + } + } + false +} + +fn parse_install_wrap_raw_args(raw: &[String]) -> Result { + let mut args = InstallWrapArgs { + threshold: corgea::verify_deps::parse_threshold("2d").expect("default threshold"), + no_fail: false, + force: false, + json: false, + cmd: Vec::new(), + }; + let mut wrapper_allowed = true; + let mut i = 0; + while i < raw.len() { + let arg = &raw[i]; + if wrapper_allowed { + match arg.as_str() { + "--threshold" | "-t" => { + let value = raw + .get(i + 1) + .ok_or_else(|| format!("{arg} requires a duration"))?; + args.threshold = corgea::verify_deps::parse_threshold(value)?; + i += 2; + continue; + } + "--no-fail" => { + args.no_fail = true; + i += 1; + continue; + } + "--force" => { + args.force = true; + i += 1; + continue; + } + "--json" => { + args.json = true; + i += 1; + continue; + } + _ => { + if let Some(value) = arg.strip_prefix("--threshold=") { + args.threshold = corgea::verify_deps::parse_threshold(value)?; + i += 1; + continue; + } + } + } + } + wrapper_allowed = false; + args.cmd.push(arg.clone()); + i += 1; + } + Ok(args) +} + /// Initialize the global logger. /// /// `CORGEA_DEBUG=1` (env var or config file) raises the default verbosity to @@ -246,6 +441,7 @@ fn default_log_level(debug_flag: i8) -> &'static str { } fn main() { + maybe_run_install_wrap_from_raw_args(); let cli = Cli::parse(); let mut corgea_config = Config::load().expect("Failed to load config"); init_logging(&corgea_config); @@ -504,6 +700,23 @@ fn main() { // Offline: no token / network. Exit code propagates fail-on policy. std::process::exit(i32::from(corgea::deps::run::run(command.clone()))); } + // Install wrappers: no hard auth gate. Public vulnerability checks run + // without a token; authenticated default-url checks fail closed. + Some(Commands::Npm(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Npm, args, &corgea_config) + } + Some(Commands::Yarn(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Yarn, args, &corgea_config) + } + Some(Commands::Pnpm(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Pnpm, args, &corgea_config) + } + Some(Commands::Pip(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Pip, args, &corgea_config) + } + Some(Commands::Uv(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Uv, args, &corgea_config) + } None => { utils::terminal::show_welcome_message(); let _ = Cli::command().print_help(); diff --git a/src/osv/mod.rs b/src/osv/mod.rs new file mode 100644 index 0000000..6aa01d7 --- /dev/null +++ b/src/osv/mod.rs @@ -0,0 +1,303 @@ +//! OSV public vulnerability client. +//! +//! OSV is a secondary signal for install gating. It can add a block when it +//! finds a package-version advisory, but an OSV clean result never weakens an +//! authenticated Corgea fail-closed verdict. + +use std::sync::OnceLock; +use std::time::Duration; + +use serde::{Deserialize, Serialize}; + +use crate::vuln_api::VulnMatch; + +const REQUEST_TIMEOUT: Duration = Duration::from_secs(30); + +#[derive(Debug, Clone)] +pub struct OsvConfig { + pub base_url: String, +} + +#[derive(Debug, Clone)] +pub struct OsvPackage { + pub ecosystem: String, + pub name: String, + pub version: String, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum OsvVerdict { + Clean, + Vulnerable(Vec), +} + +#[derive(Debug, Serialize)] +struct QueryBatchRequest<'a> { + queries: Vec>, +} + +#[derive(Debug, Serialize)] +struct Query<'a> { + package: Package<'a>, + version: &'a str, +} + +#[derive(Debug, Serialize)] +struct Package<'a> { + ecosystem: &'a str, + name: &'a str, +} + +#[derive(Debug, Deserialize)] +struct QueryBatchResponse { + results: Vec, +} + +#[derive(Debug, Deserialize)] +struct QueryResult { + #[serde(default)] + vulns: Vec, +} + +#[derive(Debug, Deserialize)] +struct OsvVuln { + id: String, + #[serde(default)] + aliases: Vec, + #[serde(default)] + affected: Vec, + #[serde(default)] + severity: Vec, + #[serde(default)] + database_specific: serde_json::Value, +} + +#[derive(Debug, Deserialize)] +struct OsvSeverity { + #[serde(default)] + score: String, +} + +#[derive(Debug, Deserialize)] +struct OsvAffected { + #[serde(default)] + ranges: Vec, +} + +#[derive(Debug, Deserialize)] +struct OsvRange { + #[serde(default)] + events: Vec, +} + +#[derive(Debug, Deserialize)] +struct OsvEvent { + fixed: Option, +} + +fn user_agent() -> String { + format!("corgea-cli/{} (osv)", env!("CARGO_PKG_VERSION")) +} + +pub fn http_client() -> Result { + static CLIENT: OnceLock> = OnceLock::new(); + CLIENT + .get_or_init(|| { + reqwest::blocking::Client::builder() + .timeout(REQUEST_TIMEOUT) + .user_agent(user_agent()) + .build() + .map_err(|e| format!("failed to build OSV http client: {e}")) + }) + .clone() +} + +pub fn query_batch( + client: &reqwest::blocking::Client, + base_url: &str, + packages: &[OsvPackage], +) -> Result, String> { + if packages.is_empty() { + return Ok(Vec::new()); + } + let base = base_url.trim().trim_end_matches('/'); + if base.is_empty() { + return Err("OSV base URL is empty".to_string()); + } + let url = format!("{base}/v1/querybatch"); + let body = QueryBatchRequest { + queries: packages + .iter() + .map(|pkg| Query { + package: Package { + ecosystem: &pkg.ecosystem, + name: &pkg.name, + }, + version: &pkg.version, + }) + .collect(), + }; + + let response = client + .post(&url) + .header("Accept", "application/json") + .json(&body) + .send() + .map_err(|e| format!("OSV request failed: {e}"))?; + let status = response.status(); + if !status.is_success() { + return Err(format!("OSV returned HTTP {}", status.as_u16())); + } + let response_text = response + .text() + .map_err(|e| format!("failed to read OSV response: {e}"))?; + let parsed: QueryBatchResponse = serde_json::from_str(&response_text) + .map_err(|e| format!("failed to parse OSV response: {e}"))?; + if parsed.results.len() != packages.len() { + return Err(format!( + "OSV response returned {} results for {} queries", + parsed.results.len(), + packages.len() + )); + } + + Ok(parsed + .results + .into_iter() + .map(|result| { + if result.vulns.is_empty() { + OsvVerdict::Clean + } else { + OsvVerdict::Vulnerable(result.vulns.into_iter().map(osv_match).collect()) + } + }) + .collect()) +} + +fn osv_match(vuln: OsvVuln) -> VulnMatch { + VulnMatch { + advisory_id: advisory_id(&vuln), + severity_level: severity_level(&vuln), + tier: severity_tier(&severity_level(&vuln)), + vulnerable_version_range: None, + fixed_version: fixed_version(&vuln), + source: Some("OSV".to_string()), + } +} + +fn advisory_id(vuln: &OsvVuln) -> String { + if !vuln.id.trim().is_empty() { + return vuln.id.clone(); + } + vuln.aliases + .iter() + .find(|alias| !alias.trim().is_empty()) + .cloned() + .unwrap_or_else(|| "OSV".to_string()) +} + +fn severity_level(vuln: &OsvVuln) -> String { + if let Some(sev) = vuln + .database_specific + .get("severity") + .and_then(|v| v.as_str()) + .filter(|s| !s.trim().is_empty()) + { + return sev.to_ascii_lowercase(); + } + let max_score = vuln + .severity + .iter() + .filter_map(|s| cvss_score(&s.score)) + .fold(None, |max: Option, score| { + Some(max.map_or(score, |m| m.max(score))) + }); + match max_score { + Some(score) if score >= 9.0 => "critical".to_string(), + Some(score) if score >= 7.0 => "high".to_string(), + Some(score) if score >= 4.0 => "medium".to_string(), + Some(_) => "low".to_string(), + None => "unknown".to_string(), + } +} + +fn cvss_score(raw: &str) -> Option { + raw.parse::().ok() +} + +fn severity_tier(severity: &str) -> u8 { + match severity.to_ascii_lowercase().as_str() { + "critical" => 1, + "high" => 2, + "medium" => 3, + "low" => 4, + _ => 5, + } +} + +fn fixed_version(vuln: &OsvVuln) -> Option { + vuln.affected + .iter() + .flat_map(|affected| &affected.ranges) + .flat_map(|range| &range.events) + .find_map(|event| event.fixed.clone()) +} + +pub fn ecosystem_for_osv(ecosystem: &str) -> String { + match ecosystem { + "pypi" => "PyPI".to_string(), + "npm" => "npm".to_string(), + _ => ecosystem.to_string(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Read, Write}; + use std::net::TcpListener; + + fn spawn_osv_stub(body: &'static str) -> String { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + std::thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let mut buf = [0; 4096]; + let _ = stream.read(&mut buf); + let resp = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + body.len(), + body + ); + let _ = stream.write_all(resp.as_bytes()); + } + }); + base + } + + #[test] + fn query_batch_maps_osv_vulnerabilities() { + let base = spawn_osv_stub( + r#"{"results":[{"vulns":[{"id":"GHSA-test","database_specific":{"severity":"HIGH"},"affected":[{"ranges":[{"events":[{"fixed":"2.0.0"}]}]}]}]}]}"#, + ); + let client = http_client().expect("client"); + let out = query_batch( + &client, + &base, + &[OsvPackage { + ecosystem: "PyPI".to_string(), + name: "oldpkg".to_string(), + version: "1.0.0".to_string(), + }], + ) + .expect("query"); + let OsvVerdict::Vulnerable(matches) = &out[0] else { + panic!("expected vulnerable: {out:?}"); + }; + assert_eq!(matches[0].advisory_id, "GHSA-test"); + assert_eq!(matches[0].severity_level, "high"); + assert_eq!(matches[0].fixed_version.as_deref(), Some("2.0.0")); + assert_eq!(matches[0].source.as_deref(), Some("OSV")); + } +} diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs new file mode 100644 index 0000000..d46aceb --- /dev/null +++ b/src/precheck/mod.rs @@ -0,0 +1,2508 @@ +//! Install wrappers: `corgea npm`, `corgea yarn`, `corgea pnpm`, `corgea pip`, `corgea uv`. +//! +//! Wraps an install command from a supported package manager, resolves what +//! the package manager *would* install against the public registry, and either +//! blocks the install or runs it transparently. +//! +//! Verification rule: a package is rejected if the resolved version +//! was published within `--threshold` (default `2d`). This mirrors +//! the `deps` flow but applies to the install-time set of +//! packages instead of the already-locked set. +//! +//! By default a "recent" finding makes the wrapper exit with status 1 +//! *without* running the install. Use `--no-fail` to demote this to a +//! warning (the install runs anyway). + +pub mod parse; +pub mod tree; + +use std::ffi::OsString; +use std::process::Command; +use std::time::Duration; + +use chrono::Utc; + +use crate::verify_deps; + +/// Supported package managers. Each one shares enough behaviour with +/// the others that we only need a small per-manager dispatch. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PackageManager { + Npm, + Yarn, + Pnpm, + Pip, + Uv, +} + +impl PackageManager { + pub fn binary_name(self) -> &'static str { + match self { + PackageManager::Npm => "npm", + PackageManager::Yarn => "yarn", + PackageManager::Pnpm => "pnpm", + PackageManager::Pip => "pip", + PackageManager::Uv => "uv", + } + } + + /// Subcommands that this manager treats as "install something new" + /// — the only ones we need to verify before running. + pub fn is_install_subcommand(self, sub: &str) -> bool { + match self { + PackageManager::Npm => matches!(sub, "install" | "i" | "add" | "ci" | "clean-install"), + PackageManager::Yarn => matches!(sub, "add" | "install"), + PackageManager::Pnpm => matches!(sub, "add" | "install" | "i"), + PackageManager::Pip => matches!(sub, "install"), + PackageManager::Uv => false, + } + } + + /// vuln-api ecosystem path segment for this manager's registry. + pub fn ecosystem(self) -> &'static str { + match self { + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => "npm", + PackageManager::Pip | PackageManager::Uv => "pypi", + } + } + + /// Canonical package name for dedup/matching across spec spellings: + /// PEP 503 for pypi (shared with `deps`), verbatim for npm. + pub fn normalize_name(self, name: &str) -> String { + match self { + PackageManager::Pip | PackageManager::Uv => { + crate::deps::ecosystems::pypi::normalize_pypi_name(name) + } + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => name.to_string(), + } + } +} + +/// Connection details for the Corgea vuln-api verdict pass. Public mode uses +/// no token and fails open on service outages; authenticated mode carries a +/// token and fails closed when the Corgea verdict cannot be obtained. +#[derive(Debug, Clone)] +pub struct VerdictConfig { + pub base_url: String, + pub token: Option, + pub fail_closed: bool, +} + +/// Threat verdict for one resolved target. +#[derive(Debug, Clone)] +pub enum VerdictStatus { + /// vuln-api answered: no known advisories for this exact version. + Clean, + /// vuln-api answered: known vulnerable or malicious — blocks. + Vulnerable(Vec), + /// The verdict could not be obtained (network/5xx/auth/integrity). + /// Blocks fail-closed. + Unverifiable(String), + /// A public-mode vulnerability source could not be reached. This warns but + /// does not block; public checks fail open by design. + PublicUnavailable(String), + /// Verdict never attempted because all vulnerability sources are disabled. + /// This is used by unit tests and recency-only internal paths. + NotChecked, +} + +/// Reason recorded on resolved targets when vulnerability checks are disabled. +const NO_TOKEN_REASON: &str = "vulnerability checks disabled"; + +#[derive(Debug, Clone)] +pub struct PrecheckOptions { + pub threshold: Duration, + /// If true, demote a recent finding from "block" to "warn-and-run". + pub no_fail: bool, + /// If true, never block: print findings (recent, vulnerable, + /// unverifiable) and run the install anyway. + pub force: bool, + pub json: bool, + /// Corgea vuln-api verdict source. `None` disables this source. + pub verdict: Option, + pub osv: Option, + /// Optional registry overrides, used by tests. + pub npm_registry: Option, + pub pypi_registry: Option, +} + +impl PrecheckOptions { + fn has_vulnerability_checks(&self) -> bool { + self.verdict.is_some() || self.osv.is_some() + } + + fn fail_closed(&self) -> bool { + self.verdict.as_ref().is_some_and(|cfg| cfg.fail_closed) + } + + fn verdict_mode(&self) -> &'static str { + if self.fail_closed() { + "authenticated" + } else if self.has_vulnerability_checks() { + "public" + } else { + "recency-only" + } + } +} + +/// Each item the user (or a `-r` requirements file) asked us to install. +#[derive(Debug, Clone)] +pub struct InstallTarget { + pub name: String, + /// Display form, e.g. `axios@^1.0.0` or `requests==2.31.0`. + pub display: String, + /// What we'll feed into the resolver. + pub kind: TargetKind, +} + +#[derive(Debug, Clone)] +pub enum TargetKind { + Npm(crate::verify_deps::registry::NpmSpec), + Pypi(crate::verify_deps::registry::PypiSpec), + /// Something we can't verify (URL/git/file/path) — we surface this + /// as a warning but never block on it. + Unverifiable { + reason: String, + }, +} + +/// Outcome of resolving + verifying a single target. +#[derive(Debug, Clone)] +pub enum TargetOutcome { + /// Resolved cleanly. The blocking recency condition is derived from + /// `age` against the report's threshold (`PrecheckReport::is_recent`). + Resolved { + target: InstallTarget, + resolved: crate::verify_deps::registry::ResolvedPackage, + age: Duration, + verdict: VerdictStatus, + }, + /// We deliberately couldn't verify this target (URL / git / etc.). + Skipped { + target: InstallTarget, + reason: String, + }, + /// Resolution failed (network, unknown package, bad spec). + Error { + target: InstallTarget, + error: String, + }, +} + +/// Why a tree-pass finding is in the would-install set. Drives the +/// provenance label so a package the user asked for (or already depends on) +/// is never mislabeled "(transitive)". +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TreeOrigin { + /// Pulled in as a dependency of something else. + Transitive, + /// Explicitly requested (pip report `"requested"` — CLI arg or + /// requirements file; leftovers here come from `-r` files since named + /// CLI targets match a named outcome instead). + Requested, + /// Already a direct dependency in the project's `package.json`. + PreExisting, + /// Pinned by the project's lockfile (`uv sync` from `uv.lock`). + Locked, +} + +impl TreeOrigin { + fn label(self) -> &'static str { + match self { + TreeOrigin::Transitive => "(transitive)", + TreeOrigin::Requested => "(from requirements)", + TreeOrigin::PreExisting => "(already in package.json)", + TreeOrigin::Locked => "(locked)", + } + } + + fn json_name(self) -> &'static str { + match self { + TreeOrigin::Transitive => "transitive", + TreeOrigin::Requested => "requested", + TreeOrigin::PreExisting => "pre-existing", + TreeOrigin::Locked => "locked", + } + } +} + +/// Verdict for one package the tree pass resolved beyond the named targets. +#[derive(Debug)] +pub struct TreeOutcome { + pub name: String, + pub version: String, + pub origin: TreeOrigin, + pub verdict: VerdictStatus, +} + +/// Result of the tree pass. `PrecheckReport.tree` is `None` when the pass +/// never ran, usually because the command was recency-only or passthrough. +#[derive(Debug)] +pub enum TreeReport { + /// The full would-install set was resolved and verdicted. + Full { + /// Distinct packages the dry-run resolved (named + transitive). + resolved_count: usize, + /// Verdicts for resolved packages beyond the named targets. + transitive: Vec, + }, + /// Resolution unavailable or failed — only named targets were verified. + NamedOnly { reason: String }, +} + +#[derive(Debug)] +pub struct PrecheckReport { + pub manager: PackageManager, + pub subcommand: String, + pub original_args: Vec, + pub outcomes: Vec, + pub threshold: Duration, + /// `None` means no tree pass ran. + pub tree: Option, + /// True when the command named nothing — no CLI targets and no + /// requirements files — so everything the tree pass resolved predates + /// this command (bare `npm install`). Distinct from + /// `outcomes.is_empty()`: a requirements-only install also has no named + /// outcomes, but its resolved set IS added by the command. + pub bare_install: bool, +} + +impl PrecheckReport { + fn count(&self, pred: impl Fn(&TargetOutcome) -> bool) -> usize { + self.outcomes.iter().filter(|o| pred(o)).count() + } + /// True when this age is within the recency threshold (the blocking + /// condition). The single definition of "recent". + fn is_recent(&self, age: Duration) -> bool { + age < self.threshold + } + pub fn ok_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if !self.is_recent(*age))) + } + pub fn recent_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if self.is_recent(*age))) + } + pub fn vulnerable_count(&self) -> usize { + self.named_vulnerable_count() + self.tree_vulnerable_count() + } + pub fn unverifiable_count(&self) -> usize { + self.named_unverifiable_count() + self.tree_unverifiable_count() + } + /// Vulnerable findings among the named targets this command adds. + pub fn named_vulnerable_count(&self) -> usize { + self.named_finding_count(|v| matches!(v, VerdictStatus::Vulnerable(_))) + } + /// Unverifiable findings among the named targets this command adds. + pub fn named_unverifiable_count(&self) -> usize { + self.named_finding_count(|v| matches!(v, VerdictStatus::Unverifiable(_))) + } + /// Count named (resolved) outcomes whose verdict matches `pred`. + fn named_finding_count(&self, pred: impl Fn(&VerdictStatus) -> bool) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { verdict, .. } if pred(verdict))) + } + /// Vulnerable findings beyond the named targets (the resolved tree). + pub fn tree_vulnerable_count(&self) -> usize { + self.tree_finding_count(|v| matches!(v, VerdictStatus::Vulnerable(_))) + } + /// Unverifiable findings beyond the named targets (the resolved tree). + pub fn tree_unverifiable_count(&self) -> usize { + self.tree_finding_count(|v| matches!(v, VerdictStatus::Unverifiable(_))) + } + /// Count transitive tree findings whose verdict matches `pred`. + fn tree_finding_count(&self, pred: impl Fn(&VerdictStatus) -> bool) -> usize { + match &self.tree { + Some(TreeReport::Full { transitive, .. }) => { + transitive.iter().filter(|o| pred(&o.verdict)).count() + } + Some(TreeReport::NamedOnly { .. }) | None => 0, + } + } + pub fn skipped_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Skipped { .. })) + } + pub fn error_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Error { .. })) + } +} + +/// Canonical entry for ecosystem commands (`corgea npm install …`). +/// +/// `cmd` is everything after the ecosystem name, e.g. +/// `["install", "axios@^1.0.0", "--save-dev"]`. An empty `cmd` execs the +/// package manager with no arguments. +pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOptions) -> i32 { + if manager == PackageManager::Uv { + return run_uv(cmd, opts); + } + + if cmd.is_empty() { + return exec_command_for_json(manager.binary_name(), &[], opts.json); + } + + let Some(subcommand_index) = find_package_manager_subcommand(manager, cmd) else { + return exec_command_for_json(manager.binary_name(), cmd, opts.json); + }; + let subcommand = &cmd[subcommand_index]; + let rest = &cmd[subcommand_index + 1..]; + + if manager == PackageManager::Pip && subcommand == "add" { + eprintln!("`pip add` is not a pip install command. Use `corgea pip install ...`."); + return 2; + } + + if !manager.is_install_subcommand(subcommand) { + return exec_command_for_json(manager.binary_name(), cmd, opts.json); + } + + let parsed = match parse::parse_install_args(manager, rest) { + Ok(p) => p, + Err(e) => { + eprintln!("failed to parse install args: {}", e); + return 2; + } + }; + + let json = opts.json; + run_parsed_install( + manager, + subcommand, + rest, + parsed, + || exec_command_for_json(manager.binary_name(), cmd, json), + opts, + ) +} + +fn find_package_manager_subcommand(manager: PackageManager, cmd: &[String]) -> Option { + let mut i = 0; + while i < cmd.len() { + let arg = cmd[i].as_str(); + if !arg.starts_with('-') { + return Some(i); + } + if !arg.contains('=') + && (flag_takes_leading_value(manager, arg) + || possible_unknown_flag_value_before_install(manager, cmd, i)) + { + i += 2; + } else { + i += 1; + } + } + None +} + +fn possible_unknown_flag_value_before_install( + manager: PackageManager, + cmd: &[String], + flag_index: usize, +) -> bool { + let flag = &cmd[flag_index]; + if flag.contains('=') { + return false; + } + let Some(next) = cmd.get(flag_index + 1).map(String::as_str) else { + return false; + }; + !next.starts_with('-') + && !manager.is_install_subcommand(next) + && cmd[flag_index + 2..] + .iter() + .any(|arg| manager.is_install_subcommand(arg)) +} + +fn flag_takes_leading_value(manager: PackageManager, flag: &str) -> bool { + match manager { + PackageManager::Npm => matches!( + flag, + "-w" | "--workspace" + | "--prefix" + | "--registry" + | "--tag" + | "--omit" + | "--include" + | "--loglevel" + | "--userconfig" + | "--cache" + | "--globalconfig" + ), + PackageManager::Pnpm => matches!( + flag, + "-C" | "--dir" | "--filter" | "--registry" | "--reporter" | "--loglevel" + ), + PackageManager::Yarn => matches!( + flag, + "--registry" | "--modules-folder" | "--cache-folder" | "--mutex" | "--network-timeout" + ), + PackageManager::Pip => matches!( + flag, + "--python" + | "--platform" + | "--implementation" + | "--abi" + | "--target" + | "-t" + | "--prefix" + | "--root" + | "--index-url" + | "-i" + | "--extra-index-url" + ), + PackageManager::Uv => false, + } +} + +fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { + let json = opts.json; + let exec = || exec_command_for_json("uv", cmd, json); + + match parse::classify_uv_command(cmd) { + parse::UvCommand::InvalidInstall => { + eprintln!("`uv install` is not a uv dependency install command. Use `corgea uv pip install ...`."); + 2 + } + parse::UvCommand::Passthrough => exec(), + parse::UvCommand::PipInstall { install_args } => { + let parsed = match parse::parse_pip_install_args(install_args) { + Ok(p) => p, + Err(e) => { + eprintln!("failed to parse install args: {}", e); + return 2; + } + }; + run_parsed_install( + PackageManager::Uv, + "pip install", + install_args, + parsed, + exec, + opts, + ) + } + parse::UvCommand::PipSync { sync_args } => run_parsed_install( + PackageManager::Uv, + "pip sync", + sync_args, + parse::parse_uv_pip_sync_args(sync_args), + exec, + opts, + ), + parse::UvCommand::Add { add_args } => run_parsed_install( + PackageManager::Uv, + "add", + add_args, + parse::parse_pypi_positionals_args(add_args), + exec, + opts, + ), + parse::UvCommand::Sync => run_uv_sync(cmd, opts, exec), + } +} + +/// Gate `uv sync` from the project's `uv.lock`. The lockfile is the full +/// locked universe (all groups/extras) — a superset of what sync installs, +/// conservative in the blocking direction; a stale lock that sync would +/// re-resolve is gated as written. Recency isn't checked (locked versions +/// aren't newly chosen by this command); the verdict pass is the gate. We +/// never run `uv lock` ourselves — locking can build sdists, which would +/// execute package code before any verdict. +fn run_uv_sync(cmd: &[String], opts: PrecheckOptions, exec: impl FnOnce() -> i32) -> i32 { + if !opts.has_vulnerability_checks() { + // Recency-only mode has no verdict to gate with. + return exec(); + }; + let lock = match std::fs::read_to_string("uv.lock") { + Ok(content) => content, + Err(_) => { + eprintln!( + "note: no uv.lock here — 'uv sync' is not gated; dependencies install unchecked (run 'uv lock' first to enable the gate)" + ); + return exec(); + } + }; + let jobs = match parse_uv_lock(&lock) { + Ok(jobs) => jobs, + Err(e) if opts.force => { + eprintln!("warning: cannot verify 'uv sync' ({e}); proceeding under --force"); + return exec(); + } + Err(e) => { + eprintln!("error: cannot verify 'uv sync': {e} (pass --force to proceed unchecked)"); + return 1; + } + }; + + let resolved_count = jobs.len(); + let results = verdict_pool(jobs, &opts, PackageManager::Uv, VERDICT_CONCURRENCY); + let transitive = results + .into_iter() + .map(|(pkg, verdict)| TreeOutcome { + name: pkg.name, + version: pkg.version, + origin: TreeOrigin::Locked, + verdict, + }) + .collect(); + let report = PrecheckReport { + manager: PackageManager::Uv, + subcommand: "sync".to_string(), + original_args: cmd[1..].to_vec(), + outcomes: Vec::new(), + threshold: opts.threshold, + tree: Some(TreeReport::Full { + resolved_count, + transitive, + }), + bare_install: true, + }; + + if opts.json { + print_json(&report, &opts); + } else { + print_text(&report); + } + if should_block_install(&report, &opts) { + if !opts.json { + print_refusal(&report); + } + return 1; + } + exec() +} + +/// Packages from `uv.lock` that `uv sync` installs from an index. Local +/// stanzas (the project itself and path deps: editable / virtual / +/// directory / path sources) carry no registry identity and are skipped. +fn parse_uv_lock(content: &str) -> Result, String> { + #[derive(serde::Deserialize)] + struct Lock { + #[serde(default)] + package: Vec, + } + #[derive(serde::Deserialize)] + struct Pkg { + name: String, + version: Option, + #[serde(default)] + source: std::collections::BTreeMap, + } + const LOCAL_SOURCES: [&str; 4] = ["editable", "virtual", "directory", "path"]; + + let lock: Lock = toml::from_str(content).map_err(|e| format!("parse uv.lock: {e}"))?; + Ok(lock + .package + .into_iter() + .filter(|p| !LOCAL_SOURCES.iter().any(|k| p.source.contains_key(*k))) + .filter_map(|p| { + Some(tree::TreePackage { + name: p.name, + version: p.version?, + requested: false, + }) + }) + .collect()) +} + +/// Post-parse verification shared by npm/yarn/pnpm/pip and uv install paths. +fn run_parsed_install( + manager: PackageManager, + subcommand_label: &str, + rest: &[String], + parsed: parse::ParsedInstall, + exec: impl FnOnce() -> i32, + opts: PrecheckOptions, +) -> i32 { + // With vulnerability checks, the tree pass resolves the full would-install + // set; `tree::covers_input` owns what each manager's resolver can chew on. + let tree_eligible = + opts.has_vulnerability_checks() && tree_covers_command(manager, subcommand_label, &parsed); + let bare_install = parsed.targets.is_empty() && parsed.requirements_files.is_empty(); + + if !opts.force { + if let Some(message) = project_guard_message(manager, subcommand_label, &parsed) { + eprintln!("{message} Pass --force to run it anyway."); + return 1; + } + if manager == PackageManager::Pip + && subcommand_label == "install" + && opts.has_vulnerability_checks() + && pip_externally_managed() + && !pip_has_environment_override(rest) + { + eprintln!("Refusing to run pip in an externally managed Python environment. Use pip's explicit override (`--break-system-packages`) or an explicit install target such as `--target`, or pass --force."); + return 1; + } + } + + if parsed.targets.is_empty() && !tree_eligible { + // Only a truly bare install gets the bare note — a tokenless + // `-r requirements.txt` install is covered by `requirements_note`. + if bare_install { + bare_install_note(manager, subcommand_label); + } + requirements_note(&parsed); + return exec(); + } + + let now = Utc::now(); + let mut outcomes: Vec<_> = parsed + .targets + .iter() + .map(|target| verify_one(target, &opts, &now)) + .collect(); + + let tree = if tree_eligible { + Some(run_tree_pass( + manager, + subcommand_label, + rest, + &parsed, + &mut outcomes, + &opts, + )) + } else { + run_verdict_pass(manager, &mut outcomes, &opts); // no-op tokenless + None + }; + + // The mandatory loud warning when the tree pass fell back to named-only. + if let Some(TreeReport::NamedOnly { reason }) = &tree { + eprintln!( + "warning: transitive dependencies not checked ({reason}); only named packages were verified." + ); + } + // The requirements note only matters when the tree pass did *not* cover + // those files (fallback to named-only, or recency-only mode). + if !matches!(&tree, Some(TreeReport::Full { .. })) { + requirements_note(&parsed); + } + coverage_note(&opts); + + let report = PrecheckReport { + manager, + subcommand: subcommand_label.to_string(), + original_args: rest.to_vec(), + outcomes, + threshold: opts.threshold, + tree, + bare_install, + }; + + if opts.json { + print_json(&report, &opts); + } else { + print_text(&report); + } + + if should_block_install(&report, &opts) { + if !opts.json { + print_refusal(&report); + } + return 1; + } + + exec() +} + +fn tree_covers_command( + manager: PackageManager, + subcommand_label: &str, + parsed: &parse::ParsedInstall, +) -> bool { + if manager == PackageManager::Npm && matches!(subcommand_label, "ci" | "clean-install") { + return std::path::Path::new("package-lock.json").exists() + || std::path::Path::new("npm-shrinkwrap.json").exists(); + } + tree::covers_input(manager, parsed) +} + +fn project_guard_message( + manager: PackageManager, + subcommand_label: &str, + parsed: &parse::ParsedInstall, +) -> Option { + let cwd = std::path::Path::new("."); + let has_targets = !parsed.targets.is_empty() || !parsed.requirements_files.is_empty(); + match manager { + PackageManager::Npm if has_targets && cwd.join("pnpm-lock.yaml").exists() => Some( + "This looks like a pnpm project. Use `corgea pnpm add ...` instead of `corgea npm ...`." + .to_string(), + ), + PackageManager::Npm if has_targets && cwd.join("yarn.lock").exists() => Some( + "This looks like a yarn project. Use `corgea yarn add ...` instead of `corgea npm ...`." + .to_string(), + ), + PackageManager::Pnpm + if (cwd.join("package-lock.json").exists() || cwd.join("npm-shrinkwrap.json").exists()) + && !cwd.join("pnpm-lock.yaml").exists() => + { + Some( + "This looks like an npm project. Use `corgea npm install ...` instead of `corgea pnpm ...`." + .to_string(), + ) + } + PackageManager::Pip if cwd.join("uv.lock").exists() => { + if parsed.requirements_files.is_empty() { + Some( + "This looks like a uv project. Use `corgea uv add ...` instead of `corgea pip install ...`." + .to_string(), + ) + } else { + Some( + "This looks like a uv project. Use `corgea uv pip install -r ...` instead of `corgea pip install -r ...`." + .to_string(), + ) + } + } + PackageManager::Uv + if subcommand_label == "add" + && cwd.join("requirements.txt").exists() + && !cwd.join("uv.lock").exists() + && parsed.requirements_files.is_empty() => + { + Some( + "This looks like a requirements.txt pip project. Use `corgea pip install ...` instead of `corgea uv add ...`." + .to_string(), + ) + } + _ => None, + } +} + +fn pip_has_environment_override(args: &[String]) -> bool { + args.iter().enumerate().any(|(i, arg)| { + matches!( + arg.as_str(), + "--break-system-packages" | "--target" | "--prefix" | "--root" + ) || arg.starts_with("--target=") + || arg.starts_with("--prefix=") + || arg.starts_with("--root=") + || (arg == "-t" && args.get(i + 1).is_some()) + }) +} + +fn pip_externally_managed() -> bool { + if std::env::var("CORGEA_PIP_EXTERNALLY_MANAGED") + .ok() + .as_deref() + == Some("1") + { + return true; + } + let output = std::process::Command::new("python3") + .args([ + "-c", + "import pathlib, sysconfig; print((pathlib.Path(sysconfig.get_path('stdlib')) / 'EXTERNALLY-MANAGED').exists())", + ]) + .output(); + matches!(output, Ok(out) if out.status.success() && String::from_utf8_lossy(&out.stdout).trim() == "True") +} + +/// One honest stderr line when a zero-spec install can't be gated: +/// yarn/pnpm/uv have no safe dry-run, so a bare install pulls its whole +/// dependency set unchecked. No-op for other managers (bare npm is gated +/// via the tree pass; bare pip installs nothing). +fn bare_install_note(manager: PackageManager, subcommand_label: &str) { + if matches!( + manager, + PackageManager::Yarn | PackageManager::Pnpm | PackageManager::Uv + ) { + eprintln!( + "note: bare '{} {}' is not gated (no safe dry-run) — dependencies install unchecked", + manager.binary_name(), + subcommand_label + ); + } +} + +/// The refusal line on stderr. When vulnerable findings exist but none sit on +/// a named target — and no named target is unverifiable either — the block is +/// entirely the existing tree's doing, so say that instead of implying the +/// package the user typed is at fault. Messaging only; the block decision +/// stays with `should_block_install`. +fn print_refusal(report: &PrecheckReport) { + if refusal_blames_existing_tree(report) { + eprintln!( + "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." + ); + } else if report.vulnerable_count() > 0 + || report.unverifiable_count() > 0 + || report.error_count() > 0 + { + eprintln!("Refusing to run install. Pass --force to proceed despite findings."); + } else { + eprintln!("Refusing to run install. Pass --no-fail to proceed anyway."); + } +} + +/// True when the block is entirely the existing tree's doing: vulnerable +/// findings exist, none sit on a named target (or block as unverifiable +/// there), and every *blocking* tree finding — vulnerable or unverifiable, +/// since `should_block_install` refuses on both — genuinely predates this +/// command. A `Requested` finding (pip `-r`) is added by this command and +/// renders as `(from requirements)`; a `Transitive` finding on any install +/// that names targets or requirements files is being pulled in by them +/// right now. Only a truly bare install (`report.bare_install`) or +/// manifest-declared `PreExisting` findings may blame the existing tree. +fn refusal_blames_existing_tree(report: &PrecheckReport) -> bool { + let named_findings = report.named_vulnerable_count() + report.named_unverifiable_count(); + if report.vulnerable_count() == 0 || named_findings > 0 { + return false; + } + let Some(TreeReport::Full { transitive, .. }) = &report.tree else { + return false; + }; + transitive + .iter() + .filter(|t| { + matches!( + t.verdict, + VerdictStatus::Vulnerable(_) | VerdictStatus::Unverifiable(_) + ) + }) + .all(|t| match t.origin { + // A locked pin predates the sync command that installs it. + TreeOrigin::PreExisting | TreeOrigin::Locked => true, + TreeOrigin::Requested => false, + TreeOrigin::Transitive => report.bare_install, + }) +} + +/// Print the "requirements files are not recency-checked" note when the +/// install carried any `-r` files. No-op otherwise. +fn requirements_note(parsed: &parse::ParsedInstall) { + if parsed.requirements_files.is_empty() { + return; + } + let files: Vec = parsed + .requirements_files + .iter() + .map(|p| p.display().to_string()) + .collect(); + eprintln!( + "note: requirements files ({}) are not recency-checked by the baseline gate", + files.join(", ") + ); +} + +fn coverage_note(opts: &PrecheckOptions) { + match opts.verdict_mode() { + "authenticated" => {} + "public" => { + eprintln!( + "warning: public vulnerability mode — known findings still block, but service outages fail open." + ); + } + _ => { + eprintln!( + "warning: vulnerability checks disabled — known-vulnerable packages will NOT be blocked (recency-only)." + ); + } + } +} + +/// Resolve the full would-install set and verdict it. On any resolution +/// failure, fall back to the named-only verdict pass; the caller renders the +/// loud warning from the returned `NamedOnly` reason. Only called when +/// `opts.verdict.is_some()`. +fn run_tree_pass( + manager: PackageManager, + subcommand_label: &str, + rest: &[String], + parsed: &parse::ParsedInstall, + outcomes: &mut [TargetOutcome], + opts: &PrecheckOptions, +) -> TreeReport { + let set = match tree::resolve_tree(manager, subcommand_label, rest, parsed) { + Ok(Some(set)) => set, + Ok(None) => { + run_verdict_pass(manager, outcomes, opts); + return TreeReport::NamedOnly { + reason: format!("{} has no safe dry-run", manager.binary_name()), + }; + } + Err(reason) => { + run_verdict_pass(manager, outcomes, opts); + return TreeReport::NamedOnly { reason }; + } + }; + + // Dedup the dry-run set (npm lockfiles repeat the same name@version at + // multiple nested paths), then union in the named-resolved targets — a + // named target already installed is absent from the dry-run delta but + // must still be verdicted. + let norm = |n: &str| manager.normalize_name(n); + let mut seen = std::collections::HashSet::new(); + let mut jobs: Vec = Vec::with_capacity(set.len()); + for p in set { + if seen.insert((norm(&p.name), p.version.clone())) { + jobs.push(p); + } + } + let resolved_count = jobs.len(); + for o in outcomes.iter() { + if let TargetOutcome::Resolved { resolved, .. } = o { + if seen.insert((norm(&resolved.name), resolved.version.clone())) { + jobs.push(tree::TreePackage { + name: resolved.name.clone(), + version: resolved.version.clone(), + requested: true, + }); + } + } + } + + // npm leftovers that are direct deps of the project manifest are + // pre-existing, not transitive. pip carries `requested` instead. + let direct_deps = if manager == PackageManager::Npm { + tree::project_direct_deps() + } else { + Default::default() + }; + + let results = verdict_pool(jobs, opts, manager, VERDICT_CONCURRENCY); + let transitive = apply_verdicts(manager, results, outcomes, &direct_deps); + TreeReport::Full { + resolved_count, + transitive, + } +} + +/// Above this many verdict jobs, print a stderr progress line so a big tree +/// pass doesn't look hung. +const VERDICT_PROGRESS_THRESHOLD: usize = 8; + +/// Max parallel Corgea vuln-api verdict requests. +const VERDICT_CONCURRENCY: usize = 8; + +#[derive(Debug, Clone)] +enum SourceVerdict { + Clean, + Vulnerable(Vec), + Unavailable(String), + NotChecked, +} + +/// Query configured vulnerability sources and compose them into the gate's +/// final verdict. Corgea requests are bounded-concurrent; OSV uses one +/// querybatch call for the same package set. The two source paths run in +/// parallel so OSV cannot slow down a healthy Corgea path unnecessarily. +fn verdict_pool( + jobs: Vec, + opts: &PrecheckOptions, + manager: PackageManager, + concurrency: usize, +) -> Vec<(tree::TreePackage, VerdictStatus)> { + if jobs.is_empty() { + return Vec::new(); + } + if jobs.len() > VERDICT_PROGRESS_THRESHOLD { + eprintln!("checking {} packages for vulnerabilities…", jobs.len()); + } + + let corgea_jobs = jobs.clone(); + let osv_jobs = jobs.clone(); + let corgea_cfg = opts.verdict.clone(); + let osv_cfg = opts.osv.clone(); + let fail_closed = opts.fail_closed(); + + let (corgea_results, osv_results) = std::thread::scope(|s| { + let corgea = s.spawn(|| { + corgea_cfg + .as_ref() + .map(|cfg| corgea_verdict_pool(corgea_jobs, cfg, manager, concurrency)) + .unwrap_or_else(|| { + jobs.iter() + .cloned() + .map(|job| (job, SourceVerdict::NotChecked)) + .collect() + }) + }); + let osv = s.spawn(|| { + osv_cfg + .as_ref() + .map(|cfg| osv_verdict_batch(osv_jobs, cfg, manager)) + .unwrap_or_else(|| { + jobs.iter() + .cloned() + .map(|job| (job, SourceVerdict::NotChecked)) + .collect() + }) + }); + (corgea.join().unwrap(), osv.join().unwrap()) + }); + + let key = |pkg: &tree::TreePackage| { + ( + manager.normalize_name(&pkg.name), + pkg.version.clone(), + pkg.requested, + ) + }; + let mut osv_by_key: std::collections::HashMap<(String, String, bool), SourceVerdict> = + std::collections::HashMap::new(); + for (job, verdict) in osv_results { + osv_by_key.insert(key(&job), verdict); + } + + corgea_results + .into_iter() + .map(|(job, corgea)| { + let osv = osv_by_key + .remove(&key(&job)) + .unwrap_or(SourceVerdict::NotChecked); + let verdict = compose_source_verdicts(corgea, osv, fail_closed); + (job, verdict) + }) + .collect() +} + +/// Bounded worker pool for Corgea vuln-api requests. Result order is not +/// preserved; callers match by package identity. +fn corgea_verdict_pool( + jobs: Vec, + cfg: &VerdictConfig, + manager: PackageManager, + concurrency: usize, +) -> Vec<(tree::TreePackage, SourceVerdict)> { + use std::collections::VecDeque; + use std::sync::Mutex; + + let client = match crate::vuln_api::http_client() { + Ok(c) => c, + Err(e) => { + return jobs + .into_iter() + .map(|j| (j, SourceVerdict::Unavailable(e.clone()))) + .collect(); + } + }; + + let ecosystem = manager.ecosystem(); + let workers = concurrency.min(jobs.len()).max(1); + let queue = Mutex::new(VecDeque::from(jobs)); + let results = Mutex::new(Vec::new()); + std::thread::scope(|s| { + for _ in 0..workers { + s.spawn(|| loop { + let Some(job) = queue.lock().unwrap().pop_front() else { + break; + }; + // vuln-api advisories are keyed by canonical names; an + // alternate spelling (PEP 503: `Flask_Cors` ≡ `flask-cors`) + // would miss and read as clean. + let verdict = match crate::vuln_api::check_package_version_with_auth( + &client, + &cfg.base_url, + cfg.token.as_deref(), + ecosystem, + &manager.normalize_name(&job.name), + &job.version, + ) { + Ok(resp) if resp.is_vulnerable => SourceVerdict::Vulnerable(resp.matches), + Ok(_) => SourceVerdict::Clean, + Err(e) => SourceVerdict::Unavailable(e.to_string()), + }; + results.lock().unwrap().push((job, verdict)); + }); + } + }); + results.into_inner().unwrap() +} + +fn osv_verdict_batch( + jobs: Vec, + cfg: &crate::osv::OsvConfig, + manager: PackageManager, +) -> Vec<(tree::TreePackage, SourceVerdict)> { + let client = match crate::osv::http_client() { + Ok(c) => c, + Err(e) => { + return jobs + .into_iter() + .map(|j| (j, SourceVerdict::Unavailable(e.clone()))) + .collect(); + } + }; + let ecosystem = crate::osv::ecosystem_for_osv(manager.ecosystem()); + let packages: Vec<_> = jobs + .iter() + .map(|job| crate::osv::OsvPackage { + ecosystem: ecosystem.clone(), + name: manager.normalize_name(&job.name), + version: job.version.clone(), + }) + .collect(); + match crate::osv::query_batch(&client, &cfg.base_url, &packages) { + Ok(verdicts) => jobs + .into_iter() + .zip(verdicts) + .map(|(job, verdict)| { + let verdict = match verdict { + crate::osv::OsvVerdict::Clean => SourceVerdict::Clean, + crate::osv::OsvVerdict::Vulnerable(matches) => { + SourceVerdict::Vulnerable(matches) + } + }; + (job, verdict) + }) + .collect(), + Err(e) => jobs + .into_iter() + .map(|j| (j, SourceVerdict::Unavailable(e.clone()))) + .collect(), + } +} + +fn compose_source_verdicts( + corgea: SourceVerdict, + osv: SourceVerdict, + fail_closed: bool, +) -> VerdictStatus { + let mut matches = Vec::new(); + if let SourceVerdict::Vulnerable(mut m) = corgea.clone() { + matches.append(&mut m); + } + if let SourceVerdict::Vulnerable(mut m) = osv.clone() { + matches.append(&mut m); + } + if !matches.is_empty() { + return VerdictStatus::Vulnerable(matches); + } + + if fail_closed { + if let SourceVerdict::Unavailable(error) = corgea { + return VerdictStatus::Unverifiable(error); + } + return VerdictStatus::Clean; + } + + match (corgea, osv) { + (SourceVerdict::Unavailable(a), SourceVerdict::Unavailable(b)) => { + VerdictStatus::PublicUnavailable(format!( + "vulnerability coverage unavailable: Corgea: {a}; OSV: {b}" + )) + } + (SourceVerdict::Unavailable(error), _) => VerdictStatus::PublicUnavailable(format!( + "partial vulnerability coverage: Corgea unavailable: {error}" + )), + (_, SourceVerdict::Unavailable(error)) => VerdictStatus::PublicUnavailable(format!( + "partial vulnerability coverage: OSV unavailable: {error}" + )), + (SourceVerdict::NotChecked, SourceVerdict::NotChecked) => VerdictStatus::NotChecked, + _ => VerdictStatus::Clean, + } +} + +/// Assign pooled verdicts onto matching named outcomes (by normalized +/// name + version) and return the unmatched leftovers — the tree findings. +/// Each leftover carries its provenance: pip's `requested` flag, membership +/// in the project manifest's direct deps (`direct_deps`), or transitive. +fn apply_verdicts( + manager: PackageManager, + results: Vec<(tree::TreePackage, VerdictStatus)>, + outcomes: &mut [TargetOutcome], + direct_deps: &std::collections::HashSet, +) -> Vec { + let norm = |n: &str| manager.normalize_name(n); + // Index named outcomes by (normalized name, version) so matching the + // pooled results stays linear on big trees. + let mut named: std::collections::HashMap<(String, String), Vec> = + std::collections::HashMap::new(); + for (i, o) in outcomes.iter().enumerate() { + if let TargetOutcome::Resolved { resolved, .. } = o { + named + .entry((norm(&resolved.name), resolved.version.clone())) + .or_default() + .push(i); + } + } + + let mut transitive = Vec::new(); + for (pkg, verdict) in results { + if let Some(indices) = named.get(&(norm(&pkg.name), pkg.version.clone())) { + for &i in indices { + if let TargetOutcome::Resolved { verdict: v, .. } = &mut outcomes[i] { + *v = verdict.clone(); + } + } + } else { + let origin = if pkg.requested { + TreeOrigin::Requested + } else if direct_deps.contains(&pkg.name) { + TreeOrigin::PreExisting + } else { + TreeOrigin::Transitive + }; + transitive.push(TreeOutcome { + name: pkg.name, + version: pkg.version, + origin, + verdict, + }); + } + } + transitive +} + +/// Vulnerability verdict pass over resolved targets. No-op when all +/// vulnerability sources are disabled (test-only recency mode). +fn run_verdict_pass( + manager: PackageManager, + outcomes: &mut [TargetOutcome], + opts: &PrecheckOptions, +) { + if !opts.has_vulnerability_checks() { + return; + } + + // One job per resolved target; jobs are 1:1 with outcomes, so + // `apply_verdicts` matches everything and returns no leftovers. + let jobs: Vec = outcomes + .iter() + .filter_map(|o| match o { + TargetOutcome::Resolved { resolved, .. } => Some(tree::TreePackage { + name: resolved.name.clone(), + version: resolved.version.clone(), + requested: true, + }), + _ => None, + }) + .collect(); + + let results = verdict_pool(jobs, opts, manager, VERDICT_CONCURRENCY); + apply_verdicts(manager, results, outcomes, &Default::default()); +} + +fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + if opts.force { + return false; + } + // A resolution error means no verdict was obtained for that target, so + // in tokened mode it fails closed like `Unverifiable` — otherwise a + // registry outage silently bypasses the gate. + report.vulnerable_count() > 0 + || report.unverifiable_count() > 0 + || (opts.fail_closed() && report.error_count() > 0) + || (!opts.no_fail && report.recent_count() > 0) +} + +fn verify_one( + target: &InstallTarget, + opts: &PrecheckOptions, + now: &chrono::DateTime, +) -> TargetOutcome { + use crate::verify_deps::registry; + + let resolved = match &target.kind { + TargetKind::Unverifiable { reason } => { + return TargetOutcome::Skipped { + target: target.clone(), + reason: reason.clone(), + }; + } + TargetKind::Npm(spec) => { + registry::npm_resolve(&target.name, spec, opts.npm_registry.as_deref()) + } + TargetKind::Pypi(spec) => { + registry::pypi_resolve(&target.name, spec, opts.pypi_registry.as_deref()) + } + }; + + match resolved { + Ok(resolved) => { + // Future publish dates clamp to zero — maximally recent. + let age = now + .signed_duration_since(resolved.published_at) + .to_std() + .unwrap_or_else(|_| Duration::from_secs(0)); + TargetOutcome::Resolved { + target: target.clone(), + resolved, + age, + verdict: VerdictStatus::NotChecked, + } + } + Err(e) => TargetOutcome::Error { + target: target.clone(), + error: e, + }, + } +} + +/// Resolve `binary` on PATH. On Windows this finds `.cmd` shims. pip is the +/// one manager with a conventional alias, so a missing `pip` retries `pip3`. +/// The error names the binary and any fallback tried. +fn resolve_binary(binary: &str) -> Result { + if let Ok(p) = which::which(binary) { + return Ok(p); + } + if binary == "pip" { + if let Ok(p) = which::which("pip3") { + return Ok(p); + } + return Err("error: 'pip' not found on PATH (also tried 'pip3')".to_string()); + } + Err(format!("error: '{binary}' not found on PATH")) +} + +fn exec_command_for_json(binary: &str, args: &[String], json_mode: bool) -> i32 { + let resolved = match resolve_binary(binary) { + Ok(p) => p, + Err(msg) => { + eprintln!("{msg}"); + return 127; + } + }; + + let os_args: Vec = args.iter().map(OsString::from).collect(); + + if json_mode { + return match Command::new(&resolved).args(&os_args).output() { + Ok(output) => { + use std::io::Write; + let mut stderr = std::io::stderr(); + let _ = stderr.write_all(&output.stdout); + let _ = stderr.write_all(&output.stderr); + output.status.code().unwrap_or_else(|| { + #[cfg(unix)] + { + use std::os::unix::process::ExitStatusExt; + if let Some(sig) = output.status.signal() { + return 128 + sig; + } + } + 1 + }) + } + Err(e) => { + eprintln!("failed to exec {}: {}", resolved.display(), e); + 1 + } + }; + } + + match Command::new(&resolved).args(&os_args).status() { + Ok(status) => status.code().unwrap_or_else(|| { + #[cfg(unix)] + { + use std::os::unix::process::ExitStatusExt; + if let Some(sig) = status.signal() { + return 128 + sig; + } + } + 1 + }), + Err(e) => { + // Name the resolved path: it may be the pip3 fallback, not `binary`. + eprintln!("failed to exec {}: {}", resolved.display(), e); + 1 + } + } +} + +/// Suffix for a vulnerable match line: the advisory's fix, if known. +fn fix_note(m: &crate::vuln_api::VulnMatch) -> String { + match &m.fixed_version { + Some(v) => format!(" — fixed in {v}"), + None => " — no fixed version known".to_string(), + } +} + +/// Highest of `fixes` after sort/dedup: a single distinct value is returned +/// as-is (no parsing — preserves odd-but-unambiguous forms); several distinct +/// values compare by lenient semver. With `all_must_parse`, one unparsable +/// candidate among several poisons the answer (`None`); otherwise unparsable +/// candidates are skipped. +fn highest_fix(mut fixes: Vec<&str>, all_must_parse: bool) -> Option { + fixes.sort_unstable(); + fixes.dedup(); + match fixes.as_slice() { + [] => None, + [only] => Some((*only).to_string()), + many => { + let mut best: Option<(semver::Version, &str)> = None; + for raw in many { + let v = + match semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) + { + Ok(v) => v, + Err(_) if all_must_parse => return None, + Err(_) => continue, + }; + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw)), + } + } + best.map(|(_, raw)| (*raw).to_string()) + } + } +} + +/// The one version certified to clear every match. Requires every match to +/// carry a `fixed_version`; any match without one — or an unparsable +/// candidate among several — means no version can be certified, so `None`. +fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let fixes: Vec<&str> = matches + .iter() + .map(|m| m.fixed_version.as_deref()) + .collect::>()?; + highest_fix(fixes, true) +} + +/// Highest `fixed_version` the advisories advertise, by lenient semver. +/// Unlike `safe_version` this is *not* a certification: matches without a +/// fix are ignored, so the result may still be vulnerable to them. `None` +/// only when no match advertises a fix (or no candidate parses). +fn advertised_fix(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let fixes: Vec<&str> = matches + .iter() + .filter_map(|m| m.fixed_version.as_deref()) + .collect(); + highest_fix(fixes, false) +} + +/// Per-match advisory lines plus the safe-version steer, shared by the +/// named-target and transitive vulnerable render arms. +fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) { + for m in matches { + let source = m + .source + .as_deref() + .map(|s| format!(", source: {s}")) + .unwrap_or_default(); + println!( + " {} ({}{}){}", + m.advisory_id, + m.severity_level, + source, + fix_note(m) + ); + } + if let Some(safe) = safe_version(matches) { + println!(" → safe version: {name}@{safe}"); + } +} + +/// One summary-line segment, e.g. `"2 vulnerable (2 from resolved tree)"`. +/// The parenthetical separates findings the resolved tree carried in from +/// findings on the targets this command names; omitted when the tree +/// contributed none. +fn summary_segment(total: usize, from_tree: usize, label: &str) -> String { + if from_tree > 0 { + format!("{total} {label} ({from_tree} from resolved tree)") + } else { + format!("{total} {label}") + } +} + +/// More than this many unverifiable findings with the same error-prefix +/// render as one collapsed line instead of one line per package. +const UNVERIFIABLE_COLLAPSE_THRESHOLD: usize = 3; + +/// Group key for collapsing repeated unverifiable errors: the text before +/// the first `(` — strips per-package detail (URLs, status codes) so one +/// outage groups under one key. +fn error_prefix(error: &str) -> &str { + match error.find('(') { + Some(i) => error[..i].trim_end(), + None => error, + } +} + +/// Unverifiable error strings across transitive tree findings and named +/// outcomes, in render order. +fn unverifiable_errors(report: &PrecheckReport) -> Vec<&str> { + let mut errors = Vec::new(); + if let Some(TreeReport::Full { transitive, .. }) = &report.tree { + for t in transitive { + if let VerdictStatus::Unverifiable(e) = &t.verdict { + errors.push(e.as_str()); + } + } + } + for o in &report.outcomes { + if let TargetOutcome::Resolved { + verdict: VerdictStatus::Unverifiable(e), + .. + } = o + { + errors.push(e.as_str()); + } + } + errors +} + +/// `(prefix, count, first error)` groups of unverifiable findings large +/// enough to collapse (> `UNVERIFIABLE_COLLAPSE_THRESHOLD` per prefix) — +/// the vuln-api outage case, where every package fails the same way. +/// Display-only: counts and exit codes never change. +fn collapsed_unverifiable_groups(report: &PrecheckReport) -> Vec<(&str, usize, &str)> { + let mut groups: Vec<(&str, usize, &str)> = Vec::new(); + for e in unverifiable_errors(report) { + let prefix = error_prefix(e); + match groups.iter_mut().find(|(p, _, _)| *p == prefix) { + Some((_, count, _)) => *count += 1, + None => groups.push((prefix, 1, e)), + } + } + groups.retain(|(_, count, _)| *count > UNVERIFIABLE_COLLAPSE_THRESHOLD); + groups +} + +fn print_text(report: &PrecheckReport) { + // Build the echoed command from non-empty parts: a bare gated install + // (e.g. `npm install` with zero specs) has no args to append. + let mut command = format!("{} {}", report.manager.binary_name(), report.subcommand); + if !report.original_args.is_empty() { + command.push(' '); + command.push_str(&report.original_args.join(" ")); + } + + let collapsed = collapsed_unverifiable_groups(report); + let is_collapsed = |error: &str| { + collapsed + .iter() + .any(|(prefix, _, _)| *prefix == error_prefix(error)) + }; + + println!( + "Pre-checking `{}` (threshold {})", + command, + verify_deps::format_duration(report.threshold) + ); + println!( + " {} ok, {} recent, {}, {}, {} skipped, {} errors", + report.ok_count(), + report.recent_count(), + summary_segment( + report.vulnerable_count(), + report.tree_vulnerable_count(), + "vulnerable" + ), + summary_segment( + report.unverifiable_count(), + report.tree_unverifiable_count(), + "unverifiable" + ), + report.skipped_count(), + report.error_count(), + ); + + match &report.tree { + Some(TreeReport::Full { + resolved_count, + transitive, + .. + }) => { + println!( + " tree: {} packages resolved, {} transitive checked", + resolved_count, + transitive.len() + ); + for t in transitive { + match &t.verdict { + VerdictStatus::Vulnerable(matches) => { + println!( + " ✗ {}@{} {} known vulnerable:", + t.name, + t.version, + t.origin.label() + ); + print_vulnerable_matches(&t.name, matches); + // A vulnerable dep the project already declares can be + // bumped directly — point at the fix as a command. + // When `safe_version` is `Some` it equals + // `advertised_fix` and clears every advisory; otherwise + // some advisory has no fix, so the "(advertised fix)" + // hedge marks the bump as partial. + if t.origin == TreeOrigin::PreExisting { + if let Some(fix) = advertised_fix(matches) { + let hedge = if safe_version(matches).is_some() { + "" + } else { + " (advertised fix)" + }; + println!( + " fix with: corgea {} install {}@{}{}", + report.manager.binary_name(), + t.name, + fix, + hedge + ); + } + } + } + VerdictStatus::Unverifiable(error) => { + if !is_collapsed(error) { + println!( + " ⚠ {}@{} {} could not be verified: {}", + t.name, + t.version, + t.origin.label(), + error + ); + } + } + VerdictStatus::PublicUnavailable(error) => { + println!( + " ⚠ {}@{} {} vulnerability check warning: {}", + t.name, + t.version, + t.origin.label(), + error + ); + } + // Clean / not-checked tree entries stay quiet in text mode. + VerdictStatus::Clean | VerdictStatus::NotChecked => {} + } + } + } + Some(TreeReport::NamedOnly { reason }) => { + println!(" tree: transitive dependencies NOT checked ({reason})"); + } + None => {} + } + + // One line per collapsed outage group instead of one per package. + for (_, count, first_error) in &collapsed { + println!( + " ⚠ {count} packages could not be verified (vuln-api unreachable: {first_error})" + ); + } + + for o in &report.outcomes { + match o { + TargetOutcome::Resolved { + target, + resolved, + age, + verdict, + } => match verdict { + VerdictStatus::Vulnerable(matches) => { + println!( + " ✗ {} → {}@{} known vulnerable:", + target.display, resolved.name, resolved.version, + ); + print_vulnerable_matches(&resolved.name, matches); + } + VerdictStatus::Unverifiable(error) => { + if !is_collapsed(error) { + println!( + " ⚠ {} → {}@{} could not be verified: {}", + target.display, resolved.name, resolved.version, error, + ); + } + } + VerdictStatus::PublicUnavailable(error) => { + println!( + " ⚠ {} → {}@{} vulnerability check warning: {}", + target.display, resolved.name, resolved.version, error, + ); + if report.is_recent(*age) { + println!( + " ⚠ {} → {}@{} published {} ago at {} (within threshold)", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } + } + VerdictStatus::Clean | VerdictStatus::NotChecked => { + if report.is_recent(*age) { + println!( + " ⚠ {} → {}@{} published {} ago at {} (within threshold)", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } else { + println!( + " ✓ {} → {}@{} published {} ago", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + ); + } + } + }, + TargetOutcome::Skipped { target, reason } => { + println!(" ? {}: {}", target.display, reason); + } + TargetOutcome::Error { target, error } => { + println!(" ✗ {}: {}", target.display, error); + } + } + } +} + +/// JSON shape for a single verdict. Shared by named outcomes and tree +/// (transitive) outcomes so both render verdicts identically. +/// `remediation` carries the version that clears every advisory +/// (`safe_version`); `null` when any advisory has no known fix. +fn verdict_json(verdict: &VerdictStatus) -> serde_json::Value { + use serde_json::json; + match verdict { + VerdictStatus::Clean => json!({ "status": "clean" }), + VerdictStatus::Vulnerable(matches) => { + json!({ + "status": "vulnerable", + "matches": matches, + "remediation": safe_version(matches), + }) + } + VerdictStatus::Unverifiable(error) => { + json!({ "status": "unverifiable", "error": error }) + } + VerdictStatus::PublicUnavailable(error) => { + json!({ "status": "unavailable", "fail_closed": false, "error": error }) + } + VerdictStatus::NotChecked => { + json!({ "status": "not_checked", "reason": NO_TOKEN_REASON }) + } + } +} + +fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { + use serde_json::json; + let outcomes: Vec<_> = report + .outcomes + .iter() + .map(|o| match o { + TargetOutcome::Resolved { + target, + resolved, + age, + verdict, + } => { + let verdict_json = verdict_json(verdict); + json!({ + "status": if report.is_recent(*age) { "recent" } else { "ok" }, + "spec": target.display, + "name": resolved.name, + "resolved_version": resolved.version, + "published_at": resolved.published_at.to_rfc3339(), + "age_seconds": age.as_secs(), + "verdict": verdict_json, + }) + } + TargetOutcome::Skipped { target, reason } => json!({ + "status": "skipped", + "spec": target.display, + "name": target.name, + "reason": reason, + }), + TargetOutcome::Error { target, error } => json!({ + "status": "error", + "spec": target.display, + "name": target.name, + "error": error, + }), + }) + .collect(); + + let body = json!({ + "manager": report.manager.binary_name(), + "subcommand": report.subcommand, + "args": report.original_args, + "threshold_seconds": report.threshold.as_secs(), + "summary": { + "ok": report.ok_count(), + "recent": report.recent_count(), + "vulnerable": report.vulnerable_count(), + "unverifiable": report.unverifiable_count(), + "skipped": report.skipped_count(), + "errors": report.error_count(), + }, + "verdict_mode": opts.verdict_mode(), + "results": outcomes, + "tree": report.tree.as_ref().map(|t| match t { + TreeReport::Full { resolved_count, transitive } => json!({ + "mode": "full", + "reason": serde_json::Value::Null, + "resolved_count": resolved_count, + "transitive": transitive.iter().map(|o| json!({ + "name": o.name, + "version": o.version, + "origin": o.origin.json_name(), + "verdict": verdict_json(&o.verdict), + })).collect::>(), + }), + TreeReport::NamedOnly { reason } => json!({ + "mode": "named-only", + "reason": reason, + "resolved_count": 0, + "transitive": [], + }), + }), + }); + + println!("{}", serde_json::to_string_pretty(&body).unwrap()); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn install_subcommand_recognition() { + assert!(PackageManager::Npm.is_install_subcommand("install")); + assert!(PackageManager::Npm.is_install_subcommand("i")); + assert!(PackageManager::Npm.is_install_subcommand("add")); + assert!(!PackageManager::Npm.is_install_subcommand("update")); + + assert!(PackageManager::Yarn.is_install_subcommand("add")); + assert!(PackageManager::Yarn.is_install_subcommand("install")); + + assert!(PackageManager::Pnpm.is_install_subcommand("add")); + assert!(PackageManager::Pnpm.is_install_subcommand("install")); + assert!(PackageManager::Pnpm.is_install_subcommand("i")); + + assert!(PackageManager::Pip.is_install_subcommand("install")); + assert!(!PackageManager::Pip.is_install_subcommand("freeze")); + } + + #[test] + fn parse_uv_lock_keeps_index_packages_and_skips_local_sources() { + let lock = r#" +version = 1 + +[[package]] +name = "proj" +version = "0.1.0" +source = { editable = "." } + +[[package]] +name = "evildep" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } + +[[package]] +name = "gitdep" +version = "1.2.3" +source = { git = "https://example.com/repo?rev=abc#abc" } +"#; + let pkgs = parse_uv_lock(lock).expect("parse uv.lock"); + let names: Vec<&str> = pkgs.iter().map(|p| p.name.as_str()).collect(); + assert_eq!(names, vec!["evildep", "gitdep"]); + assert_eq!(pkgs[0].version, "0.4.2"); + } + + #[test] + fn parse_uv_lock_rejects_invalid_toml() { + let err = parse_uv_lock("not = [valid").expect_err("invalid toml"); + assert!(err.contains("parse uv.lock"), "got: {err}"); + } + + /// Baseline options: pypi registry at a dead address (a port that + /// refuses connections — these tests never dial it), no verdict config. + /// Override fields per test via struct update. + fn stub_opts() -> PrecheckOptions { + PrecheckOptions { + threshold: Duration::from_secs(2 * 86400), + no_fail: false, + force: false, + json: false, + verdict: None, + osv: None, + npm_registry: None, + pypi_registry: Some("http://127.0.0.1:9".to_string()), + } + } + + /// `stub_opts()` plus a verdict config pointing at `base_url`. + fn verdict_opts(base_url: &str) -> PrecheckOptions { + PrecheckOptions { + verdict: Some(VerdictConfig { + base_url: base_url.to_string(), + token: Some("test-token".to_string()), + fail_closed: true, + }), + ..stub_opts() + } + } + + /// Run `run_parsed_install` for `pip install ` with an exec + /// closure that records whether it ran (returning 42 instead of + /// spawning anything). + fn gate_pip_install(args: &[&str], opts: PrecheckOptions) -> (i32, bool) { + let rest: Vec = args.iter().map(|s| s.to_string()).collect(); + let parsed = parse::parse_install_args(PackageManager::Pip, &rest).expect("parse"); + let mut exec_ran = false; + let code = run_parsed_install( + PackageManager::Pip, + "install", + &rest, + parsed, + || { + exec_ran = true; + 42 + }, + opts, + ); + (code, exec_ran) + } + + #[test] + fn unverifiable_target_skips_and_proceeds() { + // git+ spec → Skipped outcome, no registry hit, install proceeds. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&["git+https://github.com/psf/requests.git"], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn bare_install_passes_through_without_verification() { + // Bare `pip install` (no targets) → straight exec, no registry hit. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&[], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn requirements_files_note_then_exec() { + // `-r reqs.txt` alone → printed note, no verification, exec runs. + let opts = stub_opts(); + let (code, exec_ran) = gate_pip_install(&["-r", "reqs.txt"], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + fn resolved_outcome(name: &str, version: &str, recent: bool) -> TargetOutcome { + // Recency derives from age vs `report_with`'s 2-day threshold: + // one hour ⇒ recent, a year ⇒ not. + let age = if recent { + Duration::from_secs(3600) + } else { + Duration::from_secs(365 * 86400) + }; + TargetOutcome::Resolved { + target: InstallTarget { + name: name.to_string(), + display: format!("{name}=={version}"), + kind: TargetKind::Unverifiable { + reason: "test".to_string(), + }, + }, + resolved: crate::verify_deps::registry::ResolvedPackage { + name: name.to_string(), + version: version.to_string(), + published_at: Utc::now() - chrono::Duration::from_std(age).unwrap(), + }, + age, + verdict: VerdictStatus::NotChecked, + } + } + + fn report_with(outcomes: Vec) -> PrecheckReport { + PrecheckReport { + manager: PackageManager::Pip, + subcommand: "install".to_string(), + original_args: vec![], + outcomes, + threshold: Duration::from_secs(2 * 86400), + tree: None, + // Most tests model an install that named something; bare-install + // cases set this explicitly. + bare_install: false, + } + } + + fn set_verdict(outcome: &mut TargetOutcome, v: VerdictStatus) { + if let TargetOutcome::Resolved { verdict, .. } = outcome { + *verdict = v; + } + } + + #[test] + fn ecosystem_mapping() { + assert_eq!(PackageManager::Pip.ecosystem(), "pypi"); + assert_eq!(PackageManager::Uv.ecosystem(), "pypi"); + assert_eq!(PackageManager::Npm.ecosystem(), "npm"); + assert_eq!(PackageManager::Yarn.ecosystem(), "npm"); + assert_eq!(PackageManager::Pnpm.ecosystem(), "npm"); + } + + #[test] + fn normalize_name_per_manager() { + // pypi: PEP 503 — lowercase, separator runs collapse to one `-`. + assert_eq!( + PackageManager::Pip.normalize_name("Flask_Cors"), + "flask-cors" + ); + assert_eq!( + PackageManager::Uv.normalize_name("zope.interface"), + "zope-interface" + ); + assert_eq!(PackageManager::Pip.normalize_name("a__b"), "a-b"); + // npm names are case-sensitive and pass through verbatim. + assert_eq!(PackageManager::Npm.normalize_name("Left_Pad"), "Left_Pad"); + } + + /// Full predicate matrix: force ⇒ never block; vulnerable and + /// unverifiable block regardless of --no-fail; recency keeps its + /// task-2 --no-fail demotion. + #[test] + fn block_predicate_matrix() { + let opts = |no_fail: bool, force: bool| PrecheckOptions { + no_fail, + force, + ..stub_opts() + }; + + let clean = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Clean); + report_with(vec![o]) + }; + let recent = report_with(vec![resolved_outcome("pkg", "1.0.0", true)]); + let vulnerable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Vulnerable(vec![])); + report_with(vec![o]) + }; + let unverifiable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Unverifiable("503".to_string())); + report_with(vec![o]) + }; + + assert!(!should_block_install(&clean, &opts(false, false))); + assert!(should_block_install(&recent, &opts(false, false))); + assert!(!should_block_install(&recent, &opts(true, false))); + assert!(should_block_install(&vulnerable, &opts(false, false))); + assert!( + should_block_install(&vulnerable, &opts(true, false)), + "--no-fail must not waive a vulnerable block" + ); + assert!( + should_block_install(&unverifiable, &opts(true, false)), + "--no-fail must not waive an unverifiable block" + ); + for report in [&clean, &recent, &vulnerable, &unverifiable] { + assert!( + !should_block_install(report, &opts(false, true)), + "--force must never block" + ); + assert!(!should_block_install(report, &opts(true, true))); + } + } + + /// A clean named outcome plus a vulnerable transitive tree finding must + /// roll into the block counts: `vulnerable_count() == 1`, + /// `should_block_install` true without `--force`, false with it. + #[test] + fn tree_findings_extend_block_counts() { + let mut named = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut named, VerdictStatus::Clean); + let mut report = report_with(vec![named]); + report.tree = Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![TreeOutcome { + name: "evildep".to_string(), + version: "0.4.2".to_string(), + origin: TreeOrigin::Transitive, + verdict: VerdictStatus::Vulnerable(vec![]), + }], + }); + + assert_eq!(report.vulnerable_count(), 1); + let opts = |force: bool| PrecheckOptions { + force, + ..stub_opts() + }; + assert!(should_block_install(&report, &opts(false))); + assert!(!should_block_install(&report, &opts(true))); + } + + /// Verdict pass against an in-process stub: vulnerable body → Vulnerable + /// with matches; 503 override → Unverifiable; no VerdictConfig → outcomes + /// keep NotChecked. + #[test] + fn verdict_pass_maps_stub_responses() { + use std::collections::HashMap; + + let key = |name: &str| ("pypi".to_string(), name.to_string(), "1.0.0".to_string()); + let mut checks = HashMap::new(); + checks.insert( + key("evil"), + r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string(), + ); + checks.insert(key("flaky"), "{}".to_string()); + let mut statuses = HashMap::new(); + statuses.insert(key("flaky"), 503u16); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, statuses); + + let opts = verdict_opts(&stub.base_url); + + let mut outcomes = vec![ + resolved_outcome("evil", "1.0.0", false), + resolved_outcome("flaky", "1.0.0", false), + resolved_outcome("goodpkg", "1.0.0", false), // unknown → stub default clean + ]; + run_verdict_pass(PackageManager::Pip, &mut outcomes, &opts); + + let verdicts: Vec<_> = outcomes + .iter() + .map(|o| match o { + TargetOutcome::Resolved { verdict, .. } => verdict.clone(), + _ => unreachable!(), + }) + .collect(); + assert!( + matches!(&verdicts[0], VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + assert!(matches!(&verdicts[1], VerdictStatus::Unverifiable(_))); + assert!(matches!(&verdicts[2], VerdictStatus::Clean)); + + // Without a VerdictConfig the pass is a no-op. + let mut untouched = vec![resolved_outcome("evil", "1.0.0", false)]; + let no_verdict = stub_opts(); + run_verdict_pass(PackageManager::Pip, &mut untouched, &no_verdict); + assert!(matches!( + &untouched[0], + TargetOutcome::Resolved { + verdict: VerdictStatus::NotChecked, + .. + } + )); + } + + /// The pool must verdict every job exactly once and return the flagged + /// job `Vulnerable` with the rest `Clean`, regardless of `concurrency` + /// (1 = serial, 8 > job count = all workers spawn but some drain empty). + #[test] + fn verdict_pool_returns_all_results() { + use std::collections::HashMap; + + let key = |name: &str| ("pypi".to_string(), name.to_string(), "1.0.0".to_string()); + let mut checks = HashMap::new(); + checks.insert( + key("evil"), + r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string(), + ); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + + let opts = PrecheckOptions { + verdict: Some(VerdictConfig { + base_url: stub.base_url.clone(), + token: Some("test-token".to_string()), + fail_closed: true, + }), + ..stub_opts() + }; + + let jobs: Vec = ["a", "b", "evil", "c", "d", "e"] + .iter() + .map(|n| tree::TreePackage { + name: n.to_string(), + version: "1.0.0".to_string(), + requested: false, + }) + .collect(); + + for concurrency in [1usize, 8] { + let results = verdict_pool(jobs.clone(), &opts, PackageManager::Pip, concurrency); + assert_eq!( + results.len(), + 6, + "concurrency {concurrency}: all jobs verdicted" + ); + let flagged = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Vulnerable(_))) + .count(); + let clean = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Clean)) + .count(); + assert_eq!(flagged, 1, "concurrency {concurrency}: only evil flagged"); + assert_eq!(clean, 5, "concurrency {concurrency}: rest clean"); + let evil = results + .iter() + .find(|(p, _)| p.name == "evil") + .expect("evil present"); + assert!( + matches!(&evil.1, VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + } + } + + fn vm(advisory: &str, fixed: Option<&str>) -> crate::vuln_api::VulnMatch { + crate::vuln_api::VulnMatch { + advisory_id: advisory.to_string(), + severity_level: "high".to_string(), + tier: 1, + vulnerable_version_range: None, + fixed_version: fixed.map(str::to_string), + source: None, + } + } + + #[test] + fn safe_version_single_fix() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0"))]), + Some("2.0.0".to_string()) + ); + } + + #[test] + fn safe_version_duplicate_fixes_collapse_without_parsing() { + // "1.0rc1" is unparsable, but a single distinct value needs no parse. + assert_eq!( + safe_version(&[vm("A-1", Some("1.0rc1")), vm("A-2", Some("1.0rc1"))]), + Some("1.0rc1".to_string()) + ); + } + + #[test] + fn safe_version_picks_highest_of_distinct_fixes() { + // Semver order, not lexical ("1.2.0" > "1.10.0" lexically). + assert_eq!( + safe_version(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } + + #[test] + fn safe_version_two_component_versions_normalize() { + assert_eq!( + safe_version(&[vm("A-1", Some("4.0")), vm("A-2", Some("3.2.5"))]), + Some("4.0".to_string()) + ); + } + + #[test] + fn safe_version_mixed_fix_and_none_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + None + ); + } + + #[test] + fn safe_version_unparsable_among_distinct_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2!1.0")), vm("A-2", Some("1.0.0"))]), + None + ); + } + + #[test] + fn safe_version_empty_matches_is_none() { + assert_eq!(safe_version(&[]), None); + } + + #[test] + fn error_prefix_strips_parenthesized_detail() { + // The reqwest network-failure shape: per-package URL in parens. + assert_eq!( + error_prefix("Failed to send vuln-api request: error sending request for url (http://x/v1/packages/pypi/a/versions/1.0.0/check)"), + "Failed to send vuln-api request: error sending request for url" + ); + assert_eq!( + error_prefix("vuln-api unavailable (HTTP 503)"), + "vuln-api unavailable" + ); + assert_eq!(error_prefix("no parens here"), "no parens here"); + } + + /// Four unverifiable findings sharing a prefix collapse into one group + /// (named + transitive both count); three do not. + #[test] + fn collapsed_groups_require_more_than_threshold() { + let unverifiable = |name: &str| { + let mut o = resolved_outcome(name, "1.0.0", false); + set_verdict( + &mut o, + VerdictStatus::Unverifiable(format!("vuln-api unavailable (HTTP 503: {name})")), + ); + o + }; + + let mut report = report_with(vec![ + unverifiable("a"), + unverifiable("b"), + unverifiable("c"), + ]); + assert!(collapsed_unverifiable_groups(&report).is_empty()); + + report.tree = Some(TreeReport::Full { + resolved_count: 4, + transitive: vec![TreeOutcome { + name: "d".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Unverifiable( + "vuln-api unavailable (HTTP 503: d)".to_string(), + ), + origin: TreeOrigin::Transitive, + }], + }); + let groups = collapsed_unverifiable_groups(&report); + assert_eq!(groups.len(), 1); + let (prefix, count, first) = groups[0]; + assert_eq!(prefix, "vuln-api unavailable"); + assert_eq!(count, 4); + // Render order is transitive-first, so the tree finding leads. + assert_eq!(first, "vuln-api unavailable (HTTP 503: d)"); + } + + #[test] + fn advertised_fix_ignores_matches_without_fix() { + // safe_version returns None here; the advertised fix still surfaces. + assert_eq!( + advertised_fix(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + Some("2.0.0".to_string()) + ); + assert_eq!(advertised_fix(&[vm("A-1", None)]), None); + assert_eq!(advertised_fix(&[]), None); + } + + #[test] + fn advertised_fix_picks_highest_by_semver() { + assert_eq!( + advertised_fix(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } + + /// Leftover origin assignment: pip `requested` ⇒ Requested; manifest + /// direct dep ⇒ PreExisting; otherwise Transitive. Requested wins over + /// a direct-dep hit. + #[test] + fn apply_verdicts_assigns_origins() { + let pkg = |name: &str, requested: bool| tree::TreePackage { + name: name.to_string(), + version: "1.0.0".to_string(), + requested, + }; + let results = vec![ + (pkg("reqdep", true), VerdictStatus::Clean), + (pkg("predep", false), VerdictStatus::Clean), + (pkg("deepdep", false), VerdictStatus::Clean), + ]; + let direct_deps = std::collections::HashSet::from(["predep".to_string()]); + let mut outcomes = []; + let mut tree = apply_verdicts(PackageManager::Npm, results, &mut outcomes, &direct_deps); + tree.sort_by(|a, b| a.name.cmp(&b.name)); + let origins: Vec<(&str, TreeOrigin)> = + tree.iter().map(|t| (t.name.as_str(), t.origin)).collect(); + assert_eq!( + origins, + vec![ + ("deepdep", TreeOrigin::Transitive), + ("predep", TreeOrigin::PreExisting), + ("reqdep", TreeOrigin::Requested), + ] + ); + } + + /// The existing-tree refusal fires only when every vulnerable finding + /// predates the command: a `Requested` finding (pip `-r`) is added by + /// this command, and a `Transitive` finding is being pulled in right + /// now unless the install is truly bare. `bare_install` is the explicit + /// discriminator — a requirements-only install also has no named + /// outcomes, but its resolved set is the command's doing. + #[test] + fn refusal_blame_respects_finding_origin() { + let tree_vulnerable = |origin| TreeOutcome { + name: "dep".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + origin, + }; + // (origin, named outcomes present, bare_install, expected). + // (origin, named=false, bare=false) is the requirements-only shape. + let cases = [ + (TreeOrigin::PreExisting, false, true, true), + (TreeOrigin::PreExisting, false, false, true), + (TreeOrigin::PreExisting, true, false, true), + (TreeOrigin::Transitive, false, true, true), + (TreeOrigin::Transitive, false, false, false), + (TreeOrigin::Transitive, true, false, false), + (TreeOrigin::Requested, false, true, false), + (TreeOrigin::Requested, false, false, false), + (TreeOrigin::Requested, true, false, false), + ]; + for (origin, with_named, bare_install, blames_tree) in cases { + let outcomes = if with_named { + vec![resolved_outcome("cleanpkg", "1.0.0", false)] + } else { + vec![] + }; + let mut report = report_with(outcomes); + report.bare_install = bare_install; + report.tree = Some(TreeReport::Full { + resolved_count: 1, + transitive: vec![tree_vulnerable(origin)], + }); + assert_eq!( + refusal_blames_existing_tree(&report), + blames_tree, + "origin {origin:?}, with_named {with_named}, bare {bare_install}" + ); + } + } + + /// Unverifiable tree findings block too (`should_block_install`), so + /// they must pass the same origin test before the refusal may blame the + /// existing tree: a command-added unverifiable transitive alongside a + /// pre-existing vulnerable dep keeps the generic refusal on a named + /// install, while on a bare install everything still predates the + /// command. + #[test] + fn refusal_blame_considers_unverifiable_tree_findings() { + let tree_finding = |name: &str, verdict, origin| TreeOutcome { + name: name.to_string(), + version: "1.0.0".to_string(), + verdict, + origin, + }; + let mixed_tree = || { + Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![ + tree_finding( + "stickydep", + VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + TreeOrigin::PreExisting, + ), + tree_finding( + "newdep", + VerdictStatus::Unverifiable("vuln-api unavailable".to_string()), + TreeOrigin::Transitive, + ), + ], + }) + }; + + // Named install: the unverifiable transitive is being added by this + // command, so "none were added by this command" would lie. + let mut report = report_with(vec![resolved_outcome("cleanpkg", "1.0.0", false)]); + report.tree = mixed_tree(); + assert!(!refusal_blames_existing_tree(&report)); + + // Bare install: nothing named, everything resolved predates the + // command — the mixed findings still blame the existing tree. + let mut report = report_with(vec![]); + report.bare_install = true; + report.tree = mixed_tree(); + assert!(refusal_blames_existing_tree(&report)); + } +} diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs new file mode 100644 index 0000000..dcceb1c --- /dev/null +++ b/src/precheck/parse.rs @@ -0,0 +1,791 @@ +//! Parse install-command argument lists into structured `InstallTarget`s. +//! +//! The goal is to be liberal with valid inputs (real install commands +//! mix flags, package specs, and pass-through args freely) and clear +//! about anything we can't verify (URLs / git / filesystem refs). + +use std::path::PathBuf; + +use crate::verify_deps::registry::{NpmSpec, PypiSpec}; + +use super::{InstallTarget, PackageManager, TargetKind}; + +#[derive(Debug, Default)] +pub struct ParsedInstall { + pub targets: Vec, + /// `pip install -r foo.txt` — requirements files are only noted + /// (not verified) by the baseline gate. + pub requirements_files: Vec, +} + +/// `uv pip install` argument list (everything after `pip install`). +pub fn parse_pip_install_args(args: &[String]) -> Result { + Ok(build_parsed_install( + extract_pip_positionals(args)?, + parse_pypi_spec, + )) +} + +/// `uv add` argument list (everything after `add`). +pub fn parse_pypi_positionals_args(args: &[String]) -> ParsedInstall { + build_parsed_install(extract_uv_add_positionals(args), parse_pypi_spec) +} + +fn build_parsed_install( + positionals: PositionalSplit, + parse_spec: fn(&str) -> InstallTarget, +) -> ParsedInstall { + ParsedInstall { + targets: positionals + .specs + .iter() + .map(|raw| parse_spec(raw)) + .collect(), + requirements_files: positionals.requirements_files, + } +} + +pub fn parse_install_args( + manager: PackageManager, + args: &[String], +) -> Result { + match manager { + PackageManager::Pip => parse_pip_install_args(args), + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => Ok( + build_parsed_install(extract_node_positionals(manager, args), parse_npm_spec), + ), + PackageManager::Uv => unreachable!("uv uses classify_uv_command"), + } +} + +/// Install-shaped `uv` invocations we know how to verify. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UvCommand<'a> { + InvalidInstall, + Passthrough, + PipInstall { + install_args: &'a [String], + }, + PipSync { + sync_args: &'a [String], + }, + Add { + add_args: &'a [String], + }, + /// `uv sync` — installs the locked project environment; gated from + /// `uv.lock`. (`uv lock` stays passthrough: it installs nothing.) + Sync, +} + +pub fn classify_uv_command(cmd: &[String]) -> UvCommand<'_> { + match cmd.first().map(String::as_str) { + Some("pip") if matches!(cmd.get(1).map(String::as_str), Some("install" | "i")) => { + UvCommand::PipInstall { + install_args: &cmd[2..], + } + } + Some("pip") if matches!(cmd.get(1).map(String::as_str), Some("sync")) => { + UvCommand::PipSync { + sync_args: &cmd[2..], + } + } + Some("add") => UvCommand::Add { + add_args: &cmd[1..], + }, + Some("sync") => UvCommand::Sync, + Some("install") => UvCommand::InvalidInstall, + _ => UvCommand::Passthrough, + } +} + +pub fn parse_uv_pip_sync_args(args: &[String]) -> ParsedInstall { + let positionals = extract_node_positionals(PackageManager::Uv, args); + ParsedInstall { + targets: Vec::new(), + requirements_files: positionals.specs.into_iter().map(PathBuf::from).collect(), + } +} + +#[derive(Debug, Default)] +struct PositionalSplit { + specs: Vec, + requirements_files: Vec, +} + +/// Known install flags that take a separate value argument, per manager. +/// The fallback heuristic in [`skip_unknown_flag`] only skips URL/path-like +/// values, so a bare-word value (`-w my-workspace`) would otherwise parse — +/// and get verified or blocked — as a package spec. Not exhaustive; the +/// heuristic still backstops anything unlisted. The same letter can differ +/// by manager: npm's `-w ` takes a value, while pnpm's `-w` +/// (workspace-root) and yarn's `-W` are boolean. +fn takes_value(manager: PackageManager, flag: &str) -> bool { + match manager { + PackageManager::Npm => matches!( + flag, + "-w" | "--workspace" + | "--prefix" + | "--registry" + | "--tag" + | "--omit" + | "--include" + | "--loglevel" + | "--userconfig" + | "--cache" + | "--globalconfig" + ), + PackageManager::Pnpm => matches!( + flag, + "-C" | "--dir" | "--filter" | "--registry" | "--reporter" | "--loglevel" + ), + PackageManager::Yarn => matches!( + flag, + "--registry" | "--modules-folder" | "--cache-folder" | "--mutex" | "--network-timeout" + ), + PackageManager::Uv => matches!( + flag, + "--group" | "--extra" | "--index" | "--tag" | "--branch" | "--rev" | "--package" + ), + PackageManager::Pip => false, + } +} + +/// Strip flags from a npm/yarn/pnpm (or `uv add`) install argument list, +/// returning only the positional package specs. +/// +/// We treat anything starting with `-` as a flag. Boolean flags (`-D`, +/// `--save-dev`, `--no-save`, ...) are dropped on their own. Flags +/// that take a value can be written as either `--flag=value` or +/// `--flag value`; known value-taking flags ([`takes_value`]) skip the +/// next token outright, anything else skips it only if it looks like a +/// value (a URL / path), never like a package spec. +fn extract_node_positionals(manager: PackageManager, args: &[String]) -> PositionalSplit { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + // After `--`, everything is positional. + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + if a.starts_with('-') { + if !a.contains('=') && takes_value(manager, a) { + i += 2; + continue; + } + i = skip_unknown_flag(args, i); + continue; + } + out.specs.push(a.clone()); + i += 1; + } + out +} + +fn extract_uv_add_positionals(args: &[String]) -> PositionalSplit { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + match a.as_str() { + "-r" | "--requirements" | "--requirement" => { + if let Some(path) = args.get(i + 1) { + out.requirements_files.push(PathBuf::from(path)); + } + i += if args.get(i + 1).is_some() { 2 } else { 1 }; + continue; + } + _ => {} + } + if let Some(path) = a + .strip_prefix("--requirements=") + .or_else(|| a.strip_prefix("--requirement=")) + { + out.requirements_files.push(PathBuf::from(path)); + i += 1; + continue; + } + if a.starts_with('-') { + if !a.contains('=') && takes_value(PackageManager::Uv, a) { + i += 2; + } else { + i = skip_unknown_flag(args, i); + } + continue; + } + out.specs.push(a.clone()); + i += 1; + } + out +} + +/// Advance past an unknown flag at `i`. `--flag=value` is self-contained; +/// otherwise peek at the next arg and skip it too if it doesn't look like +/// a package spec (contains `://` or is path-like) — see the heuristic +/// rationale on [`extract_node_positionals`]. +fn skip_unknown_flag(args: &[String], i: usize) -> usize { + if args[i].contains('=') { + return i + 1; + } + let next_is_value = args + .get(i + 1) + .map(|n| { + !n.starts_with('-') + && (n.contains("://") + || n.starts_with('/') + || n.starts_with("./") + || n.starts_with('~')) + }) + .unwrap_or(false); + i + if next_is_value { 2 } else { 1 } +} + +/// pip's argument grammar is more structured than npm's: there are +/// known flags that take a value (`-r FILE`, `-c FILE`, `-e PATH`, +/// `--index-url URL`, `--target DIR`, ...). We special-case `-r/-c/-e` +/// because they affect behaviour, and treat the rest with the same +/// liberal heuristic as npm. +fn extract_pip_positionals(args: &[String]) -> Result { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + match a.as_str() { + "-r" | "--requirement" => { + let path = args + .get(i + 1) + .ok_or_else(|| "`-r` / `--requirement` requires a file path".to_string())?; + out.requirements_files.push(PathBuf::from(path)); + i += 2; + continue; + } + "-c" | "--constraint" => { + // Constraints don't add packages, but skip the path. + i += 2; + continue; + } + "-e" | "--editable" => { + // Editable installs are explicit unverifiable targets. + let path = args.get(i + 1).cloned().unwrap_or_default(); + out.specs.push(format!("-e {}", path)); + i += if args.get(i + 1).is_some() { 2 } else { 1 }; + continue; + } + _ => {} + } + // Long-form `--requirement=foo.txt`. + if let Some(rest) = a.strip_prefix("--requirement=") { + out.requirements_files.push(PathBuf::from(rest)); + i += 1; + continue; + } + if let Some(rest) = a.strip_prefix("--editable=") { + out.specs.push(format!("-e {}", rest)); + i += 1; + continue; + } + if a.starts_with('-') { + i = skip_unknown_flag(args, i); + continue; + } + out.specs.push(a.clone()); + i += 1; + } + Ok(out) +} + +/// Parse a single npm-style positional, e.g. `axios`, `axios@1.0.0`, +/// `axios@^1.0.0`, `axios@latest`, `@types/node@20.10.5`, +/// `git+https://...`, `file:./local`, `./local`, `npm:other@1.0.0`. +fn parse_npm_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", + "git:", + "git@", + "ssh://", + "http://", + "https://", + "file:", + "./", + "../", + "/", + "~/", + "npm:", + "workspace:", + ]; + if let Some(p) = unverifiable_prefixes + .iter() + .find(|p| trimmed.starts_with(*p)) + { + let reason = match *p { + "npm:" => "npm: aliased dependency — registry verification skipped", + "workspace:" => "workspace: dependency — registry verification skipped", + _ => "spec is a URL/git/filesystem reference — registry verification skipped", + }; + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: reason.to_string(), + }, + }; + } + + // Find the version separator. Scoped names start with `@` and the + // version separator is the *next* `@` (if any). Unscoped names + // use the first `@`. + let (name_part, spec_part): (&str, &str) = if let Some(rest) = trimmed.strip_prefix('@') { + match rest.find('@') { + Some(at_in_rest) => { + let split = 1 + at_in_rest; + (&trimmed[..split], &trimmed[split + 1..]) + } + None => (trimmed, ""), + } + } else { + match trimmed.find('@') { + Some(at) => (&trimmed[..at], &trimmed[at + 1..]), + None => (trimmed, ""), + } + }; + + let name = name_part.trim().to_string(); + let spec_str = spec_part.trim(); + + let kind = if spec_str.is_empty() || spec_str.eq_ignore_ascii_case("latest") { + TargetKind::Npm(NpmSpec::Latest) + } else if semver::Version::parse(spec_str).is_ok() { + TargetKind::Npm(NpmSpec::Exact(spec_str.to_string())) + } else if looks_like_npm_range(spec_str) { + TargetKind::Npm(NpmSpec::Range(spec_str.to_string())) + } else if is_npm_dist_tag(spec_str) { + TargetKind::Npm(NpmSpec::Tag(spec_str.to_string())) + } else { + TargetKind::Unverifiable { + reason: format!( + "could not classify version spec '{}' (not a valid semver, range, or dist-tag)", + spec_str + ), + } + }; + + InstallTarget { + name, + display, + kind, + } +} + +/// Loose check: does this spec look like an npm version range? +/// We accept anything that *starts* with a range metacharacter +/// (`^`, `~`, `>`, `<`, `=`, `*`) or with a digit (so `1.x`, `1.2.x`, +/// and bare ranges still resolve). Validation against the registry's +/// version list happens later inside the resolver. +fn looks_like_npm_range(s: &str) -> bool { + matches!( + s.chars().next(), + Some('^') | Some('~') | Some('>') | Some('<') | Some('=') | Some('*') + ) || s + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) +} + +/// A dist-tag is a non-empty alphanumeric string (e.g. `latest`, +/// `next`, `beta`, `alpha-1`). We reject anything that contains +/// version-spec metacharacters. +fn is_npm_dist_tag(s: &str) -> bool { + !s.is_empty() + && s.chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.') + && s.chars() + .next() + .map(|c| c.is_ascii_alphabetic()) + .unwrap_or(false) +} + +/// Parse a single pip-style positional, e.g. `requests`, `requests==2.31.0`, +/// `requests>=2.0`, `requests[security]`, `git+https://...`, `./local`. +fn parse_pypi_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", "hg+", "svn+", "bzr+", "http://", "https://", "file:", "./", "../", "/", "~/", + "-e ", "-e=", + ]; + if unverifiable_prefixes.iter().any(|p| trimmed.starts_with(p)) { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a VCS / URL / editable / filesystem reference — registry verification skipped".to_string(), + }, + }; + } + + let requirement = trimmed.split(';').next().unwrap_or(trimmed).trim(); + + // Find the first specifier operator (`==`, `>=`, `<=`, `!=`, `~=`, + // `>`, `<`). PEP 440 also allows `===` (arbitrary equality). + // Find the leftmost specifier operator. On ties, prefer the + // longer operator (e.g. `==` over `=`). + let separators = ["===", "==", ">=", "<=", "!=", "~=", ">", "<"]; + let mut split_at: Option = None; + for sep in &separators { + if let Some(idx) = requirement.find(sep) { + split_at = match split_at { + Some(prev) if prev <= idx => Some(prev), + _ => Some(idx), + }; + } + } + + let (name_part, spec_part): (&str, &str) = match split_at { + Some(idx) => (&requirement[..idx], &requirement[idx..]), + None => (requirement, ""), + }; + + // Strip extras: `requests[security]` -> `requests`. + let name_no_extras = name_part.split('[').next().unwrap_or(name_part).trim(); + + let spec = spec_part.trim(); + + let kind = if spec.is_empty() { + TargetKind::Pypi(PypiSpec::Latest) + } else if let Some(rest) = spec.strip_prefix("===") { + TargetKind::Pypi(PypiSpec::Exact(rest.trim().to_string())) + } else if let Some(rest) = spec.strip_prefix("==") { + let v = rest.trim(); + if v.is_empty() { + TargetKind::Unverifiable { + reason: "empty `==` specifier".to_string(), + } + } else { + TargetKind::Pypi(PypiSpec::Exact(v.to_string())) + } + } else { + TargetKind::Pypi(PypiSpec::Specifier(spec.to_string())) + }; + + InstallTarget { + name: name_no_extras.to_string(), + display, + kind, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn extracts_npm_positionals_skipping_flags() { + let args = vec![ + "axios".to_string(), + "--save-dev".to_string(), + "@types/node@latest".to_string(), + "-D".to_string(), + "--registry".to_string(), + "https://example.com/registry".to_string(), + "lodash@^4.0.0".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!( + p.specs, + vec![ + "axios".to_string(), + "@types/node@latest".to_string(), + "lodash@^4.0.0".to_string(), + ] + ); + } + + #[test] + fn npm_workspace_flag_value_is_not_a_spec() { + // npm's `-w ` / `--workspace ` take a bare-word value; + // it must never be verified (or blocked) as a package spec. + for flag in ["-w", "--workspace"] { + let args = vec![ + flag.to_string(), + "my-workspace".to_string(), + "lodash".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()], "flag {flag}"); + } + // `--workspace=name` is self-contained. + let args = vec!["--workspace=my-workspace".to_string(), "lodash".to_string()]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + } + + #[test] + fn pnpm_and_yarn_boolean_workspace_flags_keep_the_spec() { + // pnpm's `-w` (--workspace-root) and yarn's `-W` are boolean — + // the next token is the package being installed. + let args = vec!["-w".to_string(), "lodash".to_string()]; + let p = extract_node_positionals(PackageManager::Pnpm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + + let args = vec!["-W".to_string(), "lodash".to_string()]; + let p = extract_node_positionals(PackageManager::Yarn, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + + // pnpm's `--filter ` does take a value. + let args = vec![ + "--filter".to_string(), + "my-app".to_string(), + "lodash".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Pnpm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + } + + #[test] + fn uv_add_group_flag_value_is_not_a_spec() { + let args = vec![ + "--group".to_string(), + "dev".to_string(), + "requests".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Uv, &args); + assert_eq!(p.specs, vec!["requests".to_string()]); + } + + #[test] + fn extracts_npm_positionals_after_double_dash() { + let args = vec![ + "--save-dev".to_string(), + "--".to_string(), + "axios".to_string(), + "--this-is-positional-now".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!( + p.specs, + vec!["axios".to_string(), "--this-is-positional-now".to_string()] + ); + } + + #[test] + fn parse_npm_spec_classifies() { + let cases = vec![ + ("axios", NpmSpec::Latest), + ("axios@", NpmSpec::Latest), + ("axios@latest", NpmSpec::Latest), + ("axios@1.0.0", NpmSpec::Exact("1.0.0".to_string())), + ("axios@^1.0.0", NpmSpec::Range("^1.0.0".to_string())), + ("axios@~1.0.0", NpmSpec::Range("~1.0.0".to_string())), + ( + "axios@>=1.0.0 <2.0.0", + NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), + ), + ("axios@next", NpmSpec::Tag("next".to_string())), + ("axios@beta", NpmSpec::Tag("beta".to_string())), + ("@types/node", NpmSpec::Latest), + ("@types/node@20.10.5", NpmSpec::Exact("20.10.5".to_string())), + ("@types/node@^20.0.0", NpmSpec::Range("^20.0.0".to_string())), + ("@types/node@latest", NpmSpec::Latest), + ]; + for (input, expected) in cases { + let target = parse_npm_spec(input); + match (&target.kind, &expected) { + (TargetKind::Npm(actual), expected) => { + assert_eq!(actual, expected, "for input '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_npm_spec_extracts_scoped_names() { + assert_eq!(parse_npm_spec("@types/node").name, "@types/node"); + assert_eq!(parse_npm_spec("@types/node@20.10.5").name, "@types/node"); + assert_eq!(parse_npm_spec("axios@1.2.3").name, "axios"); + assert_eq!(parse_npm_spec("axios").name, "axios"); + } + + #[test] + fn parse_npm_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "git@github.com:x/y.git", + "https://example.com/pkg.tgz", + "file:./local-pkg", + "./local-pkg", + "../sibling", + "/abs/path", + "npm:alias-of-other@1.0.0", + "workspace:*", + ]; + for u in unverifiable { + let t = parse_npm_spec(u); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + } + + #[test] + fn parse_pypi_spec_classifies() { + let cases = vec![ + ("requests", PypiSpec::Latest), + ("requests==2.31.0", PypiSpec::Exact("2.31.0".to_string())), + ("requests>=2.0", PypiSpec::Specifier(">=2.0".to_string())), + ("requests~=2.0", PypiSpec::Specifier("~=2.0".to_string())), + ("requests<3,>=2", PypiSpec::Specifier("<3,>=2".to_string())), + ("requests[security]", PypiSpec::Latest), + ( + "requests[security]==2.31.0", + PypiSpec::Exact("2.31.0".to_string()), + ), + ]; + for (input, expected) in cases { + let t = parse_pypi_spec(input); + match (&t.kind, &expected) { + (TargetKind::Pypi(actual), expected) => { + assert_eq!(actual, expected, "for '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_pypi_spec_strips_extras_and_markers() { + assert_eq!( + parse_pypi_spec("requests[security]==2.31.0").name, + "requests" + ); + let t = parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\""); + assert_eq!(t.name, "requests"); + assert!( + matches!(t.kind, TargetKind::Pypi(PypiSpec::Exact(ref v)) if v == "2.31.0"), + "env marker must not leak into the spec: {:?}", + t.kind + ); + + let t = parse_pypi_spec("requests; python_version >= \"3.8\""); + assert_eq!(t.name, "requests"); + assert!( + matches!(t.kind, TargetKind::Pypi(PypiSpec::Latest)), + "marker-only requirement must still query the package: {:?}", + t.kind + ); + + let t = parse_pypi_spec("requests[security]; python_version >= \"3.8\""); + assert_eq!(t.name, "requests"); + assert!( + matches!(t.kind, TargetKind::Pypi(PypiSpec::Latest)), + "extras plus marker must still query the package: {:?}", + t.kind + ); + } + + #[test] + fn parse_pypi_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "https://example.com/pkg.tar.gz", + "./local-pkg", + "/abs/path", + "-e ./local", + ]; + for u in unverifiable { + let t = parse_pypi_spec(u); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + } + + #[test] + fn classify_uv_command_recognizes_install_shapes() { + assert!(matches!( + classify_uv_command(&[ + "pip".to_string(), + "install".to_string(), + "requests".to_string(), + ]), + UvCommand::PipInstall { .. } + )); + assert!(matches!( + classify_uv_command(&["pip".to_string(), "i".to_string()]), + UvCommand::PipInstall { .. } + )); + assert!(matches!( + classify_uv_command(&["add".to_string(), "django".to_string()]), + UvCommand::Add { .. } + )); + assert_eq!( + classify_uv_command(&["sync".to_string(), "--extra".to_string(), "dev".to_string()]), + UvCommand::Sync + ); + assert_eq!( + classify_uv_command(&["run".to_string(), "pytest".to_string()]), + UvCommand::Passthrough + ); + assert_eq!( + classify_uv_command(&["lock".to_string()]), + UvCommand::Passthrough + ); + } + + #[test] + fn uv_add_positionals_parse_as_pypi_specs() { + let parsed = parse_pypi_positionals_args(&["requests==2.31.0".into()]); + assert_eq!(parsed.targets.len(), 1); + assert!( + matches!( + &parsed.targets[0].kind, + TargetKind::Pypi(PypiSpec::Exact(v)) if v == "2.31.0" + ), + "uv add targets must parse as PyPI specs, got {:?}", + parsed.targets[0].kind + ); + } + + #[test] + fn pip_args_extract_requirements_files() { + let args = vec![ + "-r".to_string(), + "reqs.txt".to_string(), + "requests==2.31.0".to_string(), + "--requirement=other.txt".to_string(), + "-e".to_string(), + "./local".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!( + p.requirements_files, + vec![PathBuf::from("reqs.txt"), PathBuf::from("other.txt")] + ); + assert!(p.specs.contains(&"requests==2.31.0".to_string())); + assert!(p.specs.iter().any(|s| s.starts_with("-e "))); + } +} diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs new file mode 100644 index 0000000..2619ee5 --- /dev/null +++ b/src/precheck/tree.rs @@ -0,0 +1,552 @@ +//! Full would-install-set resolution (the "tree pass"). +//! +//! Safety invariant: resolution must never execute package code. +//! pip: `--only-binary :all:` prevents sdist builds (pypa/pip#13091). +//! npm: `--ignore-scripts` guards npm/cli#2787. + +use std::process::Command; + +use super::PackageManager; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TreePackage { + pub name: String, + pub version: String, + /// pip report `"requested"`: the user named this package (CLI arg or + /// requirements file). Always false for npm — its lockfile has no + /// equivalent flag. + pub requested: bool, +} + +/// Whether this manager's resolver has anything to resolve for the parsed +/// install. pip's dry-run and uv's compile also read `-r` requirements +/// files, so those make an install eligible even with no named targets. +/// npm's lockfile resolution reads `package.json`, so a bare `npm install` +/// is eligible whenever the working directory has one. +pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstall) -> bool { + !parsed.targets.is_empty() + || (matches!(manager, PackageManager::Pip | PackageManager::Uv) + && !parsed.requirements_files.is_empty()) + || (manager == PackageManager::Npm && std::path::Path::new("package.json").exists()) +} + +/// `Ok(None)`: manager has no safe dry-run — named-only with warning. +/// `Err(reason)`: dry-run attempted and failed — named-only, warning carries reason. +pub fn resolve_tree( + manager: PackageManager, + subcommand_label: &str, + install_args: &[String], + parsed: &super::parse::ParsedInstall, +) -> Result>, String> { + match manager { + PackageManager::Pip => resolve_pip_tree(manager.binary_name(), install_args).map(Some), + PackageManager::Npm if matches!(subcommand_label, "ci" | "clean-install") => { + resolve_npm_locked_tree().map(Some) + } + PackageManager::Npm => resolve_npm_tree(manager.binary_name(), install_args).map(Some), + PackageManager::Uv => resolve_uv_tree(parsed).map(Some), + // yarn/pnpm have no safe dry-run for installs. + PackageManager::Yarn | PackageManager::Pnpm => Ok(None), + } +} + +/// Last stderr line of a failed subprocess, for one-line error messages. +fn stderr_tail(output: &std::process::Output) -> String { + String::from_utf8_lossy(&output.stderr) + .trim() + .lines() + .last() + .unwrap_or("unknown error") + .to_string() +} + +fn resolve_pip_tree(binary: &str, install_args: &[String]) -> Result, String> { + // Same binary resolution as the exec path (pip → pip3 fallback) — the + // tree pass must not silently degrade on pip3-only systems. + let resolved = super::resolve_binary(binary)?; + let output = Command::new(resolved) + .arg("install") + .args([ + "--dry-run", + "--quiet", + "--report", + "-", + "--only-binary", + ":all:", + ]) + .args(install_args) + .output() + .map_err(|e| format!("run pip dry-run: {e}"))?; + if !output.status.success() { + return Err(format!("pip dry-run failed: {}", stderr_tail(&output))); + } + parse_pip_report(&String::from_utf8_lossy(&output.stdout)) +} + +fn parse_pip_report(json: &str) -> Result, String> { + let report: serde_json::Value = + serde_json::from_str(json).map_err(|e| format!("parse pip report: {e}"))?; + let install = report + .get("install") + .and_then(|v| v.as_array()) + .ok_or("pip report has no install[] array")?; + install + .iter() + .map(|item| { + let metadata = item.get("metadata").ok_or("report item missing metadata")?; + let field = |k: &str| { + metadata + .get(k) + .and_then(|v| v.as_str()) + .map(str::to_string) + .ok_or_else(|| format!("report item missing metadata.{k}")) + }; + Ok(TreePackage { + name: field("name")?, + version: field("version")?, + requested: item + .get("requested") + .and_then(|v| v.as_bool()) + .unwrap_or(false), + }) + }) + .collect() +} + +/// Resolve uv's would-install set with `uv pip compile` — uv's own +/// resolver, run without executing package code (`--only-binary :all:` +/// blocks sdist builds, mirroring the pip dry-run guard). Compile takes +/// requirements files rather than bare specs, so named registry specs and +/// absolutized `-r` includes are written to a temp `.in` file. +/// Unverifiable targets (URL / git / editable / path) are excluded — they +/// are already surfaced as skipped warnings. Index selection comes from +/// uv's env/config; index flags on the wrapped command don't carry over. +fn resolve_uv_tree(parsed: &super::parse::ParsedInstall) -> Result, String> { + let uv = super::resolve_binary("uv")?; + let mut input = String::new(); + for t in &parsed.targets { + if !matches!(t.kind, super::TargetKind::Unverifiable { .. }) { + input.push_str(&t.display); + input.push('\n'); + } + } + for f in &parsed.requirements_files { + let abs = std::fs::canonicalize(f).map_err(|e| format!("read {}: {e}", f.display()))?; + input.push_str(&format!("-r {}\n", abs.display())); + } + if input.is_empty() { + return Err("nothing uv pip compile can resolve (all targets are URL/path refs)".into()); + } + + let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; + let in_file = work.path().join("corgea-gate.in"); + std::fs::write(&in_file, &input).map_err(|e| format!("write compile input: {e}"))?; + let output = Command::new(&uv) + .args([ + "pip", + "compile", + "--only-binary", + ":all:", + "--no-header", + "--no-annotate", + "--quiet", + ]) + .arg(&in_file) + .output() + .map_err(|e| format!("run uv pip compile: {e}"))?; + if !output.status.success() { + return Err(format!("uv pip compile failed: {}", stderr_tail(&output))); + } + parse_compiled_requirements( + &String::from_utf8_lossy(&output.stdout), + &requested_names(parsed), + ) +} + +/// Normalized names the user asked for — named CLI targets plus entries of +/// `-r` files — so tree findings label "(from requirements)" like pip's +/// `requested` report flag. Best-effort line parse; anything unparsed just +/// labels "(transitive)". +fn requested_names(parsed: &super::parse::ParsedInstall) -> std::collections::HashSet { + let norm = |n: &str| PackageManager::Uv.normalize_name(n); + let mut out: std::collections::HashSet = parsed + .targets + .iter() + .filter(|t| !matches!(t.kind, super::TargetKind::Unverifiable { .. })) + .map(|t| norm(&t.name)) + .collect(); + for f in &parsed.requirements_files { + let Ok(content) = std::fs::read_to_string(f) else { + continue; + }; + for line in content.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with(['#', '-']) || line.contains("://") { + continue; + } + let name: String = line + .chars() + .take_while(|c| !matches!(c, '[' | '<' | '>' | '=' | '!' | '~' | ';' | ' ')) + .collect(); + if !name.is_empty() { + out.insert(norm(&name)); + } + } + } + out +} + +/// Parse `uv pip compile` stdout (requirements.txt-format `name==version` +/// pins) into the would-install set. Any line that isn't a pin is an error — +/// silently skipping could hide part of the tree. +fn parse_compiled_requirements( + out: &str, + requested: &std::collections::HashSet, +) -> Result, String> { + let mut pkgs = Vec::new(); + for line in out.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with(['#', '-']) { + continue; + } + // Strip env markers and trailing comments: `pkg==1.0 ; marker # via`. + let line = line.split(';').next().unwrap_or(line).trim(); + let line = line.split(" #").next().unwrap_or(line).trim(); + let Some((name, version)) = line.split_once("==") else { + return Err(format!( + "unexpected line in uv pip compile output: '{line}'" + )); + }; + // Strip extras: `celery[redis]==5.3.4`. + let name = name.split('[').next().unwrap_or(name).trim().to_string(); + pkgs.push(TreePackage { + requested: requested.contains(&PackageManager::Uv.normalize_name(&name)), + name, + version: version.trim().to_string(), + }); + } + if pkgs.is_empty() { + return Err("uv pip compile produced no packages".to_string()); + } + Ok(pkgs) +} + +/// Direct dependency names declared by the project's `package.json` in the +/// current directory (the manifest `resolve_npm_tree` copies). Empty when +/// the manifest is absent or unparsable — origin labeling then degrades to +/// `(transitive)`. +pub fn project_direct_deps() -> std::collections::HashSet { + std::fs::read_to_string("package.json") + .map(|s| direct_deps_from_manifest(&s)) + .unwrap_or_default() +} + +fn direct_deps_from_manifest(json: &str) -> std::collections::HashSet { + let Ok(manifest) = serde_json::from_str::(json) else { + return Default::default(); + }; + let groups = [ + "dependencies", + "devDependencies", + "optionalDependencies", + "peerDependencies", + ]; + groups + .iter() + .filter_map(|g| manifest.get(g)?.as_object()) + .flat_map(|deps| deps.keys().cloned()) + .collect() +} + +/// Resolve npm's full would-install set by generating a lockfile in a +/// throwaway dir so the user's own lockfile is never touched. npm's +/// `--dry-run --json` only emits counts (npm/cli#6558), so we read the +/// generated `package-lock.json` instead. +/// +/// `--ignore-scripts` because npm has run lifecycle scripts under +/// `--package-lock-only` before (npm/cli#2787). +fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result, String> { + let resolved = super::resolve_binary(binary)?; + let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; + for manifest in [ + "package.json", + "package-lock.json", + "npm-shrinkwrap.json", + ".npmrc", + ] { + if std::path::Path::new(manifest).exists() { + std::fs::copy(manifest, work.path().join(manifest)) + .map_err(|e| format!("copy {manifest}: {e}"))?; + } + } + let output = Command::new(&resolved) + .arg("install") + .args(install_args) + .args([ + "--package-lock-only", + "--ignore-scripts", + "--no-audit", + "--no-fund", + ]) + .current_dir(work.path()) + .output() + .map_err(|e| format!("run npm lockfile resolution: {e}"))?; + if !output.status.success() { + return Err(format!( + "npm lockfile resolution failed: {}", + stderr_tail(&output) + )); + } + let lock = std::fs::read_to_string(work.path().join("package-lock.json")) + .map_err(|e| format!("read generated package-lock.json: {e}"))?; + parse_npm_lockfile(&lock) +} + +/// `npm ci` / `npm clean-install` install the existing lockfile exactly. +/// Do not re-resolve with `npm install --package-lock-only`: that can choose +/// newer versions allowed by package.json and miss vulnerable pinned versions. +fn resolve_npm_locked_tree() -> Result, String> { + let path = if std::path::Path::new("npm-shrinkwrap.json").exists() { + "npm-shrinkwrap.json" + } else { + "package-lock.json" + }; + let lock = std::fs::read_to_string(path).map_err(|e| format!("read {path}: {e}"))?; + parse_npm_lockfile(&lock) +} + +fn parse_npm_lockfile(json: &str) -> Result, String> { + let lock: serde_json::Value = + serde_json::from_str(json).map_err(|e| format!("parse npm lockfile: {e}"))?; + let packages = lock + .get("packages") + .and_then(|v| v.as_object()) + .ok_or("npm lockfile has no packages map (npm < 7?)")?; + Ok(packages + .iter() + // Skip the root project entry ("") and symlinked (workspace) entries. + .filter(|(path, entry)| { + !path.is_empty() && entry.get("link").and_then(|v| v.as_bool()) != Some(true) + }) + .filter_map(|(path, entry)| { + let name = entry + .get("name") + .and_then(|v| v.as_str()) + .map(str::to_string) + .or_else(|| name_from_lock_path(path))?; + let version = entry.get("version").and_then(|v| v.as_str())?; + Some(TreePackage { + name, + version: version.to_string(), + requested: false, + }) + }) + .collect()) +} + +/// Derive a package name from a lockfile path key like +/// `node_modules/a/node_modules/@scope/pkg` → `@scope/pkg`. +fn name_from_lock_path(path: &str) -> Option { + let idx = path.rfind("node_modules/")?; + let name = &path[idx + "node_modules/".len()..]; + (!name.is_empty()).then(|| name.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + const OK_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + + #[test] + fn parse_pip_report_ok() { + let pkgs = parse_pip_report(OK_REPORT).expect("parse ok report"); + assert_eq!( + pkgs, + vec![ + TreePackage { + name: "oldpkg".to_string(), + version: "1.0.0".to_string(), + requested: true, + }, + TreePackage { + name: "evildep".to_string(), + version: "0.4.2".to_string(), + requested: false, + }, + ] + ); + } + + #[test] + fn parse_pip_report_missing_requested_defaults_false() { + let json = r#"{"install":[{"metadata":{"name":"x","version":"1.0.0"}}]}"#; + let pkgs = parse_pip_report(json).expect("parse report without requested"); + assert!(!pkgs[0].requested); + } + + #[test] + fn parse_pip_report_missing_install() { + let err = parse_pip_report(r#"{"version":"1"}"#).expect_err("no install[]"); + assert!(err.contains("no install[]"), "got: {err}"); + } + + #[test] + fn parse_pip_report_missing_version() { + let json = r#"{"install":[{"metadata":{"name":"x"}}]}"#; + let err = parse_pip_report(json).expect_err("missing version"); + assert!(err.contains("metadata.version"), "got: {err}"); + } + + #[test] + fn parse_pip_report_non_json() { + let err = parse_pip_report("not json").expect_err("non-json"); + assert!(err.contains("parse pip report"), "got: {err}"); + } + + #[test] + fn parse_compiled_requirements_pins_extras_and_markers() { + let requested = std::collections::HashSet::from(["flask-cors".to_string()]); + let out = "Flask_Cors==4.0.0\ncelery[redis]==5.3.4\nwerkzeug==3.1.8 ; python_version >= \"3.9\"\n\n# comment\n--index-url https://example.com\n"; + let pkgs = parse_compiled_requirements(out, &requested).expect("parse pins"); + assert_eq!( + pkgs, + vec![ + TreePackage { + name: "Flask_Cors".to_string(), + version: "4.0.0".to_string(), + requested: true, + }, + TreePackage { + name: "celery".to_string(), + version: "5.3.4".to_string(), + requested: false, + }, + TreePackage { + name: "werkzeug".to_string(), + version: "3.1.8".to_string(), + requested: false, + }, + ] + ); + } + + #[test] + fn parse_compiled_requirements_rejects_non_pins() { + let none = std::collections::HashSet::new(); + let err = parse_compiled_requirements("flask>=2.0\n", &none).expect_err("not a pin"); + assert!(err.contains("unexpected line"), "got: {err}"); + let err = parse_compiled_requirements("", &none).expect_err("empty"); + assert!(err.contains("no packages"), "got: {err}"); + } + + #[test] + fn requested_names_unions_targets_and_requirements_files() { + let dir = tempfile::tempdir().expect("temp dir"); + let req = dir.path().join("requirements.txt"); + std::fs::write( + &req, + "# comment\nFlask_Cors==4.0.0\nrequests[security]>=2.0 ; python_version >= \"3.9\"\n-r other.txt\nhttps://example.com/pkg.whl\n", + ) + .expect("write requirements"); + let parsed = super::super::parse::ParsedInstall { + targets: vec![super::super::InstallTarget { + name: "celery".to_string(), + display: "celery==5.3.4".to_string(), + kind: super::super::TargetKind::Pypi( + crate::verify_deps::registry::PypiSpec::Exact("5.3.4".to_string()), + ), + }], + requirements_files: vec![req], + }; + let names = requested_names(&parsed); + for name in ["celery", "flask-cors", "requests"] { + assert!(names.contains(name), "missing {name}: {names:?}"); + } + assert_eq!(names.len(), 3); + } + + // lockfile-v3 with: root entry (skipped), a plain dep, a nested dep, + // a scoped dep, and a workspace `link: true` entry (skipped). + const NPM_LOCK: &str = r#"{ + "name": "proj", "lockfileVersion": 3, + "packages": { + "": {"name": "proj", "version": "1.0.0"}, + "node_modules/oldpkg": {"version": "1.0.0"}, + "node_modules/evildep": {"version": "0.4.2"}, + "node_modules/a/node_modules/b": {"version": "2.3.4"}, + "node_modules/@scope/pkg": {"version": "9.0.1"}, + "node_modules/localdep": {"resolved": "../local", "link": true}, + "packages/localdep": {"name": "localdep", "version": "0.0.1"} + } + }"#; + + #[test] + fn parse_npm_lockfile_ok() { + let mut pkgs = parse_npm_lockfile(NPM_LOCK).expect("parse npm lock"); + pkgs.sort_by(|a, b| a.name.cmp(&b.name)); + let pkg = |name: &str, version: &str| TreePackage { + name: name.to_string(), + version: version.to_string(), + requested: false, + }; + assert_eq!( + pkgs, + vec![ + pkg("@scope/pkg", "9.0.1"), + pkg("b", "2.3.4"), + pkg("evildep", "0.4.2"), + pkg("localdep", "0.0.1"), + pkg("oldpkg", "1.0.0"), + ] + ); + } + + #[test] + fn parse_npm_lockfile_missing_packages() { + let err = parse_npm_lockfile(r#"{"lockfileVersion":1}"#).expect_err("no packages map"); + assert!(err.contains("no packages map"), "got: {err}"); + } + + #[test] + fn name_from_lock_path_handles_nested_and_scoped() { + assert_eq!( + name_from_lock_path("node_modules/oldpkg").as_deref(), + Some("oldpkg") + ); + assert_eq!( + name_from_lock_path("node_modules/a/node_modules/b").as_deref(), + Some("b") + ); + assert_eq!( + name_from_lock_path("node_modules/a/node_modules/@scope/pkg").as_deref(), + Some("@scope/pkg") + ); + assert_eq!(name_from_lock_path("packages/foo"), None); + } + + #[test] + fn direct_deps_from_manifest_unions_all_groups() { + let manifest = r#"{ + "name": "proj", + "dependencies": {"a": "^1.0.0", "@scope/b": "2.x"}, + "devDependencies": {"c": "*"}, + "optionalDependencies": {"d": "1.2.3"}, + "peerDependencies": {"e": ">=1"} + }"#; + let deps = direct_deps_from_manifest(manifest); + for name in ["a", "@scope/b", "c", "d", "e"] { + assert!(deps.contains(name), "missing {name}"); + } + assert_eq!(deps.len(), 5); + } + + #[test] + fn direct_deps_from_manifest_degrades_to_empty() { + assert!(direct_deps_from_manifest("not json").is_empty()); + assert!(direct_deps_from_manifest(r#"{"name":"proj"}"#).is_empty()); + assert!(direct_deps_from_manifest(r#"{"dependencies":[]}"#).is_empty()); + } +} diff --git a/src/utils/api.rs b/src/utils/api.rs index 9b9a445..c82e38e 100644 --- a/src/utils/api.rs +++ b/src/utils/api.rs @@ -1,5 +1,6 @@ use crate::log::debug; use crate::utils; +use corgea::vuln_api::is_jwt; use reqwest::header::HeaderMap; use reqwest::StatusCode; use reqwest::{ @@ -22,11 +23,6 @@ fn get_source() -> String { std::env::var("CORGEA_SOURCE").unwrap_or_else(|_| "cli".to_string()) } -fn is_jwt(token: &str) -> bool { - let parts: Vec<&str> = token.splitn(4, '.').collect(); - parts.len() == 3 && parts.iter().all(|p| !p.is_empty()) -} - fn auth_headers(token: &str) -> HeaderMap { let mut headers = HeaderMap::new(); if is_jwt(token) { @@ -1039,27 +1035,6 @@ mod tests { use super::*; use reqwest::header::{HeaderMap, HeaderValue}; - #[test] - fn is_jwt_accepts_three_dot_separated_non_empty_parts() { - assert!(is_jwt("aaa.bbb.ccc")); - assert!(is_jwt("header.payload.signature")); - } - - #[test] - fn is_jwt_rejects_wrong_part_count() { - assert!(!is_jwt("aaa.bbb")); - assert!(!is_jwt("aaa.bbb.ccc.ddd")); - assert!(!is_jwt("plainstring")); - assert!(!is_jwt("")); - } - - #[test] - fn is_jwt_rejects_when_any_part_is_empty() { - assert!(!is_jwt("aaa..ccc")); - assert!(!is_jwt(".bbb.ccc")); - assert!(!is_jwt("aaa.bbb.")); - } - #[test] fn auth_headers_uses_bearer_for_jwt_tokens() { let headers = auth_headers("aaa.bbb.ccc"); diff --git a/src/verify_deps/mod.rs b/src/verify_deps/mod.rs new file mode 100644 index 0000000..b813529 --- /dev/null +++ b/src/verify_deps/mod.rs @@ -0,0 +1,137 @@ +//! Slim slice of #89's verify_deps: registry resolution + threshold helpers. + +pub mod registry; + +use std::time::Duration; + +/// Parse a human-friendly duration like `2d`, `48h`, `30m`, `45s`, or +/// a bare integer (interpreted as days). Returns the parsed duration. +pub fn parse_threshold(input: &str) -> Result { + let s = input.trim(); + if s.is_empty() { + return Err("threshold cannot be empty".to_string()); + } + + let (num_str, unit) = match s.chars().last() { + Some(c) if c.is_ascii_alphabetic() => { + (&s[..s.len() - c.len_utf8()], c.to_ascii_lowercase()) + } + _ => (s, 'd'), + }; + + let value: f64 = num_str + .trim() + .parse() + .map_err(|_| format!("invalid threshold number: '{}'", num_str))?; + + if value < 0.0 || !value.is_finite() { + return Err(format!( + "threshold must be a non-negative finite number: '{}'", + input + )); + } + + let secs = match unit { + 's' => value, + 'm' => value * 60.0, + 'h' => value * 3600.0, + 'd' => value * 86400.0, + 'w' => value * 7.0 * 86400.0, + other => { + return Err(format!( + "unknown threshold unit '{}'. Use s, m, h, d, or w.", + other + )) + } + }; + + let d = Duration::try_from_secs_f64(secs).map_err(|_| "threshold too large".to_string())?; + // Establish the invariant every consumer relies on: the threshold + // must also fit in a `chrono::Duration` (see precheck's from_std). + chrono::Duration::from_std(d).map_err(|_| "threshold too large".to_string())?; + Ok(d) +} + +/// Format a Duration as a short human-readable string (e.g. `1d 4h`). +pub fn format_duration(d: Duration) -> String { + let total_secs = d.as_secs(); + if total_secs < 60 { + return format!("{}s", total_secs); + } + let mins = total_secs / 60; + if mins < 60 { + return format!("{}m", mins); + } + let hours = total_secs / 3600; + let rem_mins = (total_secs % 3600) / 60; + if hours < 24 { + if rem_mins == 0 { + return format!("{}h", hours); + } + return format!("{}h {}m", hours, rem_mins); + } + let days = total_secs / 86400; + let rem_hours = (total_secs % 86400) / 3600; + if rem_hours == 0 { + format!("{}d", days) + } else { + format!("{}d {}h", days, rem_hours) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_threshold_units() { + assert_eq!( + parse_threshold("2d").unwrap(), + Duration::from_secs(2 * 86400) + ); + assert_eq!( + parse_threshold("48h").unwrap(), + Duration::from_secs(48 * 3600) + ); + assert_eq!( + parse_threshold("30m").unwrap(), + Duration::from_secs(30 * 60) + ); + assert_eq!(parse_threshold("90s").unwrap(), Duration::from_secs(90)); + assert_eq!( + parse_threshold("1w").unwrap(), + Duration::from_secs(7 * 86400) + ); + assert_eq!( + parse_threshold("3").unwrap(), + Duration::from_secs(3 * 86400) + ); + assert_eq!(parse_threshold("0.5d").unwrap(), Duration::from_secs(43200)); + } + + #[test] + fn parse_threshold_rejects_garbage() { + assert!(parse_threshold("").is_err()); + assert!(parse_threshold("abc").is_err()); + assert!(parse_threshold("-1d").is_err()); + assert!(parse_threshold("1y").is_err()); + } + + #[test] + fn parse_threshold_rejects_absurdly_large_values() { + // Too large for chrono::Duration (precheck converts via from_std). + assert!(parse_threshold("999999999999d").is_err()); + // Too large even for std::time::Duration. + assert!(parse_threshold("1e308d").is_err()); + } + + #[test] + fn format_duration_short() { + assert_eq!(format_duration(Duration::from_secs(5)), "5s"); + assert_eq!(format_duration(Duration::from_secs(120)), "2m"); + assert_eq!(format_duration(Duration::from_secs(3600)), "1h"); + assert_eq!(format_duration(Duration::from_secs(3700)), "1h 1m"); + assert_eq!(format_duration(Duration::from_secs(86400)), "1d"); + assert_eq!(format_duration(Duration::from_secs(90000)), "1d 1h"); + } +} diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs new file mode 100644 index 0000000..a7d32c6 --- /dev/null +++ b/src/verify_deps/registry.rs @@ -0,0 +1,731 @@ +//! Registry lookups for npm and PyPI publish times. +//! +//! These talk to public registries (no auth) and are kept independent +//! of the rest of the CLI's HTTP client because: +//! * we must not send the user's Corgea auth header to a third-party, +//! * the timeouts and retry policy are different. +//! +//! Both resolvers turn a version spec into the concrete version that +//! would be installed, plus its publish time as a UTC timestamp. + +use chrono::{DateTime, Utc}; +use serde::Deserialize; +use std::sync::OnceLock; +use std::time::Duration; + +const DEFAULT_NPM_REGISTRY: &str = "https://registry.npmjs.org"; +const DEFAULT_PYPI_REGISTRY: &str = "https://pypi.org"; + +const REQUEST_TIMEOUT: Duration = Duration::from_secs(20); + +fn user_agent() -> String { + format!("corgea-cli/{} (deps)", env!("CARGO_PKG_VERSION")) +} + +fn http_client() -> Result<&'static reqwest::blocking::Client, String> { + static CLIENT: OnceLock = OnceLock::new(); + Ok(CLIENT.get_or_init(|| { + reqwest::blocking::Client::builder() + .timeout(REQUEST_TIMEOUT) + .user_agent(user_agent()) + .build() + .expect("registry http client") + })) +} + +/// URL-encode an npm package name. Scoped names contain `@` and `/`, +/// the latter must be encoded as `%2f` for the package metadata URL. +/// Also used by `vuln_api` for its npm path segments. +pub(crate) fn encode_npm_name(name: &str) -> String { + if let Some(stripped) = name.strip_prefix('@') { + if let Some((scope, pkg)) = stripped.split_once('/') { + return format!("@{}%2f{}", scope, pkg); + } + } + name.to_string() +} + +#[derive(Debug, Deserialize)] +struct PypiUrl { + upload_time_iso_8601: Option, + upload_time: Option, + /// PEP 592. PyPI's JSON API emits a bool; some mirrors emit the + /// yank reason string instead. Either form means yanked. + #[serde(default)] + yanked: Option, +} + +impl PypiUrl { + fn is_yanked(&self) -> bool { + match &self.yanked { + Some(serde_json::Value::Bool(b)) => *b, + Some(serde_json::Value::String(_)) => true, + _ => false, + } + } +} + +/// Parse an ISO-8601 timestamp from npm or PyPI. PyPI sometimes emits +/// a naive timestamp like `2023-05-22T18:30:00` (no offset) which +/// chrono's RFC3339 parser rejects, so we accept both shapes. +fn parse_iso8601(raw: &str) -> Result, String> { + if let Ok(dt) = DateTime::parse_from_rfc3339(raw) { + return Ok(dt.with_timezone(&Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S%.f") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + Err(format!("unrecognised timestamp format: {}", raw)) +} + +// Resolution helpers (npm + PyPI). Inserted before the tests module +// in registry.rs. + +/// What the user typed after `pkg@` in an install command. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NpmSpec { + /// `axios`, `axios@`, or no spec — resolve to the `latest` dist-tag. + Latest, + /// `axios@latest`, `axios@next`, etc. + Tag(String), + /// `axios@1.2.3` — already resolved. + Exact(String), + /// `axios@^1.0.0`, `axios@~1.2.0`, `axios@>=1.0.0 <2.0.0`, etc. + Range(String), +} + +#[derive(Debug, Clone)] +pub struct ResolvedPackage { + pub name: String, + pub version: String, + pub published_at: DateTime, +} + +#[derive(Debug, Deserialize)] +struct NpmFullMetadata { + #[serde(default, rename = "dist-tags")] + dist_tags: std::collections::BTreeMap, + /// Only the keys (published version strings) are used; `IgnoredAny` + /// avoids allocating multi-MB JSON trees for big packuments. + #[serde(default)] + versions: std::collections::BTreeMap, + #[serde(default)] + time: std::collections::BTreeMap, +} + +/// Resolve an `NpmSpec` against the npm registry and return the +/// concrete version + publish time. Used by install wrappers when the +/// install command says e.g. `axios@^1.0.0` and we need to know what +/// would actually be installed before the install runs. +pub fn npm_resolve( + name: &str, + spec: &NpmSpec, + registry: Option<&str>, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_NPM_REGISTRY) + .trim_end_matches('/'); + let url = format!("{}/{}", base, encode_npm_name(name)); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("npm registry request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!( + "package '{}' not found on npm registry ({})", + name, base + )); + } + if !status.is_success() { + return Err(format!( + "npm registry returned status {} for '{}'", + status, name + )); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read npm registry response: {}", e))?; + + let meta: NpmFullMetadata = serde_json::from_str(&body).map_err(|e| { + format!( + "failed to parse npm registry response for '{}': {}", + name, e + ) + })?; + + let resolved_version = match spec { + NpmSpec::Latest => meta.dist_tags.get("latest").cloned().ok_or_else(|| { + format!( + "package '{}' has no 'latest' dist-tag on the npm registry", + name + ) + })?, + NpmSpec::Tag(tag) => meta.dist_tags.get(tag).cloned().ok_or_else(|| { + format!( + "package '{}' has no dist-tag named '{}' (available: {})", + name, + tag, + meta.dist_tags + .keys() + .cloned() + .collect::>() + .join(", "), + ) + })?, + NpmSpec::Exact(v) => { + if !meta.versions.contains_key(v) { + return Err(format!( + "version '{}' for package '{}' was not found on the npm registry", + v, name + )); + } + v.clone() + } + NpmSpec::Range(range) => { + npm_pick_highest_matching(&meta.versions, range).ok_or_else(|| { + format!( + "no published version of '{}' satisfies range '{}'", + name, range + ) + })? + } + }; + + let raw_time = meta.time.get(&resolved_version).ok_or_else(|| { + format!( + "publish time missing for {}@{} on the npm registry", + name, resolved_version + ) + })?; + + let published_at = parse_iso8601(raw_time).map_err(|e| { + format!( + "could not parse publish time '{}' for {}@{}: {}", + raw_time, name, resolved_version, e + ) + })?; + + Ok(ResolvedPackage { + name: name.to_string(), + version: resolved_version, + published_at, + }) +} + +/// Pick the highest semver-compatible version that satisfies `range`. +/// Pre-releases are excluded unless the range itself references a +/// pre-release (matches npm's behaviour). +/// Translate an npm-style version range (`>=1.0.0 <2.0.0`, +/// `1.x`, `>=1.0.0`) to a `semver::VersionReq`. The Rust crate uses +/// `,` as the AND separator, npm uses whitespace, so we normalise +/// before parsing. +fn parse_npm_range(range: &str) -> Option { + if let Ok(req) = semver::VersionReq::parse(range) { + return Some(req); + } + let normalised = range.split_whitespace().collect::>().join(","); + semver::VersionReq::parse(&normalised).ok() +} + +fn npm_pick_highest_matching( + versions: &std::collections::BTreeMap, + range: &str, +) -> Option { + // npm separates predicates with spaces (`>=1.0.0 <2.0.0`); the + // Rust `semver` crate uses commas. Try both. We don't support + // npm's `||` OR syntax here — those are best-effort skipped. + let req = parse_npm_range(range)?; + let range_has_prerelease = range.contains('-'); + + let mut best: Option<(semver::Version, String)> = None; + for raw in versions.keys() { + let v = match semver::Version::parse(raw) { + Ok(v) => v, + Err(_) => continue, + }; + if !v.pre.is_empty() && !range_has_prerelease { + continue; + } + if !req.matches(&v) { + continue; + } + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw.clone())), + } + } + best.map(|(_, raw)| raw) +} + +/// PyPI version specifier used by install wrappers. We parse a +/// limited subset of PEP 440 specifiers — enough for the common +/// install-command cases (`pkg`, `pkg==X`, `pkg>=X`, `pkg=2.0`, `<3,>=2`, `~=1.4`). + Specifier(String), +} + +#[derive(Debug, Deserialize)] +struct PypiInfoResponse { + releases: std::collections::BTreeMap>, +} + +/// Resolve a `PypiSpec` against PyPI and return the concrete version +/// + publish time. The latest non-prerelease, non-yanked release is +/// preferred. +pub fn pypi_resolve( + name: &str, + spec: &PypiSpec, + registry: Option<&str>, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_PYPI_REGISTRY) + .trim_end_matches('/'); + let url = format!("{}/pypi/{}/json", base, urlencoding::encode(name)); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("PyPI request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!("package '{}' not found on PyPI ({})", name, base)); + } + if !status.is_success() { + return Err(format!("PyPI returned status {} for '{}'", status, name)); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read PyPI response: {}", e))?; + + let meta: PypiInfoResponse = serde_json::from_str(&body) + .map_err(|e| format!("failed to parse PyPI response for '{}': {}", name, e))?; + + let candidates = collect_pypi_candidates(&meta); + // A yanked release resolves only via an exact pin (PEP 592), matching + // pip — otherwise we'd gate a version pip would never choose. + let installable: Vec = + candidates.iter().filter(|c| !c.yanked).cloned().collect(); + let chosen = match spec { + PypiSpec::Latest => pick_latest_stable(&installable).map(|c| c.version.clone()), + PypiSpec::Exact(v) => { + if candidates.iter().any(|c| &c.version == v) { + Some(v.clone()) + } else { + None + } + } + PypiSpec::Specifier(spec_str) => pypi_resolve_specifier(&installable, spec_str) + .map_err(|e| format!("{} for '{}'", e, name))?, + }; + + let chosen = chosen.ok_or_else(|| match spec { + PypiSpec::Exact(v) => { + format!( + "version '{}' for package '{}' was not found on PyPI", + v, name + ) + } + _ => format!("no installable version found for '{}' on PyPI", name), + })?; + + let published_at = candidates + .iter() + .find(|c| c.version == chosen) + .map(|c| c.uploaded) + .ok_or_else(|| { + format!( + "no upload timestamp for '{}' version '{}' on PyPI", + name, chosen + ) + })?; + + Ok(ResolvedPackage { + name: name.to_string(), + version: chosen, + published_at, + }) +} + +/// One published release a `PypiSpec` can resolve to. +#[derive(Debug, Clone)] +struct PypiCandidate { + version: String, + uploaded: DateTime, + /// Every artifact of this release is yanked (PEP 592) — pip skips + /// it for anything but an exact pin, so non-exact resolution must too. + yanked: bool, +} + +/// Returns a candidate for every release that has at least one uploaded, +/// timestamped artifact. Empty or timestampless release entries (which +/// PyPI sometimes keeps around for deleted / private versions) are +/// filtered out so we never pick them. +fn collect_pypi_candidates(meta: &PypiInfoResponse) -> Vec { + let mut out = Vec::new(); + for (ver, files) in &meta.releases { + if files.is_empty() { + continue; + } + let mut earliest: Option> = None; + for f in files { + let raw = f + .upload_time_iso_8601 + .as_deref() + .or(f.upload_time.as_deref()); + if let Some(raw) = raw { + if let Ok(dt) = parse_iso8601(raw) { + earliest = match earliest { + Some(prev) if prev <= dt => Some(prev), + _ => Some(dt), + }; + } + } + } + if let Some(dt) = earliest { + out.push(PypiCandidate { + version: ver.clone(), + uploaded: dt, + yanked: files.iter().all(PypiUrl::is_yanked), + }); + } + } + out +} + +/// Pick the latest non-prerelease version using `semver` parsing as a +/// best-effort PEP 440 ordering. Falls back to the entry with the +/// latest upload time if no candidate parses as semver. +fn pick_latest_stable(candidates: &[PypiCandidate]) -> Option<&PypiCandidate> { + let mut best_semver: Option<(semver::Version, &PypiCandidate)> = None; + for c in candidates { + let normalized = normalize_for_semver(&c.version); + if let Ok(v) = semver::Version::parse(&normalized) { + if !v.pre.is_empty() { + continue; + } + match &best_semver { + Some((cur, _)) if cur >= &v => {} + _ => best_semver = Some((v, c)), + } + } + } + if let Some((_, picked)) = best_semver { + return Some(picked); + } + candidates.iter().max_by_key(|c| c.uploaded) +} + +/// Best-effort PEP 440 → semver: PyPI versions are usually `X.Y.Z` or +/// `X.Y` or `X.Y.Z.postN` — the dotted-number form usually parses +/// straight as semver if we pad to 3 components. Anything more exotic +/// (`1.0a1`, `2!1.0`, etc.) is left alone and rejected by semver. +/// +/// Also used outside the registry (`precheck::safe_version`) as a lenient +/// cross-ecosystem pad for ordering fixed versions; keep it ecosystem-agnostic. +pub(crate) fn normalize_for_semver(v: &str) -> String { + if v.contains('!') + || v.contains('a') + || v.contains('b') + || v.contains("rc") + || v.contains(".dev") + { + return v.to_string(); + } + let parts: Vec<&str> = v.split('.').collect(); + match parts.len() { + 1 => format!("{}.0.0", parts[0]), + 2 => format!("{}.{}.0", parts[0], parts[1]), + _ => v.to_string(), + } +} + +/// Apply a PEP 440-style specifier expression to the candidate list +/// and return the highest match (`Ok(None)` when nothing satisfies it). +/// Supported operators: `==`, `>=`, `>`, `<=`, `<`, `~=`, `!=`. An +/// expression we can't parse (unknown operator, wildcard like `==1.*`) +/// is `Err` — resolving anything else would gate a different version +/// than the package manager installs. +fn pypi_resolve_specifier( + candidates: &[PypiCandidate], + spec: &str, +) -> Result, String> { + let parts: Vec<&str> = spec.split(',').map(|s| s.trim()).collect(); + let mut requirements: Vec<(&'static str, semver::Version)> = Vec::new(); + + // Longest prefixes first so `>=` never matches as `>`. + const OPERATORS: &[(&str, &str)] = &[ + ("===", "=="), + ("==", "=="), + (">=", ">="), + ("<=", "<="), + ("!=", "!="), + ("~=", "~="), + (">", ">"), + ("<", "<"), + ]; + for p in &parts { + let unsupported = || format!("unsupported version specifier '{}'", spec); + let (op, val) = OPERATORS + .iter() + .find_map(|(prefix, op)| p.strip_prefix(prefix).map(|v| (*op, v.trim()))) + .ok_or_else(unsupported)?; + let v = semver::Version::parse(&normalize_for_semver(val)).map_err(|_| unsupported())?; + requirements.push((op, v)); + } + + let mut best: Option<(semver::Version, String)> = None; + for c in candidates { + let raw = &c.version; + let v = match semver::Version::parse(&normalize_for_semver(raw)) { + Ok(v) => v, + Err(_) => continue, + }; + if !v.pre.is_empty() { + continue; + } + let satisfies = requirements.iter().all(|(op, want)| match *op { + "==" => &v == want, + ">=" => &v >= want, + "<=" => &v <= want, + "!=" => &v != want, + ">" => &v > want, + "<" => &v < want, + "~=" => { + if &v < want { + return false; + } + let upper = semver::Version::new(want.major, want.minor + 1, 0); + v < upper + } + _ => false, + }); + if !satisfies { + continue; + } + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw.clone())), + } + } + Ok(best.map(|(_, raw)| raw)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn npm_name_encoding() { + assert_eq!(encode_npm_name("left-pad"), "left-pad"); + assert_eq!(encode_npm_name("@scope/pkg"), "@scope%2fpkg"); + assert_eq!(encode_npm_name("@types/node"), "@types%2fnode"); + } + + fn candidates(versions: &[&str]) -> Vec { + versions + .iter() + .map(|v| PypiCandidate { + version: v.to_string(), + uploaded: Utc::now(), + yanked: false, + }) + .collect() + } + + #[test] + fn specifier_resolves_highest_match() { + let c = candidates(&["1.0.0", "2.5.0", "3.0.0"]); + assert_eq!( + pypi_resolve_specifier(&c, ">=1.0,<3").expect("parse"), + Some("2.5.0".to_string()) + ); + } + + #[test] + fn specifier_with_no_match_is_ok_none() { + let c = candidates(&["1.0.0"]); + assert_eq!(pypi_resolve_specifier(&c, ">=9.0").expect("parse"), None); + } + + #[test] + fn unparseable_specifier_errors_instead_of_falling_back() { + // `==1.*` is valid PEP 440 but not representable here; resolving + // "latest stable" instead would gate the wrong version. + let c = candidates(&["1.0.0", "2.0.0"]); + for spec in ["==1.*", "@weird", ">= not-a-version"] { + let err = pypi_resolve_specifier(&c, spec).expect_err(spec); + assert!( + err.contains("unsupported version specifier"), + "{spec}: {err}" + ); + } + } + + #[test] + fn yanked_only_releases_are_flagged() { + // 2.0.0 has every file yanked (one bool, one mirror-style reason + // string); 1.0.0 has a non-yanked file. Timestamps alone must not + // decide yanked status — yanked files keep theirs. + let meta: PypiInfoResponse = serde_json::from_str( + r#"{"releases":{ + "1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z","yanked":false}], + "2.0.0":[{"upload_time_iso_8601":"2021-01-01T00:00:00Z","yanked":true}, + {"upload_time_iso_8601":"2021-01-01T00:00:00Z","yanked":"broken build"}] + }}"#, + ) + .expect("parse pypi json"); + let candidates = collect_pypi_candidates(&meta); + let yanked_of = |v: &str| candidates.iter().find(|c| c.version == v).unwrap().yanked; + assert!(!yanked_of("1.0.0")); + assert!(yanked_of("2.0.0")); + + // Latest/specifier resolution must skip the yanked release… + let installable: Vec = + candidates.iter().filter(|c| !c.yanked).cloned().collect(); + assert_eq!( + pick_latest_stable(&installable).map(|c| c.version.as_str()), + Some("1.0.0") + ); + assert_eq!( + pypi_resolve_specifier(&installable, ">=1.0").expect("parse"), + Some("1.0.0".to_string()) + ); + // …while an exact pin still finds it (pip installs it with a warning). + assert!(candidates.iter().any(|c| c.version == "2.0.0")); + } + + #[test] + fn release_with_partially_yanked_files_stays_installable() { + let meta: PypiInfoResponse = serde_json::from_str( + r#"{"releases":{"1.5.0":[ + {"upload_time_iso_8601":"2020-06-01T00:00:00Z","yanked":true}, + {"upload_time_iso_8601":"2020-06-01T00:00:00Z","yanked":false} + ]}}"#, + ) + .expect("parse pypi json"); + let candidates = collect_pypi_candidates(&meta); + assert!(!candidates[0].yanked); + } + + #[test] + fn parses_iso8601_variants() { + assert!(parse_iso8601("2024-01-02T03:04:05Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05.123Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05+00:00").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05").is_ok()); + assert!(parse_iso8601("not a date").is_err()); + } + + /// Network-touching integration tests. Skipped by default (#[ignore]) + /// so unit-test runs stay hermetic. Run with: + /// cargo test -- --ignored verify_deps::registry::tests::live + #[test] + #[ignore] + fn live_npm_resolve_latest() { + let r = npm_resolve("left-pad", &NpmSpec::Latest, None).expect("npm resolve latest"); + assert_eq!(r.name, "left-pad"); + assert_eq!(r.version, "1.3.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2018-04-09"); + } + + #[test] + #[ignore] + fn live_npm_resolve_exact() { + let r = npm_resolve("left-pad", &NpmSpec::Exact("1.3.0".to_string()), None) + .expect("npm resolve exact"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_range() { + let r = npm_resolve("left-pad", &NpmSpec::Range("^1.0.0".to_string()), None) + .expect("npm resolve range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_npm_style_range() { + // npm uses spaces, the Rust crate uses commas — we should + // accept both. + let r = npm_resolve( + "left-pad", + &NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), + None, + ) + .expect("npm resolve space-range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_unknown_tag() { + let err = npm_resolve( + "left-pad", + &NpmSpec::Tag("does-not-exist".to_string()), + None, + ) + .err() + .unwrap(); + assert!(err.contains("dist-tag"), "got: {}", err); + } + + #[test] + #[ignore] + fn live_pypi_resolve_latest() { + let r = pypi_resolve("flask", &PypiSpec::Latest, None).expect("pypi resolve latest"); + assert_eq!(r.name, "flask"); + assert!(!r.version.is_empty()); + } + + #[test] + #[ignore] + fn live_pypi_resolve_exact() { + let r = pypi_resolve("requests", &PypiSpec::Exact("2.31.0".to_string()), None) + .expect("pypi resolve exact"); + assert_eq!(r.version, "2.31.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2023-05-22"); + } + + #[test] + #[ignore] + fn live_pypi_resolve_specifier() { + let r = pypi_resolve( + "requests", + &PypiSpec::Specifier(">=2.30,<2.32".to_string()), + None, + ) + .expect("pypi resolve specifier"); + // `requests==2.31.0` is the only release in [2.30, 2.32). + assert_eq!(r.version, "2.31.0"); + } +} diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs new file mode 100644 index 0000000..da06d3e --- /dev/null +++ b/src/vuln_api/mod.rs @@ -0,0 +1,561 @@ +//! Corgea vuln-api client. +//! +//! Deliberately independent of `utils::api::SHARED_CLIENT` because: +//! * the vuln-api host is user-configurable via `CORGEA_VULN_API_URL`, +//! so we must never silently replay Corgea cookies / non-JWT +//! `CORGEA-TOKEN` headers via redirect following or the shared +//! cookie jar. +//! * the shared client's `check_for_warnings` exits the process on +//! HTTP 410, which is wrong for per-dep CVE lookups. +//! +//! The auth header is attached explicitly per call from a caller-owned +//! token (no global state). + +use serde::{Deserialize, Serialize}; +use std::sync::OnceLock; +use std::time::Duration; + +use crate::log::debug; + +const REQUEST_TIMEOUT: Duration = Duration::from_secs(30); + +/// Cap on how much of an error response body we splice into the +/// user-facing error message. Fits a CLI line, captures +/// `{"error":"…"}`-class messages comfortably, and truncates +/// Cloudflare HTML before it gets ugly. +const ERROR_BODY_SNIPPET_LEN: usize = 300; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct VulnCheckResponse { + pub ecosystem: String, + pub package_name: String, + pub version: String, + pub is_vulnerable: bool, + pub matches: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct VulnMatch { + pub advisory_id: String, + pub severity_level: String, + pub tier: u8, + pub vulnerable_version_range: Option, + pub fixed_version: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub source: Option, +} + +fn user_agent() -> String { + format!("corgea-cli/{} (vuln-api)", env!("CARGO_PKG_VERSION")) +} + +/// Build (once) and clone the shared vuln-api client. A blocking reqwest +/// client owns a runtime thread, and a gate makes up to three verdict +/// passes (tree, named-only, steers) — cache it like `registry.rs` does. +/// `Client` clones share the same pool, so the clone is cheap. +pub fn http_client() -> Result { + static CLIENT: OnceLock> = OnceLock::new(); + CLIENT + .get_or_init(|| { + reqwest::blocking::Client::builder() + .timeout(REQUEST_TIMEOUT) + .user_agent(user_agent()) + .redirect(reqwest::redirect::Policy::none()) + .build() + .map_err(|e| format!("failed to build vuln-api http client: {}", e)) + }) + .clone() +} + +/// Whether `token` looks like a JWT (three non-empty dot-separated parts). +/// Decides the auth header shape here and in the binary crate's `utils/api.rs`. +pub fn is_jwt(token: &str) -> bool { + let parts: Vec<&str> = token.splitn(4, '.').collect(); + parts.len() == 3 && parts.iter().all(|p| !p.is_empty()) +} + +fn normalize_base_url(base_url: &str) -> String { + base_url.trim_end_matches('/').to_string() +} + +/// Encode package name for the vuln-api path segment. +/// npm scoped names: `@scope/pkg` → `@scope%2fpkg`. +fn encode_package_name(ecosystem: &str, name: &str) -> String { + if ecosystem.eq_ignore_ascii_case("npm") { + crate::verify_deps::registry::encode_npm_name(name) + } else { + urlencoding::encode(name).into_owned() + } +} + +/// Build an authed JSON GET: the standard `Accept` / `CORGEA-SOURCE` headers +/// plus the per-call auth header (JWT → `Authorization: Bearer`, otherwise +/// `CORGEA-TOKEN`). The single place auth is attached, shared by every route. +fn build_json_get( + client: &reqwest::blocking::Client, + url: &str, + token: Option<&str>, +) -> reqwest::blocking::RequestBuilder { + let mut req = client + .get(url) + .header("Accept", "application/json") + .header("CORGEA-SOURCE", "cli"); + if let Some(token) = token { + if is_jwt(token) { + req = req.header("Authorization", format!("Bearer {}", token)); + } else { + req = req.header("CORGEA-TOKEN", token); + } + } + req +} + +/// Validate the per-call preconditions shared by every vuln-api request: +/// a non-empty token and a non-empty (trailing-slash-normalized) base URL. +/// Returns the normalized base so callers don't re-derive it. +fn validated_base(base_url: &str) -> Result> { + let base = normalize_base_url(base_url); + if base.is_empty() { + return Err("vuln-api base URL is empty".into()); + } + Ok(base) +} + +/// Format a server error body into a `": "` suffix for a single-line +/// CLI error, or an empty string when the body is empty. Consumes the response. +fn error_body_suffix(response: reqwest::blocking::Response) -> String { + let body = response.text().unwrap_or_default(); + let snippet = body_snippet(&body, ERROR_BODY_SNIPPET_LEN); + if snippet.is_empty() { + String::new() + } else { + format!(": {}", snippet) + } +} + +/// Collapse whitespace and truncate at `max_chars` so a server error +/// body can be spliced into a single-line CLI error message without +/// dragging in HTML newlines or runaway length. Returns empty string +/// when the body is empty so the caller can format conditionally. +/// Char-boundary safe — operates on `chars()`, never byte slices. +fn body_snippet(body: &str, max_chars: usize) -> String { + let collapsed: String = body.split_whitespace().collect::>().join(" "); + if collapsed.is_empty() { + return String::new(); + } + let truncated: String = collapsed.chars().take(max_chars).collect(); + if collapsed.chars().count() > max_chars { + format!("{}…", truncated) + } else { + truncated + } +} + +fn retry_after_seconds(response: &reqwest::blocking::Response) -> u64 { + response + .headers() + .get("Retry-After") + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.trim().parse::().ok()) + .map(|s| s.clamp(1, 10)) + .unwrap_or(1) +} + +fn send_package_check_with_429_retry( + client: &reqwest::blocking::Client, + url: &str, + token: Option<&str>, +) -> Result> { + let response = build_json_get(client, url, token) + .send() + .map_err(|e| format!("Failed to send vuln-api request: {}", e))?; + + if response.status().as_u16() == 429 { + let wait = retry_after_seconds(&response); + std::thread::sleep(Duration::from_secs(wait)); + return build_json_get(client, url, token) + .send() + .map_err(|e| format!("Failed to send vuln-api request: {}", e).into()); + } + Ok(response) +} + +pub fn check_package_version( + client: &reqwest::blocking::Client, + base_url: &str, + token: &str, + ecosystem: &str, + name: &str, + version: &str, +) -> Result> { + check_package_version_with_auth(client, base_url, Some(token), ecosystem, name, version) +} + +pub fn check_package_version_with_auth( + client: &reqwest::blocking::Client, + base_url: &str, + token: Option<&str>, + ecosystem: &str, + name: &str, + version: &str, +) -> Result> { + let base = validated_base(base_url)?; + let encoded_name = encode_package_name(ecosystem, name); + let encoded_version = urlencoding::encode(version); + let url = format!( + "{}/v1/packages/{}/{}/versions/{}/check", + base, ecosystem, encoded_name, encoded_version + ); + + debug(&format!("Sending vuln-api request to URL: {}", url)); + + let response = send_package_check_with_429_retry(client, &url, token)?; + + let status = response.status(); + match status.as_u16() { + 401 => { + let msg = if token.is_some() { + "vuln-api rejected the Corgea token (run `corgea login` to refresh)" + } else { + "vuln-api rejected the public unauthenticated request" + }; + return Err(msg.into()); + } + 403 => { + return Err("vuln-api access denied (check your Corgea plan/permissions)".into()); + } + 404 => { + return Ok(VulnCheckResponse { + ecosystem: ecosystem.to_string(), + package_name: name.to_string(), + version: version.to_string(), + is_vulnerable: false, + matches: vec![], + }); + } + 429 => { + return Err("vuln-api rate-limited this request (retry later)".into()); + } + 500..=599 => { + return Err(format!("vuln-api unavailable (HTTP {})", status.as_u16()).into()); + } + code if !status.is_success() => { + let suffix = error_body_suffix(response); + return Err(format!("vuln-api returned unexpected HTTP {}{}", code, suffix).into()); + } + _ => {} + } + + let response_text = response.text()?; + let parsed: VulnCheckResponse = serde_json::from_str(&response_text).map_err(|e| { + debug(&format!( + "Failed to parse vuln-api response: {}. Body: {}", + e, response_text + )); + format!("Failed to parse vuln-api response: {}", e) + })?; + + // Confused-deputy guard: refuse to attribute advisories to a different + // (name, version, ecosystem) than what we asked about. The server is + // allowed to be silent on identity, but if it answers, it must match. + if !parsed.ecosystem.is_empty() && !parsed.ecosystem.eq_ignore_ascii_case(ecosystem) { + return Err(format!( + "vuln-api response ecosystem '{}' does not match request '{}'", + parsed.ecosystem, ecosystem + ) + .into()); + } + if !parsed.package_name.is_empty() && !parsed.package_name.eq_ignore_ascii_case(name) { + return Err(format!( + "vuln-api response package '{}' does not match request '{}'", + parsed.package_name, name + ) + .into()); + } + if !parsed.version.is_empty() && parsed.version != version { + return Err(format!( + "vuln-api response version '{}' does not match request '{}'", + parsed.version, version + ) + .into()); + } + + // is_vulnerable=true with no matches is contradictory — treat as an + // error so the caller can surface it rather than silently demoting + // the dep to "clean". + if parsed.is_vulnerable && parsed.matches.is_empty() { + return Err( + "vuln-api reported is_vulnerable=true with no matches; refusing to interpret".into(), + ); + } + + Ok(parsed) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::vuln_api_stub::{self, PackageKey}; + use std::collections::{HashMap, HashSet}; + + fn lodash_key() -> PackageKey { + ("npm".into(), "lodash".into(), "4.17.20".into()) + } + + fn check_with_stub_status( + status_code: u16, + body: &str, + ) -> Result> { + let client = http_client().expect("test client"); + let stub = vuln_api_stub::spawn_with_statuses( + HashMap::from([(lodash_key(), body.to_string())]), + HashMap::from([(lodash_key(), status_code)]), + ); + check_package_version( + &client, + &stub.base_url, + "test-token", + "npm", + "lodash", + "4.17.20", + ) + } + + #[test] + fn check_package_version_401_returns_actionable_error() { + let err = check_with_stub_status(401, r#"{"error":"unauthorized"}"#) + .expect_err("401 should fail"); + assert!(err.to_string().contains("rejected the Corgea token")); + } + + #[test] + fn check_package_version_403_returns_actionable_error() { + let err = + check_with_stub_status(403, r#"{"error":"forbidden"}"#).expect_err("403 should fail"); + assert!(err.to_string().contains("access denied")); + } + + #[test] + fn check_package_version_404_returns_clean() { + let resp = + check_with_stub_status(404, r#"{"error":"not found"}"#).expect("404 should be clean"); + assert!(!resp.is_vulnerable); + assert!(resp.matches.is_empty()); + assert_eq!(resp.package_name, "lodash"); + assert_eq!(resp.version, "4.17.20"); + } + + #[test] + fn check_package_version_persistent_429_returns_actionable_error() { + let err = check_with_stub_status(429, r#"{"error":"rate limited"}"#) + .expect_err("429 should fail"); + assert!(err.to_string().contains("rate-limited")); + } + + #[test] + fn check_package_version_429_retries_then_succeeds() { + let client = http_client().unwrap(); + let vulnerable_body = r#"{ + "ecosystem": "npm", + "package_name": "lodash", + "version": "4.17.20", + "is_vulnerable": true, + "matches": [{ + "advisory_id": "GHSA-retry-test", + "severity_level": "high", + "tier": 1, + "vulnerable_version_range": "<4.17.21", + "fixed_version": "4.17.21" + }] + }"#; + let stub = vuln_api_stub::spawn_with_retry_once( + HashMap::from([(lodash_key(), vulnerable_body.to_string())]), + HashMap::new(), + HashSet::from([lodash_key()]), + ); + let resp = check_package_version( + &client, + &stub.base_url, + "test-token", + "npm", + "lodash", + "4.17.20", + ) + .expect("retry should succeed"); + assert!(resp.is_vulnerable); + } + + #[test] + fn check_package_version_500_returns_unavailable() { + let err = + check_with_stub_status(500, r#"{"error":"internal"}"#).expect_err("500 should fail"); + assert!(err.to_string().contains("unavailable (HTTP 500)")); + } + + #[test] + fn check_package_version_unexpected_status_includes_body_snippet() { + let err = + check_with_stub_status(418, r#"{"error":"teapot"}"#).expect_err("418 should fail"); + let msg = err.to_string(); + assert!(msg.contains("unexpected HTTP 418"), "got: {}", msg); + assert!( + msg.contains("teapot"), + "expected body in error; got: {}", + msg + ); + } + + #[test] + fn check_package_version_unexpected_status_omits_body_when_empty() { + let err = check_with_stub_status(418, "").expect_err("418 should fail"); + let msg = err.to_string(); + assert!(msg.contains("unexpected HTTP 418"), "got: {}", msg); + // Body is empty → message must end at the status, no dangling ":" or whitespace. + assert!( + msg.trim_end().ends_with("418"), + "expected message to end at status code; got: {:?}", + msg + ); + } + + #[test] + fn body_snippet_truncates_at_char_boundary() { + // Multi-byte char ("é" is 2 bytes UTF-8). Naïve byte-slicing would + // panic; we must operate on chars(). + let input = "é".repeat(500); + let out = body_snippet(&input, ERROR_BODY_SNIPPET_LEN); + assert!(out.ends_with('…'), "expected ellipsis; got: {:?}", out); + // 300 "é" chars + the ellipsis. + assert_eq!(out.chars().count(), ERROR_BODY_SNIPPET_LEN + 1); + } + + #[test] + fn body_snippet_collapses_whitespace() { + assert_eq!(body_snippet("foo\n bar\t\tbaz", 100), "foo bar baz"); + } + + #[test] + fn body_snippet_empty_returns_empty() { + assert_eq!(body_snippet("", 100), ""); + assert_eq!(body_snippet(" \n\t ", 100), ""); + } + + #[test] + fn encode_package_name_scoped_npm() { + assert_eq!(encode_package_name("npm", "@types/node"), "@types%2fnode"); + assert_eq!(encode_package_name("npm", "lodash"), "lodash"); + } + + #[test] + fn encode_package_name_pypi() { + assert_eq!(encode_package_name("PyPI", "requests"), "requests"); + } + + #[test] + fn encode_package_name_npm_case_insensitive() { + // Defends against vuln_api_ecosystem() casing changes. + assert_eq!(encode_package_name("NPM", "@types/node"), "@types%2fnode"); + } + + #[test] + fn deserialize_vuln_check_response() { + let body = r#"{ + "ecosystem": "npm", + "package_name": "lodash", + "version": "4.17.20", + "is_vulnerable": true, + "matches": [{ + "advisory_id": "GHSA-xxxx-yyyy-zzzz", + "severity_level": "high", + "tier": 1, + "vulnerable_version_range": "<4.17.21", + "fixed_version": "4.17.21" + }] + }"#; + let parsed: VulnCheckResponse = serde_json::from_str(body).unwrap(); + assert!(parsed.is_vulnerable); + assert_eq!(parsed.matches.len(), 1); + assert_eq!(parsed.matches[0].advisory_id, "GHSA-xxxx-yyyy-zzzz"); + assert_eq!(parsed.matches[0].tier, 1); + } + + #[test] + fn normalize_base_url_strips_trailing_slash() { + assert_eq!( + normalize_base_url("http://localhost:8080/"), + "http://localhost:8080" + ); + } + + #[test] + fn is_jwt_detection() { + assert!(is_jwt("a.b.c")); + assert!(!is_jwt("plain-token")); + assert!(!is_jwt("")); + assert!(!is_jwt("a.b")); + assert!(!is_jwt("a.b.c.d")); + assert!(!is_jwt("a..c")); + assert!(!is_jwt(".b.c")); + assert!(!is_jwt("a.b.")); + } + + // Fixture-based deserialization tests — committed JSON under tests/fixtures/vuln_api/, + // built to the authoritative server serialization (vuln-api/cve_worker/src/worker.js). + macro_rules! fixture { + ($name:literal) => { + include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/fixtures/vuln_api/", + $name + )) + }; + } + + #[test] + fn fixture_check_clean_deserializes() { + let parsed: VulnCheckResponse = serde_json::from_str(fixture!("check_clean.json")).unwrap(); + assert!(!parsed.is_vulnerable); + assert!(parsed.matches.is_empty()); + assert_eq!(parsed.ecosystem, "pypi"); + assert_eq!(parsed.package_name, "requests"); + assert_eq!(parsed.version, "2.31.0"); + } + + #[test] + fn fixture_check_unknown_deserializes_as_clean() { + // /check returns 200 is_vulnerable:false matches:[] for an unknown package; + // the 404 {"error":"Package not found"} body is the profile route, not /check. + let parsed: VulnCheckResponse = + serde_json::from_str(fixture!("check_unknown.json")).unwrap(); + assert!(!parsed.is_vulnerable); + assert!(parsed.matches.is_empty()); + } + + #[test] + fn fixture_check_vulnerable_deserializes() { + let parsed: VulnCheckResponse = + serde_json::from_str(fixture!("check_vulnerable.json")).unwrap(); + assert!(parsed.is_vulnerable); + assert_eq!(parsed.matches.len(), 1); + let m = &parsed.matches[0]; + assert_eq!(m.advisory_id, "GHSA-xxxx-yyyy-zzzz"); + assert_eq!(m.severity_level, "high"); + assert_eq!(m.tier, 1); + assert_eq!(m.vulnerable_version_range.as_deref(), Some(">=3.2,<3.2.5")); + assert_eq!(m.fixed_version.as_deref(), Some("3.2.5")); + } + + #[test] + fn fixture_check_malware_deserializes() { + // Malware surfaces through /check as an ordinary is_vulnerable:true match + // (MAL-* id); /malware items carry no version, so /check is the per-version signal. + let parsed: VulnCheckResponse = + serde_json::from_str(fixture!("check_malware.json")).unwrap(); + assert!(parsed.is_vulnerable); + assert_eq!(parsed.matches.len(), 1); + let m = &parsed.matches[0]; + assert!(m.advisory_id.starts_with("MAL-")); + assert!(m.vulnerable_version_range.is_none()); + assert!(m.fixed_version.is_none()); + } +} diff --git a/src/vuln_api_stub/mod.rs b/src/vuln_api_stub/mod.rs new file mode 100644 index 0000000..df8a092 --- /dev/null +++ b/src/vuln_api_stub/mod.rs @@ -0,0 +1,207 @@ +use std::collections::{HashMap, HashSet}; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::thread; + +pub type PackageKey = (String, String, String); + +const NOT_FOUND_BODY: &str = r#"{"error":"not found"}"#; + +pub struct VulnApiStub { + pub base_url: String, + _handle: thread::JoinHandle<()>, +} + +/// Minimal TCP vuln-api stub for CLI integration tests. Binds an ephemeral +/// 127.0.0.1 port; unknown packages get a synthesized clean 200. +pub fn spawn_with_statuses( + package_checks: HashMap, + status_overrides: HashMap, +) -> VulnApiStub { + spawn_with_retry_once(package_checks, status_overrides, HashSet::new()) +} + +/// Like [`spawn_with_statuses`], but keys in `retry_once` answer their first +/// hit with 429 + `Retry-After: 1` and fall through to the scripted response +/// from the second hit on — for exercising the client's retry path. +pub fn spawn_with_retry_once( + package_checks: HashMap, + status_overrides: HashMap, + retry_once: HashSet, +) -> VulnApiStub { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let bound_port = listener.local_addr().expect("stub local_addr").port(); + let base_url = format!("http://127.0.0.1:{bound_port}"); + + let handle = thread::spawn(move || { + let mut pending_retries = retry_once; + for stream in listener.incoming() { + let Ok(mut stream) = stream else { + continue; + }; + handle_connection( + &mut stream, + &package_checks, + &status_overrides, + &mut pending_retries, + ); + } + }); + + VulnApiStub { + base_url, + _handle: handle, + } +} + +/// Read one HTTP request's bytes (through the header terminator) off `stream`. +pub fn read_http_request(stream: &mut std::net::TcpStream) -> Vec { + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + buf +} + +fn handle_connection( + stream: &mut std::net::TcpStream, + package_checks: &HashMap, + status_overrides: &HashMap, + pending_retries: &mut HashSet, +) { + let buf = read_http_request(stream); + let req = String::from_utf8_lossy(&buf); + + let path = req.lines().next().and_then(|l| l.split_whitespace().nth(1)); + + let (status_code, response_body, retry_after) = match path { + Some(path) => { + let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); + if parts.len() >= 7 + && parts[0] == "v1" + && parts[1] == "packages" + && parts[4] == "versions" + && parts[6] == "check" + { + let key = ( + parts[2].to_string(), + urlencoding::decode(parts[3]) + .unwrap_or_default() + .into_owned(), + urlencoding::decode(parts[5]) + .unwrap_or_default() + .into_owned(), + ); + if pending_retries.remove(&key) { + (429, r#"{"error":"rate limited"}"#.to_string(), true) + } else { + let body = package_checks + .get(&key) + .cloned() + .unwrap_or_else(|| default_clean_response(&key.0, &key.1, &key.2)); + let status = status_overrides.get(&key).copied().unwrap_or(200); + (status, body, false) + } + } else { + (404, NOT_FOUND_BODY.to_string(), false) + } + } + None => (400, r#"{"error":"bad request"}"#.to_string(), false), + }; + + let status_text = status_text(status_code); + // `Connection: close` is load-bearing: the stub serves one response per + // connection, so without it reqwest pools the socket and a second request + // (the gate's tree pass makes several per run) races the close and fails. + let response = format!( + "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\n{}Content-Length: {}\r\nConnection: close\r\n\r\n{}", + status_code, + status_text, + if retry_after { "Retry-After: 1\r\n" } else { "" }, + response_body.len(), + response_body + ); + let _ = stream.write_all(response.as_bytes()); +} + +/// Reason phrase for a stub status line. Shared with the in-crate test +/// stubs so the mapping lives once. +fn status_text(status_code: u16) -> &'static str { + match status_code { + 404 => "Not Found", + 401 => "Unauthorized", + 403 => "Forbidden", + 429 => "Too Many Requests", + 500..=599 => "Internal Server Error", + _ if status_code >= 400 => "Error", + _ => "OK", + } +} + +fn default_clean_response(eco: &str, name: &str, ver: &str) -> String { + format!( + r#"{{"ecosystem":"{eco}","package_name":"{name}","version":"{ver}","is_vulnerable":false,"matches":[]}}"# + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Read, Write}; + use std::net::TcpStream; + + fn get(base_url: &str, path: &str) -> String { + let addr = base_url.trim_start_matches("http://"); + let mut stream = TcpStream::connect(addr).expect("connect stub"); + let req = format!("GET {path} HTTP/1.1\r\nHost: localhost\r\n\r\n"); + stream.write_all(req.as_bytes()).unwrap(); + let mut resp = String::new(); + stream.read_to_string(&mut resp).unwrap(); + resp + } + + fn key(eco: &str, name: &str, ver: &str) -> super::PackageKey { + (eco.to_string(), name.to_string(), ver.to_string()) + } + + #[test] + fn scripted_package_check_and_status_override() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evil", "1.0.0"), + r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true,"matches":[]}"#.to_string(), + ); + checks.insert(key("pypi", "flaky", "1.0.0"), "{}".to_string()); + let mut statuses = HashMap::new(); + statuses.insert(key("pypi", "flaky", "1.0.0"), 503u16); + let stub = spawn_with_statuses(checks, statuses); + + let resp = get( + &stub.base_url, + "/v1/packages/pypi/evil/versions/1.0.0/check", + ); + assert!(resp.starts_with("HTTP/1.1 200"), "resp: {resp}"); + assert!(resp.contains(r#""is_vulnerable":true"#), "resp: {resp}"); + + let resp = get( + &stub.base_url, + "/v1/packages/pypi/flaky/versions/1.0.0/check", + ); + assert!(resp.starts_with("HTTP/1.1 503"), "resp: {resp}"); + + // Unknown package → synthesized clean 200. + let resp = get( + &stub.base_url, + "/v1/packages/pypi/unknown/versions/2.0.0/check", + ); + assert!(resp.starts_with("HTTP/1.1 200"), "resp: {resp}"); + assert!(resp.contains(r#""is_vulnerable":false"#), "resp: {resp}"); + } +} diff --git a/tests/cli_bare_install.rs b/tests/cli_bare_install.rs new file mode 100644 index 0000000..8813f3e --- /dev/null +++ b/tests/cli_bare_install.rs @@ -0,0 +1,345 @@ +//! Hermetic e2e tests for zero-spec ("bare") installs. +//! +//! With a token and a `package.json`, bare `npm install` is gated like any +//! other install: the tree pass resolves the full lockfile set and verdicts +//! every package, so a vulnerable lockfile blocks (exit 1, `--force` escape). +//! Bare yarn/pnpm/uv installs have no safe dry-run — they exec unchecked +//! behind one honest stderr note. +//! +//! Harness mirrors `cli_tree.rs`: fake package manager on a private PATH +//! (tree-aware for npm, plain argv recorder for yarn/pnpm/uv) + local +//! registry stub + in-crate vuln-api stub. `oldpkg` is published in 2020 so +//! recency never blocks here. + +#![cfg(unix)] + +mod common; + +use common::{ + corgea_isolated, key, spawn_oldpkg_registry_stub, vulnerable_body, write_fake_recorder, + write_fake_tree_pm, NPM_LOCK, RESOLUTION_FAILS, +}; +use corgea::vuln_api_stub::{self, PackageKey}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::process::Command; +use tempfile::TempDir; + +const PACKAGE_JSON: &str = r#"{"name":"proj","version":"1.0.0","dependencies":{"oldpkg":"1.0.0"}}"#; +const RANGE_PACKAGE_JSON: &str = + r#"{"name":"proj","version":"1.0.0","dependencies":{"oldpkg":"^1.0.0"}}"#; +const LOCK_OLDPKG_100: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0","dependencies":{"oldpkg":"^1.0.0"}}, + "node_modules/oldpkg":{"version":"1.0.0"}}}"#; +const LOCK_OLDPKG_110: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0","dependencies":{"oldpkg":"^1.0.0"}}, + "node_modules/oldpkg":{"version":"1.1.0"}}}"#; + +fn vulnerable_evildep_body() -> String { + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None) +} + +fn vulnerable_oldpkg_body() -> String { + vulnerable_body("npm", "oldpkg", "1.0.0", "MAL-2024-0001", None) +} + +/// `corgea` wired to a fake package manager, the registry + vuln-api stubs, +/// a token, and a throwaway project dir as cwd. +struct BareHarness { + cmd: Command, + marker: PathBuf, + project: TempDir, + _home: TempDir, + _bin: TempDir, +} + +impl BareHarness { + /// `npm_payload`: `Some` wires a tree-aware fake npm with that canned + /// lockfile (or `RESOLUTION_FAILS`); `None` wires a plain recorder for + /// `binary`. `exit_code` is what the fake exits with on the exec'd + /// (non-tree) invocation. + fn new( + binary: &str, + checks: HashMap, + npm_payload: Option<&str>, + exit_code: i32, + ) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let project = TempDir::new().expect("project dir"); + let marker = bin.path().join("pm-argv.txt"); + match npm_payload { + Some(payload) => write_fake_tree_pm(bin.path(), "npm", &marker, payload, exit_code), + None => write_fake_recorder(bin.path(), binary, &marker, exit_code), + } + let registry = spawn_oldpkg_registry_stub(); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + cmd.env("PATH", bin.path()) + .env("CORGEA_NPM_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) + .env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1") + .env("CORGEA_TOKEN", "test-token") + .current_dir(project.path()); + Self { + cmd, + marker, + project, + _home: home, + _bin: bin, + } + } + + fn with_package_json(self) -> Self { + std::fs::write(self.project.path().join("package.json"), PACKAGE_JSON) + .expect("write package.json"); + self + } + + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +#[test] +fn bare_npm_install_vulnerable_lockfile_blocks() { + let mut checks = HashMap::new(); + checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); + let mut h = BareHarness::new("npm", checks, Some(NPM_LOCK), 0).with_package_json(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "vulnerable lockfile must block"); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); + assert!(stdout.contains("MAL-2024-0002"), "stdout: {stdout}"); + assert!(stdout.contains("(transitive)"), "stdout: {stdout}"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("not gated"), + "gated bare npm must not print the ungated note: {stderr}" + ); + // A bare install names no targets, so everything resolved is the + // existing tree's — the refusal must say so. + assert!( + stderr.contains("your existing dependency tree has known-vulnerable packages"), + "bare install blames the existing tree: {stderr}" + ); +} + +#[test] +fn bare_npm_install_clean_lockfile_proceeds() { + let mut h = BareHarness::new("npm", HashMap::new(), Some(NPM_LOCK), 0).with_package_json(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("tree: 2 packages resolved"), + "stdout: {stdout}" + ); +} + +#[test] +fn bare_npm_install_force_overrides_block() { + let mut checks = HashMap::new(); + checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); + let mut h = BareHarness::new("npm", checks, Some(NPM_LOCK), 0).with_package_json(); + let out = h + .cmd + .args(["npm", "--force", "install"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force must run the install"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + assert!( + String::from_utf8_lossy(&out.stdout).contains("evildep"), + "findings still printed under --force" + ); +} + +#[test] +fn bare_npm_install_json_carries_tree_object() { + let mut checks = HashMap::new(); + checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); + let mut h = BareHarness::new("npm", checks, Some(NPM_LOCK), 0).with_package_json(); + let out = h + .cmd + .args(["npm", "--json", "install"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["tree"]["mode"], "full"); + assert_eq!(parsed["tree"]["resolved_count"], 2); + assert_eq!(parsed["summary"]["vulnerable"], 1); + assert_eq!( + parsed["results"].as_array().map(Vec::len), + Some(0), + "zero named targets" + ); +} + +#[test] +fn bare_npm_resolution_failure_falls_back_with_warning() { + // Fake npm exits 1 on `--package-lock-only`. Nothing named remains to + // verify, so the install proceeds behind the loud fallback warning. + let mut h = + BareHarness::new("npm", HashMap::new(), Some(RESOLUTION_FAILS), 0).with_package_json(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "fallback must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), + "stderr must carry the fallback warning: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn bare_npm_without_package_json_passes_through() { + // No package.json in cwd → nothing to resolve → straight exec, no gate. + let mut h = BareHarness::new("npm", HashMap::new(), Some(NPM_LOCK), 3); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(3), "npm's own exit code propagates"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(!stdout.contains("Pre-checking"), "stdout: {stdout}"); + assert!( + !String::from_utf8_lossy(&out.stderr).contains("not gated"), + "npm never gets the yarn/pnpm/uv note" + ); +} + +#[test] +fn npm_ci_without_lockfile_passes_through() { + let mut h = BareHarness::new("npm", HashMap::new(), Some(NPM_LOCK), 0).with_package_json(); + let out = h.cmd.args(["npm", "ci"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("ci")); + assert!(!String::from_utf8_lossy(&out.stdout).contains("Pre-checking")); +} + +#[test] +fn npm_ci_with_lockfile_is_gated() { + let mut checks = HashMap::new(); + checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); + let mut h = BareHarness::new("npm", checks, Some(NPM_LOCK), 0).with_package_json(); + std::fs::write(h.project.path().join("package-lock.json"), NPM_LOCK) + .expect("write package-lock"); + let out = h.cmd.args(["npm", "ci"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); +} + +#[test] +fn npm_ci_checks_locked_versions_not_package_json_resolution() { + let mut checks = HashMap::new(); + checks.insert(key("npm", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + let mut h = BareHarness::new("npm", checks, Some(LOCK_OLDPKG_110), 0); + std::fs::write(h.project.path().join("package.json"), RANGE_PACKAGE_JSON) + .expect("write package.json"); + std::fs::write(h.project.path().join("package-lock.json"), LOCK_OLDPKG_100) + .expect("write package-lock"); + + let out = h.cmd.args(["npm", "ci"]).output().expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "locked vulnerable version must block" + ); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("oldpkg"), "stdout: {stdout}"); + assert!(stdout.contains("MAL-2024-0001"), "stdout: {stdout}"); + assert!( + !stdout.contains("1.1.0"), + "ci must not use package.json resolution: {stdout}" + ); +} + +#[test] +fn bare_npm_tokenless_public_mode_checks_tree() { + // package.json present but no token → public mode still gates the tree. + let mut h = BareHarness::new("npm", HashMap::new(), Some(NPM_LOCK), 0).with_package_json(); + h.cmd.env_remove("CORGEA_TOKEN"); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("Pre-checking"), "stdout: {stdout}"); + assert!( + stdout.contains("tree: 2 packages resolved"), + "stdout: {stdout}" + ); +} + +#[test] +fn bare_ungated_managers_print_note_and_exec() { + // yarn's nonzero exit also proves the manager's own exit code propagates. + let cases = [ + ("yarn", &["yarn", "install"][..], "install", 7), + ("pnpm", &["pnpm", "install"][..], "install", 0), + ("uv", &["uv", "add"][..], "add", 0), + ("uv", &["uv", "pip", "install"][..], "pip install", 0), + ]; + for (binary, args, forwarded_argv, exit_code) in cases { + let mut h = BareHarness::new(binary, HashMap::new(), None, exit_code); + let out = h.cmd.args(args).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(exit_code), "{args:?}"); + assert_eq!(h.recorded_argv().as_deref(), Some(forwarded_argv)); + let note = format!( + "note: bare '{}' is not gated (no safe dry-run) — dependencies install unchecked", + args.join(" ") + ); + assert!( + String::from_utf8_lossy(&out.stderr).contains(¬e), + "{args:?} stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + } +} + +#[test] +fn bare_yarn_note_prints_without_token_too() { + let mut h = BareHarness::new("yarn", HashMap::new(), None, 0); + h.cmd.env_remove("CORGEA_TOKEN"); + let out = h + .cmd + .args(["yarn", "install"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert!( + String::from_utf8_lossy(&out.stderr).contains("bare 'yarn install' is not gated"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn yarn_named_target_does_not_print_bare_note() { + // A named target takes the gated path: named-only warning, no bare note. + let mut h = BareHarness::new("yarn", HashMap::new(), None, 0); + let out = h + .cmd + .args(["yarn", "add", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean named target proceeds"); + assert_eq!(h.recorded_argv().as_deref(), Some("add oldpkg@1.0.0")); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("not gated"), + "named install must not print the bare note: {stderr}" + ); + assert!( + stderr.contains("transitive dependencies not checked"), + "named-only warning still applies to yarn: {stderr}" + ); +} diff --git a/tests/cli_command_surface.rs b/tests/cli_command_surface.rs new file mode 100644 index 0000000..c108621 --- /dev/null +++ b/tests/cli_command_surface.rs @@ -0,0 +1,245 @@ +//! Command-surface tests for install wrappers: leading package-manager flags, +//! invalid install-like commands, project guards, and pip environment guard. + +#![cfg(unix)] + +mod common; + +use common::{ + corgea_isolated, spawn_oldpkg_registry_stub, write_fake_recorder, write_fake_tree_pm, + write_script, RESOLUTION_FAILS, +}; +use std::path::PathBuf; +use std::process::Command; +use tempfile::TempDir; + +struct SurfaceHarness { + cmd: Command, + marker: PathBuf, + project: TempDir, + _home: TempDir, + _bin: TempDir, +} + +impl SurfaceHarness { + fn new(binary: &str) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin"); + let project = TempDir::new().expect("temp project"); + let marker = bin.path().join("pm-argv.txt"); + match binary { + "pip" | "npm" => write_fake_tree_pm(bin.path(), binary, &marker, RESOLUTION_FAILS, 0), + _ => write_fake_recorder(bin.path(), binary, &marker, 0), + } + let registry = spawn_oldpkg_registry_stub(); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry) + .env("CORGEA_NPM_REGISTRY", ®istry) + .current_dir(project.path()); + Self { + cmd, + marker, + project, + _home: home, + _bin: bin, + } + } + + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +#[test] +fn wrapper_help_is_corgea_help_not_package_manager_help() { + let mut h = SurfaceHarness::new("npm"); + let out = h.cmd.args(["npm", "--help"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("Usage: corgea npm"), "stdout: {stdout}"); +} + +#[test] +fn npm_leading_package_manager_flags_are_forwarded_and_install_is_gated() { + let mut h = SurfaceHarness::new("npm"); + let out = h + .cmd + .args([ + "npm", + "--loglevel", + "silent", + "install", + "oldpkg@1.0.0", + "--save-dev", + ]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!( + h.recorded_argv().as_deref(), + Some("--loglevel silent install oldpkg@1.0.0 --save-dev") + ); + assert!( + String::from_utf8_lossy(&out.stdout).contains("Pre-checking"), + "stdout: {}", + String::from_utf8_lossy(&out.stdout) + ); +} + +#[test] +fn npm_leading_flags_with_bare_values_do_not_hide_install() { + for (flag, value) in [ + ("--userconfig", ".npmrc"), + ("--cache", ".npm-cache"), + ("--globalconfig", "npmrc-global"), + ] { + let mut h = SurfaceHarness::new("npm"); + let out = h + .cmd + .args(["npm", flag, value, "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "flag {flag}"); + let expected = format!("{flag} {value} install oldpkg@1.0.0"); + assert_eq!( + h.recorded_argv().as_deref(), + Some(expected.as_str()), + "flag {flag}" + ); + assert!( + String::from_utf8_lossy(&out.stdout).contains("Pre-checking"), + "flag {flag} stdout: {}", + String::from_utf8_lossy(&out.stdout) + ); + } +} + +#[test] +fn pip_add_is_refused_with_install_suggestion() { + let mut h = SurfaceHarness::new("pip"); + let out = h + .cmd + .args(["pip", "add", "oldpkg"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(2)); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!(stderr.contains("corgea pip install"), "stderr: {stderr}"); +} + +#[test] +fn top_level_pip3_is_refused_with_pip_suggestion() { + let mut h = SurfaceHarness::new("pip"); + let out = h + .cmd + .args(["pip3", "install", "oldpkg"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(2)); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!(stderr.contains("corgea pip"), "stderr: {stderr}"); +} + +#[test] +fn uv_install_is_refused_with_uv_pip_install_suggestion() { + let mut h = SurfaceHarness::new("uv"); + let out = h + .cmd + .args(["uv", "install", "oldpkg"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(2)); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!(stderr.contains("corgea uv pip install"), "stderr: {stderr}"); +} + +#[test] +fn npm_in_pnpm_project_is_refused_with_suggestion() { + let mut h = SurfaceHarness::new("npm"); + std::fs::write( + h.project.path().join("pnpm-lock.yaml"), + "lockfileVersion: 9\n", + ) + .expect("write pnpm lock"); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!(stderr.contains("corgea pnpm add"), "stderr: {stderr}"); +} + +#[test] +fn pip_externally_managed_environment_blocks_without_override() { + let mut h = SurfaceHarness::new("pip"); + h.cmd.env("CORGEA_PIP_EXTERNALLY_MANAGED", "1"); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("externally managed Python environment"), + "stderr: {stderr}" + ); +} + +#[test] +fn pip_externally_managed_environment_allows_explicit_target() { + let mut h = SurfaceHarness::new("pip"); + h.cmd.env("CORGEA_PIP_EXTERNALLY_MANAGED", "1"); + let out = h + .cmd + .args(["pip", "install", "--target", "./vendor", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install --target ./vendor oldpkg==1.0.0") + ); +} + +#[test] +fn json_install_keeps_package_manager_stdout_off_stdout() { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin"); + let project = TempDir::new().expect("temp project"); + let marker = bin.path().join("pm-argv.txt"); + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) exit 2;; esac\nprintf 'pm stdout\\n'\nprintf 'pm stderr\\n' >&2\nprintf '%s' \"$*\" > '{}'\nexit 0\n", + marker.display() + ); + write_script(bin.path(), "pip", &script); + let registry = spawn_oldpkg_registry_stub(); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry) + .current_dir(project.path()); + + let out = cmd + .args(["pip", "--json", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + let stdout = String::from_utf8_lossy(&out.stdout); + let _: serde_json::Value = serde_json::from_slice(&out.stdout).expect("stdout JSON"); + assert!(!stdout.contains("pm stdout"), "stdout: {stdout}"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!(stderr.contains("pm stdout"), "stderr: {stderr}"); + assert!(stderr.contains("pm stderr"), "stderr: {stderr}"); + assert_eq!( + std::fs::read_to_string(marker).ok().as_deref(), + Some("install oldpkg==1.0.0") + ); + drop(home); +} diff --git a/tests/cli_deps.rs b/tests/cli_deps.rs index 7723e8c..596c8cf 100644 --- a/tests/cli_deps.rs +++ b/tests/cli_deps.rs @@ -1,28 +1,9 @@ +mod common; + +use common::{corgea_isolated, fixture}; use std::process::Command; use tempfile::TempDir; -fn corgea_isolated() -> (Command, TempDir) { - let home = TempDir::new().expect("temp HOME"); - let mut cmd = Command::new(env!("CARGO_BIN_EXE_corgea")); - cmd.env("HOME", home.path()) - .env("USERPROFILE", home.path()) - .env_remove("CORGEA_TOKEN") - .env_remove("CORGEA_URL") - .env_remove("AI_AGENT") - .env_remove("CODEX_SANDBOX") - .env_remove("CLAUDECODE") - .env_remove("CLAUDE_CODE") - .env_remove("CURSOR_AGENT") - .env_remove("CURSOR_TRACE_ID") - .env_remove("GEMINI_CLI") - .env_remove("PI_AGENT"); - (cmd, home) -} - -fn fixture(name: &str) -> String { - format!("{}/tests/fixtures/{}", env!("CARGO_MANIFEST_DIR"), name) -} - #[test] fn cli_scan_runs_without_token_or_config() { let (mut cmd, _home) = corgea_isolated(); diff --git a/tests/cli_exec_fallback.rs b/tests/cli_exec_fallback.rs new file mode 100644 index 0000000..4b29a5c --- /dev/null +++ b/tests/cli_exec_fallback.rs @@ -0,0 +1,113 @@ +//! Hermetic e2e tests for package-manager binary resolution: the pip→pip3 +//! fallback and the missing-binary error (exit 127). +//! +//! Same harness shape as `cli_install.rs`: the real `corgea` binary, a local +//! TcpListener stub standing in for PyPI, and a controlled `PATH` dir that +//! either holds a fake `pip3` (recording its argv to a marker file) or +//! nothing at all. Unix-only — the fake manager is a shell script. + +#![cfg(unix)] + +mod common; + +use common::{corgea_isolated, spawn_oldpkg_registry_stub, write_fake_recorder}; +use std::path::PathBuf; +use std::process::Command; +use tempfile::TempDir; + +/// Isolated `corgea` wired to the PyPI stub, with `PATH` set to a private +/// temp dir containing only the named fake binaries. +struct FallbackHarness { + cmd: Command, + marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +impl FallbackHarness { + fn new(binaries: &[&str]) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + for binary in binaries { + write_fake_recorder(bin.path(), binary, &marker, 0); + } + let registry = spawn_oldpkg_registry_stub(); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry); + Self { + cmd, + marker, + _home: home, + _bin: bin, + } + } + + /// The argv the fake package manager was invoked with, if it ran. + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +#[test] +fn pip_install_falls_back_to_pip3_when_pip_missing() { + let mut h = FallbackHarness::new(&["pip3"]); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install oldpkg==1.0.0"), + "the install must run via pip3 with forwarded args" + ); +} + +#[test] +fn pip_passthrough_falls_back_to_pip3() { + let mut h = FallbackHarness::new(&["pip3"]); + let out = h.cmd.args(["pip", "list"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("list")); +} + +#[test] +fn pip_missing_both_pip_and_pip3_exits_127_with_message() { + let mut h = FallbackHarness::new(&[]); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(127)); + assert_eq!(h.recorded_argv(), None, "nothing must have run"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: 'pip' not found on PATH (also tried 'pip3')"), + "stderr: {stderr}" + ); +} + +#[test] +fn npm_missing_binary_error_names_binary_without_fallback() { + let mut h = FallbackHarness::new(&[]); + let out = h.cmd.args(["npm", "list"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(127)); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: 'npm' not found on PATH"), + "stderr: {stderr}" + ); + assert!( + !stderr.contains("also tried"), + "npm has no fallback alias; stderr: {stderr}" + ); +} diff --git a/tests/cli_install.rs b/tests/cli_install.rs new file mode 100644 index 0000000..8a29f30 --- /dev/null +++ b/tests/cli_install.rs @@ -0,0 +1,301 @@ +//! Hermetic end-to-end tests for the install wrappers (`corgea pip|npm …`). +//! +//! Each test spawns the real binary (`CARGO_BIN_EXE_corgea`) against: +//! * a local TcpListener stub standing in for PyPI / the npm registry +//! (wired up via `CORGEA_PYPI_REGISTRY` / `CORGEA_NPM_REGISTRY`), and +//! * a fake package manager on `PATH` — a shell script that records its +//! argv to a marker file, proving whether the install actually ran. +//! +//! No live network. The fake package managers are Unix shell scripts, so +//! the whole file is Unix-only (matching the repo's Linux/macOS CI). + +#![cfg(unix)] + +mod common; + +use common::{ + corgea_isolated, spawn_http_stub, write_fake_recorder, write_fake_tree_pm, NOT_FOUND_JSON, + OLDPKG_NPM_PACKUMENT, OLDPKG_PYPI_JSON, RESOLUTION_FAILS, +}; +use std::path::PathBuf; +use std::process::Command; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use tempfile::TempDir; + +/// Spawn a registry stub serving both the PyPI and npm routes the +/// resolver hits. Returns the base URL and a counter of accepted +/// connections (used to prove "no registry hit" for passthroughs). +/// +/// Routes: +/// * `/pypi/oldpkg/json` — one release, published 2020-01-01 +/// * `/pypi/freshpkg/json` — one release, published one hour ago +/// * `/oldpkg` — npm metadata, published 2020-01-01 +/// * `/freshpkg` — npm metadata, published one hour ago +/// * anything else — 404 +fn spawn_registry_stub() -> (String, Arc) { + let hits = Arc::new(AtomicUsize::new(0)); + let hits_in_stub = Arc::clone(&hits); + let base_url = spawn_http_stub(move |path| { + hits_in_stub.fetch_add(1, Ordering::SeqCst); + let fresh_ts = (chrono::Utc::now() - chrono::Duration::hours(1)) + .format("%Y-%m-%dT%H:%M:%SZ") + .to_string(); + match path { + "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), + "/pypi/freshpkg/json" => ( + "200 OK", + format!( + r#"{{"info":{{"name":"freshpkg"}},"releases":{{"9.9.9":[{{"upload_time_iso_8601":"{fresh_ts}"}}]}}}}"#, + ), + ), + "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), + "/freshpkg" => ( + "200 OK", + format!( + r#"{{"dist-tags":{{"latest":"9.9.9"}},"versions":{{"9.9.9":{{}}}},"time":{{"9.9.9":"{fresh_ts}"}}}}"#, + ), + ), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + } + }); + (base_url, hits) +} + +/// A ready-to-run wrapper invocation: isolated `corgea` command with the +/// registry stub wired in and a fake `binary` on a PATH of its own. +struct WrapperHarness { + cmd: Command, + marker: PathBuf, + registry_hits: Arc, + _home: TempDir, + _bin: TempDir, +} + +impl WrapperHarness { + /// `registry_env` is `CORGEA_PYPI_REGISTRY` or `CORGEA_NPM_REGISTRY`, + /// matching `binary`'s ecosystem. + fn new(binary: &str, registry_env: &str, pm_exit_code: i32) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + match binary { + "pip" | "npm" => { + write_fake_tree_pm(bin.path(), binary, &marker, RESOLUTION_FAILS, pm_exit_code) + } + _ => write_fake_recorder(bin.path(), binary, &marker, pm_exit_code), + } + let (base_url, registry_hits) = spawn_registry_stub(); + cmd.env("PATH", bin.path()).env(registry_env, &base_url); + Self { + cmd, + marker, + registry_hits, + _home: home, + _bin: bin, + } + } + + /// The argv the fake package manager was invoked with, if it ran. + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +#[test] +fn pip_fresh_pin_blocks_without_running_install() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "freshpkg==9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run when blocked"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("Refusing to run install"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn pip_old_pin_runs_install_with_forwarded_args() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("published"), "stdout: {stdout}"); +} + +#[test] +fn pip_no_fail_demotes_block_and_installs() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "--no-fail", "install", "freshpkg==9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install freshpkg==9.9.9"), + "--no-fail must still run the install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); +} + +#[test] +fn pip_non_install_subcommand_passes_through_without_registry_hit() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "list"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("list")); + assert_eq!( + h.registry_hits.load(Ordering::SeqCst), + 0, + "passthrough must not touch the registry" + ); +} + +#[test] +fn pip_json_reports_fresh_pin_as_recent() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "--json", "install", "freshpkg==9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["results"][0]["status"], "recent"); + assert_eq!(parsed["results"][0]["name"], "freshpkg"); + assert_eq!(parsed["summary"]["recent"], 1); +} + +#[test] +fn pip_resolution_error_prints_error_but_install_proceeds() { + // `nosuchpkg` hits the stub's 404 route → an error outcome, which + // warns but does not block in public mode. Authenticated mode fails + // closed on resolution errors — see cli_verdict.rs. + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "nosuchpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!( + h.registry_hits.load(Ordering::SeqCst) >= 1, + "the 404 route must have been hit" + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install nosuchpkg==1.0.0"), + "a resolution error must not block the install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("not found"), "stdout: {stdout}"); + assert!(stdout.contains("1 errors"), "stdout: {stdout}"); +} + +#[test] +fn pip_mixed_fresh_and_old_pins_block_without_running_install() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "freshpkg==9.9.9", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "one recent target must block the whole install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); + assert!(stdout.contains("1 ok, 1 recent"), "stdout: {stdout}"); +} + +#[test] +fn npm_fresh_pin_blocks_without_running_install() { + let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let out = h + .cmd + .args(["npm", "install", "freshpkg@9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "npm must not run when blocked"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("Refusing to run install"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn npm_old_pin_runs_install_with_forwarded_args() { + let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); +} + +#[test] +fn wrapper_forwards_package_manager_exit_code() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 7); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(7), + "the package manager's exit code must be forwarded" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); +} diff --git a/tests/cli_provenance.rs b/tests/cli_provenance.rs new file mode 100644 index 0000000..2851fcf --- /dev/null +++ b/tests/cli_provenance.rs @@ -0,0 +1,267 @@ +//! Hermetic e2e tests for provenance labels on tree-pass findings: +//! `(from requirements)` for pip-requested packages, `(already in +//! package.json)` for npm direct deps the project already declares (plus the +//! `fix with:` advertised-fix hint), `(transitive)` otherwise, and the +//! `"origin"` field in `--json` output. +//! +//! Same harness pattern as `cli_tree.rs`: fake package manager on a private +//! PATH (answers the tree-resolution invocation with a canned payload), +//! a local registry stub, and the in-crate vuln-api stub. `oldpkg` is +//! published in 2020 so recency never blocks — every block is the verdict's. + +#![cfg(unix)] + +mod common; + +use common::{key, TreeHarness, NPM_LOCK}; +use std::collections::HashMap; +use tempfile::TempDir; + +/// Vulnerable verdict body; `fixed: None` renders `"fixed_version":null`. +fn vulnerable_body(ecosystem: &str, name: &str, version: &str, fixed: Option<&str>) -> String { + common::vulnerable_body(ecosystem, name, version, "MAL-2024-0002", fixed) +} + +/// Pip report: only `reqpkg`, requested (as if it came from a `-r` file). +const PIP_REQ_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"reqpkg","version":"6.0.0"},"requested":true}]}"#; + +/// Pip report mixing all three origins: `oldpkg` (named on the CLI, matches +/// the named outcome), `reqpkg` (requested via `-r`), `evildep` (transitive). +const PIP_MIXED_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"reqpkg","version":"6.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + +/// Project manifest that already declares `evildep` as a direct dep. +const PROJECT_MANIFEST: &str = + r#"{"name":"proj","version":"1.0.0","dependencies":{"evildep":"^0.4.0"}}"#; + +/// Project dir holding a `package.json` that already declares `evildep`. +fn npm_project() -> TempDir { + let project = TempDir::new().expect("project dir"); + std::fs::write(project.path().join("package.json"), PROJECT_MANIFEST) + .expect("write package.json"); + project +} + +#[test] +fn pip_requirements_finding_labeled_from_requirements() { + // The flagged package comes from a `-r` file (pip marks it `requested`), + // so it must not be mislabeled "(transitive)". + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "reqpkg", "6.0.0"), + vulnerable_body("pypi", "reqpkg", "6.0.0", None), + ); + let mut h = TreeHarness::new("pip", checks, HashMap::new(), PIP_REQ_REPORT); + let out = h + .cmd + .args(["pip", "install", "-r", "reqs.txt"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "requested vuln must block"); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("reqpkg@6.0.0 (from requirements)"), + "stdout: {stdout}" + ); + assert!(!stdout.contains("(transitive)"), "stdout: {stdout}"); +} + +#[test] +fn npm_preexisting_direct_dep_labeled_with_fix_hint() { + // `evildep` is already a direct dep in the project's package.json; the + // finding gets the pre-existing label plus the fix-command hint. The + // fix 1.2.2 covers every advisory (`safe_version` is Some), so the hint + // drops the "(advertised fix)" hedge. + let project = npm_project(); + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", Some("1.2.2")), + ); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "pre-existing vuln must block"); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("evildep@0.4.2 (already in package.json)"), + "stdout: {stdout}" + ); + assert!( + stdout.contains("fix with: corgea npm install evildep@1.2.2\n"), + "verified fix hint must print without the advertised-fix hedge: {stdout}" + ); +} + +#[test] +fn npm_preexisting_fix_hint_keeps_hedge_when_fix_is_partial() { + // One advisory advertises fix 1.2.2, the other has no fix: bumping is + // still the best move but doesn't clear everything, so the steer line + // stays quiet and the fix-command hint keeps its "(advertised fix)" + // hedge. + let project = npm_project(); + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + r#"{"ecosystem":"npm","package_name":"evildep","version":"0.4.2","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":"1.2.2"}, + {"advisory_id":"MAL-2024-0003","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string(), + ); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "pre-existing vuln must block"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("fix with: corgea npm install evildep@1.2.2 (advertised fix)"), + "partial fix hint must keep the hedge: {stdout}" + ); + assert!( + !stdout.contains("→ safe version"), + "a partial fix must not print the steer: {stdout}" + ); +} + +/// PR #108 review regression: unverifiable tree findings block too, so the +/// refusal may not blame the existing tree when a command-added transitive +/// is part of the block — even if the only *vulnerable* finding is a +/// pre-existing direct dep. +#[test] +fn preexisting_vulnerable_with_unverifiable_transitive_keeps_generic_refusal() { + const LOCK_WITH_NEWDEP: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0"}, + "node_modules/oldpkg":{"version":"1.0.0"}, + "node_modules/evildep":{"version":"0.4.2"}, + "node_modules/newdep":{"version":"2.0.0"}}}"#; + let project = npm_project(); + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", None), + ); + let mut statuses = HashMap::new(); + statuses.insert(key("npm", "newdep", "2.0.0"), 503u16); + let mut h = TreeHarness::new("npm", checks, statuses, LOCK_WITH_NEWDEP); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "must block"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("Refusing to run install. Pass --force to proceed despite findings."), + "the command-added unverifiable transitive keeps the generic refusal: {stderr}" + ); + assert!( + !stderr.contains("your existing dependency tree"), + "existing-tree refusal must not fire when a command-added finding blocks: {stderr}" + ); +} + +#[test] +fn npm_preexisting_without_fix_has_no_hint() { + let project = npm_project(); + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", None), + ); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("evildep@0.4.2 (already in package.json)"), + "stdout: {stdout}" + ); + assert!( + !stdout.contains("fix with:"), + "no advertised fix → no hint; stdout: {stdout}" + ); +} + +#[test] +fn pip_json_carries_origin_per_tree_entry() { + // All-clean run mixing origins: the named `oldpkg` matches its outcome, + // `reqpkg` (requested) and `evildep` (transitive) land in `tree.transitive` + // with their origins. + let mut h = TreeHarness::new("pip", HashMap::new(), HashMap::new(), PIP_MIXED_REPORT); + let out = h + .cmd + .args([ + "pip", + "--json", + "install", + "oldpkg==1.0.0", + "-r", + "reqs.txt", + ]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["tree"]["mode"], "full"); + let entries = parsed["tree"]["transitive"] + .as_array() + .expect("transitive array"); + let origin_of = |name: &str| { + entries + .iter() + .find(|e| e["name"] == name) + .unwrap_or_else(|| panic!("{name} missing from tree entries"))["origin"] + .clone() + }; + assert_eq!(origin_of("reqpkg"), "requested"); + assert_eq!(origin_of("evildep"), "transitive"); + assert_eq!(entries.len(), 2, "named oldpkg must not be a tree entry"); +} + +#[test] +fn npm_json_carries_preexisting_origin() { + let project = npm_project(); + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", Some("1.2.2")), + ); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "--json", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["tree"]["transitive"][0]["name"], "evildep"); + assert_eq!(parsed["tree"]["transitive"][0]["origin"], "pre-existing"); + assert_eq!( + parsed["tree"]["transitive"][0]["verdict"]["status"], + "vulnerable" + ); +} diff --git a/tests/cli_refusal_context.rs b/tests/cli_refusal_context.rs new file mode 100644 index 0000000..a14daaa --- /dev/null +++ b/tests/cli_refusal_context.rs @@ -0,0 +1,178 @@ +//! Hermetic e2e tests for refusal-message context: the refusal blames the +//! existing tree only when every vulnerable finding predates the command +//! (bare installs, or manifest-declared pre-existing deps — see +//! `cli_bare_install.rs` for the positive case). A finding on a named +//! target, or a transitive finding the named targets pull in, keeps the +//! generic refusal. +//! +//! Same harness as `cli_tree.rs`, pip-only: a fake pip on a private PATH +//! answers the `--dry-run --report -` tree pass with a canned report, a local +//! pypi registry stub publishes `oldpkg` in 2020 (recency never blocks), and +//! the in-crate vuln-api stub supplies verdicts. Every block here is the +//! verdict's doing. + +#![cfg(unix)] + +mod common; + +use common::{key, TreeHarness, TREE_REPORT}; +use corgea::vuln_api_stub::PackageKey; +use std::collections::HashMap; +use tempfile::TempDir; + +/// Refusal when the existing tree alone caused the block. +const TREE_REFUSAL: &str = "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force."; +/// Refusal when a named target carries a blocking verdict. +const GENERIC_REFUSAL: &str = "Refusing to run install. Pass --force to proceed despite findings."; + +fn vulnerable_body(name: &str, version: &str) -> String { + common::vulnerable_body("pypi", name, version, "MAL-2024-0002", None) +} + +fn harness(checks: HashMap, statuses: HashMap) -> TreeHarness { + TreeHarness::new("pip", checks, statuses, TREE_REPORT) +} + +fn run_install(h: &mut TreeHarness) -> std::process::Output { + h.cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea") +} + +#[test] +fn named_install_with_transitive_vulnerable_keeps_generic_refusal() { + // Only the transitive `evildep` is flagged; the named `oldpkg` is clean. + // `evildep` is being pulled in *by this command*, so the existing-tree + // refusal ("none were added by this command") would lie. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_body("evildep", "0.4.2"), + ); + let mut h = harness(checks, HashMap::new()); + let out = run_install(&mut h); + + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert!( + h.recorded_argv().is_none(), + "pip must not run on a blocked install" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains(GENERIC_REFUSAL), + "a transitive dep of a named target keeps the generic refusal: {stderr}" + ); + assert!( + !stderr.contains(TREE_REFUSAL), + "existing-tree refusal must not fire for command-added transitives: {stderr}" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("1 vulnerable (1 from resolved tree)"), + "summary must attribute the finding to the tree: {stdout}" + ); +} + +/// PR #108 review regression: a requirements-only install has no named +/// outcomes — exactly like a bare install — but its resolved set is added +/// by this command. A vulnerable transitive of a clean requirements entry +/// must keep the generic refusal. +#[test] +fn requirements_only_install_with_vulnerable_transitive_keeps_generic_refusal() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_body("evildep", "0.4.2"), + ); + let mut h = harness(checks, HashMap::new()); + // `pip install -r reqs.txt` with no named targets — the canned tree + // report still resolves oldpkg (requested) + evildep (transitive). + let reqs_dir = TempDir::new().expect("reqs dir"); + let reqs = reqs_dir.path().join("reqs.txt"); + std::fs::write(&reqs, "oldpkg==1.0.0\n").expect("write reqs.txt"); + let out = h + .cmd + .args(["pip", "install", "-r"]) + .arg(&reqs) + .output() + .expect("run corgea"); + + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert!( + h.recorded_argv().is_none(), + "pip must not run on a blocked install" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains(GENERIC_REFUSAL), + "requirements-driven transitives keep the generic refusal: {stderr}" + ); + assert!( + !stderr.contains(TREE_REFUSAL), + "existing-tree refusal must not fire for a requirements-only install: {stderr}" + ); +} + +#[test] +fn named_vulnerable_keeps_generic_refusal() { + // The named `oldpkg` itself is flagged; `evildep` is clean. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("oldpkg", "1.0.0"), + ); + let mut h = harness(checks, HashMap::new()); + let out = run_install(&mut h); + + assert_eq!(out.status.code(), Some(1), "named vuln must block"); + assert!( + h.recorded_argv().is_none(), + "pip must not run on a blocked install" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains(GENERIC_REFUSAL), + "named finding keeps the generic refusal: {stderr}" + ); + assert!( + !stderr.contains(TREE_REFUSAL), + "existing-tree refusal must not fire on a named finding: {stderr}" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + !stdout.contains("from resolved tree"), + "summary must not attribute a named finding to the tree: {stdout}" + ); +} + +#[test] +fn named_unverifiable_with_transitive_vulnerable_keeps_generic_refusal() { + // The named `oldpkg` verdict 503s (unverifiable, fail-closed) while the + // transitive `evildep` is vulnerable. The command's own target is part of + // the block, so the existing-tree refusal would mislead. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_body("evildep", "0.4.2"), + ); + let mut statuses = HashMap::new(); + statuses.insert(key("pypi", "oldpkg", "1.0.0"), 503u16); + let mut h = harness(checks, statuses); + let out = run_install(&mut h); + + assert_eq!(out.status.code(), Some(1), "must block"); + assert!( + h.recorded_argv().is_none(), + "pip must not run on a blocked install" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains(GENERIC_REFUSAL), + "named unverifiable keeps the generic refusal: {stderr}" + ); + assert!( + !stderr.contains(TREE_REFUSAL), + "existing-tree refusal must not fire while a named target blocks: {stderr}" + ); +} diff --git a/tests/cli_remediation.rs b/tests/cli_remediation.rs new file mode 100644 index 0000000..a266192 --- /dev/null +++ b/tests/cli_remediation.rs @@ -0,0 +1,119 @@ +//! Hermetic e2e tests for remediation steering: a blocked install names the +//! safe version from the verdict's `fixed_version` data — the highest fix +//! covering every advisory. When any advisory has no known fix, no steer +//! prints and JSON `remediation` is null. +//! +//! Uses the shared `common::PipHarness` (pypi stub published 2020 so recency +//! never blocks, a fake pip recording its argv, the in-crate vuln-api stub, +//! and a set token) — every block here is the verdict's doing. + +#![cfg(unix)] + +mod common; + +use common::{key, vulnerable_body, PipHarness}; +use std::collections::HashMap; + +fn fixed_body() -> String { + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")) +} + +fn no_fix_body() -> String { + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0002", None) +} + +#[test] +fn fixed_match_blocks_and_names_safe_version() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("fixed in 2.0.0"), "stdout: {stdout}"); + assert!( + stdout.contains("safe version: oldpkg@2.0.0"), + "stdout: {stdout}" + ); +} + +#[test] +fn no_fix_match_reports_no_fixed_version_known() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), no_fix_body()); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("no fixed version known"), + "stdout: {stdout}" + ); + assert!( + !stdout.contains("safe version:"), + "no steer line when the fix is unknown: {stdout}" + ); +} + +#[test] +fn json_remediation_carries_safe_version() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "--json", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!( + parsed["results"][0]["verdict"]["remediation"], "2.0.0", + "parsed: {parsed}" + ); +} + +#[test] +fn json_remediation_null_when_no_fix() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), no_fix_body()); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "--json", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + let v = &parsed["results"][0]["verdict"]; + assert!( + v.as_object().unwrap().contains_key("remediation"), + "verdict must carry the remediation key: {parsed}" + ); + assert!( + v["remediation"].is_null(), + "remediation must be null when no fix is known: {parsed}" + ); +} diff --git a/tests/cli_tree.rs b/tests/cli_tree.rs new file mode 100644 index 0000000..8a644d1 --- /dev/null +++ b/tests/cli_tree.rs @@ -0,0 +1,300 @@ +//! Hermetic e2e tests for the full-tree resolution pass +//! (`corgea pip install …` with a token + `CORGEA_VULN_API_URL` stub). +//! +//! Composes the `cli_verdict.rs` harness pattern (fake pip on a private PATH + +//! local pypi registry stub + in-crate vuln-api stub) with a dry-run-aware +//! fake pip: a `--dry-run` invocation answers with a canned pip report on +//! stdout, every other invocation records its argv to a marker and exits. +//! `oldpkg==1.0.0` is published in 2020 so recency never blocks here — every +//! block is the verdict's doing. + +#![cfg(unix)] + +mod common; + +use common::{ + key, vulnerable_body, TreeHarness, NPM_LOCK, RESOLUTION_FAILS, TREE_REPORT, UV_COMPILED, +}; +use std::collections::HashMap; +use tempfile::TempDir; + +fn vulnerable_evildep_body(ecosystem: &str) -> String { + vulnerable_body(ecosystem, "evildep", "0.4.2", "MAL-2024-0002", None) +} + +#[test] +fn transitive_vulnerable_blocks_install() { + // Only the transitive `evildep` is flagged; the named `oldpkg` is clean. + let cases = [ + ( + "pip", + "pypi", + TREE_REPORT, + &["pip", "install", "oldpkg==1.0.0"][..], + ), + ( + "npm", + "npm", + NPM_LOCK, + &["npm", "install", "oldpkg@1.0.0"][..], + ), + ( + "uv", + "pypi", + UV_COMPILED, + &["uv", "pip", "install", "oldpkg==1.0.0"][..], + ), + ]; + for (binary, eco, payload, args) in cases { + let mut checks = HashMap::new(); + checks.insert(key(eco, "evildep", "0.4.2"), vulnerable_evildep_body(eco)); + let mut h = TreeHarness::new(binary, checks, HashMap::new(), payload); + let out = h.cmd.args(args).output().expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "{binary}: transitive vuln must block" + ); + assert_eq!( + h.recorded_argv(), + None, + "{binary} must not run on a transitive vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(transitive)"] { + assert!(stdout.contains(needle), "{binary} stdout: {stdout}"); + } + } +} + +#[test] +fn uv_requirements_file_install_is_tree_gated() { + // `uv pip install -r requirements.txt` names no targets — the gate must + // still resolve the full set via `uv pip compile` and block on the + // vulnerable pin instead of exec'ing unchecked. + let cwd = TempDir::new().expect("temp cwd"); + std::fs::write(cwd.path().join("requirements.txt"), "oldpkg==1.0.0\n") + .expect("write requirements.txt"); + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_evildep_body("pypi"), + ); + let mut h = TreeHarness::new("uv", checks, HashMap::new(), UV_COMPILED); + let out = h + .cmd + .current_dir(cwd.path()) + .args(["uv", "pip", "install", "-r", "requirements.txt"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert_eq!(h.recorded_argv(), None, "uv must not run when blocked"); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(transitive)"] { + assert!(stdout.contains(needle), "stdout: {stdout}"); + } + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("not gated"), + "gated uv requirements install must not print the bare note: {stderr}" + ); +} + +#[test] +fn uv_add_requirements_file_is_tree_gated() { + let cwd = TempDir::new().expect("temp cwd"); + std::fs::write(cwd.path().join("requirements.txt"), "oldpkg==1.0.0\n") + .expect("write requirements.txt"); + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_evildep_body("pypi"), + ); + let mut h = TreeHarness::new("uv", checks, HashMap::new(), UV_COMPILED); + let out = h + .cmd + .current_dir(cwd.path()) + .args(["uv", "add", "-r", "requirements.txt"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert_eq!(h.recorded_argv(), None, "uv must not run when blocked"); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(transitive)"] { + assert!(stdout.contains(needle), "stdout: {stdout}"); + } +} + +#[test] +fn uv_pip_sync_requirements_file_is_tree_gated() { + let cwd = TempDir::new().expect("temp cwd"); + std::fs::write(cwd.path().join("requirements.txt"), "oldpkg==1.0.0\n") + .expect("write requirements.txt"); + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_evildep_body("pypi"), + ); + let mut h = TreeHarness::new("uv", checks, HashMap::new(), UV_COMPILED); + let out = h + .cmd + .current_dir(cwd.path()) + .args(["uv", "pip", "sync", "requirements.txt"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert_eq!(h.recorded_argv(), None, "uv must not run when blocked"); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(transitive)"] { + assert!(stdout.contains(needle), "stdout: {stdout}"); + } +} + +#[test] +fn tree_pass_runs_via_pip3_when_pip_is_absent() { + // Only `pip3` exists on PATH (common Linux/macOS). The tree pass must + // use the same pip → pip3 fallback as the exec path instead of silently + // degrading to named-only — the transitive `evildep` must still block. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_evildep_body("pypi"), + ); + let mut h = TreeHarness::new("pip3", checks, HashMap::new(), TREE_REPORT); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("transitive dependencies not checked"), + "tree pass must not degrade with only pip3 on PATH: {stderr}" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); +} + +#[test] +fn resolution_failure_falls_back_with_loud_warning() { + // The fake manager fails its tree invocation (pip: exits 2 on `--dry-run`, + // simulating an old pip with no `--report`; npm: exits 1 on + // `--package-lock-only`). Stub is all-clean, so the named-only fallback + // proceeds. + let cases = [ + ( + "pip", + &["pip", "install", "oldpkg==1.0.0"][..], + "install oldpkg==1.0.0", + ), + ( + "npm", + &["npm", "install", "oldpkg@1.0.0"][..], + "install oldpkg@1.0.0", + ), + ( + "uv", + &["uv", "pip", "install", "oldpkg==1.0.0"][..], + "pip install oldpkg==1.0.0", + ), + ]; + for (binary, args, forwarded_argv) in cases { + let mut h = TreeHarness::new(binary, HashMap::new(), HashMap::new(), RESOLUTION_FAILS); + let out = h.cmd.args(args).output().expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "{binary}: clean named-only must proceed" + ); + assert_eq!(h.recorded_argv().as_deref(), Some(forwarded_argv)); + assert!( + String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), + "{binary} stderr must carry the fallback warning: {}", + String::from_utf8_lossy(&out.stderr) + ); + } +} + +#[test] +fn pip_json_carries_tree_object() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_evildep_body("pypi"), + ); + let mut h = TreeHarness::new("pip", checks, HashMap::new(), TREE_REPORT); + let out = h + .cmd + .args(["pip", "--json", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["tree"]["mode"], "full"); + assert_eq!(parsed["tree"]["transitive"][0]["name"], "evildep"); + assert_eq!( + parsed["tree"]["transitive"][0]["verdict"]["status"], + "vulnerable" + ); + assert_eq!(parsed["summary"]["vulnerable"], 1); +} + +#[test] +fn pip_clean_tree_proceeds() { + // Stub default-clean (no overrides), so every resolved package is clean. + let mut h = TreeHarness::new("pip", HashMap::new(), HashMap::new(), TREE_REPORT); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("tree: 2 packages resolved"), + "stdout: {stdout}" + ); +} + +#[test] +fn npm_does_not_touch_project_lockfile() { + // Run from a project dir holding sentinel manifests; the resolver works in + // a throwaway copy, so after a gated run both files are byte-identical. + let project = TempDir::new().expect("project dir"); + let pkg_json = project.path().join("package.json"); + let lock_json = project.path().join("package-lock.json"); + let pkg_sentinel = r#"{"name":"sentinel","version":"0.0.0"}"#; + let lock_sentinel = r#"{"name":"sentinel","lockfileVersion":3,"packages":{}}"#; + std::fs::write(&pkg_json, pkg_sentinel).expect("write package.json"); + std::fs::write(&lock_json, lock_sentinel).expect("write package-lock.json"); + + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_evildep_body("npm"), + ); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + + assert_eq!( + std::fs::read_to_string(&pkg_json).unwrap(), + pkg_sentinel, + "package.json must be untouched" + ); + assert_eq!( + std::fs::read_to_string(&lock_json).unwrap(), + lock_sentinel, + "package-lock.json must be untouched" + ); +} diff --git a/tests/cli_uv_sync.rs b/tests/cli_uv_sync.rs new file mode 100644 index 0000000..ff39c4c --- /dev/null +++ b/tests/cli_uv_sync.rs @@ -0,0 +1,198 @@ +//! Hermetic e2e tests for the `corgea uv sync` gate. +//! +//! With a token, `uv sync` is gated from the project's `uv.lock`: every +//! index-sourced pin is verdicted against the vuln-api stub before uv runs. +//! Without a lockfile it execs behind an honest note. Without a token it uses +//! public fail-open vulnerability checks. +//! Harness: fake `uv` argv recorder on a private PATH + in-crate vuln-api +//! stub + throwaway project dir as cwd. No registry stub — the sync gate +//! does no recency resolution. + +#![cfg(unix)] + +mod common; + +use common::{corgea_isolated, key, vulnerable_body, write_fake_recorder}; +use corgea::vuln_api_stub::{self, PackageKey}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::process::Command; +use tempfile::TempDir; + +/// `proj` is the project itself (editable — skipped); `evildep` is the one +/// index-sourced pin the gate must verdict. +const UV_LOCK: &str = r#" +version = 1 + +[[package]] +name = "proj" +version = "0.1.0" +source = { editable = "." } + +[[package]] +name = "evildep" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +"#; + +struct SyncHarness { + cmd: Command, + marker: PathBuf, + project: TempDir, + _home: TempDir, + _bin: TempDir, +} + +impl SyncHarness { + fn new(checks: HashMap) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let project = TempDir::new().expect("project dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_recorder(bin.path(), "uv", &marker, 0); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + cmd.env("PATH", bin.path()) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) + .env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1") + .env("CORGEA_TOKEN", "test-token") + .current_dir(project.path()); + Self { + cmd, + marker, + project, + _home: home, + _bin: bin, + } + } + + fn with_uv_lock(self, content: &str) -> Self { + std::fs::write(self.project.path().join("uv.lock"), content).expect("write uv.lock"); + self + } + + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +fn vulnerable_evildep_checks() -> HashMap { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_body("pypi", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + checks +} + +#[test] +fn uv_sync_vulnerable_lockfile_blocks() { + let mut h = SyncHarness::new(vulnerable_evildep_checks()).with_uv_lock(UV_LOCK); + let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "vulnerable lock must block"); + assert_eq!( + h.recorded_argv(), + None, + "uv must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(locked)"] { + assert!(stdout.contains(needle), "stdout: {stdout}"); + } + // Nothing was named by this command — the refusal blames the lock, not + // the user's input. + assert!( + String::from_utf8_lossy(&out.stderr) + .contains("your existing dependency tree has known-vulnerable packages"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn uv_sync_clean_lockfile_proceeds() { + let mut h = SyncHarness::new(HashMap::new()).with_uv_lock(UV_LOCK); + let out = h + .cmd + .args(["uv", "sync", "--frozen"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean lock must proceed"); + assert_eq!( + h.recorded_argv().as_deref(), + Some("sync --frozen"), + "uv's own args must be forwarded untouched" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("tree: 1 packages resolved"), + "the project's own editable stanza must be skipped: {stdout}" + ); +} + +#[test] +fn uv_sync_force_overrides_block() { + let mut h = SyncHarness::new(vulnerable_evildep_checks()).with_uv_lock(UV_LOCK); + let out = h + .cmd + .args(["uv", "--force", "sync"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force must run the sync"); + assert_eq!(h.recorded_argv().as_deref(), Some("sync")); + assert!( + String::from_utf8_lossy(&out.stdout).contains("evildep"), + "findings still printed under --force" + ); +} + +#[test] +fn uv_sync_without_lockfile_execs_with_note() { + let mut h = SyncHarness::new(HashMap::new()); + let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("sync")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("'uv sync' is not gated"), + "stderr must carry the explicit ungated note: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn uv_sync_malformed_lockfile_fails_closed() { + let mut h = SyncHarness::new(HashMap::new()).with_uv_lock("not = [valid"); + let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "unparseable lock must block"); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("cannot verify 'uv sync'"), + "stderr: {stderr}" + ); + assert!(stderr.contains("--force"), "stderr: {stderr}"); +} + +#[test] +fn uv_sync_tokenless_public_mode_checks_lockfile() { + let mut h = SyncHarness::new(HashMap::new()).with_uv_lock(UV_LOCK); + h.cmd.env_remove("CORGEA_TOKEN"); + let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("sync")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("Pre-checking"), "stdout: {stdout}"); + assert!( + stdout.contains("tree: 1 packages resolved"), + "stdout: {stdout}" + ); +} + +#[test] +fn uv_lock_stays_passthrough() { + // `uv lock` installs nothing; the gate applies to the sync that follows. + let mut h = SyncHarness::new(vulnerable_evildep_checks()).with_uv_lock(UV_LOCK); + let out = h.cmd.args(["uv", "lock"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("lock")); + assert!(!String::from_utf8_lossy(&out.stdout).contains("Pre-checking")); +} diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs new file mode 100644 index 0000000..a77e415 --- /dev/null +++ b/tests/cli_verdict.rs @@ -0,0 +1,345 @@ +//! Hermetic e2e tests for the install-gate vuln-api verdict +//! (`corgea pip install …` with a token + `CORGEA_VULN_API_URL` stub). +//! +//! Composes the `cli_install.rs` harness pattern (fake package manager on a +//! private PATH + local pypi registry stub) with the in-crate vuln-api stub — +//! the shared `common::PipHarness`. `oldpkg==1.0.0` is published in 2020, so +//! recency never blocks here — every block in this file is the verdict's +//! doing. + +#![cfg(unix)] + +mod common; + +use common::{key, spawn_osv_stub, vulnerable_body, PipHarness}; +use std::collections::HashMap; + +fn vulnerable_oldpkg_body() -> String { + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")) +} + +#[test] +fn vulnerable_pin_blocks_without_running_install() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("MAL-2024-0001"), "stdout: {stdout}"); + assert!(stdout.contains("critical"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("--force"), + "block message must name --force" + ); +} + +#[test] +fn alternate_pypi_spelling_hits_canonical_verdict() { + // Advisories are keyed by the PEP 503 canonical name; `Flask_Cors` + // must query (and block on) the `flask-cors` verdict. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "flask-cors", "1.0.0"), + vulnerable_body("pypi", "flask-cors", "1.0.0", "GHSA-TEST-0001", None), + ); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "Flask_Cors==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "alternate spelling must not bypass the gate" + ); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("GHSA-TEST-0001"), "stdout: {stdout}"); +} + +#[test] +fn force_overrides_vulnerable_block_and_propagates_exit_code() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 7); + let out = h + .cmd + .args(["pip", "--force", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(7), + "manager exit code must propagate under --force" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("MAL-2024-0001"), + "findings must still print under --force: {stdout}" + ); +} + +#[test] +fn resolution_error_fails_closed_with_token() { + // The wildcard registry stub only knows version 1.0.0, so `==2.0.0` + // is a resolution error: no verdict was obtained, and with a token + // that must block — otherwise a registry outage bypasses the gate. + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "nosuchpkg==2.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "a resolution error must fail closed in tokened mode" + ); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("1 errors"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("--force"), + "block message must name --force" + ); +} + +#[test] +fn verdict_503_fails_closed() { + let mut statuses = HashMap::new(); + statuses.insert(key("pypi", "oldpkg", "1.0.0"), 503u16); + let mut h = PipHarness::new(HashMap::new(), statuses, Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "unverifiable must block (fail-closed)" + ); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("could not be verified"), "stdout: {stdout}"); +} + +#[test] +fn osv_only_finding_blocks_and_names_source() { + let mut osv = HashMap::new(); + osv.insert( + key("PyPI", "oldpkg", "1.0.0"), + r#"{"id":"GHSA-osv-only","database_specific":{"severity":"HIGH"},"affected":[{"ranges":[{"events":[{"fixed":"2.0.0"}]}]}]}"# + .to_string(), + ); + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + h.cmd.env("CORGEA_OSV_API_URL", spawn_osv_stub(osv, 200)); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("GHSA-osv-only"), "stdout: {stdout}"); + assert!(stdout.contains("source: OSV"), "stdout: {stdout}"); +} + +#[test] +fn public_mode_service_outage_warns_and_installs() { + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), None, 0); + h.cmd + .env("CORGEA_VULN_API_URL", "http://127.0.0.1:1") + .env("CORGEA_OSV_API_URL", spawn_osv_stub(HashMap::new(), 503)); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("vulnerability check warning") && stdout.contains("coverage unavailable"), + "stdout: {stdout}" + ); +} + +#[test] +fn custom_vuln_api_url_is_public_without_send_token_opt_in() { + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + h.cmd + .env_remove("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL") + .env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "custom URL without opt-in must fail open even when CORGEA_TOKEN exists" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("public vulnerability mode"), + "stderr: {stderr}" + ); +} + +#[test] +fn tokenless_public_mode_blocks_known_findings() { + // No token means public fail-open mode, not no vulnerability checks. + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + let mut h = PipHarness::new(checks, HashMap::new(), None, 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "public mode must still block known vulnerable versions" + ); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("public vulnerability mode") && stderr.contains("fail open"), + "tokenless warning must state public-mode semantics: {stderr}" + ); +} + +#[test] +fn progress_line_prints_only_above_eight_verdict_jobs() { + // Nine resolvable named targets → 9 verdict jobs (> 8) → progress line. + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let mut args = vec!["pip".to_string(), "install".to_string()]; + args.extend((1..=9).map(|i| format!("pkg{i}==1.0.0"))); + let out = h.cmd.args(&args).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "all clean + old must install"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("checking 9 packages for vulnerabilities"), + "stderr: {stderr}" + ); + + // Two jobs → quiet. + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "pkg1==1.0.0", "pkg2==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("checking 2 packages for vulnerabilities"), + "no progress line at or below 8 jobs: {stderr}" + ); +} + +#[test] +fn outage_noise_collapses_above_three_unverifiable() { + // vuln-api refuses connections: every check fails with the same + // error-prefix (only the per-package URL differs). Four findings → + // one collapsed line; counts and fail-closed exit code unchanged. + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); + let out = h + .cmd + .args([ + "pip", + "install", + "pkg1==1.0.0", + "pkg2==1.0.0", + "pkg3==1.0.0", + "pkg4==1.0.0", + ]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "unverifiable must still block"); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("4 packages could not be verified (vuln-api unreachable:"), + "stdout: {stdout}" + ); + assert!( + !stdout.contains("could not be verified:"), + "per-package lines must collapse: {stdout}" + ); + assert!( + stdout.contains("4 unverifiable"), + "summary counts unchanged: {stdout}" + ); + + // Three findings stay per-line — no collapse at the threshold. + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); + let out = h + .cmd + .args([ + "pip", + "install", + "pkg1==1.0.0", + "pkg2==1.0.0", + "pkg3==1.0.0", + ]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let stdout = String::from_utf8_lossy(&out.stdout); + assert_eq!( + stdout.matches("could not be verified:").count(), + 3, + "three findings must keep per-package lines: {stdout}" + ); + assert!( + !stdout.contains("vuln-api unreachable:"), + "no collapsed line at exactly the threshold: {stdout}" + ); +} + +#[test] +fn json_carries_verdict_object_and_mode() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "--json", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["verdict_mode"], "authenticated"); + assert_eq!(parsed["results"][0]["verdict"]["status"], "vulnerable"); + assert_eq!( + parsed["results"][0]["verdict"]["matches"][0]["advisory_id"], + "MAL-2024-0001" + ); + assert_eq!( + parsed["results"][0]["verdict"]["matches"][0]["fixed_version"], + "2.0.0" + ); + assert_eq!(parsed["summary"]["vulnerable"], 1); +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs new file mode 100644 index 0000000..cadeae3 --- /dev/null +++ b/tests/common/mod.rs @@ -0,0 +1,489 @@ +//! Shared helpers for the e2e CLI tests (standard Cargo `tests/common/mod.rs` +//! pattern — included via `mod common;` from each integration-test crate, so +//! items unused by one consumer are `#[allow(dead_code)]`). + +use corgea::vuln_api_stub::PackageKey; +use std::collections::HashMap; +use std::io::Read; +#[cfg(unix)] +use std::path::PathBuf; +use std::process::Command; +use tempfile::TempDir; + +/// A `corgea` invocation isolated from the host environment: temp +/// HOME/USERPROFILE, no Corgea config/registry env vars, and no +/// agent-detection env vars leaking in. +#[allow(dead_code)] +pub fn corgea_isolated() -> (Command, TempDir) { + let home = TempDir::new().expect("temp HOME"); + let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(HashMap::new(), HashMap::new()); + let mut cmd = Command::new(env!("CARGO_BIN_EXE_corgea")); + cmd.env("HOME", home.path()) + .env("USERPROFILE", home.path()) + .env_remove("CORGEA_TOKEN") + .env_remove("CORGEA_URL") + .env_remove("CORGEA_NPM_REGISTRY") + .env_remove("CORGEA_PYPI_REGISTRY") + .env_remove("CORGEA_VULN_API_URL") + .env_remove("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL") + .env_remove("CORGEA_OSV_API_URL") + .env("CORGEA_VULN_API_URL", vuln_stub.base_url) + .env("CORGEA_OSV_API_URL", spawn_osv_stub(HashMap::new(), 200)) + .env_remove("AI_AGENT") + .env_remove("CODEX_SANDBOX") + .env_remove("CLAUDECODE") + .env_remove("CLAUDE_CODE") + .env_remove("CURSOR_AGENT") + .env_remove("CURSOR_TRACE_ID") + .env_remove("GEMINI_CLI") + .env_remove("PI_AGENT"); + (cmd, home) +} + +#[allow(dead_code)] +pub fn fixture(name: &str) -> String { + format!("{}/tests/fixtures/{}", env!("CARGO_MANIFEST_DIR"), name) +} + +/// Canned 404 body for stub route tables. +#[allow(dead_code)] +pub const NOT_FOUND_JSON: &str = r#"{"message":"not found"}"#; + +/// PyPI release JSON for `oldpkg` 1.0.0, published 2020 → never recent. +#[allow(dead_code)] +pub const OLDPKG_PYPI_JSON: &str = r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#; + +/// npm packument for `oldpkg` 1.0.0, published 2020 → never recent. +#[allow(dead_code)] +pub const OLDPKG_NPM_PACKUMENT: &str = r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#; + +#[allow(dead_code)] +pub fn key(eco: &str, name: &str, ver: &str) -> PackageKey { + (eco.to_string(), name.to_string(), ver.to_string()) +} + +/// Single-match vulnerable verdict body for the vuln-api stub; `fixed: None` +/// renders `"fixed_version":null`. +#[allow(dead_code)] +pub fn vulnerable_body( + ecosystem: &str, + name: &str, + version: &str, + advisory: &str, + fixed: Option<&str>, +) -> String { + let fixed = fixed.map_or("null".to_string(), |f| format!(r#""{f}""#)); + format!( + r#"{{"ecosystem":"{ecosystem}","package_name":"{name}","version":"{version}","is_vulnerable":true, + "matches":[{{"advisory_id":"{advisory}","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":{fixed}}}]}}"# + ) +} + +/// Pip `--report -` payload: `oldpkg` (named/requested) + `evildep` +/// (transitive). +#[allow(dead_code)] +pub const TREE_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + +/// npm lockfile-v3 fixture: named `oldpkg` 1.0.0 + transitive `evildep` 0.4.2. +#[allow(dead_code)] +pub const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0"}, + "node_modules/oldpkg":{"version":"1.0.0"}, + "node_modules/evildep":{"version":"0.4.2"}}}"#; + +/// `uv pip compile` stdout: `oldpkg` + transitive `evildep`, same shape as +/// `TREE_REPORT` / `NPM_LOCK`. +#[allow(dead_code)] +pub const UV_COMPILED: &str = "oldpkg==1.0.0\nevildep==0.4.2\n"; + +/// Spawn a one-response-per-connection HTTP stub on an ephemeral 127.0.0.1 +/// port; `route` maps a request path to `(status line, body)`. Returns the +/// base URL. `Connection: close` is load-bearing — without it reqwest pools +/// the socket and a second request races the close and fails. +#[allow(dead_code)] +pub fn spawn_http_stub(route: F) -> String +where + F: Fn(&str) -> (&'static str, String) + Send + 'static, +{ + use std::io::Write; + use std::net::TcpListener; + + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + std::thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let buf = corgea::vuln_api_stub::read_http_request(&mut stream); + let req = String::from_utf8_lossy(&buf); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or(""); + let (status, body) = route(path); + let response = format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + status, + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + base_url +} + +/// OSV querybatch stub. Unknown packages are clean; packages in `vulns` return +/// the provided OSV vulnerability object JSON inside `vulns[]`. +#[allow(dead_code)] +pub fn spawn_osv_stub(vulns: HashMap, status_code: u16) -> String { + use std::io::Write; + use std::net::TcpListener; + + let listener = TcpListener::bind("127.0.0.1:0").expect("bind OSV stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + std::thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let request = read_http_request_with_body(&mut stream); + let req = String::from_utf8_lossy(&request); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or(""); + let body = req.split("\r\n\r\n").nth(1).unwrap_or(""); + let response_body = if path == "/v1/querybatch" && status_code < 400 { + osv_response_body(body, &vulns) + } else if status_code < 400 { + NOT_FOUND_JSON.to_string() + } else { + r#"{"error":"osv unavailable"}"#.to_string() + }; + let effective_status = if path == "/v1/querybatch" { + status_code + } else { + 404 + }; + let reason = match effective_status { + 200 => "OK", + 404 => "Not Found", + 500..=599 => "Internal Server Error", + _ => "Error", + }; + let response = format!( + "HTTP/1.1 {effective_status} {reason}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + response_body.len(), + response_body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + base_url +} + +fn read_http_request_with_body(stream: &mut std::net::TcpStream) -> Vec { + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + let mut header_end = None; + while header_end.is_none() { + let Ok(n) = stream.read(&mut chunk) else { + break; + }; + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + header_end = buf.windows(4).position(|w| w == b"\r\n\r\n").map(|i| i + 4); + } + let Some(header_end) = header_end else { + return buf; + }; + let headers = String::from_utf8_lossy(&buf[..header_end]); + let content_length = headers + .lines() + .find_map(|line| { + let (name, value) = line.split_once(':')?; + name.eq_ignore_ascii_case("content-length") + .then(|| value.trim().parse::().ok()) + .flatten() + }) + .unwrap_or(0); + let current_body_len = buf.len().saturating_sub(header_end); + let remaining = content_length.saturating_sub(current_body_len); + if remaining > 0 { + let mut rest = vec![0u8; remaining]; + let _ = stream.read_exact(&mut rest); + buf.extend_from_slice(&rest); + } + buf +} + +fn osv_response_body(request_body: &str, vulns: &HashMap) -> String { + let parsed: serde_json::Value = + serde_json::from_str(request_body).unwrap_or_else(|_| serde_json::json!({})); + let results = parsed + .get("queries") + .and_then(|v| v.as_array()) + .map(|queries| { + queries + .iter() + .map(|query| { + let ecosystem = query + .get("package") + .and_then(|p| p.get("ecosystem")) + .and_then(|v| v.as_str()) + .unwrap_or(""); + let name = query + .get("package") + .and_then(|p| p.get("name")) + .and_then(|v| v.as_str()) + .unwrap_or(""); + let version = query.get("version").and_then(|v| v.as_str()).unwrap_or(""); + let key = (ecosystem.to_string(), name.to_string(), version.to_string()); + let lower_key = ( + ecosystem.to_ascii_lowercase(), + name.to_string(), + version.to_string(), + ); + vulns + .get(&key) + .or_else(|| vulns.get(&lower_key)) + .map(|body| serde_json::json!({ "vulns": [serde_json::from_str::(body).unwrap()] })) + .unwrap_or_else(|| serde_json::json!({ "vulns": [] })) + }) + .collect::>() + }) + .unwrap_or_default(); + serde_json::json!({ "results": results }).to_string() +} + +/// Registry stub serving `/pypi/oldpkg/json` (pypi) and `/oldpkg` (npm +/// packument), both published 2020 → never recent. Everything else 404s. +#[allow(dead_code)] +pub fn spawn_oldpkg_registry_stub() -> String { + spawn_http_stub(|path| match path { + "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), + "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + }) +} + +/// Registry stub serving `/pypi//json` for any single-segment name, +/// always version 1.0.0 published 2020 → never recent. Everything else 404s. +#[allow(dead_code)] +pub fn spawn_wildcard_pypi_stub() -> String { + spawn_http_stub(|path| { + let name = path + .strip_prefix("/pypi/") + .and_then(|p| p.strip_suffix("/json")) + .filter(|n| !n.is_empty() && !n.contains('/')); + match name { + Some(name) => ( + "200 OK", + format!( + r#"{{"info":{{"name":"{name}"}},"releases":{{"1.0.0":[{{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}}]}}}}"# + ), + ), + None => ("404 Not Found", NOT_FOUND_JSON.to_string()), + } + }) +} + +/// Write `script` as the executable `dir/binary`. +#[cfg(unix)] +#[allow(dead_code)] +pub fn write_script(dir: &std::path::Path, binary: &str, script: &str) { + use std::os::unix::fs::PermissionsExt; + let path = dir.join(binary); + std::fs::write(&path, script).expect("write fake script"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) + .expect("chmod fake script"); +} + +/// Shell loop that emits the file at `path` line by line via builtins — +/// works under the locked-down test PATH (no `cat`); the `|| [ -n "$line" ]` +/// guard keeps a final line with no trailing newline. +#[cfg(unix)] +#[allow(dead_code)] +pub fn emit(path: &std::path::Path) -> String { + format!( + "while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{}'", + path.display() + ) +} + +/// Write an executable fake package manager named `binary` into `dir`. It +/// records its argv to `marker` and exits `exit_code` — proving both "the +/// install ran (with these args)" and exit-code forwarding. +#[cfg(unix)] +#[allow(dead_code)] +pub fn write_fake_recorder( + dir: &std::path::Path, + binary: &str, + marker: &std::path::Path, + exit_code: i32, +) { + let script = format!( + "#!/bin/sh\nprintf '%s' \"$*\" > '{}'\nexit {}\n", + marker.display(), + exit_code + ); + write_script(dir, binary, &script); +} + +/// Write an executable fake `pip` that simulates an old pip with no +/// `--report`: the tree dry-run exits 2 *without* touching the marker, so +/// tests exercise the named-only fallback path. Any other invocation +/// records its argv to `marker` and exits `exit_code`. +#[cfg(unix)] +#[allow(dead_code)] +pub fn write_fake_pip_without_report( + dir: &std::path::Path, + marker: &std::path::Path, + exit_code: i32, +) { + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) exit 2;; esac\nprintf '%s' \"$*\" > '{}'\nexit {}\n", + marker.display(), + exit_code + ); + write_script(dir, "pip", &script); +} + +/// Sentinel payload that makes a tree-aware fake manager exit non-zero on +/// its tree (resolution) invocation, forcing the named-only fallback. +#[allow(dead_code)] +pub const RESOLUTION_FAILS: &str = "RESOLUTION_FAILS"; + +/// Write an executable tree-aware fake package manager into `dir`. An +/// invocation carrying the manager's tree flag emits `payload` (stdout for +/// pip's `--dry-run --report -` and uv's `pip compile`, +/// `./package-lock.json` for npm's `--package-lock-only`, whose cwd is the +/// resolver's throwaway temp dir) and exits 0 — the tree pass; if `payload` +/// is `RESOLUTION_FAILS` it exits non-zero instead, emitting nothing. Any +/// other invocation records its argv to `marker` and exits `exit_code`. +#[cfg(unix)] +#[allow(dead_code)] +pub fn write_fake_tree_pm( + dir: &std::path::Path, + binary: &str, + marker: &std::path::Path, + payload: &str, + exit_code: i32, +) { + let (tree_flag, redirect, fail_exit) = match binary { + "pip" | "pip3" => ("--dry-run", "", 2), + "npm" => ("--package-lock-only", " > package-lock.json", 1), + "uv" => ("compile", "", 1), + other => panic!("unsupported fake manager {other}"), + }; + let tree_branch = if payload == RESOLUTION_FAILS { + format!("exit {fail_exit}") + } else { + let payload_path = dir.join(format!("{binary}-tree-payload.json")); + std::fs::write(&payload_path, payload).expect("write fake pm payload"); + format!("{}{redirect}; exit 0", emit(&payload_path)) + }; + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" {tree_flag} \"*) {tree_branch};; esac\nprintf '%s' \"$*\" > '{marker}'\nexit {exit_code}\n", + marker = marker.display(), + ); + write_script(dir, binary, &script); +} + +/// `corgea` wired to the wildcard pypi registry stub, a report-less fake pip +/// (recording its argv to a marker), and a vuln-api stub. +#[cfg(unix)] +#[allow(dead_code)] +pub struct PipHarness { + pub cmd: Command, + marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +#[cfg(unix)] +#[allow(dead_code)] +impl PipHarness { + /// `token: None` exercises public mode (no CORGEA_TOKEN set). + pub fn new( + checks: HashMap, + statuses: HashMap, + token: Option<&str>, + pip_exit_code: i32, + ) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_pip_without_report(bin.path(), &marker, pip_exit_code); + let registry = spawn_wildcard_pypi_stub(); + let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(checks, statuses); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) + .env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); + if let Some(t) = token { + cmd.env("CORGEA_TOKEN", t); + } + Self { + cmd, + marker, + _home: home, + _bin: bin, + } + } + + pub fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +/// `corgea` wired to the oldpkg registry stub, a tree-aware fake `binary` +/// (`"pip"` or `"npm"`) answering the tree pass with `payload`, a vuln-api +/// stub, and a token. +#[cfg(unix)] +#[allow(dead_code)] +pub struct TreeHarness { + pub cmd: Command, + marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +#[cfg(unix)] +#[allow(dead_code)] +impl TreeHarness { + pub fn new( + binary: &str, + checks: HashMap, + statuses: HashMap, + payload: &str, + ) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_tree_pm(bin.path(), binary, &marker, payload, 0); + let registry = spawn_oldpkg_registry_stub(); + let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(checks, statuses); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry) + .env("CORGEA_NPM_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) + .env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1") + .env("CORGEA_TOKEN", "test-token"); + Self { + cmd, + marker, + _home: home, + _bin: bin, + } + } + + pub fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} diff --git a/tests/fixtures/vuln_api/check_clean.json b/tests/fixtures/vuln_api/check_clean.json new file mode 100644 index 0000000..7a1d137 --- /dev/null +++ b/tests/fixtures/vuln_api/check_clean.json @@ -0,0 +1 @@ +{"ecosystem":"pypi","package_name":"requests","version":"2.31.0","is_vulnerable":false,"matches":[]} diff --git a/tests/fixtures/vuln_api/check_malware.json b/tests/fixtures/vuln_api/check_malware.json new file mode 100644 index 0000000..f353d36 --- /dev/null +++ b/tests/fixtures/vuln_api/check_malware.json @@ -0,0 +1,15 @@ +{ + "ecosystem": "npm", + "package_name": "wozhendeshitule", + "version": "1.0.0", + "is_vulnerable": true, + "matches": [ + { + "advisory_id": "MAL-2022-7232", + "severity_level": "critical", + "tier": 1, + "vulnerable_version_range": null, + "fixed_version": null + } + ] +} diff --git a/tests/fixtures/vuln_api/check_unknown.json b/tests/fixtures/vuln_api/check_unknown.json new file mode 100644 index 0000000..9886df0 --- /dev/null +++ b/tests/fixtures/vuln_api/check_unknown.json @@ -0,0 +1 @@ +{"ecosystem":"pypi","package_name":"this-package-does-not-exist","version":"9.9.9","is_vulnerable":false,"matches":[]} diff --git a/tests/fixtures/vuln_api/check_vulnerable.json b/tests/fixtures/vuln_api/check_vulnerable.json new file mode 100644 index 0000000..e50112b --- /dev/null +++ b/tests/fixtures/vuln_api/check_vulnerable.json @@ -0,0 +1,15 @@ +{ + "ecosystem": "pypi", + "package_name": "django", + "version": "3.2.0", + "is_vulnerable": true, + "matches": [ + { + "advisory_id": "GHSA-xxxx-yyyy-zzzz", + "severity_level": "high", + "tier": 1, + "vulnerable_version_range": ">=3.2,<3.2.5", + "fixed_version": "3.2.5" + } + ] +}