diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index 0f95380..7c9d2f4 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -143,8 +143,24 @@ else passes through with the package manager's own exit code. Git/URL/path specs (including `pip install .`, PEP 508 `name @ url` direct references, and npm GitHub shorthand `user/repo`) are noted, never blocked. The install verb is found behind global flags (`npm --loglevel silent install x` is still -gated). Bare installs (no named targets) and `-r requirements.txt` files are -noted, not gated. `npm ci` passes through ungated. +gated). Bare `npm install` (zero specs, project `package.json` found like npm +finds it — nearest ancestor) is gated too: the full lockfile-resolved tree is +verdicted, so a vulnerable lockfile blocks. `npm ci` (and aliases) is gated +from the project lockfile directly. + +The vuln check covers the **full would-install set**, not just the named +targets: `pip` and `npm` resolve the complete tree (named + transitive) via a +safe dry-run (`pip install --dry-run …`; an isolated +`npm install --package-lock-only` in a temp dir, never touching your +lockfile); every resolved package is verdicted, so a flagged **transitive** +dependency blocks the install too, labeled by provenance (`(transitive)`, +`(from requirements)`, `(already in package.json)`, `(locked)`). Whenever a +dry-run fails or an npm flag redirects the project root (`--prefix`, `-g`), +the gate falls back to named-only and prints +`warning: transitive dependencies not checked (…); only named packages were verified.` +— for pip, entries of `-r requirements.txt` files are still parsed and +verified in that fallback. Verdict requests run in a bounded pool +(8 parallel). Wrapper flags (`--force`, `--no-fail`, `-t`) are read between the manager name and the install verb (`corgea npm --force install x`); flags after the @@ -182,10 +198,9 @@ The gate is a wrapper, not an enforcement boundary. By design it cannot catch: `pip.conf` overrides change where packages resolve from. The gate still verdicts each `name@version`, but it cannot vouch that a substituted registry serves the same artifact those advisories describe. -- **Transitive dependencies** — only the named install targets are verified; - the rest of the resolved tree installs unchecked. -- **Bare installs and lockfiles** — `npm install` with no targets, `npm ci`, - and `-r requirements.txt` files run unchecked after a note. +- **Named-only fallback** — when a dry-run fails (old pip, broken resolution) + or `--prefix`/`-g` redirects npm's root, transitive dependencies install + unchecked behind the printed warning. Hard enforcement needs org-level controls — lockfile review, registry allow-listing — alongside the wrapper. diff --git a/src/config.rs b/src/config.rs index 2c9287c..1cdd12c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -107,7 +107,15 @@ impl Config { /// Base URL for the vuln-api service: `CORGEA_VULN_API_URL` env var, /// then the public default. Pure env/constant — no config file field. pub fn vuln_api_url() -> String { - crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") + resolve_vuln_api_url(crate::utils::generic::get_env_var_if_exists( + "CORGEA_VULN_API_URL", + )) +} + +/// Pure resolution rule, split out so tests never mutate process-global +/// env (`set_var` races concurrent `getenv` under the parallel harness). +fn resolve_vuln_api_url(override_url: Option) -> String { + override_url .unwrap_or_else(|| DEFAULT_VULN_API_URL.to_string()) .trim() .trim_end_matches('/') @@ -118,23 +126,16 @@ pub fn vuln_api_url() -> String { mod tests { use super::*; - /// All `vuln_api_url` cases in one test fn: the env-var cases - /// mutate process-global state, so they must not run concurrently - /// with each other under the parallel test harness. #[test] fn vuln_api_url_resolution_order() { - env::remove_var("CORGEA_VULN_API_URL"); - - // Default when the env var is unset. - assert_eq!(vuln_api_url(), DEFAULT_VULN_API_URL); - - // Env var wins; whitespace and trailing slash trimmed. - env::set_var("CORGEA_VULN_API_URL", " https://env.example.com/ "); - assert_eq!(vuln_api_url(), "https://env.example.com"); - - // Empty / whitespace-only env var is treated as unset. - env::set_var("CORGEA_VULN_API_URL", " "); - assert_eq!(vuln_api_url(), DEFAULT_VULN_API_URL); - env::remove_var("CORGEA_VULN_API_URL"); + // Default when the env var is unset (`get_env_var_if_exists` + // already maps empty/whitespace-only values to None). + assert_eq!(resolve_vuln_api_url(None), DEFAULT_VULN_API_URL); + + // Override wins; whitespace and trailing slash trimmed. + assert_eq!( + resolve_vuln_api_url(Some(" https://env.example.com/ ".to_string())), + "https://env.example.com" + ); } } diff --git a/src/deps/ecosystems/npm.rs b/src/deps/ecosystems/npm.rs index edbc7cf..1fd8d91 100644 --- a/src/deps/ecosystems/npm.rs +++ b/src/deps/ecosystems/npm.rs @@ -312,7 +312,11 @@ fn parse_npm_lock(path: &Path) -> Result, DepsError Ok(out) } -fn package_name_from_lock_key(key: &str) -> &str { +/// Package name from a lockfile `packages` key: the path after the last +/// `node_modules/` (or the whole key), truncated to one component — two for +/// scoped names. Also shared with the install gate's lockfile parse +/// (`precheck::tree`). +pub(crate) fn package_name_from_lock_key(key: &str) -> &str { let package_path = key .rsplit_once("node_modules/") .map(|(_, name)| name) diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 9b148de..b26c708 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -1,14 +1,14 @@ //! Install wrappers: `corgea npm`, `corgea pip`. //! -//! Wraps an install command from a supported package manager, resolves the -//! named install targets against the public registry, and either blocks the -//! install or runs it transparently. +//! Wraps an install command from a supported package manager, resolves what +//! the package manager *would* install against the public registry, and +//! either blocks the install or runs it transparently. //! //! Two independent blocks: //! * recency — the resolved version was published within `--threshold` //! (default `2d`); `--no-fail` demotes this to a warning; -//! * vuln verdict — the vuln-api knows the resolved version is vulnerable -//! or malicious; only `--force` overrides this. +//! * vuln verdict — the vuln-api knows a resolved version (named or +//! transitive) is vulnerable or malicious; only `--force` overrides this. //! //! Verdict lookups are public and fail open: a vuln-api outage warns and the //! install continues. @@ -16,6 +16,7 @@ mod exec; mod parse; mod render; +mod tree; mod verdict; #[cfg(test)] @@ -57,6 +58,17 @@ impl PackageManager { PackageManager::Pip => crate::vuln_api::Ecosystem::Pypi, } } + + /// Canonical package name for dedup/matching across spec spellings — + /// the ecosystem's rule (`vuln_api::Ecosystem::normalize_name`). + /// + /// Invariant: request-time normalization is owned by the vuln-api + /// client (`vuln_api::check_package_version`); comparison sites + /// (`verdict::apply_verdicts` / tree dedup) normalize here. Parsers + /// and resolvers carry raw names. + pub fn normalize_name(self, name: &str) -> String { + self.ecosystem().normalize_name(name) + } } /// Connection details for the vuln-api verdict pass. Lookups are public @@ -83,7 +95,8 @@ pub enum VerdictStatus { impl VerdictStatus { /// Whether this verdict blocks the install. The single definition of - /// "blocking finding", used by `verdict::block_reason`. + /// "blocking finding", shared by `verdict::block_reason` and the + /// refusal-blame predicate. fn blocks(&self) -> bool { matches!(self, VerdictStatus::Vulnerable(_)) } @@ -106,7 +119,7 @@ pub struct PrecheckOptions { pub pypi_registry: Option, } -/// Each item the user asked us to install. +/// Each item the user (or a `-r` requirements file) asked us to install. #[derive(Debug, Clone)] pub struct InstallTarget { pub name: String, @@ -150,6 +163,58 @@ pub enum TargetOutcome { }, } +/// Why a tree-pass finding is in the would-install set. Drives the +/// provenance label so a package the user asked for (or already depends on) +/// is never mislabeled "(transitive)". +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TreeOrigin { + /// Pulled in as a dependency of something else. + Transitive, + /// Explicitly requested (pip report `"requested"` — CLI arg or + /// requirements file; leftovers here come from `-r` files since named + /// CLI targets match a named outcome instead). + Requested, + /// Already a direct dependency in the project's `package.json`. + PreExisting, + /// Pinned by the project's lockfile (`npm ci`). + Locked, +} + +impl TreeOrigin { + fn label(self) -> &'static str { + match self { + TreeOrigin::Transitive => "(transitive)", + TreeOrigin::Requested => "(from requirements)", + TreeOrigin::PreExisting => "(already in package.json)", + TreeOrigin::Locked => "(locked)", + } + } +} + +/// Verdict for one package the tree pass resolved beyond the named targets. +#[derive(Debug)] +pub struct TreeOutcome { + pub name: String, + pub version: String, + pub origin: TreeOrigin, + pub verdict: VerdictStatus, +} + +/// Result of the tree pass. `PrecheckReport.tree` is `None` when the pass +/// never ran (verdicts disabled, or nothing to resolve). +#[derive(Debug)] +pub enum TreeReport { + /// The full would-install set was resolved and verdicted. + Full { + /// Distinct packages the dry-run resolved (named + transitive). + resolved_count: usize, + /// Verdicts for resolved packages beyond the named targets. + transitive: Vec, + }, + /// Resolution unavailable or failed — only named targets were verified. + NamedOnly { reason: String }, +} + #[derive(Debug)] pub struct PrecheckReport { pub manager: PackageManager, @@ -157,6 +222,14 @@ pub struct PrecheckReport { pub original_args: Vec, pub outcomes: Vec, pub threshold: Duration, + /// `None` ⇒ no tree pass ran. + pub tree: Option, + /// True when the command named nothing — no CLI targets and no + /// requirements files — so everything the tree pass resolved predates + /// this command (bare `npm install`). Distinct from + /// `outcomes.is_empty()`: a requirements-only install also has no named + /// outcomes, but its resolved set IS added by the command. + pub bare_install: bool, } impl PrecheckReport { @@ -174,13 +247,27 @@ impl PrecheckReport { pub fn recent_count(&self) -> usize { self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if self.is_recent(*age))) } - /// Verdicts on the resolved named targets. + /// Every verdict in the report: named (resolved) outcomes, then + /// transitive tree findings. fn verdicts(&self) -> impl Iterator { + self.named_verdicts().chain(self.tree_verdicts()) + } + /// Verdicts on the named targets this command adds. + fn named_verdicts(&self) -> impl Iterator { self.outcomes.iter().filter_map(|o| match o { TargetOutcome::Resolved { verdict, .. } => Some(verdict), _ => None, }) } + /// Verdicts beyond the named targets (the resolved tree). + fn tree_verdicts(&self) -> impl Iterator { + match &self.tree { + Some(TreeReport::Full { transitive, .. }) => transitive.as_slice(), + Some(TreeReport::NamedOnly { .. }) | None => &[], + } + .iter() + .map(|o| &o.verdict) + } pub fn vulnerable_count(&self) -> usize { self.verdicts() .filter(|v| matches!(v, VerdictStatus::Vulnerable(_))) @@ -191,6 +278,18 @@ impl PrecheckReport { .filter(|v| matches!(v, VerdictStatus::Unverifiable(_))) .count() } + /// Vulnerable findings beyond the named targets (the resolved tree). + pub fn tree_vulnerable_count(&self) -> usize { + self.tree_verdicts() + .filter(|v| matches!(v, VerdictStatus::Vulnerable(_))) + .count() + } + /// Unverifiable findings beyond the named targets (the resolved tree). + pub fn tree_unverifiable_count(&self) -> usize { + self.tree_verdicts() + .filter(|v| matches!(v, VerdictStatus::Unverifiable(_))) + .count() + } pub fn skipped_count(&self) -> usize { self.count(|o| matches!(o, TargetOutcome::Skipped { .. })) } @@ -228,6 +327,17 @@ pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOption return 1; } + // `npm ci` installs the lockfile exactly as written — gate it from the + // project lockfile directly. + if manager == PackageManager::Npm + && matches!( + subcommand.as_str(), + "ci" | "ic" | "clean-install" | "install-clean" | "isntall-clean" + ) + { + return run_npm_ci(subcommand, rest, opts); + } + if !manager.is_install_subcommand(subcommand) { // Non-install subcommand: transparent passthrough, args untouched. return exec::exec_command(manager.binary_name(), cmd); @@ -323,9 +433,23 @@ fn warn_registry_override(manager: PackageManager, rest: &[String]) { } } -/// Post-parse verification: resolve named targets, verdict them, render the -/// report, refuse (exit 1) when the block predicate fires, otherwise run -/// the install. +/// Shared tail of every gated path: render the report, refuse (exit 1) when +/// the block predicate fires, otherwise run the install. +fn report_and_exec( + report: &PrecheckReport, + opts: &PrecheckOptions, + exec: impl FnOnce() -> i32, +) -> i32 { + render::print_text(report); + render::warn_public_lookup_failures(report, opts); + if let Some(reason) = verdict::block_reason(report, opts) { + render::print_refusal(reason); + return 1; + } + exec() +} + +/// Post-parse verification shared by the npm and pip install paths. fn run_parsed_install( manager: PackageManager, subcommand_label: &str, @@ -334,17 +458,69 @@ fn run_parsed_install( exec: impl FnOnce() -> i32, opts: PrecheckOptions, ) -> i32 { - if parsed.targets.is_empty() { - // Nothing named: bare installs and requirements-only installs are - // noted, never gated, by this phase. + // With a verdict config, the tree pass resolves the full would-install + // set; `tree::covers_input` owns what each manager's resolver can chew on. + let tree_eligible = opts.verdict.is_some() && tree::covers_input(manager, &parsed); + let bare_install = parsed.targets.is_empty() && parsed.requirements_files.is_empty(); + + // A BARE `npm install --prefix ` installs another project's whole + // tree, but the gate can't safely resolve that redirected root from a copy + // of the CWD. Nothing named verifies it either, so it would install wholly + // unchecked — fail closed unless `--force`. (A NAMED install still verifies + // its targets and degrades the tree pass to a loud named-only warning.) + if manager == PackageManager::Npm && bare_install && opts.verdict.is_some() && !opts.force { + if let Some(flag) = tree::npm_root_redirect_flag(rest) { + eprintln!( + "error: cannot verify a bare 'npm install' that redirects the project root ('{flag}'): the would-install tree is unknown (pass --force to proceed unchecked)" + ); + return 1; + } + } + + if parsed.targets.is_empty() && !tree_eligible { + // A `-r requirements.txt` install with verdicts disabled is only + // noted; a truly bare install has nothing to note at all. render::requirements_note(&parsed); return exec(); } + // The named-target registry lookups and the tree dry-run are independent + // network/subprocess work — overlap them; verdicts need both. let now = Utc::now(); - let mut outcomes = verdict::verify_all(&parsed.targets, &opts, &now, parsed.allow_prerelease); - verdict::run_verdict_pass(manager, &mut outcomes, &opts); - render::requirements_note(&parsed); + let (mut outcomes, tree_resolution) = std::thread::scope(|s| { + let tree = tree_eligible.then(|| s.spawn(|| tree::resolve_tree(manager, rest, &parsed))); + let outcomes = verdict::verify_all(&parsed.targets, &opts, &now, parsed.allow_prerelease); + ( + outcomes, + tree.map(|handle| handle.join().expect("tree resolution thread panicked")), + ) + }); + + let tree = if let Some(resolution) = tree_resolution { + Some(run_tree_pass( + manager, + resolution, + &mut outcomes, + &parsed, + &opts, + &now, + )) + } else { + run_verdict_pass(manager, &mut outcomes, &opts); + None + }; + + // The mandatory loud warning when the tree pass fell back to named-only. + if let Some(TreeReport::NamedOnly { reason }) = &tree { + eprintln!( + "warning: transitive dependencies not checked ({reason}); only named packages were verified." + ); + } + // The requirements note only matters when the tree pass did *not* cover + // those files (fallback to named-only, or verdicts disabled). + if !matches!(&tree, Some(TreeReport::Full { .. })) { + render::requirements_note(&parsed); + } let report = PrecheckReport { manager, @@ -352,15 +528,234 @@ fn run_parsed_install( original_args: rest.to_vec(), outcomes, threshold: opts.threshold, + tree, + bare_install, }; - render::print_text(&report); - render::warn_public_lookup_failures(&report, &opts); - if let Some(reason) = verdict::block_reason(&report, &opts) { - render::print_refusal(reason); - return 1; + report_and_exec(&report, &opts, exec) +} + +/// `npm ci` (and aliases): installs the project lockfile exactly as +/// written, so the gate verdicts the lockfile-pinned set directly — no +/// dry-run needed. Recency isn't checked — locked versions aren't newly +/// chosen by this command; the verdict pass is the gate. Without a project +/// or lockfile npm errors on its own; the gate just execs. +fn run_npm_ci(subcommand: &str, rest: &[String], opts: PrecheckOptions) -> i32 { + let exec = || exec::exec_install_with_args(PackageManager::Npm, subcommand, rest); + + let Some(cfg) = &opts.verdict else { + return exec(); + }; + // A root-redirect flag (`--prefix ../other`, `-C ../other`) makes npm ci + // install a DIFFERENT project's lockfile than the CWD one we'd verdict, so + // verifying the CWD lockfile would pass on the wrong project. Fail closed + // unless `--force`. + if !opts.force { + if let Some(flag) = tree::npm_root_redirect_flag(rest) { + eprintln!( + "error: cannot verify 'npm {subcommand}' with '{flag}': it installs a redirected project's lockfile, not this one (pass --force to proceed unchecked)" + ); + return 1; + } + } + let Some(root) = tree::npm_project_root() else { + return exec(); + }; + // npm-shrinkwrap.json takes precedence over package-lock.json. + let Some(lock_path) = ["npm-shrinkwrap.json", "package-lock.json"] + .iter() + .map(|n| root.join(n)) + .find(|p| p.is_file()) + else { + return exec(); + }; + + let lock = std::fs::read_to_string(&lock_path) + .map_err(|e| format!("read {}: {e}", lock_path.display())) + .and_then(|content| tree::parse_npm_lockfile(&content)); + let jobs = match lock { + Ok(jobs) => jobs, + Err(e) if opts.force => { + eprintln!("warning: cannot verify 'npm {subcommand}' ({e}); proceeding under --force"); + return exec(); + } + Err(e) => { + // The single documented bypass of the "all blocking goes through + // `verdict::block_reason`" invariant: an unparsable lockfile + // means there is no report to feed the predicate, so the gate + // refuses directly (--force above is the only escape). + eprintln!( + "error: cannot verify 'npm {subcommand}': {e} (pass --force to proceed unchecked)" + ); + return 1; + } + }; + + let resolved_count = jobs.len(); + let results = verdict::verdict_pool(jobs, cfg, PackageManager::Npm); + let transitive = results + .into_iter() + .map(|(pkg, verdict)| TreeOutcome { + name: pkg.name, + version: pkg.version, + origin: TreeOrigin::Locked, + verdict, + }) + .collect(); + let report = PrecheckReport { + manager: PackageManager::Npm, + subcommand: subcommand.to_string(), + original_args: rest.to_vec(), + outcomes: Vec::new(), + threshold: opts.threshold, + tree: Some(TreeReport::Full { + resolved_count, + transitive, + }), + bare_install: true, + }; + + report_and_exec(&report, &opts, exec) +} + +/// One verdict job (`requested: true`) per named resolved target, in +/// outcome order. +fn resolved_jobs(outcomes: &[TargetOutcome]) -> impl Iterator + '_ { + outcomes.iter().filter_map(|o| match o { + TargetOutcome::Resolved { resolved, .. } => Some(tree::TreePackage { + name: resolved.name.clone(), + version: resolved.version.clone(), + requested: true, + }), + _ => None, + }) +} + +/// Verdict the resolved would-install set (`tree::resolve_tree`'s result). +/// On any resolution failure, fall back to the named-only verdict pass; the +/// caller renders the loud warning from the returned `NamedOnly` reason. +/// Only called when `opts.verdict.is_some()`. +fn run_tree_pass( + manager: PackageManager, + resolution: Result, String>, + outcomes: &mut Vec, + parsed: &parse::ParsedInstall, + opts: &PrecheckOptions, + now: &chrono::DateTime, +) -> TreeReport { + let set = match resolution { + Ok(set) => set, + Err(reason) => { + outcomes.extend(requirements_fallback_outcomes(manager, parsed, opts, now)); + run_verdict_pass(manager, outcomes, opts); + return TreeReport::NamedOnly { reason }; + } + }; + + // Dedup the dry-run set (npm lockfiles repeat the same name@version at + // multiple nested paths), then union in the named-resolved targets — a + // named target already installed is absent from the dry-run delta but + // must still be verdicted. + let norm = |n: &str| manager.normalize_name(n); + let mut seen = std::collections::HashSet::new(); + let mut jobs: Vec = Vec::with_capacity(set.len()); + for p in set { + if seen.insert((norm(&p.name), p.version.clone())) { + jobs.push(p); + } + } + let resolved_count = jobs.len(); + for p in resolved_jobs(outcomes) { + if seen.insert((norm(&p.name), p.version.clone())) { + jobs.push(p); + } + } + + // npm leftovers that are direct deps of the project manifest are + // pre-existing, not transitive. pip carries `requested` instead. + let direct_deps = if manager == PackageManager::Npm { + tree::project_direct_deps() + } else { + Default::default() + }; + + let cfg = opts + .verdict + .as_ref() + .expect("tree pass requires verdict config"); + let results = verdict::verdict_pool(jobs, cfg, manager); + let transitive = verdict::apply_verdicts(manager, results, outcomes, &direct_deps); + TreeReport::Full { + resolved_count, + transitive, + } +} + +fn requirements_fallback_outcomes( + manager: PackageManager, + parsed: &parse::ParsedInstall, + opts: &PrecheckOptions, + now: &chrono::DateTime, +) -> Vec { + if manager != PackageManager::Pip || parsed.requirements_files.is_empty() { + return Vec::new(); + } + + let mut targets = Vec::new(); + let mut outcomes = Vec::new(); + for file in &parsed.requirements_files { + match parse::parse_requirement_file_targets(file) { + Ok(mut file_targets) => targets.append(&mut file_targets), + Err(error) => outcomes.push(TargetOutcome::Error { + target: InstallTarget { + name: file.display().to_string(), + display: file.display().to_string(), + kind: TargetKind::Unverifiable { + reason: "requirements file could not be read".to_string(), + }, + }, + error, + }), + } + } + + outcomes.extend(verdict::verify_all( + &targets, + opts, + now, + parsed.allow_prerelease, + )); + outcomes +} + +/// Vuln-api verdict pass over resolved targets, run through the bounded +/// worker pool. No-op without a `VerdictConfig` (recency-only callers). +/// Any client/call failure becomes `Unverifiable`, which warns but never +/// blocks: public lookups fail open. +fn run_verdict_pass( + manager: PackageManager, + outcomes: &mut [TargetOutcome], + opts: &PrecheckOptions, +) { + let Some(cfg) = &opts.verdict else { return }; + + // One job per resolved target, in outcome order; the pool preserves + // order, so verdicts zip straight back onto the resolved outcomes. + let jobs: Vec = resolved_jobs(outcomes).collect(); + + let mut results = verdict::verdict_pool(jobs, cfg, manager).into_iter(); + for o in outcomes.iter_mut() { + if let TargetOutcome::Resolved { verdict, .. } = o { + *verdict = match results.next() { + Some((_, v)) => v, + // Pool invariant broken — fail safe instead of panicking: + // Unverifiable warns instead of silently reading as clean. + None => VerdictStatus::Unverifiable( + "internal error: verdict pool returned fewer results than outcomes".to_string(), + ), + }; + } } - exec() } #[cfg(test)] @@ -420,7 +815,8 @@ mod tests { #[test] fn requirements_files_note_then_exec() { - // `-r reqs.txt` alone → printed note, no verification, exec runs. + // `-r reqs.txt` alone, verdicts disabled → printed note, no + // verification, exec runs. let opts = stub_opts(); let (code, exec_ran) = gate_pip_install(&["-r", "reqs.txt"], opts); assert_eq!(code, 42); @@ -433,4 +829,16 @@ mod tests { assert_eq!(PackageManager::Pip.ecosystem(), Ecosystem::Pypi); assert_eq!(PackageManager::Npm.ecosystem(), Ecosystem::Npm); } + + #[test] + fn normalize_name_per_manager() { + // pypi: PEP 503 — lowercase, separator runs collapse to one `-`. + assert_eq!( + PackageManager::Pip.normalize_name("Flask_Cors"), + "flask-cors" + ); + assert_eq!(PackageManager::Pip.normalize_name("a__b"), "a-b"); + // npm names are case-sensitive and pass through verbatim. + assert_eq!(PackageManager::Npm.normalize_name("Left_Pad"), "Left_Pad"); + } } diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs index f2873f0..68e0b2e 100644 --- a/src/precheck/parse.rs +++ b/src/precheck/parse.rs @@ -4,7 +4,7 @@ //! mix flags, package specs, and pass-through args freely) and clear //! about anything we can't verify (URLs / git / filesystem refs). -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use crate::verify_deps::registry::{NpmSpec, PypiSpec}; @@ -93,6 +93,201 @@ pub fn parse_install_args( } } +/// Best-effort extraction of registry-installable entries from pip +/// requirements files. This is a fallback for when pip's full dry-run cannot +/// resolve the tree. It deliberately skips file-level options and constraints, +/// while preserving URL/VCS/editable entries as unverifiable targets. +pub(super) fn parse_requirement_file_targets(path: &Path) -> Result, String> { + let mut seen = std::collections::HashSet::new(); + parse_requirement_file_targets_inner(path, &mut seen) +} + +fn parse_requirement_file_targets_inner( + path: &Path, + seen: &mut std::collections::HashSet, +) -> Result, String> { + let path_for_io = if path.is_absolute() { + path.to_path_buf() + } else { + std::env::current_dir() + .map_err(|e| format!("read {}: {e}", path.display()))? + .join(path) + }; + let seen_key = std::fs::canonicalize(&path_for_io).unwrap_or_else(|_| path_for_io.clone()); + if !seen.insert(seen_key) { + return Ok(Vec::new()); + } + + let content = std::fs::read_to_string(&path_for_io) + .map_err(|e| format!("read {}: {e}", path.display()))?; + let base = path_for_io.parent().unwrap_or_else(|| Path::new(".")); + let mut targets = Vec::new(); + + for line in requirement_logical_lines(&content) { + match requirement_line_entry(&line) { + Some(RequirementLineEntry::Target(spec)) => targets.push(parse_pypi_spec(&spec)), + Some(RequirementLineEntry::Include(include)) => { + targets.extend(parse_requirement_file_targets_inner( + &base.join(include), + seen, + )?); + } + None => {} + } + } + + Ok(targets) +} + +/// First format-control directive (`--no-binary` / `--only-binary`) found in +/// any of `files`, following nested `-r` includes. pip applies file-level +/// format-control AFTER command-line options (the file parser mutates the +/// shared FormatControl object post-CLI-parse), so a `--no-binary :all:` +/// line inside a requirements file overrides the tree pass's trailing +/// `--only-binary :all:` guard and would build sdists — executing package +/// code — during the dry-run. The tree pass must refuse to dry-run such +/// files. Returns `(file, directive)` of the first hit. +pub(super) fn requirements_format_control_directive( + files: &[PathBuf], +) -> Option<(PathBuf, String)> { + let mut seen = std::collections::HashSet::new(); + files + .iter() + .find_map(|file| format_control_scan(file, &mut seen)) +} + +fn format_control_scan( + path: &Path, + seen: &mut std::collections::HashSet, +) -> Option<(PathBuf, String)> { + let path_for_io = if path.is_absolute() { + path.to_path_buf() + } else { + std::env::current_dir().ok()?.join(path) + }; + let seen_key = std::fs::canonicalize(&path_for_io).unwrap_or_else(|_| path_for_io.clone()); + if !seen.insert(seen_key) { + return None; + } + + // Best-effort: an unreadable/missing file can't carry a directive we'd + // miss — pip runs as the same uid, so it can't read it either and the + // dry-run fails loudly on its own. + let content = std::fs::read_to_string(&path_for_io).ok()?; + let base = path_for_io.parent().unwrap_or_else(|| Path::new(".")); + + for line in requirement_logical_lines(&content) { + let line = strip_requirement_comment(&line); + let first = line.split_whitespace().next().unwrap_or_default(); + if first == "--no-binary" + || first == "--only-binary" + || first.starts_with("--no-binary=") + || first.starts_with("--only-binary=") + { + return Some((path.to_path_buf(), first.to_string())); + } + if let Some(include) = requirement_flag_value(line, "-r", "--requirement") { + if let Some(hit) = format_control_scan(&base.join(include), seen) { + return Some(hit); + } + } + } + None +} + +enum RequirementLineEntry { + Target(String), + Include(PathBuf), +} + +fn requirement_logical_lines(content: &str) -> Vec { + let mut lines = Vec::new(); + let mut current = String::new(); + + for raw in content.lines() { + let trimmed = raw.trim_end(); + let (part, continued) = match trimmed.strip_suffix('\\') { + Some(part) => (part.trim_end(), true), + None => (trimmed, false), + }; + if !current.is_empty() { + current.push(' '); + } + current.push_str(part.trim()); + if !continued { + lines.push(std::mem::take(&mut current)); + } + } + + if !current.trim().is_empty() { + lines.push(current); + } + lines +} + +fn requirement_line_entry(line: &str) -> Option { + let line = strip_requirement_comment(line); + if line.is_empty() { + return None; + } + + if let Some(path) = requirement_flag_value(line, "-r", "--requirement") { + return Some(RequirementLineEntry::Include(PathBuf::from(path))); + } + if requirement_flag_value(line, "-c", "--constraint").is_some() { + return None; + } + if let Some(path) = requirement_flag_value(line, "-e", "--editable") { + return Some(RequirementLineEntry::Target(format!("-e {path}"))); + } + + if line.starts_with('-') { + return None; + } + + let spec = strip_inline_requirement_options(line); + (!spec.is_empty()).then(|| RequirementLineEntry::Target(spec.to_string())) +} + +fn strip_requirement_comment(line: &str) -> &str { + let trimmed = line.trim(); + if trimmed.starts_with('#') { + return ""; + } + [" #", "\t#"] + .iter() + .filter_map(|marker| trimmed.find(marker)) + .min() + .map_or(trimmed, |idx| trimmed[..idx].trim()) +} + +fn requirement_flag_value<'a>(line: &'a str, short: &str, long: &str) -> Option<&'a str> { + let mut parts = line.split_whitespace(); + let first = parts.next()?; + if first == short || first == long { + return parts.next(); + } + if let Some(value) = first.strip_prefix(&format!("{long}=")) { + return Some(value); + } + first + .strip_prefix(short) + .filter(|value| !value.is_empty() && !value.starts_with('-')) +} + +fn strip_inline_requirement_options(line: &str) -> &str { + [ + " --hash", + " --config-setting", + " --global-option", + " --install-option", + ] + .iter() + .filter_map(|marker| line.find(marker)) + .min() + .map_or(line.trim(), |idx| line[..idx].trim()) +} + #[derive(Debug, Default)] struct PositionalSplit { specs: Vec, @@ -965,4 +1160,42 @@ mod tests { let p = extract_pip_positionals(&args).unwrap(); assert_eq!(p.specs, vec!["requests".to_string()]); } + + #[test] + fn requirements_format_control_scan_follows_includes() { + // SECURITY: pip applies file-level format-control AFTER CLI flags, + // so a --no-binary line (even in a nested -r include) defeats the + // tree pass's trailing --only-binary :all: guard. The scan must + // find it transitively; option lines that don't touch + // format-control must not trip it. + let dir = tempfile::tempdir().expect("tempdir"); + std::fs::write( + dir.path().join("inner.txt"), + "# comment\n--no-binary :all:\n", + ) + .expect("write inner"); + std::fs::write(dir.path().join("outer.txt"), "flask==1.0\n-r inner.txt\n") + .expect("write outer"); + let (file, directive) = + requirements_format_control_directive(&[dir.path().join("outer.txt")]) + .expect("directive must be found through the include"); + assert!(file.ends_with("outer.txt") || file.ends_with("inner.txt")); + assert_eq!(directive, "--no-binary"); + + // Attached `=` form counts too. + std::fs::write(dir.path().join("eq.txt"), "--only-binary=:none:\n").expect("write eq"); + assert!(requirements_format_control_directive(&[dir.path().join("eq.txt")]).is_some()); + + // Non-format-control options don't trip the scan. + std::fs::write( + dir.path().join("clean.txt"), + "flask==1.0\n--prefer-binary\n--hash=sha256:abc\n", + ) + .expect("write clean"); + assert!(requirements_format_control_directive(&[dir.path().join("clean.txt")]).is_none()); + + // A missing file is pip's error to report, not the scan's — it + // can't hide a directive pip could read (same uid). + assert!(requirements_format_control_directive(&[dir.path().join("absent.txt")]).is_none()); + } } diff --git a/src/precheck/render.rs b/src/precheck/render.rs index b7ada75..4a942c2 100644 --- a/src/precheck/render.rs +++ b/src/precheck/render.rs @@ -2,13 +2,18 @@ use crate::verify_deps; -use super::{parse, PrecheckOptions, PrecheckReport, TargetOutcome, VerdictStatus}; +use super::{ + parse, PrecheckOptions, PrecheckReport, TargetOutcome, TreeOrigin, TreeReport, VerdictStatus, +}; /// The refusal line on stderr. Messaging only; the block decision and the /// choice of escape hatch live in `verdict::block_reason`. pub(super) fn print_refusal(reason: super::verdict::BlockReason) { use super::verdict::BlockReason; match reason { + BlockReason::ExistingTree => eprintln!( + "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." + ), BlockReason::Findings => { eprintln!("Refusing to run install. Pass --force to proceed despite findings.") } @@ -51,10 +56,10 @@ fn fix_note(m: &crate::vuln_api::VulnMatch) -> String { /// Highest of `fixes` after sort/dedup: a single distinct value is returned /// as-is (no parsing — preserves odd-but-unambiguous forms); several distinct -/// values compare by lenient semver. One unparsable candidate among several -/// poisons the answer (`None`) — certifying a "safe version" from a partial -/// ordering could steer to a still-vulnerable release. -fn highest_fix(mut fixes: Vec<&str>) -> Option { +/// values compare by lenient semver. With `all_must_parse`, one unparsable +/// candidate among several poisons the answer (`None`); otherwise unparsable +/// candidates are skipped. +fn highest_fix(mut fixes: Vec<&str>, all_must_parse: bool) -> Option { fixes.sort_unstable(); fixes.dedup(); match fixes.as_slice() { @@ -65,7 +70,8 @@ fn highest_fix(mut fixes: Vec<&str>) -> Option { for raw in many { match semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) { Ok(v) => parsed.push((v, *raw)), - Err(_) => return None, + Err(_) if all_must_parse => return None, + Err(_) => {} } } parsed @@ -84,10 +90,23 @@ fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { .iter() .map(|m| m.fixed_version.as_deref()) .collect::>()?; - highest_fix(fixes) + highest_fix(fixes, true) } -/// Per-match advisory lines plus the safe-version steer. Built for agent +/// Highest `fixed_version` the advisories advertise, by lenient semver. +/// Unlike `safe_version` this is *not* a certification: matches without a +/// fix are ignored, so the result may still be vulnerable to them. `None` +/// only when no match advertises a fix (or no candidate parses). +fn advertised_fix(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let fixes: Vec<&str> = matches + .iter() + .filter_map(|m| m.fixed_version.as_deref()) + .collect(); + highest_fix(fixes, false) +} + +/// Per-match advisory lines plus the safe-version steer, shared by the +/// named-target and transitive vulnerable render arms. Built for agent /// self-correction: each advisory carries `fixed in `, and the /// steer names the exact spec to install instead. fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) { @@ -104,30 +123,184 @@ fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) } } +/// One summary-line segment, e.g. `"2 vulnerable (2 from resolved tree)"`. +/// The parenthetical separates findings the resolved tree carried in from +/// findings on the targets this command names; omitted when the tree +/// contributed none. +fn summary_segment(total: usize, from_tree: usize, label: &str) -> String { + if from_tree > 0 { + format!("{total} {label} ({from_tree} from resolved tree)") + } else { + format!("{total} {label}") + } +} + +/// More than this many unverifiable findings with the same error-prefix +/// render as one collapsed line instead of one line per package. +const UNVERIFIABLE_COLLAPSE_THRESHOLD: usize = 3; + +/// Group key for collapsing repeated unverifiable errors: the text before +/// the first `(` — strips per-package detail (URLs, status codes) so one +/// outage groups under one key. +fn error_prefix(error: &str) -> &str { + match error.find('(') { + Some(i) => error[..i].trim_end(), + None => error, + } +} + +/// Unverifiable error strings across transitive tree findings and named +/// outcomes, in render order. +fn unverifiable_errors(report: &PrecheckReport) -> Vec<&str> { + let mut errors = Vec::new(); + if let Some(TreeReport::Full { transitive, .. }) = &report.tree { + for t in transitive { + if let VerdictStatus::Unverifiable(e) = &t.verdict { + errors.push(e.as_str()); + } + } + } + for o in &report.outcomes { + if let TargetOutcome::Resolved { + verdict: VerdictStatus::Unverifiable(e), + .. + } = o + { + errors.push(e.as_str()); + } + } + errors +} + +/// `(prefix, count, first error)` groups of unverifiable findings large +/// enough to collapse (> `UNVERIFIABLE_COLLAPSE_THRESHOLD` per prefix) — +/// the vuln-api outage case, where every package fails the same way. +/// Display-only: counts and exit codes never change. +fn collapsed_unverifiable_groups(report: &PrecheckReport) -> Vec<(&str, usize, &str)> { + let mut groups: Vec<(&str, usize, &str)> = Vec::new(); + for e in unverifiable_errors(report) { + let prefix = error_prefix(e); + match groups.iter_mut().find(|(p, _, _)| *p == prefix) { + Some((_, count, _)) => *count += 1, + None => groups.push((prefix, 1, e)), + } + } + groups.retain(|(_, count, _)| *count > UNVERIFIABLE_COLLAPSE_THRESHOLD); + groups +} + pub(super) fn print_text(report: &PrecheckReport) { - // Build the echoed command from non-empty parts: a gated install with - // zero remaining args has nothing to append. + // Build the echoed command from non-empty parts: a bare gated install + // (e.g. `npm install` with zero specs) has no args to append. let mut command = format!("{} {}", report.manager.binary_name(), report.subcommand); if !report.original_args.is_empty() { command.push(' '); command.push_str(&report.original_args.join(" ")); } + let collapsed = collapsed_unverifiable_groups(report); + let is_collapsed = |error: &str| { + collapsed + .iter() + .any(|(prefix, _, _)| *prefix == error_prefix(error)) + }; + println!( "Pre-checking `{}` (threshold {})", command, verify_deps::format_duration(report.threshold) ); println!( - " {} ok, {} recent, {} vulnerable, {} unverifiable, {} skipped, {} errors", + " {} ok, {} recent, {}, {}, {} skipped, {} errors", report.ok_count(), report.recent_count(), - report.vulnerable_count(), - report.unverifiable_count(), + summary_segment( + report.vulnerable_count(), + report.tree_vulnerable_count(), + "vulnerable" + ), + summary_segment( + report.unverifiable_count(), + report.tree_unverifiable_count(), + "unverifiable" + ), report.skipped_count(), report.error_count(), ); + match &report.tree { + Some(TreeReport::Full { + resolved_count, + transitive, + .. + }) => { + println!( + " tree: {} packages resolved, {} transitive checked", + resolved_count, + transitive.len() + ); + for t in transitive { + match &t.verdict { + VerdictStatus::Vulnerable(matches) => { + println!( + " ✗ {}@{} {} known vulnerable:", + t.name, + t.version, + t.origin.label() + ); + print_vulnerable_matches(&t.name, matches); + // A vulnerable dep the project already declares can be + // bumped directly — point at the fix as a command. + // When `safe_version` is `Some` it equals + // `advertised_fix` and clears every advisory; otherwise + // some advisory has no fix, so the "(advertised fix)" + // hedge marks the bump as partial. + if t.origin == TreeOrigin::PreExisting { + if let Some(fix) = advertised_fix(matches) { + let hedge = if safe_version(matches).is_some() { + "" + } else { + " (advertised fix)" + }; + println!( + " fix with: corgea {} install {}@{}{}", + report.manager.binary_name(), + t.name, + fix, + hedge + ); + } + } + } + VerdictStatus::Unverifiable(error) => { + if !is_collapsed(error) { + println!( + " ⚠ {}@{} {} could not be verified: {}", + t.name, + t.version, + t.origin.label(), + error + ); + } + } + // Clean / not-checked tree entries stay quiet in text mode. + VerdictStatus::Clean | VerdictStatus::NotChecked => {} + } + } + } + Some(TreeReport::NamedOnly { reason }) => { + println!(" tree: transitive dependencies NOT checked ({reason})"); + } + None => {} + } + + // One line per collapsed outage group instead of one per package. + for (_, count, first_error) in &collapsed { + println!( + " ⚠ {count} packages could not be verified (vuln-api unreachable: {first_error})" + ); + } + for o in &report.outcomes { match o { TargetOutcome::Resolved { @@ -144,10 +317,12 @@ pub(super) fn print_text(report: &PrecheckReport) { print_vulnerable_matches(&resolved.name, matches); } VerdictStatus::Unverifiable(error) => { - println!( - " ⚠ {} → {}@{} could not be verified: {}", - target.display, resolved.name, resolved.version, error, - ); + if !is_collapsed(error) { + println!( + " ⚠ {} → {}@{} could not be verified: {}", + target.display, resolved.name, resolved.version, error, + ); + } } VerdictStatus::Clean | VerdictStatus::NotChecked => { if report.is_recent(*age) { @@ -189,6 +364,7 @@ pub(super) fn print_text(report: &PrecheckReport) { #[cfg(test)] mod tests { use super::super::test_support::*; + use super::super::TreeOutcome; use super::*; #[test] @@ -245,4 +421,77 @@ mod tests { fn safe_version_empty_matches_is_none() { assert_eq!(safe_version(&[]), None); } + + #[test] + fn error_prefix_strips_parenthesized_detail() { + // The reqwest network-failure shape: per-package URL in parens. + assert_eq!( + error_prefix("Failed to send vuln-api request: error sending request for url (http://x/v1/packages/pypi/a/versions/1.0.0/check)"), + "Failed to send vuln-api request: error sending request for url" + ); + assert_eq!( + error_prefix("vuln-api unavailable (HTTP 503)"), + "vuln-api unavailable" + ); + assert_eq!(error_prefix("no parens here"), "no parens here"); + } + + /// Four unverifiable findings sharing a prefix collapse into one group + /// (named + transitive both count); three do not. + #[test] + fn collapsed_groups_require_more_than_threshold() { + let unverifiable = |name: &str| { + let mut o = resolved_outcome(name, "1.0.0", false); + set_verdict( + &mut o, + VerdictStatus::Unverifiable(format!("vuln-api unavailable (HTTP 503: {name})")), + ); + o + }; + + let mut report = report_with(vec![ + unverifiable("a"), + unverifiable("b"), + unverifiable("c"), + ]); + assert!(collapsed_unverifiable_groups(&report).is_empty()); + + report.tree = Some(TreeReport::Full { + resolved_count: 4, + transitive: vec![TreeOutcome { + name: "d".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Unverifiable( + "vuln-api unavailable (HTTP 503: d)".to_string(), + ), + origin: TreeOrigin::Transitive, + }], + }); + let groups = collapsed_unverifiable_groups(&report); + assert_eq!(groups.len(), 1); + let (prefix, count, first) = groups[0]; + assert_eq!(prefix, "vuln-api unavailable"); + assert_eq!(count, 4); + // Render order is transitive-first, so the tree finding leads. + assert_eq!(first, "vuln-api unavailable (HTTP 503: d)"); + } + + #[test] + fn advertised_fix_ignores_matches_without_fix() { + // safe_version returns None here; the advertised fix still surfaces. + assert_eq!( + advertised_fix(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + Some("2.0.0".to_string()) + ); + assert_eq!(advertised_fix(&[vm("A-1", None)]), None); + assert_eq!(advertised_fix(&[]), None); + } + + #[test] + fn advertised_fix_picks_highest_by_semver() { + assert_eq!( + advertised_fix(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } } diff --git a/src/precheck/test_support.rs b/src/precheck/test_support.rs index 4b84300..babe6bf 100644 --- a/src/precheck/test_support.rs +++ b/src/precheck/test_support.rs @@ -75,6 +75,10 @@ pub(crate) fn report_with(outcomes: Vec) -> PrecheckReport { original_args: vec![], outcomes, threshold: Duration::from_secs(2 * 86400), + tree: None, + // Most tests model an install that named something; bare-install + // cases set this explicitly. + bare_install: false, } } diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs new file mode 100644 index 0000000..0c34c74 --- /dev/null +++ b/src/precheck/tree.rs @@ -0,0 +1,541 @@ +//! Full would-install-set resolution (the "tree pass"). +//! +//! Safety invariant: resolution must never execute package code. +//! pip: `--only-binary :all:` (appended last, so it wins over CLI +//! format-control flags) prevents sdist builds (pypa/pip#13091) — BUT pip +//! applies format-control directives found *inside* `-r` files after CLI +//! parsing, so requirements files are pre-scanned and any `--no-binary` / +//! `--only-binary` line refuses the dry-run (named-only fallback) instead. +//! npm: `--ignore-scripts` guards npm/cli#2787. + +use std::process::Command; + +use super::PackageManager; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TreePackage { + pub name: String, + pub version: String, + /// pip report `"requested"`: the user named this package (CLI arg or + /// requirements file). Always false for npm — its lockfile has no + /// equivalent flag. + pub requested: bool, +} + +/// Whether this manager's resolver has anything to resolve for the parsed +/// install. pip's dry-run also reads `-r` requirements files, so those make +/// an install eligible even with no named targets. npm's lockfile resolution +/// reads `package.json`, so a bare `npm install` is eligible whenever the +/// project (found like npm finds it — nearest ancestor manifest) has one. +pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstall) -> bool { + !parsed.targets.is_empty() + || (manager == PackageManager::Pip && !parsed.requirements_files.is_empty()) + || (manager == PackageManager::Npm && npm_project_root().is_some()) +} + +/// Nearest ancestor file named `name`, starting at the CWD. +pub(super) fn find_up(name: &str) -> Option { + let cwd = std::env::current_dir().ok()?; + cwd.ancestors() + .map(|dir| dir.join(name)) + .find(|p| p.is_file()) +} + +/// The project directory npm itself would operate on: the nearest ancestor +/// holding `package.json`. A bare `npm install` from a subdirectory +/// installs THAT project's tree, so the gate must look there too. +pub(super) fn npm_project_root() -> Option { + Some(find_up("package.json")?.parent()?.to_path_buf()) +} + +/// The npm flag that redirects the project root (`--prefix`, `-C`, `-g`, +/// `--global`, `--location`), if present. The gate can't safely resolve or +/// verify the redirected project from a throwaway copy of the CWD, so the +/// callers fail closed (bare install / `npm ci`) or degrade to named-only. +pub(super) fn npm_root_redirect_flag(args: &[String]) -> Option { + const ROOT_REDIRECT_FLAGS: [&str; 5] = ["--prefix", "-C", "--global", "-g", "--location"]; + args.iter() + .find(|a| { + ROOT_REDIRECT_FLAGS + .iter() + .any(|f| a.as_str() == *f || a.starts_with(&format!("{f}="))) + }) + .cloned() +} + +/// `Err(reason)`: the dry-run failed — the caller falls back to named-only +/// and its warning carries `reason`. +pub fn resolve_tree( + manager: PackageManager, + install_args: &[String], + parsed: &super::parse::ParsedInstall, +) -> Result, String> { + match manager { + PackageManager::Pip => resolve_pip_tree(manager.binary_name(), install_args, parsed), + PackageManager::Npm => resolve_npm_tree(manager.binary_name(), install_args), + } +} + +/// Last stderr line of a failed subprocess, for one-line error messages. +fn stderr_tail(output: &std::process::Output) -> String { + String::from_utf8_lossy(&output.stderr) + .trim() + .lines() + .last() + .unwrap_or("unknown error") + .to_string() +} + +fn resolve_pip_tree( + binary: &str, + install_args: &[String], + parsed: &super::parse::ParsedInstall, +) -> Result, String> { + // pip applies format-control directives found INSIDE a requirements + // file AFTER command-line parsing (acknowledged pip behavior — the file + // parser mutates the shared FormatControl object), so a `--no-binary + // :all:` line in a `-r` file would override the trailing CLI guard + // below and build sdists during the dry-run. Refuse to dry-run such + // files; the caller degrades to the named-only fallback, whose + // requirements parser skips option lines entirely. + if let Some((file, directive)) = + super::parse::requirements_format_control_directive(&parsed.requirements_files) + { + return Err(format!( + "{} sets {} (file-level format-control overrides the sdist guard; not dry-running)", + file.display(), + directive + )); + } + // Same binary resolution as the exec path (pip → pip3 fallback) — the + // tree pass must not silently degrade on pip3-only systems. + let resolved = super::exec::resolve_binary(binary)?; + // The non-execution guard `--only-binary :all:` is appended AFTER the + // user's args: pip's format-control flags are last-wins per package, so a + // user `--no-binary :all:` / `--only-binary :none:` placed in install_args + // must not re-enable sdist builds (which would run package code during the + // report step, violating this file's safety invariant). + let output = Command::new(resolved) + .arg("install") + .args(["--dry-run", "--quiet", "--report", "-"]) + .args(install_args) + .args(["--only-binary", ":all:"]) + .output() + .map_err(|e| format!("run pip dry-run: {e}"))?; + if !output.status.success() { + return Err(format!("pip dry-run failed: {}", stderr_tail(&output))); + } + parse_pip_report(&String::from_utf8_lossy(&output.stdout)) +} + +fn parse_pip_report(json: &str) -> Result, String> { + let report: serde_json::Value = + serde_json::from_str(json).map_err(|e| format!("parse pip report: {e}"))?; + let install = report + .get("install") + .and_then(|v| v.as_array()) + .ok_or("pip report has no install[] array")?; + install + .iter() + .map(|item| { + let metadata = item.get("metadata").ok_or("report item missing metadata")?; + let field = |k: &str| { + metadata + .get(k) + .and_then(|v| v.as_str()) + .map(str::to_string) + .ok_or_else(|| format!("report item missing metadata.{k}")) + }; + Ok(TreePackage { + name: field("name")?, + version: field("version")?, + requested: item + .get("requested") + .and_then(|v| v.as_bool()) + .unwrap_or(false), + }) + }) + .collect() +} + +/// Direct dependency names declared by the project's `package.json` (the +/// manifest `resolve_npm_tree` copies — nearest ancestor, like npm). +/// Empty when the manifest is absent or unparsable — origin labeling then +/// degrades to `(transitive)`. +pub fn project_direct_deps() -> std::collections::HashSet { + npm_project_root() + .and_then(|root| std::fs::read_to_string(root.join("package.json")).ok()) + .map(|s| direct_deps_from_manifest(&s)) + .unwrap_or_default() +} + +fn direct_deps_from_manifest(json: &str) -> std::collections::HashSet { + let Ok(manifest) = serde_json::from_str::(json) else { + return Default::default(); + }; + let groups = [ + "dependencies", + "devDependencies", + "optionalDependencies", + "peerDependencies", + ]; + groups + .iter() + .filter_map(|g| manifest.get(g)?.as_object()) + .flat_map(|deps| deps.keys().cloned()) + .collect() +} + +/// Resolve npm's full would-install set by generating a lockfile in a +/// throwaway dir so the user's own lockfile is never touched. npm's +/// `--dry-run --json` only emits counts (npm/cli#6558), so we read the +/// generated `package-lock.json` instead. +/// +/// `--ignore-scripts` because npm has run lifecycle scripts under +/// `--package-lock-only` before (npm/cli#2787). +fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result, String> { + // Flags that redirect npm's project root would defeat the throwaway-dir + // isolation below (`--prefix` overrides `current_dir`, so the dry run + // would write the USER'S package-lock.json) — degrade to named-only. + if let Some(flag) = npm_root_redirect_flag(install_args) { + return Err(format!( + "'{flag}' redirects npm's project root; lockfile resolution skipped" + )); + } + + let resolved = super::exec::resolve_binary(binary)?; + let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; + // Copy the manifests from the project npm would operate on (nearest + // ancestor package.json), not just the CWD. The `.npmrc` copy is + // config-only (registry/auth/save prefs) so resolution matches a real + // install; CLI flags below still win over it (`--ignore-scripts` can't + // be undone by an `ignore-scripts=false` line). A `package-lock=false` + // `.npmrc` makes the resolution emit no lockfile → named-only fallback + // by design, not a hole: nothing executes either way. + let root = npm_project_root(); + for manifest in [ + "package.json", + "package-lock.json", + "npm-shrinkwrap.json", + ".npmrc", + ] { + let src = match &root { + Some(root) => root.join(manifest), + None => std::path::PathBuf::from(manifest), + }; + if src.exists() { + std::fs::copy(&src, work.path().join(manifest)) + .map_err(|e| format!("copy {manifest}: {e}"))?; + } + } + let output = Command::new(&resolved) + .arg("install") + .args(install_args) + .args([ + "--package-lock-only", + "--ignore-scripts", + "--no-audit", + "--no-fund", + ]) + .current_dir(work.path()) + .output() + .map_err(|e| format!("run npm lockfile resolution: {e}"))?; + if !output.status.success() { + return Err(format!( + "npm lockfile resolution failed: {}", + stderr_tail(&output) + )); + } + // npm gives `npm-shrinkwrap.json` precedence over `package-lock.json`, + // so read whichever it actually produced/used, preferring the shrinkwrap. + let lock_path = ["npm-shrinkwrap.json", "package-lock.json"] + .iter() + .map(|n| work.path().join(n)) + .find(|p| p.is_file()) + .ok_or("npm produced no lockfile to verify")?; + let lock = std::fs::read_to_string(&lock_path) + .map_err(|e| format!("read generated {}: {e}", lock_path.display()))?; + parse_npm_lockfile(&lock) +} + +pub(super) fn parse_npm_lockfile(json: &str) -> Result, String> { + let lock: serde_json::Value = + serde_json::from_str(json).map_err(|e| format!("parse package-lock.json: {e}"))?; + // lockfileVersion 2/3 carries the `packages` map; v1 only has the + // `dependencies` tree, which npm still understands — support both so a + // v1 project isn't forced to bypass the gate with `--force`. + if let Some(packages) = lock.get("packages").and_then(|v| v.as_object()) { + Ok(packages + .iter() + // Only `node_modules/...` entries are registry-installed deps. + // Skip the root project (""), symlinked workspaces (`link: true`), + // and workspace SOURCE stanzas (`packages/foo`, `apps/bar`) — those + // are local packages with no registry identity, so sending them to + // the public vuln-api would falsely block a monorepo install when a + // public package shares the name@version. + .filter(|(path, entry)| { + path.contains("node_modules/") + && entry.get("link").and_then(|v| v.as_bool()) != Some(true) + }) + .filter_map(|(path, entry)| { + let name = entry + .get("name") + .and_then(|v| v.as_str()) + .map(str::to_string) + .or_else(|| name_from_lock_path(path))?; + let version = entry.get("version").and_then(|v| v.as_str())?; + Some(TreePackage { + name, + version: version.to_string(), + requested: false, + }) + }) + .collect()) + } else if let Some(deps) = lock.get("dependencies").and_then(|v| v.as_object()) { + let mut out = Vec::new(); + collect_v1_dependencies(deps, &mut out, 0)?; + Ok(out) + } else { + Err("package-lock.json has neither a packages map nor a dependencies tree".to_string()) + } +} + +/// npm-written v1 trees are finite (no cycles by construction), but +/// `npm ci` feeds this parser an attacker-supplied file — cap the depth so +/// a crafted deep nest can't overflow the stack. In practice serde_json's +/// own 128-level recursion limit rejects such files at parse time (each v1 +/// level is two JSON levels); this cap is defense-in-depth should that +/// limit ever change. Real trees are a handful of levels deep. +const V1_MAX_DEPTH: usize = 64; + +/// Recursively collect `name@version` from a lockfileVersion 1 +/// `dependencies` tree. Nested `dependencies` are deduped by the caller's +/// pool; local/link entries (`"link": true`) carry no registry identity and +/// are skipped. Fails loudly past `V1_MAX_DEPTH` (callers refuse or fall +/// back — never silently truncate the verdict set). +fn collect_v1_dependencies( + deps: &serde_json::Map, + out: &mut Vec, + depth: usize, +) -> Result<(), String> { + if depth > V1_MAX_DEPTH { + return Err(format!( + "package-lock.json dependencies nest deeper than {V1_MAX_DEPTH} levels; refusing to parse" + )); + } + for (name, entry) in deps { + if entry.get("link").and_then(|v| v.as_bool()) == Some(true) { + continue; + } + if let Some(version) = entry.get("version").and_then(|v| v.as_str()) { + out.push(TreePackage { + name: name.clone(), + version: version.to_string(), + requested: false, + }); + } + if let Some(nested) = entry.get("dependencies").and_then(|v| v.as_object()) { + collect_v1_dependencies(nested, out, depth + 1)?; + } + } + Ok(()) +} + +/// Derive a package name from a lockfile path key like +/// `node_modules/a/node_modules/@scope/pkg` → `@scope/pkg`. `None` for keys +/// outside `node_modules/` (workspace stanzas carry an explicit `name`). +fn name_from_lock_path(path: &str) -> Option { + if !path.contains("node_modules/") { + return None; + } + let name = crate::deps::ecosystems::npm::package_name_from_lock_key(path); + (!name.is_empty()).then(|| name.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + const OK_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + + #[test] + fn parse_pip_report_ok() { + let pkgs = parse_pip_report(OK_REPORT).expect("parse ok report"); + assert_eq!( + pkgs, + vec![ + TreePackage { + name: "oldpkg".to_string(), + version: "1.0.0".to_string(), + requested: true, + }, + TreePackage { + name: "evildep".to_string(), + version: "0.4.2".to_string(), + requested: false, + }, + ] + ); + } + + #[test] + fn parse_pip_report_missing_requested_defaults_false() { + let json = r#"{"install":[{"metadata":{"name":"x","version":"1.0.0"}}]}"#; + let pkgs = parse_pip_report(json).expect("parse report without requested"); + assert!(!pkgs[0].requested); + } + + #[test] + fn parse_pip_report_missing_install() { + let err = parse_pip_report(r#"{"version":"1"}"#).expect_err("no install[]"); + assert!(err.contains("no install[]"), "got: {err}"); + } + + #[test] + fn parse_pip_report_missing_version() { + let json = r#"{"install":[{"metadata":{"name":"x"}}]}"#; + let err = parse_pip_report(json).expect_err("missing version"); + assert!(err.contains("metadata.version"), "got: {err}"); + } + + #[test] + fn parse_pip_report_non_json() { + let err = parse_pip_report("not json").expect_err("non-json"); + assert!(err.contains("parse pip report"), "got: {err}"); + } + + // lockfile-v3 with: root entry (skipped), a plain dep, a nested dep, + // a scoped dep, and a workspace `link: true` entry (skipped). + const NPM_LOCK: &str = r#"{ + "name": "proj", "lockfileVersion": 3, + "packages": { + "": {"name": "proj", "version": "1.0.0"}, + "node_modules/oldpkg": {"version": "1.0.0"}, + "node_modules/evildep": {"version": "0.4.2"}, + "node_modules/a/node_modules/b": {"version": "2.3.4"}, + "node_modules/@scope/pkg": {"version": "9.0.1"}, + "node_modules/localdep": {"resolved": "../local", "link": true}, + "packages/localdep": {"name": "localdep", "version": "0.0.1"} + } + }"#; + + fn pkg(name: &str, version: &str) -> TreePackage { + TreePackage { + name: name.to_string(), + version: version.to_string(), + requested: false, + } + } + + #[test] + fn parse_npm_lockfile_ok() { + let mut pkgs = parse_npm_lockfile(NPM_LOCK).expect("parse npm lock"); + pkgs.sort_by(|a, b| a.name.cmp(&b.name)); + // The workspace SOURCE stanza `packages/localdep` is a local package, + // not a registry dep — it must NOT be verdicted, only the four + // node_modules/ entries are. + assert_eq!( + pkgs, + vec![ + pkg("@scope/pkg", "9.0.1"), + pkg("b", "2.3.4"), + pkg("evildep", "0.4.2"), + pkg("oldpkg", "1.0.0"), + ] + ); + } + + #[test] + fn parse_npm_lockfile_v1_dependencies_tree() { + // lockfileVersion 1 has no `packages` map — npm still understands it, + // so the gate must too (recursing into nested `dependencies`), and + // skip `link` entries. + const V1: &str = r#"{ + "name": "proj", "lockfileVersion": 1, + "dependencies": { + "oldpkg": {"version": "1.0.0"}, + "evildep": {"version": "0.4.2", "dependencies": { + "deepdep": {"version": "3.2.1"} + }}, + "locallink": {"version": "file:../local", "link": true} + } + }"#; + let mut pkgs = parse_npm_lockfile(V1).expect("parse v1 lock"); + pkgs.sort_by(|a, b| a.name.cmp(&b.name)); + assert_eq!( + pkgs, + vec![ + pkg("deepdep", "3.2.1"), + pkg("evildep", "0.4.2"), + pkg("oldpkg", "1.0.0"), + ] + ); + } + + #[test] + fn parse_npm_lockfile_neither_schema_is_error() { + let err = parse_npm_lockfile(r#"{"lockfileVersion":1}"#).expect_err("no deps"); + assert!(err.contains("neither a packages map"), "got: {err}"); + } + + #[test] + fn parse_npm_lockfile_v1_depth_bomb_errors_instead_of_overflowing() { + // `npm ci` parses attacker-supplied lockfiles; a crafted deep nest + // must hit the depth cap (loud error → refuse/fallback), not + // overflow the stack. + let mut inner = r#"{"version":"1.0.0"}"#.to_string(); + for _ in 0..(V1_MAX_DEPTH + 2) { + inner = format!(r#"{{"version":"1.0.0","dependencies":{{"d":{inner}}}}}"#); + } + let lock = format!(r#"{{"lockfileVersion":1,"dependencies":{{"a":{inner}}}}}"#); + let err = parse_npm_lockfile(&lock).expect_err("depth bomb must error"); + // serde_json's recursion limit fires first today; the explicit + // V1_MAX_DEPTH cap is the backstop. Either way: loud error. + assert!( + err.contains("deeper than") || err.contains("recursion limit"), + "got: {err}" + ); + } + + #[test] + fn name_from_lock_path_handles_nested_and_scoped() { + assert_eq!( + name_from_lock_path("node_modules/oldpkg").as_deref(), + Some("oldpkg") + ); + assert_eq!( + name_from_lock_path("node_modules/a/node_modules/b").as_deref(), + Some("b") + ); + assert_eq!( + name_from_lock_path("node_modules/a/node_modules/@scope/pkg").as_deref(), + Some("@scope/pkg") + ); + assert_eq!(name_from_lock_path("packages/foo"), None); + } + + #[test] + fn direct_deps_from_manifest_unions_all_groups() { + let manifest = r#"{ + "name": "proj", + "dependencies": {"a": "^1.0.0", "@scope/b": "2.x"}, + "devDependencies": {"c": "*"}, + "optionalDependencies": {"d": "1.2.3"}, + "peerDependencies": {"e": ">=1"} + }"#; + let deps = direct_deps_from_manifest(manifest); + for name in ["a", "@scope/b", "c", "d", "e"] { + assert!(deps.contains(name), "missing {name}"); + } + assert_eq!(deps.len(), 5); + } + + #[test] + fn direct_deps_from_manifest_degrades_to_empty() { + assert!(direct_deps_from_manifest("not json").is_empty()); + assert!(direct_deps_from_manifest(r#"{"name":"proj"}"#).is_empty()); + assert!(direct_deps_from_manifest(r#"{"dependencies":[]}"#).is_empty()); + } +} diff --git a/src/precheck/verdict.rs b/src/precheck/verdict.rs index 1fb4baa..e316861 100644 --- a/src/precheck/verdict.rs +++ b/src/precheck/verdict.rs @@ -1,66 +1,60 @@ -//! Verdict pass: bounded vuln-api worker pool, registry resolution, and the +//! Verdict pass: bounded vuln-api worker pool, result matching, and the //! single block predicate (`block_reason`). use std::time::Duration; use super::{ - InstallTarget, PackageManager, PrecheckOptions, PrecheckReport, TargetKind, TargetOutcome, - VerdictStatus, + tree, InstallTarget, PackageManager, PrecheckOptions, PrecheckReport, TargetKind, + TargetOutcome, TreeOrigin, TreeOutcome, TreeReport, VerdictConfig, VerdictStatus, }; +/// Above this many verdict jobs, print a stderr progress line so a big tree +/// pass doesn't look hung. +const VERDICT_PROGRESS_THRESHOLD: usize = 8; + /// Max parallel vuln-api / registry requests. const VERDICT_CONCURRENCY: usize = 8; -/// Vuln-api verdict pass over resolved targets, run through the bounded -/// worker pool. No-op without a `VerdictConfig` (recency-only callers). -/// Any client/call failure becomes `Unverifiable`, which warns but never -/// blocks: public lookups fail open. -pub(super) fn run_verdict_pass( +/// Bounded worker pool over the verdict jobs. On client/request failure every +/// job comes back `Unverifiable`, which warns but never blocks: public +/// lookups fail open. Order is preserved: result `i` belongs to job `i`. +pub(super) fn verdict_pool( + jobs: Vec, + cfg: &VerdictConfig, manager: PackageManager, - outcomes: &mut [TargetOutcome], - opts: &PrecheckOptions, -) { - let Some(cfg) = &opts.verdict else { return }; +) -> Vec<(tree::TreePackage, VerdictStatus)> { + let client = match crate::vuln_api::http_client() { + Ok(c) => c, + Err(e) => { + return jobs + .into_iter() + .map(|j| (j, VerdictStatus::Unverifiable(e.clone()))) + .collect(); + } + }; - let jobs: Vec<(usize, String, String)> = outcomes - .iter() - .enumerate() - .filter_map(|(i, o)| match o { - TargetOutcome::Resolved { resolved, .. } => { - Some((i, resolved.name.clone(), resolved.version.clone())) - } - _ => None, - }) - .collect(); - if jobs.is_empty() { - return; + if jobs.len() > VERDICT_PROGRESS_THRESHOLD { + eprintln!("checking {} packages against Corgea vuln-api…", jobs.len()); } - let client = crate::vuln_api::http_client(); let ecosystem = manager.ecosystem(); - let verdicts = pooled_map(&jobs, VERDICT_CONCURRENCY, |(_, name, version)| { - let client = match &client { - Ok(c) => c, - Err(e) => return VerdictStatus::Unverifiable(e.clone()), - }; - match crate::vuln_api::check_package_version( - client, - &cfg.base_url, - ecosystem, - name, - version, - ) { - Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), - Ok(_) => VerdictStatus::Clean, - Err(e) => VerdictStatus::Unverifiable(e.to_string()), - } - }); - - for ((i, _, _), v) in jobs.into_iter().zip(verdicts) { - if let TargetOutcome::Resolved { verdict, .. } = &mut outcomes[i] { - *verdict = v; - } - } + let verdicts = + pooled_map( + &jobs, + VERDICT_CONCURRENCY, + |job| match crate::vuln_api::check_package_version( + &client, + &cfg.base_url, + ecosystem, + &job.name, + &job.version, + ) { + Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), + Ok(_) => VerdictStatus::Clean, + Err(e) => VerdictStatus::Unverifiable(e.to_string()), + }, + ); + jobs.into_iter().zip(verdicts).collect() } /// Order-preserving bounded worker pool: `results[i]` is `f(&items[i])`. @@ -99,11 +93,65 @@ fn pooled_map( .collect() } +/// Assign pooled verdicts onto matching named outcomes (by normalized +/// name + version) and return the unmatched leftovers — the tree findings. +/// Each leftover carries its provenance: pip's `requested` flag, membership +/// in the project manifest's direct deps (`direct_deps`), or transitive. +pub(super) fn apply_verdicts( + manager: PackageManager, + results: Vec<(tree::TreePackage, VerdictStatus)>, + outcomes: &mut [TargetOutcome], + direct_deps: &std::collections::HashSet, +) -> Vec { + let norm = |n: &str| manager.normalize_name(n); + // Index named outcomes by (normalized name, version) so matching the + // pooled results stays linear on big trees. + let mut named: std::collections::HashMap<(String, String), Vec> = + std::collections::HashMap::new(); + for (i, o) in outcomes.iter().enumerate() { + if let TargetOutcome::Resolved { resolved, .. } = o { + named + .entry((norm(&resolved.name), resolved.version.clone())) + .or_default() + .push(i); + } + } + + let mut transitive = Vec::new(); + for (pkg, verdict) in results { + if let Some(indices) = named.get(&(norm(&pkg.name), pkg.version.clone())) { + for &i in indices { + if let TargetOutcome::Resolved { verdict: v, .. } = &mut outcomes[i] { + *v = verdict.clone(); + } + } + } else { + let origin = if pkg.requested { + TreeOrigin::Requested + } else if direct_deps.contains(&pkg.name) { + TreeOrigin::PreExisting + } else { + TreeOrigin::Transitive + }; + transitive.push(TreeOutcome { + name: pkg.name, + version: pkg.version, + origin, + verdict, + }); + } + } + transitive +} + /// Why the gate refuses to run the install. The single owner of both the /// block decision and the escape hatch the refusal advertises — /// `render::print_refusal` only maps variants to text. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(super) enum BlockReason { + /// Every blocking finding predates this command (existing tree only). + /// `--force` is the escape. + ExistingTree, /// Vulnerable findings. `--force` is the escape. Findings, /// Only the recency threshold fired. `--no-fail` is the escape. @@ -115,7 +163,11 @@ pub(super) fn block_reason(report: &PrecheckReport, opts: &PrecheckOptions) -> O return None; } if report.verdicts().any(|v| v.blocks()) { - return Some(BlockReason::Findings); + return Some(if blames_existing_tree(report) { + BlockReason::ExistingTree + } else { + BlockReason::Findings + }); } if !opts.no_fail && report.recent_count() > 0 { return Some(BlockReason::RecencyOnly); @@ -123,6 +175,34 @@ pub(super) fn block_reason(report: &PrecheckReport, opts: &PrecheckOptions) -> O None } +/// True when the block is entirely the existing tree's doing: vulnerable +/// findings exist, no named target blocks, and every *blocking* tree +/// finding (`VerdictStatus::blocks`, same predicate `block_reason` refuses +/// on) genuinely predates this command. A `Requested` finding (pip `-r`) +/// is added by this command and renders as `(from requirements)`; a +/// `Transitive` finding on any install that names targets or requirements +/// files is being pulled in by them right now. Only a truly bare install +/// (`report.bare_install`) or manifest-declared `PreExisting` findings may +/// blame the existing tree. +fn blames_existing_tree(report: &PrecheckReport) -> bool { + let named_blocks = report.named_verdicts().any(|v| v.blocks()); + if report.vulnerable_count() == 0 || named_blocks { + return false; + } + let Some(TreeReport::Full { transitive, .. }) = &report.tree else { + return false; + }; + transitive + .iter() + .filter(|t| t.verdict.blocks()) + .all(|t| match t.origin { + // A locked pin predates the `npm ci` that installs it. + TreeOrigin::PreExisting | TreeOrigin::Locked => true, + TreeOrigin::Requested => false, + TreeOrigin::Transitive => report.bare_install, + }) +} + /// Resolve every named target against its registry through the bounded /// worker pool. Order is preserved: outcome `i` belongs to `targets[i]`. pub(super) fn verify_all( @@ -186,7 +266,10 @@ fn verify_one( #[cfg(test)] mod tests { use super::super::test_support::*; - use super::super::{InstallTarget, PackageManager, TargetKind, TargetOutcome, VerdictStatus}; + use super::super::{ + run_verdict_pass, InstallTarget, PackageManager, TargetKind, TargetOutcome, TreeOrigin, + TreeOutcome, TreeReport, VerdictStatus, + }; use super::*; fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { @@ -259,6 +342,100 @@ mod tests { } } + /// A clean named outcome plus a vulnerable transitive tree finding must + /// roll into the block counts: `vulnerable_count() == 1`, + /// `should_block_install` true without `--force`, false with it. + #[test] + fn tree_findings_extend_block_counts() { + let mut named = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut named, VerdictStatus::Clean); + let mut report = report_with(vec![named]); + report.tree = Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![TreeOutcome { + name: "evildep".to_string(), + version: "0.4.2".to_string(), + origin: TreeOrigin::Transitive, + verdict: VerdictStatus::Vulnerable(vec![]), + }], + }); + + assert_eq!(report.vulnerable_count(), 1); + let opts = |force: bool| PrecheckOptions { + force, + ..stub_opts() + }; + assert!(should_block_install(&report, &opts(false))); + assert!(!should_block_install(&report, &opts(true))); + } + + /// The existing-tree refusal fires only when every vulnerable finding + /// predates the command: a `Requested` finding (pip `-r`) is added by + /// this command, and a `Transitive` finding is being pulled in right + /// now unless the install is truly bare. `bare_install` is the explicit + /// discriminator — a requirements-only install also has no named + /// outcomes, but its resolved set is the command's doing. + #[test] + fn refusal_blame_respects_finding_origin() { + let tree_vulnerable = |origin| TreeOutcome { + name: "dep".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + origin, + }; + // (origin, named outcomes present, bare_install, expected). + // (origin, named=false, bare=false) is the requirements-only shape. + let cases = [ + (TreeOrigin::PreExisting, false, true, true), + (TreeOrigin::PreExisting, false, false, true), + (TreeOrigin::PreExisting, true, false, true), + (TreeOrigin::Locked, false, true, true), + (TreeOrigin::Transitive, false, true, true), + (TreeOrigin::Transitive, false, false, false), + (TreeOrigin::Transitive, true, false, false), + (TreeOrigin::Requested, false, true, false), + (TreeOrigin::Requested, false, false, false), + (TreeOrigin::Requested, true, false, false), + ]; + for (origin, with_named, bare_install, blames_tree) in cases { + let outcomes = if with_named { + vec![resolved_outcome("cleanpkg", "1.0.0", false)] + } else { + vec![] + }; + let mut report = report_with(outcomes); + report.bare_install = bare_install; + report.tree = Some(TreeReport::Full { + resolved_count: 1, + transitive: vec![tree_vulnerable(origin)], + }); + assert_eq!( + blames_existing_tree(&report), + blames_tree, + "origin {origin:?}, with_named {with_named}, bare {bare_install}" + ); + } + } + + /// A vulnerable NAMED target must never blame the existing tree, even + /// when a pre-existing tree finding is also vulnerable. + #[test] + fn refusal_blame_requires_clean_named_targets() { + let mut named = resolved_outcome("badpkg", "1.0.0", false); + set_verdict(&mut named, VerdictStatus::Vulnerable(vec![vm("A-1", None)])); + let mut report = report_with(vec![named]); + report.tree = Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![TreeOutcome { + name: "stickydep".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Vulnerable(vec![vm("A-2", None)]), + origin: TreeOrigin::PreExisting, + }], + }); + assert!(!blames_existing_tree(&report)); + } + /// Verdict pass against an in-process stub: vulnerable body → Vulnerable /// with matches; 503 override → Unverifiable; no VerdictConfig → outcomes /// keep NotChecked. @@ -312,6 +489,53 @@ mod tests { )); } + /// The pool must verdict every job exactly once and return the flagged + /// job `Vulnerable` with the rest `Clean`. + #[test] + fn verdict_pool_returns_all_results() { + use std::collections::HashMap; + + let mut checks = HashMap::new(); + checks.insert( + crate::vuln_api_stub::key("pypi", "evil", "1.0.0"), + crate::vuln_api_stub::vulnerable_body("pypi", "evil", "1.0.0", "MAL-2024-0001", None), + ); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + + let cfg = VerdictConfig { + base_url: stub.base_url.clone(), + }; + + let jobs: Vec = ["a", "b", "evil", "c", "d", "e"] + .iter() + .map(|n| tree::TreePackage { + name: n.to_string(), + version: "1.0.0".to_string(), + requested: false, + }) + .collect(); + + let results = verdict_pool(jobs, &cfg, PackageManager::Pip); + assert_eq!(results.len(), 6, "all jobs verdicted"); + let flagged = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Vulnerable(_))) + .count(); + let clean = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Clean)) + .count(); + assert_eq!(flagged, 1, "only evil flagged"); + assert_eq!(clean, 5, "rest clean"); + let evil = results + .iter() + .find(|(p, _)| p.name == "evil") + .expect("evil present"); + assert!( + matches!(&evil.1, VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + } + /// `pooled_map` maps every item and preserves order at any concurrency /// (1 = serial, 8 > item count = all workers spawn but some drain empty). #[test] @@ -325,4 +549,35 @@ mod tests { ); } } + + /// Leftover origin assignment: pip `requested` ⇒ Requested; manifest + /// direct dep ⇒ PreExisting; otherwise Transitive. Requested wins over + /// a direct-dep hit. + #[test] + fn apply_verdicts_assigns_origins() { + let pkg = |name: &str, requested: bool| tree::TreePackage { + name: name.to_string(), + version: "1.0.0".to_string(), + requested, + }; + let results = vec![ + (pkg("reqdep", true), VerdictStatus::Clean), + (pkg("predep", false), VerdictStatus::Clean), + (pkg("deepdep", false), VerdictStatus::Clean), + ]; + let direct_deps = std::collections::HashSet::from(["predep".to_string()]); + let mut outcomes = []; + let mut tree = apply_verdicts(PackageManager::Npm, results, &mut outcomes, &direct_deps); + tree.sort_by(|a, b| a.name.cmp(&b.name)); + let origins: Vec<(&str, TreeOrigin)> = + tree.iter().map(|t| (t.name.as_str(), t.origin)).collect(); + assert_eq!( + origins, + vec![ + ("deepdep", TreeOrigin::Transitive), + ("predep", TreeOrigin::PreExisting), + ("reqdep", TreeOrigin::Requested), + ] + ); + } } diff --git a/tests/cli_bare_install.rs b/tests/cli_bare_install.rs new file mode 100644 index 0000000..755ee50 --- /dev/null +++ b/tests/cli_bare_install.rs @@ -0,0 +1,213 @@ +//! Hermetic e2e tests for zero-spec ("bare") installs. +//! +//! With a `package.json`, bare `npm install` is gated like any other +//! install: the tree pass resolves the full lockfile set and verdicts +//! every package, so a vulnerable lockfile blocks (exit 1, `--force` +//! escape). +//! +//! Harness mirrors `cli_tree.rs`: tree-aware fake npm on a private PATH + +//! local registry stub + in-crate vuln-api stub. `oldpkg` is published in +//! 2020 so recency never blocks here. + +#![cfg(unix)] + +mod common; + +use common::{key, vulnerable_body, GateHarness, NPM_LOCK, RESOLUTION_FAILS}; +use std::collections::HashMap; + +const PACKAGE_JSON: &str = r#"{"name":"proj","version":"1.0.0","dependencies":{"oldpkg":"1.0.0"}}"#; + +#[test] +fn bare_npm_install_vulnerable_lockfile_blocks() { + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(checks) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "vulnerable lockfile must block"); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); + assert!(stdout.contains("MAL-2024-0002"), "stdout: {stdout}"); + assert!(stdout.contains("(transitive)"), "stdout: {stdout}"); + // A bare install names no targets, so everything resolved is the + // existing tree's — the refusal must say so. + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("your existing dependency tree has known-vulnerable packages"), + "bare install blames the existing tree: {stderr}" + ); +} + +#[test] +fn bare_npm_install_clean_lockfile_proceeds() { + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("tree: 2 packages resolved"), + "stdout: {stdout}" + ); +} + +#[test] +fn bare_npm_install_force_overrides_block() { + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(checks) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h + .cmd + .args(["npm", "--force", "install"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force must run the install"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + assert!( + String::from_utf8_lossy(&out.stdout).contains("evildep"), + "findings still printed under --force" + ); +} + +#[test] +fn bare_npm_resolution_failure_falls_back_with_warning() { + // Fake npm exits 1 on `--package-lock-only`. Nothing named remains to + // verify, so the install proceeds behind the loud fallback warning. + let mut h = GateHarness::new() + .fake_tree_pm("npm", RESOLUTION_FAILS, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "fallback must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), + "stderr must carry the fallback warning: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn bare_npm_without_package_json_passes_through() { + // No package.json in cwd → nothing to resolve → straight exec, no gate. + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 3) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .in_project_dir() + .build(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(3), "npm's own exit code propagates"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(!stdout.contains("Pre-checking"), "stdout: {stdout}"); +} + +#[test] +fn bare_npm_install_root_redirect_refuses_without_force() { + // A bare `npm install --prefix ` installs another project's whole + // tree; the gate can't resolve that from the CWD and nothing named + // verifies it — fail closed unless --force. + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h + .cmd + .args(["npm", "install", "--prefix", "/tmp/other-project"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "bare root-redirect must refuse"); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("redirects the project root"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + // --force bypasses. + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h + .cmd + .args([ + "npm", + "--force", + "install", + "--prefix", + "/tmp/other-project", + ]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force proceeds"); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install --prefix /tmp/other-project") + ); +} + +#[test] +fn bare_npm_install_from_subdirectory_is_gated() { + // npm walks ancestors to find the project; the gate must too, or a + // bare install from /src would install the whole (vulnerable) + // tree silently unchecked. + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(checks) + .with_project_file("package.json", PACKAGE_JSON) + .in_subdir("src") + .build(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "vulnerable lockfile must block from a subdirectory too" + ); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); +} diff --git a/tests/cli_install.rs b/tests/cli_install.rs index 4914d1f..3888c95 100644 --- a/tests/cli_install.rs +++ b/tests/cli_install.rs @@ -15,7 +15,7 @@ mod common; use common::{ npm_packument, pip_harness, pypi_release_json, spawn_http_stub, GateHarness, NOT_FOUND_JSON, - OLD_TS, + OLD_TS, RESOLUTION_FAILS, }; use std::collections::HashMap; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -60,8 +60,10 @@ fn wrapper_with_hits( pm_exit_code: i32, ) -> (GateHarness, Arc) { let (base_url, registry_hits) = spawn_registry_stub(); + // RESOLUTION_FAILS: the tree dry-run exits non-zero without touching + // the argv marker, so `recorded_argv()` reflects only the real install. let h = GateHarness::new() - .fake_recorder(binary, pm_exit_code) + .fake_tree_pm(binary, RESOLUTION_FAILS, pm_exit_code) .registry_env(registry_env, &base_url) .build(); (h, registry_hits) diff --git a/tests/cli_npm_ci.rs b/tests/cli_npm_ci.rs new file mode 100644 index 0000000..813ea0a --- /dev/null +++ b/tests/cli_npm_ci.rs @@ -0,0 +1,215 @@ +//! Hermetic e2e tests for the `corgea npm ci` gate and install-verb routing. +//! +//! `npm ci` installs the project lockfile exactly as written, so the gate +//! verdicts the lockfile-pinned set directly — no dry-run subprocess. Verb +//! routing must also find the install verb behind global flags +//! (`npm --silent install …`), or those spellings would exec ungated. +//! +//! Harness mirrors `cli_bare_install.rs`: fake npm argv recorder on a +//! private PATH + local registry stub + in-crate vuln-api stub. + +#![cfg(unix)] + +mod common; + +use common::{key, vulnerable_body, GateHarness, NPM_LOCK}; +use std::collections::HashMap; + +const PACKAGE_JSON: &str = r#"{"name":"proj","version":"1.0.0","dependencies":{"oldpkg":"1.0.0"}}"#; + +fn vulnerable_evildep_checks() -> HashMap { + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + checks +} + +#[test] +fn npm_ci_vulnerable_lockfile_blocks() { + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(vulnerable_evildep_checks()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", NPM_LOCK) + .build(); + let out = h.cmd.args(["npm", "ci"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "vulnerable lockfile must block"); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(locked)"] { + assert!(stdout.contains(needle), "stdout: {stdout}"); + } +} + +#[test] +fn npm_ci_clean_lockfile_proceeds() { + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", NPM_LOCK) + .build(); + let out = h + .cmd + .args(["npm", "ci", "--ignore-scripts"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean lockfile must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("ci --ignore-scripts")); +} + +#[test] +fn npm_ci_unparsable_lockfile_refuses_without_force() { + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", "not json") + .build(); + let out = h.cmd.args(["npm", "ci"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "unverifiable lockfile refuses"); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("cannot verify 'npm ci'") && stderr.contains("--force"), + "stderr: {stderr}" + ); +} + +#[test] +fn npm_ci_unparsable_lockfile_force_proceeds() { + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", "not json") + .build(); + let out = h + .cmd + .args(["npm", "--force", "ci"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force proceeds unchecked"); + assert_eq!(h.recorded_argv().as_deref(), Some("ci")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("proceeding under --force"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn npm_ci_root_redirect_refuses_without_force() { + // `npm ci --prefix ../other` installs a different project's lockfile than + // the CWD one we'd verdict — fail closed rather than pass on the wrong + // project. + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", NPM_LOCK) + .build(); + let out = h + .cmd + .args(["npm", "ci", "--prefix", "/tmp/other-project"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "root-redirect ci must refuse"); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("--prefix") && stderr.contains("redirected project"), + "stderr: {stderr}" + ); + + // --force bypasses. + let mut h = GateHarness::new() + .fake_recorder("npm", 0) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .with_project_file("package-lock.json", NPM_LOCK) + .build(); + let out = h + .cmd + .args(["npm", "--force", "ci", "--prefix", "/tmp/other-project"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force proceeds"); + assert_eq!( + h.recorded_argv().as_deref(), + Some("ci --prefix /tmp/other-project") + ); +} + +#[test] +fn npm_ci_without_lockfile_execs() { + // npm ci errors on its own without a lockfile; nothing to gate. + let mut h = GateHarness::new() + .fake_recorder("npm", 9) + .vuln_checks(HashMap::new()) + .with_project_file("package.json", PACKAGE_JSON) + .build(); + let out = h.cmd.args(["npm", "ci"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(9), "npm's own exit code propagates"); + assert_eq!(h.recorded_argv().as_deref(), Some("ci")); +} + +#[test] +fn global_flags_before_the_verb_still_gate() { + // `npm --loglevel silent install ` must route to the + // gate, not the ungated passthrough. + let mut checks = HashMap::new(); + checks.insert( + key("npm", "oldpkg", "1.0.0"), + vulnerable_body("npm", "oldpkg", "1.0.0", "MAL-2024-0001", None), + ); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(checks) + .in_project_dir() + .build(); + let out = h + .cmd + .args(["npm", "--loglevel", "silent", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "flags before the verb must not skip the gate: {}", + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a vulnerable verdict" + ); +} + +#[test] +fn global_flags_before_the_verb_forward_on_clean() { + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .in_project_dir() + .build(); + let out = h + .cmd + .args(["npm", "--loglevel", "silent", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean pin proceeds"); + // The verb leads the reconstructed argv; the global flags still arrive. + assert_eq!( + h.recorded_argv().as_deref(), + Some("install --loglevel silent oldpkg@1.0.0") + ); +} diff --git a/tests/cli_provenance.rs b/tests/cli_provenance.rs new file mode 100644 index 0000000..b4485ae --- /dev/null +++ b/tests/cli_provenance.rs @@ -0,0 +1,180 @@ +//! Hermetic e2e tests for provenance labels on tree-pass findings: +//! `(from requirements)` for pip-requested packages, `(already in +//! package.json)` for npm direct deps the project already declares (plus the +//! `fix with:` advertised-fix hint), `(transitive)` otherwise. +//! +//! Same harness pattern as `cli_tree.rs`: fake package manager on a private +//! PATH (answers the tree-resolution invocation with a canned payload), +//! a local registry stub, and the in-crate vuln-api stub. `oldpkg` is +//! published in 2020 so recency never blocks — every block is the verdict's. + +#![cfg(unix)] + +mod common; + +use common::{key, tree_harness, GateHarness, NPM_LOCK}; +use std::collections::HashMap; + +/// Vulnerable verdict body; `fixed: None` renders `"fixed_version":null`. +fn vulnerable_body(ecosystem: &str, name: &str, version: &str, fixed: Option<&str>) -> String { + common::vulnerable_body(ecosystem, name, version, "MAL-2024-0002", fixed) +} + +/// Pip report: only `reqpkg`, requested (as if it came from a `-r` file). +const PIP_REQ_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"reqpkg","version":"6.0.0"},"requested":true}]}"#; + +/// Project manifest that already declares `evildep` as a direct dep. +const PROJECT_MANIFEST: &str = + r#"{"name":"proj","version":"1.0.0","dependencies":{"evildep":"^0.4.0"}}"#; + +/// npm tree harness whose project dir holds a `package.json` that already +/// declares `evildep` as a direct dep. +fn npm_project_harness( + checks: HashMap, + payload: &str, +) -> GateHarness { + tree_harness("npm", checks, HashMap::new(), payload) + .with_project_file("package.json", PROJECT_MANIFEST) +} + +#[test] +fn pip_requirements_finding_labeled_from_requirements() { + // The flagged package comes from a `-r` file (pip marks it `requested`), + // so it must not be mislabeled "(transitive)". + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "reqpkg", "6.0.0"), + vulnerable_body("pypi", "reqpkg", "6.0.0", None), + ); + let mut h = tree_harness("pip", checks, HashMap::new(), PIP_REQ_REPORT); + let out = h + .cmd + .args(["pip", "install", "-r", "reqs.txt"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "requested vuln must block"); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("reqpkg@6.0.0 (from requirements)"), + "stdout: {stdout}" + ); + assert!(!stdout.contains("(transitive)"), "stdout: {stdout}"); +} + +#[test] +fn npm_preexisting_direct_dep_labeled_with_fix_hint() { + // `evildep` is already a direct dep in the project's package.json; the + // finding gets the pre-existing label plus the fix-command hint. The + // fix 1.2.2 covers every advisory (`safe_version` is Some), so the hint + // drops the "(advertised fix)" hedge. + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", Some("1.2.2")), + ); + let mut h = npm_project_harness(checks, NPM_LOCK); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "pre-existing vuln must block"); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("evildep@0.4.2 (already in package.json)"), + "stdout: {stdout}" + ); + assert!( + stdout.contains("fix with: corgea npm install evildep@1.2.2\n"), + "verified fix hint must print without the advertised-fix hedge: {stdout}" + ); +} + +#[test] +fn npm_preexisting_fix_hint_keeps_hedge_when_fix_is_partial() { + // One advisory advertises fix 1.2.2, the other has no fix: bumping is + // still the best move but doesn't clear everything, so the steer line + // stays quiet and the fix-command hint keeps its "(advertised fix)" + // hedge. + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + r#"{"ecosystem":"npm","package_name":"evildep","version":"0.4.2","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":"1.2.2"}, + {"advisory_id":"MAL-2024-0003","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string(), + ); + let mut h = npm_project_harness(checks, NPM_LOCK); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "pre-existing vuln must block"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("fix with: corgea npm install evildep@1.2.2 (advertised fix)"), + "partial fix hint must keep the hedge: {stdout}" + ); + assert!( + !stdout.contains("→ safe version"), + "a partial fix must not print the steer: {stdout}" + ); +} + +#[test] +fn npm_preexisting_without_fix_has_no_hint() { + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", None), + ); + let mut h = npm_project_harness(checks, NPM_LOCK); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("evildep@0.4.2 (already in package.json)"), + "stdout: {stdout}" + ); + assert!( + !stdout.contains("fix with:"), + "no advertised fix → no hint; stdout: {stdout}" + ); +} + +#[test] +fn named_install_with_transitive_vulnerable_keeps_generic_refusal() { + // A named install pulling in a vulnerable transitive is the command's + // doing — the refusal must NOT blame the existing tree. + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", None), + ); + let mut h = tree_harness("npm", checks, HashMap::new(), NPM_LOCK); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("Refusing to run install. Pass --force to proceed despite findings."), + "stderr: {stderr}" + ); + assert!( + !stderr.contains("your existing dependency tree"), + "a command-added transitive must not blame the existing tree: {stderr}" + ); +} diff --git a/tests/cli_tree.rs b/tests/cli_tree.rs new file mode 100644 index 0000000..a2268a4 --- /dev/null +++ b/tests/cli_tree.rs @@ -0,0 +1,357 @@ +//! Hermetic e2e tests for the full-tree resolution pass +//! (`corgea pip|npm install …` with a `CORGEA_VULN_API_URL` stub). +//! +//! Composes the `cli_verdict.rs` harness pattern (fake package manager on a +//! private PATH + local registry stub + in-crate vuln-api stub) with a +//! tree-aware fake manager: a dry-run invocation answers with a canned +//! payload, every other invocation records its argv to a marker and exits. +//! `oldpkg` is published in 2020 so recency never blocks here — every block +//! is the verdict's doing. + +#![cfg(unix)] + +mod common; + +use common::{ + key, tree_harness, vulnerable_body, GateHarness, NPM_LOCK, RESOLUTION_FAILS, TREE_REPORT, +}; +use std::collections::HashMap; +use tempfile::TempDir; + +#[test] +fn pip_only_binary_guard_wins_over_user_no_binary() { + // SECURITY: the non-execution guard `--only-binary :all:` must land AFTER + // the user's args (pip format-control is last-wins), so a user + // `--no-binary :all:` can't re-enable sdist builds during the report step. + // The fake pip records its dry-run argv to the marker on the --dry-run + // branch and no-ops the real install, so `recorded_argv()` is the dry-run. + let mut h = GateHarness::new() + .script_with_paths("pip", |_, marker| { + format!( + "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) printf '%s' \"$*\" > '{}'; printf '{{\"install\":[{{\"metadata\":{{\"name\":\"oldpkg\",\"version\":\"1.0.0\"}},\"requested\":true}}]}}'; exit 0;; esac\nexit 0\n", + marker.display() + ) + }) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .build(); + let out = h + .cmd + .args(["pip", "install", "--no-binary", ":all:", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree proceeds"); + let argv = h.recorded_argv().expect("dry-run argv recorded"); + assert!( + argv.contains("--no-binary :all:"), + "user flag must be forwarded: {argv}" + ); + assert!( + argv.trim_end().ends_with("--only-binary :all:"), + "the guard must be appended LAST so it wins: {argv}" + ); +} + +#[test] +fn pip_requirements_format_control_refuses_dry_run() { + // SECURITY: pip applies `--no-binary` directives found INSIDE a -r file + // AFTER CLI parsing, overriding the trailing `--only-binary :all:` + // guard — the dry-run would select and build sdists, executing package + // code. The tree pass must refuse to dry-run such files and degrade to + // the named-only fallback (whose parser skips option lines), still + // verdicting the file's registry entries. + let cwd = TempDir::new().expect("temp cwd"); + std::fs::write( + cwd.path().join("requirements.txt"), + "--no-binary :all:\noldpkg==1.0.0\n", + ) + .expect("write requirements.txt"); + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0004", None), + ); + // The fake pip records argv ONLY on its dry-run branch: a recorded + // marker would mean the dry-run executed against the hostile file. + let mut h = GateHarness::new() + .script_with_paths("pip", |_, marker| { + format!( + "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) printf '%s' \"$*\" > '{}';; esac\nexit 0\n", + marker.display() + ) + }) + .oldpkg_registry() + .vuln_checks(checks) + .build(); + let out = h + .cmd + .current_dir(cwd.path()) + .args(["pip", "install", "-r", "requirements.txt"]) + .output() + .expect("run corgea"); + + assert_eq!( + out.status.code(), + Some(1), + "the file's vulnerable entry must still block via the fallback" + ); + assert_eq!( + h.recorded_argv(), + None, + "the dry-run must never execute against a format-control requirements file" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("--no-binary") && stderr.contains("not dry-running"), + "stderr must name the refusing directive: {stderr}" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("MAL-2024-0004"), "stdout: {stdout}"); +} + +fn vulnerable_evildep_body(ecosystem: &str) -> String { + vulnerable_body(ecosystem, "evildep", "0.4.2", "MAL-2024-0002", None) +} + +#[test] +fn transitive_vulnerable_blocks_install() { + // Only the transitive `evildep` is flagged; the named `oldpkg` is clean. + let cases = [ + ( + "pip", + "pypi", + TREE_REPORT, + &["pip", "install", "oldpkg==1.0.0"][..], + ), + ( + "npm", + "npm", + NPM_LOCK, + &["npm", "install", "oldpkg@1.0.0"][..], + ), + ]; + for (binary, eco, payload, args) in cases { + let mut checks = HashMap::new(); + checks.insert(key(eco, "evildep", "0.4.2"), vulnerable_evildep_body(eco)); + let mut h = tree_harness(binary, checks, HashMap::new(), payload); + let out = h.cmd.args(args).output().expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "{binary}: transitive vuln must block" + ); + assert_eq!( + h.recorded_argv(), + None, + "{binary} must not run on a transitive vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(transitive)"] { + assert!(stdout.contains(needle), "{binary} stdout: {stdout}"); + } + } +} + +#[test] +fn tree_pass_runs_via_pip3_when_pip_is_absent() { + // Only `pip3` exists on PATH (common Linux/macOS). The tree pass must + // use the same pip → pip3 fallback as the exec path instead of silently + // degrading to named-only — the transitive `evildep` must still block. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_evildep_body("pypi"), + ); + let mut h = tree_harness("pip3", checks, HashMap::new(), TREE_REPORT); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("transitive dependencies not checked"), + "tree pass must not degrade with only pip3 on PATH: {stderr}" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); +} + +#[test] +fn resolution_failure_falls_back_with_loud_warning() { + // The fake manager fails its tree invocation (pip: exits 2 on `--dry-run`, + // simulating an old pip with no `--report`; npm: exits 1 on + // `--package-lock-only`). Stub is all-clean, so the named-only fallback + // proceeds. + let cases = [ + ( + "pip", + &["pip", "install", "oldpkg==1.0.0"][..], + "install oldpkg==1.0.0", + ), + ( + "npm", + &["npm", "install", "oldpkg@1.0.0"][..], + "install oldpkg@1.0.0", + ), + ]; + for (binary, args, forwarded_argv) in cases { + let mut h = tree_harness(binary, HashMap::new(), HashMap::new(), RESOLUTION_FAILS); + let out = h.cmd.args(args).output().expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "{binary}: clean named-only must proceed" + ); + assert_eq!(h.recorded_argv().as_deref(), Some(forwarded_argv)); + assert!( + String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), + "{binary} stderr must carry the fallback warning: {}", + String::from_utf8_lossy(&out.stderr) + ); + } +} + +#[test] +fn pip_requirements_fallback_checks_file_entries_when_tree_fails() { + // A VCS requirement can make pip's dry-run fail before it emits a report. + // The degraded path must still verify registry requirements from the file + // and surface the VCS row as skipped instead of producing an empty check. + let cwd = TempDir::new().expect("temp cwd"); + std::fs::write( + cwd.path().join("requirements.txt"), + "oldpkg==1.0.0\nidna @ git+https://github.com/jazzband/idna.git@main\n", + ) + .expect("write requirements.txt"); + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0003", None), + ); + let mut h = tree_harness("pip", checks, HashMap::new(), RESOLUTION_FAILS); + let out = h + .cmd + .current_dir(cwd.path()) + .args(["pip", "install", "-r", "requirements.txt"]) + .output() + .expect("run corgea"); + + assert_eq!(out.status.code(), Some(1), "requirements vuln must block"); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable requirements entry" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in [ + "oldpkg==1.0.0", + "MAL-2024-0003", + "idna @ git+https://github.com/jazzband/idna.git@main", + "PEP 508 direct reference", + ] { + assert!(stdout.contains(needle), "stdout: {stdout}"); + } + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("transitive dependencies not checked"), + "stderr must carry the fallback warning: {stderr}" + ); +} + +#[test] +fn pip_clean_tree_proceeds() { + // Stub default-clean (no overrides), so every resolved package is clean. + let mut h = tree_harness("pip", HashMap::new(), HashMap::new(), TREE_REPORT); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("tree: 2 packages resolved"), + "stdout: {stdout}" + ); +} + +#[test] +fn npm_root_redirect_flag_degrades_to_named_only() { + // `--prefix` overrides npm's project root regardless of cwd, so the + // throwaway-dir resolution would write the REAL lockfile at that path. + // The tree pass must refuse and fall back to named-only instead. + let elsewhere = TempDir::new().expect("redirect target"); + let lock_path = elsewhere.path().join("package-lock.json"); + + let mut h = tree_harness("npm", HashMap::new(), HashMap::new(), NPM_LOCK); + let out = h + .cmd + .args([ + "npm", + "install", + "--prefix", + elsewhere.path().to_str().unwrap(), + "oldpkg@1.0.0", + ]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean named target proceeds"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("transitive dependencies not checked") && stderr.contains("--prefix"), + "must degrade loudly naming the flag: {stderr}" + ); + assert!( + !lock_path.exists(), + "the dry run must never write through --prefix" + ); + // The real install still gets the user's full argv. + assert_eq!( + h.recorded_argv(), + Some(format!( + "install --prefix {} oldpkg@1.0.0", + elsewhere.path().display() + )) + ); +} + +#[test] +fn npm_does_not_touch_project_lockfile() { + // Run from a project dir holding sentinel manifests; the resolver works in + // a throwaway copy, so after a gated run both files are byte-identical. + let project = TempDir::new().expect("project dir"); + let pkg_json = project.path().join("package.json"); + let lock_json = project.path().join("package-lock.json"); + let pkg_sentinel = r#"{"name":"sentinel","version":"0.0.0"}"#; + let lock_sentinel = r#"{"name":"sentinel","lockfileVersion":3,"packages":{}}"#; + std::fs::write(&pkg_json, pkg_sentinel).expect("write package.json"); + std::fs::write(&lock_json, lock_sentinel).expect("write package-lock.json"); + + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_evildep_body("npm"), + ); + let mut h = tree_harness("npm", checks, HashMap::new(), NPM_LOCK); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + + assert_eq!( + std::fs::read_to_string(&pkg_json).unwrap(), + pkg_sentinel, + "package.json must be untouched" + ); + assert_eq!( + std::fs::read_to_string(&lock_json).unwrap(), + lock_sentinel, + "package-lock.json must be untouched" + ); +} diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs index 8f84fa3..5fbf988 100644 --- a/tests/cli_verdict.rs +++ b/tests/cli_verdict.rs @@ -63,8 +63,11 @@ fn alternate_pypi_spelling_hits_canonical_verdict() { ), _ => ("404 Not Found", common::NOT_FOUND_JSON.to_string()), }); + // Like `pip_harness`: the tree dry-run exits 2 (old pip, no --report), + // so the block is the named verdict's doing and a recorded argv would + // mean the real install ran. let mut h = common::GateHarness::new() - .fake_recorder("pip", 0) + .fake_tree_pm("pip", common::RESOLUTION_FAILS, 0) .registry_env("CORGEA_PYPI_REGISTRY", ®istry) .vuln_checks(checks) .build(); diff --git a/tests/common/mod.rs b/tests/common/mod.rs index fc7b70d..46c5ace 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -69,6 +69,20 @@ pub fn npm_packument(version: &str, ts: &str) -> String { ) } +/// Pip `--report -` payload: `oldpkg` (named/requested) + `evildep` +/// (transitive). +#[allow(dead_code)] +pub const TREE_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + +/// npm lockfile-v3 fixture: named `oldpkg` 1.0.0 + transitive `evildep` 0.4.2. +#[allow(dead_code)] +pub const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0"}, + "node_modules/oldpkg":{"version":"1.0.0"}, + "node_modules/evildep":{"version":"0.4.2"}}}"#; + /// Spawn a one-response-per-connection HTTP stub on an ephemeral 127.0.0.1 /// port; `route` maps a request path to `(status line, body)`. Returns the /// base URL. @@ -138,6 +152,18 @@ pub fn write_script(dir: &std::path::Path, binary: &str, script: &str) { .expect("chmod fake script"); } +/// Shell loop that emits the file at `path` line by line via builtins — +/// works under the locked-down test PATH (no `cat`); the `|| [ -n "$line" ]` +/// guard keeps a final line with no trailing newline. +#[cfg(unix)] +#[allow(dead_code)] +pub fn emit(path: &std::path::Path) -> String { + format!( + "while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{}'", + path.display() + ) +} + /// Write an executable fake package manager named `binary` into `dir`. It /// records its argv to `marker` and exits `exit_code` — proving both "the /// install ran (with these args)" and exit-code forwarding. @@ -157,6 +183,46 @@ pub fn write_fake_recorder( write_script(dir, binary, &script); } +/// Sentinel payload that makes a tree-aware fake manager exit non-zero on +/// its tree (resolution) invocation, forcing the named-only fallback. +#[allow(dead_code)] +pub const RESOLUTION_FAILS: &str = "RESOLUTION_FAILS"; + +/// Write an executable tree-aware fake package manager into `dir`. An +/// invocation carrying the manager's tree flag emits `payload` (stdout for +/// pip's `--dry-run --report -`, `./package-lock.json` for npm's +/// `--package-lock-only`, whose cwd is the resolver's throwaway temp dir) +/// and exits 0 — the tree pass; if `payload` is `RESOLUTION_FAILS` it exits +/// non-zero instead, emitting nothing. Any other invocation records its +/// argv to `marker` and exits `exit_code`. +#[cfg(unix)] +#[allow(dead_code)] +pub fn write_fake_tree_pm( + dir: &std::path::Path, + binary: &str, + marker: &std::path::Path, + payload: &str, + exit_code: i32, +) { + let (tree_flag, redirect, fail_exit) = match binary { + "pip" | "pip3" => ("--dry-run", "", 2), + "npm" => ("--package-lock-only", " > package-lock.json", 1), + other => panic!("unsupported fake manager {other}"), + }; + let tree_branch = if payload == RESOLUTION_FAILS { + format!("exit {fail_exit}") + } else { + let payload_path = dir.join(format!("{binary}-tree-payload.json")); + std::fs::write(&payload_path, payload).expect("write fake pm payload"); + format!("{}{redirect}; exit 0", emit(&payload_path)) + }; + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" {tree_flag} \"*) {tree_branch};; esac\nprintf '%s' \"$*\" > '{marker}'\nexit {exit_code}\n", + marker = marker.display(), + ); + write_script(dir, binary, &script); +} + /// One configurable harness behind every gate test: isolated `corgea`, a /// private PATH of fake package managers, optional registry stubs, the /// vuln-api stub, and an optional throwaway project cwd. @@ -195,6 +261,13 @@ impl GateHarness { } } + /// Tree-aware fake manager: emits `payload` on its tree flag, records + /// argv and exits `exit_code` otherwise. + pub fn fake_tree_pm(self, binary: &str, payload: &str, exit_code: i32) -> Self { + write_fake_tree_pm(self._bin.path(), binary, &self.marker, payload, exit_code); + self + } + /// Plain argv recorder. Call repeatedly for multiple binaries; call /// never for an empty PATH. pub fn fake_recorder(self, binary: &str, exit_code: i32) -> Self { @@ -267,6 +340,18 @@ impl GateHarness { self } + /// Re-point the corgea invocation at a (created) subdirectory of the + /// project dir — for tests proving ancestor-walk behavior. + pub fn in_subdir(mut self, name: &str) -> Self { + if self.project.is_none() { + self = self.in_project_dir(); + } + let dir = self.project.as_ref().unwrap().path().join(name); + std::fs::create_dir_all(&dir).expect("create subdir"); + self.cmd.current_dir(&dir); + self + } + pub fn build(mut self) -> Self { if !self.vuln_api { return self; @@ -286,9 +371,9 @@ impl GateHarness { } /// `corgea` wired to the wildcard pypi registry stub (every package -/// published 2020 → recency never blocks), a fake pip recording its argv -/// to a marker, and a vuln-api stub. Every block in a `pip_harness` test -/// is the verdict's doing. +/// published 2020 → recency never blocks), a report-less fake pip +/// (recording its argv to a marker), and a vuln-api stub. Every block in a +/// `pip_harness` test is the verdict's doing. #[cfg(unix)] #[allow(dead_code)] pub fn pip_harness( @@ -296,10 +381,31 @@ pub fn pip_harness( statuses: HashMap, pip_exit_code: i32, ) -> GateHarness { + // RESOLUTION_FAILS models an old pip with no `--report`: the tree + // dry-run exits 2, so these tests exercise the named-only fallback. GateHarness::new() - .fake_recorder("pip", pip_exit_code) + .fake_tree_pm("pip", RESOLUTION_FAILS, pip_exit_code) .wildcard_pypi_registry() .vuln_checks(checks) .vuln_statuses(statuses) .build() } + +/// `corgea` wired to the oldpkg registry stub, a tree-aware fake `binary` +/// (`"pip"`, `"pip3"`, or `"npm"`) answering the tree pass with `payload`, +/// and a vuln-api stub. +#[cfg(unix)] +#[allow(dead_code)] +pub fn tree_harness( + binary: &str, + checks: HashMap, + statuses: HashMap, + payload: &str, +) -> GateHarness { + GateHarness::new() + .fake_tree_pm(binary, payload, 0) + .oldpkg_registry() + .vuln_checks(checks) + .vuln_statuses(statuses) + .build() +}