From de1e13b276196695d7226dd97b269a113888bece Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Tue, 9 Jun 2026 18:08:18 +0200 Subject: [PATCH 01/59] Add vuln-api client + verified contract (install-vuln-gate spike) Port the vuln-api client from #89 onto main and reconcile its types against the authoritative Cloudflare Worker contract. - src/vuln_api/mod.rs: standalone reqwest::blocking client for /v1/.../check and /v1/advisories/:id with per-call auth (JWT->Bearer, else CORGEA-TOKEN), 404->clean, 401/403/429/5xx handling, a single 429 retry honoring Retry-After, and confused-deputy/identity guards. - Fix AdvisoryResponse field mapping: the server emits 'summary' and 'severity', so title/severity_level now carry #[serde(rename)]. VulnCheckResponse/VulnMatch already match toCheckMatch, unchanged. - Wire the module into the library crate (pub mod vuln_api) with a private 'mod log;' facade so crate::log::debug resolves there. - Add five committed fixtures (clean, unknown, vulnerable CVE, malware, advisory detail) built to the server serialization, with deserialization tests proving every /check variant the gate depends on. --- src/lib.rs | 5 + src/vuln_api/mod.rs | 873 ++++++++++++++++++ tests/fixtures/vuln_api/advisory_detail.json | 15 + tests/fixtures/vuln_api/check_clean.json | 1 + tests/fixtures/vuln_api/check_malware.json | 15 + tests/fixtures/vuln_api/check_unknown.json | 1 + tests/fixtures/vuln_api/check_vulnerable.json | 15 + 7 files changed, 925 insertions(+) create mode 100644 src/vuln_api/mod.rs create mode 100644 tests/fixtures/vuln_api/advisory_detail.json create mode 100644 tests/fixtures/vuln_api/check_clean.json create mode 100644 tests/fixtures/vuln_api/check_malware.json create mode 100644 tests/fixtures/vuln_api/check_unknown.json create mode 100644 tests/fixtures/vuln_api/check_vulnerable.json diff --git a/src/lib.rs b/src/lib.rs index 49bc6d0..c780717 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,6 @@ pub mod deps; +// Also declared in the binary crate (src/main.rs); re-declared here so library modules +// (e.g. vuln_api) can use `crate::log::debug`. src/log.rs is a thin `::log` facade that +// compiles cleanly in both crates. +mod log; +pub mod vuln_api; diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs new file mode 100644 index 0000000..0cb24f1 --- /dev/null +++ b/src/vuln_api/mod.rs @@ -0,0 +1,873 @@ +//! Corgea vuln-api client. +//! +//! Deliberately independent of `utils::api::SHARED_CLIENT` because: +//! * the vuln-api host is user-configurable via `CORGEA_VULN_API_URL`, +//! so we must never silently replay Corgea cookies / non-JWT +//! `CORGEA-TOKEN` headers via redirect following or the shared +//! cookie jar. +//! * the shared client's `check_for_warnings` exits the process on +//! HTTP 410, which is wrong for per-dep CVE lookups. +//! +//! The auth header is attached explicitly per call from a caller-owned +//! token (no global state). + +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +use crate::log::debug; + +const REQUEST_TIMEOUT: Duration = Duration::from_secs(30); + +/// Cap on how much of an error response body we splice into the +/// user-facing error message. Fits a CLI line, captures +/// `{"error":"…"}`-class messages comfortably, and truncates +/// Cloudflare HTML before it gets ugly. +const ERROR_BODY_SNIPPET_LEN: usize = 300; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct VulnCheckResponse { + pub ecosystem: String, + pub package_name: String, + pub version: String, + pub is_vulnerable: bool, + pub matches: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct VulnMatch { + pub advisory_id: String, + pub severity_level: String, + pub tier: u8, + pub vulnerable_version_range: Option, + pub fixed_version: Option, +} + +/// Subset of `GET /v1/advisories/:id` we consume. +/// +/// Field-name notes (kept stable for callers, but mapped to the real +/// server shape via `#[serde(rename = …)]`): +/// +/// * `advisory_id` ← server's `id` +/// * `title` ← server's `summary` +/// * `severity_level` ← server's `severity` +/// * `url` ← server's `source_url` +/// * `tier` is `Option` because the server may emit `null` +/// (see `VULNERABILITY_SERVICE.md` §5). +/// +/// The server also returns many fields we don't currently use +/// (`alias`, `severity_badge`, `tier_score`, `details`, `llm_summary`, +/// `packages`, `cwes`, `raw`, …). `serde` ignores unknown fields by +/// default; we add them here only when a caller needs them. No +/// top-level `remediation` field exists on the server — do not add one +/// (server's `llm_summary` is a 1-2 sentence developer summary, not +/// remediation guidance, and the semantics differ). +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct AdvisoryResponse { + #[serde(rename = "id")] + pub advisory_id: String, + #[serde(default)] + pub aliases: Vec, + #[serde(default, rename = "summary")] + pub title: Option, + #[serde(default, rename = "severity")] + pub severity_level: Option, + #[serde(default)] + pub tier: Option, + #[serde(default, rename = "source_url")] + pub url: Option, +} + +fn user_agent() -> String { + format!("corgea-cli/{} (vuln-api)", env!("CARGO_PKG_VERSION")) +} + +pub fn http_client() -> Result { + reqwest::blocking::Client::builder() + .timeout(REQUEST_TIMEOUT) + .user_agent(user_agent()) + .redirect(reqwest::redirect::Policy::none()) + .build() + .map_err(|e| format!("failed to build vuln-api http client: {}", e)) +} + +fn is_jwt(token: &str) -> bool { + let parts: Vec<&str> = token.splitn(4, '.').collect(); + parts.len() == 3 && parts.iter().all(|p| !p.is_empty()) +} + +fn normalize_base_url(base_url: &str) -> String { + base_url.trim_end_matches('/').to_string() +} + +/// Encode package name for the vuln-api path segment. +/// npm scoped names: `@scope/pkg` → `@scope%2fpkg` (mirrors registry.rs). +fn encode_package_name(ecosystem: &str, name: &str) -> String { + if ecosystem.eq_ignore_ascii_case("npm") { + if let Some(stripped) = name.strip_prefix('@') { + if let Some((scope, pkg)) = stripped.split_once('/') { + return format!("@{}%2f{}", scope, pkg); + } + } + name.to_string() + } else { + urlencoding::encode(name).into_owned() + } +} + +/// Build an authed JSON GET: the standard `Accept` / `CORGEA-SOURCE` headers +/// plus the per-call auth header (JWT → `Authorization: Bearer`, otherwise +/// `CORGEA-TOKEN`). The single place auth is attached, shared by every route. +fn build_authed_get( + client: &reqwest::blocking::Client, + url: &str, + token: &str, +) -> reqwest::blocking::RequestBuilder { + let mut req = client + .get(url) + .header("Accept", "application/json") + .header("CORGEA-SOURCE", "cli"); + if is_jwt(token) { + req = req.header("Authorization", format!("Bearer {}", token)); + } else { + req = req.header("CORGEA-TOKEN", token); + } + req +} + +/// Validate the per-call preconditions shared by every vuln-api request: +/// a non-empty token and a non-empty (trailing-slash-normalized) base URL. +/// Returns the normalized base so callers don't re-derive it. +fn validated_base(token: &str, base_url: &str) -> Result> { + if token.is_empty() { + return Err("missing Corgea token for vuln-api request".into()); + } + let base = normalize_base_url(base_url); + if base.is_empty() { + return Err("vuln-api base URL is empty".into()); + } + Ok(base) +} + +/// Format a server error body into a `": "` suffix for a single-line +/// CLI error, or an empty string when the body is empty. Consumes the response. +fn error_body_suffix(response: reqwest::blocking::Response) -> String { + let body = response.text().unwrap_or_default(); + let snippet = body_snippet(&body, ERROR_BODY_SNIPPET_LEN); + if snippet.is_empty() { + String::new() + } else { + format!(": {}", snippet) + } +} + +/// Collapse whitespace and truncate at `max_chars` so a server error +/// body can be spliced into a single-line CLI error message without +/// dragging in HTML newlines or runaway length. Returns empty string +/// when the body is empty so the caller can format conditionally. +/// Char-boundary safe — operates on `chars()`, never byte slices. +fn body_snippet(body: &str, max_chars: usize) -> String { + let collapsed: String = body.split_whitespace().collect::>().join(" "); + if collapsed.is_empty() { + return String::new(); + } + let truncated: String = collapsed.chars().take(max_chars).collect(); + if collapsed.chars().count() > max_chars { + format!("{}…", truncated) + } else { + truncated + } +} + +fn retry_after_seconds(response: &reqwest::blocking::Response) -> u64 { + response + .headers() + .get("Retry-After") + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.trim().parse::().ok()) + .map(|s| s.clamp(1, 10)) + .unwrap_or(1) +} + +fn send_package_check_with_429_retry( + client: &reqwest::blocking::Client, + url: &str, + token: &str, +) -> Result> { + let response = build_authed_get(client, url, token) + .send() + .map_err(|e| format!("Failed to send vuln-api request: {}", e))?; + + if response.status().as_u16() == 429 { + let wait = retry_after_seconds(&response); + std::thread::sleep(Duration::from_secs(wait)); + return build_authed_get(client, url, token) + .send() + .map_err(|e| format!("Failed to send vuln-api request: {}", e).into()); + } + Ok(response) +} + +pub fn check_package_version( + client: &reqwest::blocking::Client, + base_url: &str, + token: &str, + ecosystem: &str, + name: &str, + version: &str, +) -> Result> { + let base = validated_base(token, base_url)?; + let encoded_name = encode_package_name(ecosystem, name); + let encoded_version = urlencoding::encode(version); + let url = format!( + "{}/v1/packages/{}/{}/versions/{}/check", + base, ecosystem, encoded_name, encoded_version + ); + + debug(&format!("Sending vuln-api request to URL: {}", url)); + + let response = send_package_check_with_429_retry(client, &url, token)?; + + let status = response.status(); + match status.as_u16() { + 401 => { + return Err( + "vuln-api rejected the Corgea token (run `corgea login` to refresh)".into(), + ); + } + 403 => { + return Err("vuln-api access denied (check your Corgea plan/permissions)".into()); + } + 404 => { + return Ok(VulnCheckResponse { + ecosystem: ecosystem.to_string(), + package_name: name.to_string(), + version: version.to_string(), + is_vulnerable: false, + matches: vec![], + }); + } + 429 => { + return Err("vuln-api rate-limited this request (retry later)".into()); + } + 500..=599 => { + return Err(format!("vuln-api unavailable (HTTP {})", status.as_u16()).into()); + } + code if !status.is_success() => { + let suffix = error_body_suffix(response); + return Err(format!("vuln-api returned unexpected HTTP {}{}", code, suffix).into()); + } + _ => {} + } + + let response_text = response.text()?; + let parsed: VulnCheckResponse = serde_json::from_str(&response_text).map_err(|e| { + debug(&format!( + "Failed to parse vuln-api response: {}. Body: {}", + e, response_text + )); + format!("Failed to parse vuln-api response: {}", e) + })?; + + // Confused-deputy guard: refuse to attribute advisories to a different + // (name, version, ecosystem) than what we asked about. The server is + // allowed to be silent on identity, but if it answers, it must match. + if !parsed.ecosystem.is_empty() && !parsed.ecosystem.eq_ignore_ascii_case(ecosystem) { + return Err(format!( + "vuln-api response ecosystem '{}' does not match request '{}'", + parsed.ecosystem, ecosystem + ) + .into()); + } + if !parsed.package_name.is_empty() && !parsed.package_name.eq_ignore_ascii_case(name) { + return Err(format!( + "vuln-api response package '{}' does not match request '{}'", + parsed.package_name, name + ) + .into()); + } + if !parsed.version.is_empty() && parsed.version != version { + return Err(format!( + "vuln-api response version '{}' does not match request '{}'", + parsed.version, version + ) + .into()); + } + + // is_vulnerable=true with no matches is contradictory — treat as an + // error so the caller can surface it rather than silently demoting + // the dep to "clean". + if parsed.is_vulnerable && parsed.matches.is_empty() { + return Err( + "vuln-api reported is_vulnerable=true with no matches; refusing to interpret".into(), + ); + } + + Ok(parsed) +} + +pub fn get_advisory( + client: &reqwest::blocking::Client, + base_url: &str, + token: &str, + advisory_id: &str, +) -> Result> { + let base = validated_base(token, base_url)?; + let encoded_id = urlencoding::encode(advisory_id); + let url = format!("{}/v1/advisories/{}", base, encoded_id); + + debug(&format!( + "Sending vuln-api advisory request to URL: {}", + url + )); + + let response = build_authed_get(client, &url, token) + .send() + .map_err(|e| format!("Failed to send vuln-api advisory request: {}", e))?; + + let status = response.status(); + if !status.is_success() { + let suffix = error_body_suffix(response); + return Err(format!( + "vuln-api advisory lookup failed: HTTP {}{}", + status.as_u16(), + suffix + ) + .into()); + } + + let response_text = response.text()?; + let parsed: AdvisoryResponse = serde_json::from_str(&response_text).map_err(|e| { + debug(&format!( + "Failed to parse vuln-api advisory response: {}. Body: {}", + e, response_text + )); + format!("Failed to parse vuln-api advisory response: {}", e) + })?; + + // Identity guard: refuse a response that names a different advisory + // than we asked about. The server is allowed to be silent on + // identity (empty advisory_id), but if it answers it must match + // either the canonical id or one of the aliases. + if !parsed.advisory_id.is_empty() + && !parsed.advisory_id.eq_ignore_ascii_case(advisory_id) + && !parsed + .aliases + .iter() + .any(|a| a.eq_ignore_ascii_case(advisory_id)) + { + return Err(format!( + "vuln-api response advisory_id '{}' does not match request '{}'", + parsed.advisory_id, advisory_id + ) + .into()); + } + + Ok(parsed) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + use std::io::{Read, Write}; + use std::net::TcpListener; + use std::sync::{Arc, Mutex}; + use std::thread; + use std::time::Duration; + + /// `(ecosystem, name, version)` request key for the stub's route table. + type CheckKey = (String, String, String); + /// Maps a request key to a canned `(status, body)` response. + type KeyedResponses = HashMap; + + /// Reason phrase for the stub's status line. + fn status_text(code: u16) -> &'static str { + match code { + 401 => "Unauthorized", + 403 => "Forbidden", + 404 => "Not Found", + 429 => "Too Many Requests", + 500..=599 => "Internal Server Error", + _ => "Error", + } + } + + struct PackageCheckStub { + base_url: String, + _handle: thread::JoinHandle<()>, + } + + /// Keys in `retry_after_keys`: first hit → 429 + Retry-After: 1, second hit → + /// response from `responses` (or clean 200 fallback). + /// `advisory_responses` keys advisory id → (status, body) for the + /// `/v1/advisories/:id` route. Empty map = route returns 404. + fn spawn_package_check_stub_with_retry_keys( + responses: KeyedResponses, + retry_after_keys: KeyedResponses, + advisory_responses: HashMap, + ) -> PackageCheckStub { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let port = listener.local_addr().unwrap().port(); + let base_url = format!("http://127.0.0.1:{}", port); + let responses = Arc::new(Mutex::new(responses)); + let retry_after_keys = Arc::new(Mutex::new(retry_after_keys)); + let advisory_responses = Arc::new(Mutex::new(advisory_responses)); + let hit_counts: Arc>> = Arc::new(Mutex::new(HashMap::new())); + + let handle = thread::spawn(move || { + for stream in listener.incoming().take(32) { + let Ok(mut stream) = stream else { + continue; + }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + + let (status_code, status_text, body, extra_headers) = if let Some(path) = + req.lines().next().and_then(|l| l.split_whitespace().nth(1)) + { + let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); + if parts.len() >= 7 + && parts[0] == "v1" + && parts[1] == "packages" + && parts[4] == "versions" + && parts[6] == "check" + { + let eco = parts[2].to_string(); + let name = urlencoding::decode(parts[3]) + .unwrap_or_default() + .into_owned(); + let ver = urlencoding::decode(parts[5]) + .unwrap_or_default() + .into_owned(); + let key = (eco.clone(), name.clone(), ver.clone()); + let hits = { + let mut counts = hit_counts.lock().unwrap(); + let entry = counts.entry(key.clone()).or_insert(0); + *entry += 1; + *entry + }; + + let retry_body = retry_after_keys.lock().unwrap().get(&key).cloned(); + if retry_body.is_some() && hits == 1 { + let (code, body) = (429, r#"{"error":"rate limited"}"#.to_string()); + let text = "Too Many Requests"; + (code, text, body, "Retry-After: 1\r\n".to_string()) + } else { + let (code, body) = responses + .lock() + .unwrap() + .get(&key) + .cloned() + .or(retry_body) + .unwrap_or((200, r#"{"is_vulnerable":false,"matches":[]}"#.into())); + (code, status_text(code), body, String::new()) + } + } else if parts.len() >= 3 && parts[0] == "v1" && parts[1] == "advisories" { + let id = urlencoding::decode(parts[2]) + .unwrap_or_default() + .into_owned(); + let (code, body) = advisory_responses + .lock() + .unwrap() + .get(&id) + .cloned() + .unwrap_or((404, r#"{"error":"not found"}"#.into())); + (code, status_text(code), body, String::new()) + } else { + ( + 404, + "Not Found", + r#"{"error":"not found"}"#.into(), + String::new(), + ) + } + } else { + ( + 400, + "Bad Request", + r#"{"error":"bad request"}"#.into(), + String::new(), + ) + }; + + let response = format!( + "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\n{}Content-Length: {}\r\n\r\n{}", + status_code, status_text, extra_headers, body.len(), body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + + thread::sleep(Duration::from_millis(50)); + PackageCheckStub { + base_url, + _handle: handle, + } + } + + fn check_with_stub_status( + status_code: u16, + body: &str, + ) -> Result> { + let client = http_client().expect("test client"); + let mut responses = HashMap::new(); + responses.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + (status_code, body.to_string()), + ); + let stub = + spawn_package_check_stub_with_retry_keys(responses, HashMap::new(), HashMap::new()); + check_package_version( + &client, + &stub.base_url, + "test-token", + "npm", + "lodash", + "4.17.20", + ) + } + + #[test] + fn check_package_version_401_returns_actionable_error() { + let err = check_with_stub_status(401, r#"{"error":"unauthorized"}"#) + .expect_err("401 should fail"); + assert!(err.to_string().contains("rejected the Corgea token")); + } + + #[test] + fn check_package_version_403_returns_actionable_error() { + let err = + check_with_stub_status(403, r#"{"error":"forbidden"}"#).expect_err("403 should fail"); + assert!(err.to_string().contains("access denied")); + } + + #[test] + fn check_package_version_404_returns_clean() { + let resp = + check_with_stub_status(404, r#"{"error":"not found"}"#).expect("404 should be clean"); + assert!(!resp.is_vulnerable); + assert!(resp.matches.is_empty()); + assert_eq!(resp.package_name, "lodash"); + assert_eq!(resp.version, "4.17.20"); + } + + #[test] + fn check_package_version_persistent_429_returns_actionable_error() { + let err = check_with_stub_status(429, r#"{"error":"rate limited"}"#) + .expect_err("429 should fail"); + assert!(err.to_string().contains("rate-limited")); + } + + #[test] + fn check_package_version_429_retries_then_succeeds() { + let client = http_client().unwrap(); + let vulnerable_body = r#"{ + "ecosystem": "npm", + "package_name": "lodash", + "version": "4.17.20", + "is_vulnerable": true, + "matches": [{ + "advisory_id": "GHSA-retry-test", + "severity_level": "high", + "tier": 1, + "vulnerable_version_range": "<4.17.21", + "fixed_version": "4.17.21" + }] + }"#; + let mut retry_after_keys = HashMap::new(); + retry_after_keys.insert( + ("npm".into(), "lodash".into(), "4.17.20".into()), + (200, vulnerable_body.to_string()), + ); + let stub = spawn_package_check_stub_with_retry_keys( + HashMap::new(), + retry_after_keys, + HashMap::new(), + ); + let resp = check_package_version( + &client, + &stub.base_url, + "test-token", + "npm", + "lodash", + "4.17.20", + ) + .expect("retry should succeed"); + assert!(resp.is_vulnerable); + } + + #[test] + fn check_package_version_500_returns_unavailable() { + let err = + check_with_stub_status(500, r#"{"error":"internal"}"#).expect_err("500 should fail"); + assert!(err.to_string().contains("unavailable (HTTP 500)")); + } + + #[test] + fn check_package_version_unexpected_status_includes_body_snippet() { + let err = + check_with_stub_status(418, r#"{"error":"teapot"}"#).expect_err("418 should fail"); + let msg = err.to_string(); + assert!(msg.contains("unexpected HTTP 418"), "got: {}", msg); + assert!( + msg.contains("teapot"), + "expected body in error; got: {}", + msg + ); + } + + #[test] + fn check_package_version_unexpected_status_omits_body_when_empty() { + let err = check_with_stub_status(418, "").expect_err("418 should fail"); + let msg = err.to_string(); + assert!(msg.contains("unexpected HTTP 418"), "got: {}", msg); + // Body is empty → message must end at the status, no dangling ":" or whitespace. + assert!( + msg.trim_end().ends_with("418"), + "expected message to end at status code; got: {:?}", + msg + ); + } + + #[test] + fn get_advisory_non_success_includes_body_snippet() { + let client = http_client().expect("test client"); + let mut advisories = HashMap::new(); + advisories.insert( + "GHSA-deploy-gap".to_string(), + (400, r#"{"error":"Invalid url"}"#.to_string()), + ); + let stub = + spawn_package_check_stub_with_retry_keys(HashMap::new(), HashMap::new(), advisories); + let err = get_advisory(&client, &stub.base_url, "test-token", "GHSA-deploy-gap") + .expect_err("400 should fail"); + let msg = err.to_string(); + assert!( + msg.contains("advisory lookup failed: HTTP 400"), + "got: {}", + msg + ); + assert!( + msg.contains("Invalid url"), + "expected body snippet in advisory error; got: {}", + msg + ); + } + + #[test] + fn body_snippet_truncates_at_char_boundary() { + // Multi-byte char ("é" is 2 bytes UTF-8). Naïve byte-slicing would + // panic; we must operate on chars(). + let input = "é".repeat(500); + let out = body_snippet(&input, ERROR_BODY_SNIPPET_LEN); + assert!(out.ends_with('…'), "expected ellipsis; got: {:?}", out); + // 300 "é" chars + the ellipsis. + assert_eq!(out.chars().count(), ERROR_BODY_SNIPPET_LEN + 1); + } + + #[test] + fn body_snippet_collapses_whitespace() { + assert_eq!(body_snippet("foo\n bar\t\tbaz", 100), "foo bar baz"); + } + + #[test] + fn body_snippet_empty_returns_empty() { + assert_eq!(body_snippet("", 100), ""); + assert_eq!(body_snippet(" \n\t ", 100), ""); + } + + #[test] + fn encode_package_name_scoped_npm() { + assert_eq!(encode_package_name("npm", "@types/node"), "@types%2fnode"); + assert_eq!(encode_package_name("npm", "lodash"), "lodash"); + } + + #[test] + fn encode_package_name_pypi() { + assert_eq!(encode_package_name("PyPI", "requests"), "requests"); + } + + #[test] + fn encode_package_name_npm_case_insensitive() { + // Defends against vuln_api_ecosystem() casing changes. + assert_eq!(encode_package_name("NPM", "@types/node"), "@types%2fnode"); + } + + #[test] + fn deserialize_vuln_check_response() { + let body = r#"{ + "ecosystem": "npm", + "package_name": "lodash", + "version": "4.17.20", + "is_vulnerable": true, + "matches": [{ + "advisory_id": "GHSA-xxxx-yyyy-zzzz", + "severity_level": "high", + "tier": 1, + "vulnerable_version_range": "<4.17.21", + "fixed_version": "4.17.21" + }] + }"#; + let parsed: VulnCheckResponse = serde_json::from_str(body).unwrap(); + assert!(parsed.is_vulnerable); + assert_eq!(parsed.matches.len(), 1); + assert_eq!(parsed.matches[0].advisory_id, "GHSA-xxxx-yyyy-zzzz"); + assert_eq!(parsed.matches[0].tier, 1); + } + + #[test] + fn normalize_base_url_strips_trailing_slash() { + assert_eq!( + normalize_base_url("http://localhost:8080/"), + "http://localhost:8080" + ); + } + + #[test] + fn is_jwt_detection() { + assert!(is_jwt("a.b.c")); + assert!(!is_jwt("plain-token")); + assert!(!is_jwt("a.b")); + assert!(!is_jwt("a..c")); + } + + #[test] + fn deserialize_advisory_response_real_server_shape() { + // Mirrors the worker's emitted payload (cve_worker/src/worker.js): + // server emits `id` (not `advisory_id`) and `source_url` (not `url`), + // plus many fields we ignore. No top-level `remediation` exists. + let body = r#"{ + "id": "GHSA-xxxx-yyyy-zzzz", + "source": "ghsa", + "source_url": "https://github.com/advisories/GHSA-xxxx-yyyy-zzzz", + "alias": "CVE-2026-12345", + "aliases": ["CVE-2026-12345"], + "ecosystem": "npm", + "summary": "Prototype pollution in lodash", + "severity": "HIGH", + "severity_badge": "HIGH", + "tier": 1, + "tier_score": 74.5, + "llm_summary": "Short developer-facing summary.", + "packages": [], + "cwes": [] + }"#; + let parsed: AdvisoryResponse = serde_json::from_str(body).unwrap(); + assert_eq!(parsed.advisory_id, "GHSA-xxxx-yyyy-zzzz"); + assert_eq!(parsed.aliases, vec!["CVE-2026-12345".to_string()]); + assert_eq!(parsed.tier, Some(1)); + assert_eq!(parsed.severity_level.as_deref(), Some("HIGH")); + assert_eq!( + parsed.title.as_deref(), + Some("Prototype pollution in lodash") + ); + assert_eq!( + parsed.url.as_deref(), + Some("https://github.com/advisories/GHSA-xxxx-yyyy-zzzz") + ); + } + + #[test] + fn deserialize_advisory_response_tier_null_and_missing_source_url() { + // Server emits `tier: null` for unscored advisories + // (VULNERABILITY_SERVICE.md §5). `source_url` may also be absent. + let body = r#"{ + "id": "GHSA-only-id", + "tier": null + }"#; + let parsed: AdvisoryResponse = serde_json::from_str(body).unwrap(); + assert_eq!(parsed.advisory_id, "GHSA-only-id"); + assert!(parsed.tier.is_none()); + assert!(parsed.aliases.is_empty()); + assert!(parsed.title.is_none()); + assert!(parsed.severity_level.is_none()); + assert!(parsed.url.is_none()); + } + + // Fixture-based deserialization tests — committed JSON under tests/fixtures/vuln_api/, + // built to the authoritative server serialization (vuln-api/cve_worker/src/worker.js). + macro_rules! fixture { + ($name:literal) => { + include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/fixtures/vuln_api/", + $name + )) + }; + } + + #[test] + fn fixture_check_clean_deserializes() { + let parsed: VulnCheckResponse = serde_json::from_str(fixture!("check_clean.json")).unwrap(); + assert!(!parsed.is_vulnerable); + assert!(parsed.matches.is_empty()); + assert_eq!(parsed.ecosystem, "pypi"); + assert_eq!(parsed.package_name, "requests"); + assert_eq!(parsed.version, "2.31.0"); + } + + #[test] + fn fixture_check_unknown_deserializes_as_clean() { + // /check returns 200 is_vulnerable:false matches:[] for an unknown package; + // the 404 {"error":"Package not found"} body is the profile route, not /check. + let parsed: VulnCheckResponse = + serde_json::from_str(fixture!("check_unknown.json")).unwrap(); + assert!(!parsed.is_vulnerable); + assert!(parsed.matches.is_empty()); + } + + #[test] + fn fixture_check_vulnerable_deserializes() { + let parsed: VulnCheckResponse = + serde_json::from_str(fixture!("check_vulnerable.json")).unwrap(); + assert!(parsed.is_vulnerable); + assert_eq!(parsed.matches.len(), 1); + let m = &parsed.matches[0]; + assert_eq!(m.advisory_id, "GHSA-xxxx-yyyy-zzzz"); + assert_eq!(m.severity_level, "high"); + assert_eq!(m.tier, 1); + assert_eq!(m.vulnerable_version_range.as_deref(), Some(">=3.2,<3.2.5")); + assert_eq!(m.fixed_version.as_deref(), Some("3.2.5")); + } + + #[test] + fn fixture_check_malware_deserializes() { + // Malware surfaces through /check as an ordinary is_vulnerable:true match + // (MAL-* id); /malware items carry no version, so /check is the per-version signal. + let parsed: VulnCheckResponse = + serde_json::from_str(fixture!("check_malware.json")).unwrap(); + assert!(parsed.is_vulnerable); + assert_eq!(parsed.matches.len(), 1); + let m = &parsed.matches[0]; + assert!(m.advisory_id.starts_with("MAL-")); + assert!(m.vulnerable_version_range.is_none()); + assert!(m.fixed_version.is_none()); + } + + #[test] + fn fixture_advisory_detail_reconciles_server_fields() { + // AdvisoryResponse reconciliation: server `severity`/`summary` map to + // severity_level/title via #[serde(rename)]. + let parsed: AdvisoryResponse = + serde_json::from_str(fixture!("advisory_detail.json")).unwrap(); + assert_eq!(parsed.advisory_id, "GHSA-xxxx-yyyy-zzzz"); + assert_eq!(parsed.aliases, vec!["CVE-2026-12345".to_string()]); + assert_eq!(parsed.tier, Some(1)); + assert_eq!(parsed.severity_level.as_deref(), Some("HIGH")); + assert_eq!(parsed.title.as_deref(), Some("SQL injection in django")); + assert_eq!( + parsed.url.as_deref(), + Some("https://github.com/advisories/GHSA-xxxx-yyyy-zzzz") + ); + } +} diff --git a/tests/fixtures/vuln_api/advisory_detail.json b/tests/fixtures/vuln_api/advisory_detail.json new file mode 100644 index 0000000..1db6245 --- /dev/null +++ b/tests/fixtures/vuln_api/advisory_detail.json @@ -0,0 +1,15 @@ +{ + "id": "GHSA-xxxx-yyyy-zzzz", + "source": "ghsa", + "source_url": "https://github.com/advisories/GHSA-xxxx-yyyy-zzzz", + "alias": "CVE-2026-12345", + "aliases": ["CVE-2026-12345"], + "ecosystem": "pypi", + "summary": "SQL injection in django", + "details": "A detailed description of the vulnerability.", + "severity": "HIGH", + "severity_badge": "HIGH", + "tier": 1, + "tier_score": 74.5, + "llm_summary": "Short developer-facing summary." +} diff --git a/tests/fixtures/vuln_api/check_clean.json b/tests/fixtures/vuln_api/check_clean.json new file mode 100644 index 0000000..7a1d137 --- /dev/null +++ b/tests/fixtures/vuln_api/check_clean.json @@ -0,0 +1 @@ +{"ecosystem":"pypi","package_name":"requests","version":"2.31.0","is_vulnerable":false,"matches":[]} diff --git a/tests/fixtures/vuln_api/check_malware.json b/tests/fixtures/vuln_api/check_malware.json new file mode 100644 index 0000000..f353d36 --- /dev/null +++ b/tests/fixtures/vuln_api/check_malware.json @@ -0,0 +1,15 @@ +{ + "ecosystem": "npm", + "package_name": "wozhendeshitule", + "version": "1.0.0", + "is_vulnerable": true, + "matches": [ + { + "advisory_id": "MAL-2022-7232", + "severity_level": "critical", + "tier": 1, + "vulnerable_version_range": null, + "fixed_version": null + } + ] +} diff --git a/tests/fixtures/vuln_api/check_unknown.json b/tests/fixtures/vuln_api/check_unknown.json new file mode 100644 index 0000000..9886df0 --- /dev/null +++ b/tests/fixtures/vuln_api/check_unknown.json @@ -0,0 +1 @@ +{"ecosystem":"pypi","package_name":"this-package-does-not-exist","version":"9.9.9","is_vulnerable":false,"matches":[]} diff --git a/tests/fixtures/vuln_api/check_vulnerable.json b/tests/fixtures/vuln_api/check_vulnerable.json new file mode 100644 index 0000000..e50112b --- /dev/null +++ b/tests/fixtures/vuln_api/check_vulnerable.json @@ -0,0 +1,15 @@ +{ + "ecosystem": "pypi", + "package_name": "django", + "version": "3.2.0", + "is_vulnerable": true, + "matches": [ + { + "advisory_id": "GHSA-xxxx-yyyy-zzzz", + "severity_level": "high", + "tier": 1, + "vulnerable_version_range": ">=3.2,<3.2.5", + "fixed_version": "3.2.5" + } + ] +} From 5c303c5193d3361a5fe7a22c6b5391c97eddb482 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Tue, 9 Jun 2026 23:29:15 +0200 Subject: [PATCH 02/59] Add corgea pip|npm|yarn|pnpm|uv install wrappers with recency gate Run package managers through a Corgea gate: install commands with named targets are resolved against the public registry first, and a resolved version published within --threshold (default 2d) blocks the install (exit 1) before the package manager runs. --no-fail demotes the block to a warning; --json emits a per-target report. Non-install subcommands, requirements files, bare installs, and unverifiable specs (git/URL/path) pass through with a printed note and the package manager's own exit code. No token required. Ported from PR #89 (cursor/dependency-freshness-check-89d1), slimmed to the named-target path: src/precheck (arg parsing + gate + exec) and src/verify_deps (threshold parsing + npm/PyPI resolvers) live in the library crate with no lockfile verification, check-only mode, or terminal-color dependency. Resolution errors warn but never block; fail-closed semantics arrive with the vuln-api verdict chunk. Also lands Config.vuln_api_url + get_vuln_api_url() (env -> config -> default) for the upcoming vuln-api chunk, hermetic integration tests driven by CORGEA_PYPI_REGISTRY/CORGEA_NPM_REGISTRY stub overrides, a shared tests/common harness, and SKILL.md docs for the new commands. --- Cargo.lock | 7 + Cargo.toml | 1 + skills/corgea/SKILL.md | 24 ++ src/config.rs | 94 ++++++ src/lib.rs | 2 + src/main.rs | 70 ++++ src/precheck/mod.rs | 544 +++++++++++++++++++++++++++++++ src/precheck/parse.rs | 629 ++++++++++++++++++++++++++++++++++++ src/verify_deps/mod.rs | 137 ++++++++ src/verify_deps/registry.rs | 613 +++++++++++++++++++++++++++++++++++ tests/cli_deps.rs | 25 +- tests/cli_install.rs | 349 ++++++++++++++++++++ tests/common/mod.rs | 35 ++ 13 files changed, 2508 insertions(+), 22 deletions(-) create mode 100644 src/precheck/mod.rs create mode 100644 src/precheck/parse.rs create mode 100644 src/verify_deps/mod.rs create mode 100644 src/verify_deps/registry.rs create mode 100644 tests/cli_install.rs create mode 100644 tests/common/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 2b9c8e7..1c6a6fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -359,6 +359,7 @@ dependencies = [ "quick-xml", "regex", "reqwest", + "semver", "serde", "serde_derive", "serde_json", @@ -1760,6 +1761,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" diff --git a/Cargo.toml b/Cargo.toml index d60edad..13e1c71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ reqwest = { version = "0.12.23", default-features = false, features = [ toml = "0.8.8" log = "0.4" env_logger = "0.11" +semver = "1" serde = { version = "1.0.195", features = ["derive"] } serde_json = "1.0.111" serde_derive = "1.0.195" diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index f23293f..ec6cb9c 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -109,6 +109,30 @@ corgea setup-hooks --default-config # Default: secrets + PII, fail on Installs a pre-commit hook running `corgea scan blast --only-uncommitted`. Bypass with `git commit --no-verify`. +### Install Wrappers — `corgea pip|npm|yarn|pnpm|uv ` + +Run a package manager through Corgea's install gate. Install commands with named +targets are resolved against the public registry first; a version published within +`--threshold` (default `2d`) blocks the install (exit 1). Everything else passes +through with the package manager's own exit code. Offline-only inputs (git/URL/path +specs, `-r requirements.txt`, bare `install`) are not checked and run with a printed note. + +```bash +corgea pip install requests==2.31.0 # resolves, checks recency, then runs pip +corgea npm install axios@^1.0.0 # same gate for npm ranges +corgea pip --no-fail install newpkg # demote a block to a warning +corgea pip --json install newpkg # machine-readable per-target report +corgea pip list # non-install subcommands pass straight through +``` + +| Flag | Short | Description | +|------|-------|-------------| +| `--threshold` | `-t` | Recency threshold (`2d`, `12h`). Younger resolved versions block. | +| `--no-fail` | | Print the finding but run the install anyway. | +| `--json` | | JSON report instead of text. | + +No Corgea token required. Registry overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`. + ### Deps — `corgea deps ` diff --git a/src/config.rs b/src/config.rs index 257a483..01db7bd 100644 --- a/src/config.rs +++ b/src/config.rs @@ -7,6 +7,10 @@ pub struct Config { pub(crate) url: String, pub(crate) debug: i8, pub(crate) token: String, + /// Override for the vuln-api host (install-gate package checks). + /// `#[serde(default)]` keeps pre-existing config files loading. + #[serde(default)] + pub(crate) vuln_api_url: Option, } impl Config { @@ -34,6 +38,7 @@ impl Config { url: "https://www.corgea.app".to_string(), debug: 0, token: "".to_string(), + vuln_api_url: None, }; let toml = toml::to_string(&config).expect("Failed to serialize config"); @@ -100,4 +105,93 @@ impl Config { self.debug } + + /// Base URL for the vuln-api service: `CORGEA_VULN_API_URL` env var, + /// then the config file's `vuln_api_url`, then the public default. + /// Consumed by the install-gate vuln check (chunk 3); no caller yet. + #[allow(dead_code)] + pub fn get_vuln_api_url(&self) -> String { + let url = crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") + .or_else(|| self.vuln_api_url.clone()) + .unwrap_or_else(|| "https://vuln-api.corgea.app".to_string()); + url.trim().trim_end_matches('/').to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn config_with(vuln_api_url: Option<&str>) -> Config { + Config { + url: "https://www.corgea.app".to_string(), + debug: 0, + token: "".to_string(), + vuln_api_url: vuln_api_url.map(str::to_string), + } + } + + /// All `get_vuln_api_url` cases in one test fn: the env-var cases + /// mutate process-global state, so they must not run concurrently + /// with each other under the parallel test harness. + #[test] + fn get_vuln_api_url_resolution_order() { + env::remove_var("CORGEA_VULN_API_URL"); + + // Default when neither env nor config is set. + assert_eq!( + config_with(None).get_vuln_api_url(), + "https://vuln-api.corgea.app" + ); + + // Config value wins over the default; trailing slash trimmed. + assert_eq!( + config_with(Some("https://custom.example.com/")).get_vuln_api_url(), + "https://custom.example.com" + ); + + // Surrounding whitespace trimmed. + assert_eq!( + config_with(Some(" https://ws.example.com ")).get_vuln_api_url(), + "https://ws.example.com" + ); + + // Env var wins over the config value (and gets the same trims). + env::set_var("CORGEA_VULN_API_URL", " https://env.example.com/ "); + assert_eq!( + config_with(Some("https://custom.example.com")).get_vuln_api_url(), + "https://env.example.com" + ); + + // Empty / whitespace-only env var is treated as unset. + env::set_var("CORGEA_VULN_API_URL", " "); + assert_eq!( + config_with(Some("https://custom.example.com")).get_vuln_api_url(), + "https://custom.example.com" + ); + env::remove_var("CORGEA_VULN_API_URL"); + } + + /// `Config::load()` writes the default file with `vuln_api_url: None` + /// and `save()` reserializes every config — both must round-trip. + #[test] + fn config_toml_round_trips_with_and_without_vuln_api_url() { + let without = toml::to_string(&config_with(None)).expect("serialize None field"); + let parsed: Config = toml::from_str(&without).expect("deserialize"); + assert_eq!(parsed.vuln_api_url, None); + + let with = toml::to_string(&config_with(Some("https://custom.example.com"))) + .expect("serialize Some field"); + let parsed: Config = toml::from_str(&with).expect("deserialize"); + assert_eq!( + parsed.vuln_api_url.as_deref(), + Some("https://custom.example.com") + ); + + // Pre-existing config files (no vuln_api_url key) must still load. + let legacy: Config = + toml::from_str("url = \"https://www.corgea.app\"\ndebug = 0\ntoken = \"\"\n") + .expect("legacy config without vuln_api_url"); + assert_eq!(legacy.vuln_api_url, None); + } } diff --git a/src/lib.rs b/src/lib.rs index c780717..bf66ab7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,6 @@ pub mod deps; +pub mod precheck; +pub mod verify_deps; // Also declared in the binary crate (src/main.rs); re-declared here so library modules // (e.g. vuln_api) can use `crate::log::debug`. src/log.rs is a thin `::log` facade that // compiles cleanly in both crates. diff --git a/src/main.rs b/src/main.rs index 442c5a1..8d26534 100644 --- a/src/main.rs +++ b/src/main.rs @@ -199,6 +199,16 @@ enum Commands { #[command(subcommand)] command: corgea::deps::run::DepsSubcommand, }, + /// Wrap `npm` commands: verify install targets' publish recency, then run npm. + Npm(InstallWrapArgs), + /// Wrap `yarn` commands: verify install targets' publish recency, then run yarn. + Yarn(InstallWrapArgs), + /// Wrap `pnpm` commands: verify install targets' publish recency, then run pnpm. + Pnpm(InstallWrapArgs), + /// Wrap `pip` commands: verify install targets' publish recency, then run pip. + Pip(InstallWrapArgs), + /// Wrap `uv` commands: verify install targets' publish recency, then run uv. + Uv(InstallWrapArgs), } #[derive(Subcommand, Debug, Clone, PartialEq)] @@ -221,6 +231,50 @@ impl FromStr for Scanner { } } +/// Shared flags for the install-wrapper subcommands (`corgea npm|yarn|pnpm|pip|uv`). +#[derive(clap::Args, Debug, Clone)] +struct InstallWrapArgs { + #[arg( + long, + short = 't', + default_value = "2d", + value_parser = corgea::verify_deps::parse_threshold, + help = "Recency threshold. Resolved versions younger than this are blocked. e.g. '2d', '12h'." + )] + threshold: std::time::Duration, + + #[arg( + long, + help = "Demote a recency block to a printed warning. The install still runs." + )] + no_fail: bool, + + #[arg( + long, + help = "Output the result as JSON instead of human-readable text." + )] + json: bool, + + /// Arguments forwarded to the package manager (subcommand and package specs). + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + cmd: Vec, +} + +fn install_wrap_options(args: &InstallWrapArgs) -> corgea::precheck::PrecheckOptions { + corgea::precheck::PrecheckOptions { + threshold: args.threshold, + no_fail: args.no_fail, + json: args.json, + npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), + pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), + } +} + +fn run_install_wrap_command(manager: corgea::precheck::PackageManager, args: &InstallWrapArgs) { + let code = corgea::precheck::run_install(manager, &args.cmd, install_wrap_options(args)); + std::process::exit(code); +} + /// Initialize the global logger. /// /// `CORGEA_DEBUG=1` (env var or config file) raises the default verbosity to @@ -504,6 +558,22 @@ fn main() { // Offline: no token / network. Exit code propagates fail-on policy. std::process::exit(i32::from(corgea::deps::run::run(command.clone()))); } + // Install wrappers: no auth gate — mirror `Deps` (offline-only in Phase 1). + Some(Commands::Npm(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Npm, args) + } + Some(Commands::Yarn(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Yarn, args) + } + Some(Commands::Pnpm(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Pnpm, args) + } + Some(Commands::Pip(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Pip, args) + } + Some(Commands::Uv(args)) => { + run_install_wrap_command(corgea::precheck::PackageManager::Uv, args) + } None => { utils::terminal::show_welcome_message(); let _ = Cli::command().print_help(); diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs new file mode 100644 index 0000000..5bbd94f --- /dev/null +++ b/src/precheck/mod.rs @@ -0,0 +1,544 @@ +//! Install wrappers: `corgea npm`, `corgea yarn`, `corgea pnpm`, `corgea pip`, `corgea uv`. +//! +//! Wraps an install command from a supported package manager, resolves what +//! the package manager *would* install against the public registry, and either +//! blocks the install or runs it transparently. +//! +//! Verification rule: a package is rejected if the resolved version +//! was published within `--threshold` (default `2d`). This mirrors +//! the `deps` flow but applies to the install-time set of +//! packages instead of the already-locked set. +//! +//! By default a "recent" finding makes the wrapper exit with status 1 +//! *without* running the install. Use `--no-fail` to demote this to a +//! warning (the install runs anyway). + +pub mod parse; + +use std::ffi::OsString; +use std::process::Command; +use std::time::Duration; + +use chrono::Utc; + +use crate::verify_deps; + +/// Supported package managers. Each one shares enough behaviour with +/// the others that we only need a small per-manager dispatch. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PackageManager { + Npm, + Yarn, + Pnpm, + Pip, + Uv, +} + +impl PackageManager { + pub fn binary_name(self) -> &'static str { + match self { + PackageManager::Npm => "npm", + PackageManager::Yarn => "yarn", + PackageManager::Pnpm => "pnpm", + PackageManager::Pip => "pip", + PackageManager::Uv => "uv", + } + } + + /// Subcommands that this manager treats as "install something new" + /// — the only ones we need to verify before running. + pub fn is_install_subcommand(self, sub: &str) -> bool { + match self { + PackageManager::Npm => matches!(sub, "install" | "i" | "add"), + PackageManager::Yarn => matches!(sub, "add" | "install"), + PackageManager::Pnpm => matches!(sub, "add" | "install" | "i"), + PackageManager::Pip => matches!(sub, "install"), + PackageManager::Uv => false, + } + } +} + +#[derive(Debug, Clone)] +pub struct PrecheckOptions { + pub threshold: Duration, + /// If true, demote a recent finding from "block" to "warn-and-run". + pub no_fail: bool, + pub json: bool, + /// Optional registry overrides, used by tests. + pub npm_registry: Option, + pub pypi_registry: Option, +} + +/// Each item the user (or a `-r` requirements file) asked us to install. +#[derive(Debug, Clone)] +pub struct InstallTarget { + pub name: String, + /// Display form, e.g. `axios@^1.0.0` or `requests==2.31.0`. + pub display: String, + /// What we'll feed into the resolver. + pub kind: TargetKind, +} + +#[derive(Debug, Clone)] +pub enum TargetKind { + Npm(crate::verify_deps::registry::NpmSpec), + Pypi(crate::verify_deps::registry::PypiSpec), + /// Something we can't verify (URL/git/file/path) — we surface this + /// as a warning but never block on it. + Unverifiable { + reason: String, + }, +} + +/// Outcome of resolving + verifying a single target. +#[derive(Debug, Clone)] +pub enum TargetOutcome { + /// Resolved cleanly. `recent` is true when the version was + /// published within the threshold (the blocking condition). + Resolved { + target: InstallTarget, + resolved: crate::verify_deps::registry::ResolvedPackage, + age: Duration, + recent: bool, + }, + /// We deliberately couldn't verify this target (URL / git / etc.). + Skipped { + target: InstallTarget, + reason: String, + }, + /// Resolution failed (network, unknown package, bad spec). + Error { + target: InstallTarget, + error: String, + }, +} + +#[derive(Debug)] +pub struct PrecheckReport { + pub manager: PackageManager, + pub subcommand: String, + pub original_args: Vec, + pub outcomes: Vec, + pub threshold: Duration, +} + +impl PrecheckReport { + fn count(&self, pred: impl Fn(&TargetOutcome) -> bool) -> usize { + self.outcomes.iter().filter(|o| pred(o)).count() + } + pub fn ok_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { recent: false, .. })) + } + pub fn recent_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { recent: true, .. })) + } + pub fn skipped_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Skipped { .. })) + } + pub fn error_count(&self) -> usize { + self.count(|o| matches!(o, TargetOutcome::Error { .. })) + } +} + +/// Canonical entry for ecosystem commands (`corgea npm install …`). +/// +/// `cmd` is everything after the ecosystem name, e.g. +/// `["install", "axios@^1.0.0", "--save-dev"]`. An empty `cmd` execs the +/// package manager with no arguments. +pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOptions) -> i32 { + if manager == PackageManager::Uv { + return run_uv(cmd, opts); + } + + if cmd.is_empty() { + return exec_command(manager.binary_name(), &[]); + } + + let subcommand = &cmd[0]; + let rest = &cmd[1..]; + + if !manager.is_install_subcommand(subcommand) { + return exec_install_with_args(manager, subcommand, rest); + } + + let parsed = match parse::parse_install_args(manager, rest) { + Ok(p) => p, + Err(e) => { + eprintln!("failed to parse install args: {}", e); + return 2; + } + }; + + run_parsed_install( + manager, + subcommand, + rest, + parsed, + || exec_install_with_args(manager, subcommand, rest), + opts, + ) +} + +fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { + let exec = || exec_command("uv", cmd); + + match parse::classify_uv_command(cmd) { + parse::UvCommand::Passthrough => exec(), + parse::UvCommand::PipInstall { install_args } => { + let parsed = match parse::parse_pip_install_args(install_args) { + Ok(p) => p, + Err(e) => { + eprintln!("failed to parse install args: {}", e); + return 2; + } + }; + run_parsed_install( + PackageManager::Uv, + "pip install", + install_args, + parsed, + exec, + opts, + ) + } + parse::UvCommand::Add { add_args } => run_parsed_install( + PackageManager::Uv, + "add", + add_args, + parse::parse_pypi_positionals_args(add_args), + exec, + opts, + ), + } +} + +/// Post-parse verification shared by npm/yarn/pnpm/pip and uv install paths. +fn run_parsed_install( + manager: PackageManager, + subcommand_label: &str, + rest: &[String], + parsed: parse::ParsedInstall, + exec: impl FnOnce() -> i32, + opts: PrecheckOptions, +) -> i32 { + if !parsed.requirements_files.is_empty() { + let files: Vec = parsed + .requirements_files + .iter() + .map(|p| p.display().to_string()) + .collect(); + eprintln!( + "note: requirements files ({}) are not recency-checked by the baseline gate", + files.join(", ") + ); + } + + if parsed.targets.is_empty() { + return exec(); + } + + let now = Utc::now(); + let threshold = + chrono::Duration::from_std(opts.threshold).expect("threshold validated before run_install"); + + let outcomes: Vec<_> = parsed + .targets + .iter() + .map(|target| verify_one(target, &opts, &now, threshold)) + .collect(); + + let report = PrecheckReport { + manager, + subcommand: subcommand_label.to_string(), + original_args: rest.to_vec(), + outcomes, + threshold: opts.threshold, + }; + + if opts.json { + print_json(&report); + } else { + print_text(&report); + } + + if should_block_install(&report, &opts) { + if !opts.json { + eprintln!("Refusing to run install. Pass --no-fail to proceed anyway."); + } + return 1; + } + + exec() +} + +fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + !opts.no_fail && report.recent_count() > 0 +} + +fn verify_one( + target: &InstallTarget, + opts: &PrecheckOptions, + now: &chrono::DateTime, + threshold: chrono::Duration, +) -> TargetOutcome { + use crate::verify_deps::registry; + + let resolved = match &target.kind { + TargetKind::Unverifiable { reason } => { + return TargetOutcome::Skipped { + target: target.clone(), + reason: reason.clone(), + }; + } + TargetKind::Npm(spec) => { + registry::npm_resolve(&target.name, spec, opts.npm_registry.as_deref()) + } + TargetKind::Pypi(spec) => { + registry::pypi_resolve(&target.name, spec, opts.pypi_registry.as_deref()) + } + }; + + match resolved { + Ok(resolved) => { + let age_chrono = now.signed_duration_since(resolved.published_at); + let age = age_chrono + .to_std() + .unwrap_or_else(|_| Duration::from_secs(0)); + TargetOutcome::Resolved { + target: target.clone(), + resolved, + age, + recent: age_chrono < threshold, + } + } + Err(e) => TargetOutcome::Error { + target: target.clone(), + error: e, + }, + } +} + +fn exec_install_with_args(manager: PackageManager, subcommand: &str, rest: &[String]) -> i32 { + let mut full = Vec::with_capacity(rest.len() + 1); + full.push(subcommand.to_string()); + full.extend(rest.iter().cloned()); + exec_command(manager.binary_name(), &full) +} + +fn exec_command(binary: &str, args: &[String]) -> i32 { + // Resolve the binary on PATH. On Windows this finds `.cmd` shims. + let resolved = match which::which(binary) { + Ok(p) => p, + Err(e) => { + eprintln!( + "could not find '{}' on PATH ({}). Make sure the package manager is installed.", + binary, e + ); + return 127; + } + }; + + let os_args: Vec = args.iter().map(OsString::from).collect(); + + match Command::new(&resolved).args(&os_args).status() { + Ok(status) => status.code().unwrap_or_else(|| { + #[cfg(unix)] + { + use std::os::unix::process::ExitStatusExt; + if let Some(sig) = status.signal() { + return 128 + sig; + } + } + 1 + }), + Err(e) => { + eprintln!("failed to exec {}: {}", binary, e); + 1 + } + } +} + +fn print_text(report: &PrecheckReport) { + println!( + "Pre-checking `{} {} {}` (threshold {})", + report.manager.binary_name(), + report.subcommand, + report.original_args.join(" "), + verify_deps::format_duration(report.threshold) + ); + println!( + " {} ok, {} recent, {} skipped, {} errors", + report.ok_count(), + report.recent_count(), + report.skipped_count(), + report.error_count(), + ); + + for o in &report.outcomes { + match o { + TargetOutcome::Resolved { + target, + resolved, + age, + recent, + } => { + if *recent { + println!( + " ⚠ {} → {}@{} published {} ago at {} (within threshold)", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } else { + println!( + " ✓ {} → {}@{} published {} ago", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + ); + } + } + TargetOutcome::Skipped { target, reason } => { + println!(" ? {}: {}", target.display, reason); + } + TargetOutcome::Error { target, error } => { + println!(" ✗ {}: {}", target.display, error); + } + } + } +} + +fn print_json(report: &PrecheckReport) { + use serde_json::json; + let outcomes: Vec<_> = report + .outcomes + .iter() + .map(|o| match o { + TargetOutcome::Resolved { + target, + resolved, + age, + recent, + } => json!({ + "status": if *recent { "recent" } else { "ok" }, + "spec": target.display, + "name": resolved.name, + "resolved_version": resolved.version, + "published_at": resolved.published_at.to_rfc3339(), + "age_seconds": age.as_secs(), + }), + TargetOutcome::Skipped { target, reason } => json!({ + "status": "skipped", + "spec": target.display, + "name": target.name, + "reason": reason, + }), + TargetOutcome::Error { target, error } => json!({ + "status": "error", + "spec": target.display, + "name": target.name, + "error": error, + }), + }) + .collect(); + + let body = json!({ + "manager": report.manager.binary_name(), + "subcommand": report.subcommand, + "args": report.original_args, + "threshold_seconds": report.threshold.as_secs(), + "summary": { + "ok": report.ok_count(), + "recent": report.recent_count(), + "skipped": report.skipped_count(), + "errors": report.error_count(), + }, + "results": outcomes, + }); + + println!("{}", serde_json::to_string_pretty(&body).unwrap()); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn install_subcommand_recognition() { + assert!(PackageManager::Npm.is_install_subcommand("install")); + assert!(PackageManager::Npm.is_install_subcommand("i")); + assert!(PackageManager::Npm.is_install_subcommand("add")); + assert!(!PackageManager::Npm.is_install_subcommand("update")); + + assert!(PackageManager::Yarn.is_install_subcommand("add")); + assert!(PackageManager::Yarn.is_install_subcommand("install")); + + assert!(PackageManager::Pnpm.is_install_subcommand("add")); + assert!(PackageManager::Pnpm.is_install_subcommand("install")); + assert!(PackageManager::Pnpm.is_install_subcommand("i")); + + assert!(PackageManager::Pip.is_install_subcommand("install")); + assert!(!PackageManager::Pip.is_install_subcommand("freeze")); + } + + fn stub_opts(pypi_registry: String, no_fail: bool) -> PrecheckOptions { + PrecheckOptions { + threshold: Duration::from_secs(2 * 86400), + no_fail, + json: false, + npm_registry: None, + pypi_registry: Some(pypi_registry), + } + } + + /// Run `run_parsed_install` for `pip install ` with an exec + /// closure that records whether it ran (returning 42 instead of + /// spawning anything). + fn gate_pip_install(args: &[&str], opts: PrecheckOptions) -> (i32, bool) { + let rest: Vec = args.iter().map(|s| s.to_string()).collect(); + let parsed = parse::parse_install_args(PackageManager::Pip, &rest).expect("parse"); + let mut exec_ran = false; + let code = run_parsed_install( + PackageManager::Pip, + "install", + &rest, + parsed, + || { + exec_ran = true; + 42 + }, + opts, + ); + (code, exec_ran) + } + + #[test] + fn unverifiable_target_skips_and_proceeds() { + // git+ spec → Skipped outcome, no registry hit, install proceeds. + let opts = stub_opts("http://127.0.0.1:9".to_string(), false); + let (code, exec_ran) = gate_pip_install(&["git+https://github.com/psf/requests.git"], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn bare_install_passes_through_without_verification() { + // Bare `pip install` (no targets) → straight exec, no registry hit. + let opts = stub_opts("http://127.0.0.1:9".to_string(), false); + let (code, exec_ran) = gate_pip_install(&[], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } + + #[test] + fn requirements_files_note_then_exec() { + // `-r reqs.txt` alone → printed note, no verification, exec runs. + let opts = stub_opts("http://127.0.0.1:9".to_string(), false); + let (code, exec_ran) = gate_pip_install(&["-r", "reqs.txt"], opts); + assert_eq!(code, 42); + assert!(exec_ran); + } +} diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs new file mode 100644 index 0000000..6ce7187 --- /dev/null +++ b/src/precheck/parse.rs @@ -0,0 +1,629 @@ +//! Parse install-command argument lists into structured `InstallTarget`s. +//! +//! The goal is to be liberal with valid inputs (real install commands +//! mix flags, package specs, and pass-through args freely) and clear +//! about anything we can't verify (URLs / git / filesystem refs). + +use std::path::PathBuf; + +use crate::verify_deps::registry::{NpmSpec, PypiSpec}; + +use super::{InstallTarget, PackageManager, TargetKind}; + +#[derive(Debug, Default)] +pub struct ParsedInstall { + pub targets: Vec, + /// `pip install -r foo.txt` — requirements files are only noted + /// (not verified) by the baseline gate. + pub requirements_files: Vec, +} + +/// `uv pip install` argument list (everything after `pip install`). +pub fn parse_pip_install_args(args: &[String]) -> Result { + Ok(build_parsed_install(extract_pip_positionals(args)?, true)) +} + +/// `uv add` argument list (everything after `add`). +pub fn parse_pypi_positionals_args(args: &[String]) -> ParsedInstall { + build_parsed_install(extract_node_positionals(args), true) +} + +fn build_parsed_install(positionals: PositionalSplit, pypi: bool) -> ParsedInstall { + let mut parsed = ParsedInstall::default(); + for raw in &positionals.specs { + let target = if pypi { + parse_pypi_spec(raw) + } else { + parse_npm_spec(raw) + }; + parsed.targets.push(target); + } + parsed.requirements_files = positionals.requirements_files; + parsed +} + +pub fn parse_install_args( + manager: PackageManager, + args: &[String], +) -> Result { + match manager { + PackageManager::Pip => parse_pip_install_args(args), + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => { + Ok(build_parsed_install(extract_node_positionals(args), false)) + } + PackageManager::Uv => unreachable!("uv uses classify_uv_command"), + } +} + +/// Install-shaped `uv` invocations we know how to verify. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UvCommand<'a> { + Passthrough, + PipInstall { install_args: &'a [String] }, + Add { add_args: &'a [String] }, +} + +pub fn classify_uv_command(cmd: &[String]) -> UvCommand<'_> { + match cmd.first().map(String::as_str) { + Some("pip") if matches!(cmd.get(1).map(String::as_str), Some("install" | "i")) => { + UvCommand::PipInstall { + install_args: &cmd[2..], + } + } + Some("add") => UvCommand::Add { + add_args: &cmd[1..], + }, + _ => UvCommand::Passthrough, + } +} + +#[derive(Debug, Default)] +struct PositionalSplit { + specs: Vec, + requirements_files: Vec, +} + +/// Strip flags from a npm/yarn/pnpm install argument list, returning +/// only the positional package specs. +/// +/// We treat anything starting with `-` as a flag. Boolean flags (`-D`, +/// `--save-dev`, `--no-save`, ...) are dropped on their own. Flags +/// that take a value can be written as either `--flag=value` or +/// `--flag value`; we handle both by skipping the next token if it +/// looks like a value (doesn't start with `-` and contains `:` or `/` +/// or starts with a digit, suggesting a URL / path / port / version). +/// +/// We deliberately avoid maintaining an exhaustive flag whitelist — +/// real-world install commands are too varied. The heuristic above +/// is correct for the common cases (`--registry url`, `--prefix path`, +/// `-w pkgname`, etc.) and conservatively skips occasional ambiguous +/// values (no spec we'd want to verify ever starts with `:` or `/`). +fn extract_node_positionals(args: &[String]) -> PositionalSplit { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + // After `--`, everything is positional. + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + if a.starts_with('-') { + // Flag. Skip the next token if it looks like a value. + if a.contains('=') { + // `--flag=value` already self-contained. + i += 1; + continue; + } + // Heuristic: peek at the next arg. If it doesn't look + // like a package spec (i.e. contains `://` or starts with + // `/` or `.`) skip it; otherwise leave it alone for the + // next iteration. + let next_is_value = args + .get(i + 1) + .map(|n| { + !n.starts_with('-') + && (n.contains("://") + || n.starts_with('/') + || n.starts_with("./") + || n.starts_with('~')) + }) + .unwrap_or(false); + i += if next_is_value { 2 } else { 1 }; + continue; + } + out.specs.push(a.clone()); + i += 1; + } + out +} + +/// pip's argument grammar is more structured than npm's: there are +/// known flags that take a value (`-r FILE`, `-c FILE`, `-e PATH`, +/// `--index-url URL`, `--target DIR`, ...). We special-case `-r/-c/-e` +/// because they affect behaviour, and treat the rest with the same +/// liberal heuristic as npm. +fn extract_pip_positionals(args: &[String]) -> Result { + let mut out = PositionalSplit::default(); + let mut i = 0; + while i < args.len() { + let a = &args[i]; + if a == "--" { + for rest in &args[i + 1..] { + out.specs.push(rest.clone()); + } + break; + } + match a.as_str() { + "-r" | "--requirement" => { + let path = args + .get(i + 1) + .ok_or_else(|| "`-r` / `--requirement` requires a file path".to_string())?; + out.requirements_files.push(PathBuf::from(path)); + i += 2; + continue; + } + "-c" | "--constraint" => { + // Constraints don't add packages, but skip the path. + i += 2; + continue; + } + "-e" | "--editable" => { + // Editable installs are explicit unverifiable targets. + let path = args.get(i + 1).cloned().unwrap_or_default(); + out.specs.push(format!("-e {}", path)); + i += if args.get(i + 1).is_some() { 2 } else { 1 }; + continue; + } + _ => {} + } + // Long-form `--requirement=foo.txt`. + if let Some(rest) = a.strip_prefix("--requirement=") { + out.requirements_files.push(PathBuf::from(rest)); + i += 1; + continue; + } + if let Some(rest) = a.strip_prefix("--editable=") { + out.specs.push(format!("-e {}", rest)); + i += 1; + continue; + } + if a.starts_with('-') { + // Unknown flag — apply the same value-skipping heuristic + // as in node land. + if a.contains('=') { + i += 1; + continue; + } + let next_is_value = args + .get(i + 1) + .map(|n| { + !n.starts_with('-') + && (n.contains("://") + || n.starts_with('/') + || n.starts_with("./") + || n.starts_with('~')) + }) + .unwrap_or(false); + i += if next_is_value { 2 } else { 1 }; + continue; + } + out.specs.push(a.clone()); + i += 1; + } + Ok(out) +} + +/// Parse a single npm-style positional, e.g. `axios`, `axios@1.0.0`, +/// `axios@^1.0.0`, `axios@latest`, `@types/node@20.10.5`, +/// `git+https://...`, `file:./local`, `./local`, `npm:other@1.0.0`. +pub(crate) fn parse_npm_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", "git:", "git@", "ssh://", "http://", "https://", "file:", "./", "../", "/", "~/", + ]; + if unverifiable_prefixes.iter().any(|p| trimmed.starts_with(p)) { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a URL/git/filesystem reference — registry verification skipped" + .to_string(), + }, + }; + } + if trimmed.starts_with("npm:") { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "npm: aliased dependency — registry verification skipped".to_string(), + }, + }; + } + if trimmed.starts_with("workspace:") { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "workspace: dependency — registry verification skipped".to_string(), + }, + }; + } + + // Find the version separator. Scoped names start with `@` and the + // version separator is the *next* `@` (if any). Unscoped names + // use the first `@`. + let (name_part, spec_part): (&str, &str) = if let Some(rest) = trimmed.strip_prefix('@') { + match rest.find('@') { + Some(at_in_rest) => { + let split = 1 + at_in_rest; + (&trimmed[..split], &trimmed[split + 1..]) + } + None => (trimmed, ""), + } + } else { + match trimmed.find('@') { + Some(at) => (&trimmed[..at], &trimmed[at + 1..]), + None => (trimmed, ""), + } + }; + + let name = name_part.trim().to_string(); + let spec_str = spec_part.trim(); + + let kind = if spec_str.is_empty() || spec_str.eq_ignore_ascii_case("latest") { + TargetKind::Npm(NpmSpec::Latest) + } else if semver::Version::parse(spec_str).is_ok() { + TargetKind::Npm(NpmSpec::Exact(spec_str.to_string())) + } else if looks_like_npm_range(spec_str) { + TargetKind::Npm(NpmSpec::Range(spec_str.to_string())) + } else if is_npm_dist_tag(spec_str) { + TargetKind::Npm(NpmSpec::Tag(spec_str.to_string())) + } else { + TargetKind::Unverifiable { + reason: format!( + "could not classify version spec '{}' (not a valid semver, range, or dist-tag)", + spec_str + ), + } + }; + + InstallTarget { + name, + display, + kind, + } +} + +/// Loose check: does this spec look like an npm version range? +/// We accept anything that *starts* with a range metacharacter +/// (`^`, `~`, `>`, `<`, `=`, `*`) or with a digit (so `1.x`, `1.2.x`, +/// and bare ranges still resolve). Validation against the registry's +/// version list happens later inside the resolver. +fn looks_like_npm_range(s: &str) -> bool { + matches!( + s.chars().next(), + Some('^') | Some('~') | Some('>') | Some('<') | Some('=') | Some('*') + ) || s + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) +} + +/// A dist-tag is a non-empty alphanumeric string (e.g. `latest`, +/// `next`, `beta`, `alpha-1`). We reject anything that contains +/// version-spec metacharacters. +fn is_npm_dist_tag(s: &str) -> bool { + !s.is_empty() + && s.chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.') + && s.chars() + .next() + .map(|c| c.is_ascii_alphabetic()) + .unwrap_or(false) +} + +/// Parse a single pip-style positional, e.g. `requests`, `requests==2.31.0`, +/// `requests>=2.0`, `requests[security]`, `git+https://...`, `./local`. +pub(crate) fn parse_pypi_spec(raw: &str) -> InstallTarget { + let display = raw.to_string(); + let trimmed = raw.trim(); + + let unverifiable_prefixes = [ + "git+", "hg+", "svn+", "bzr+", "http://", "https://", "file:", "./", "../", "/", "~/", + "-e ", "-e=", + ]; + if unverifiable_prefixes.iter().any(|p| trimmed.starts_with(p)) { + return InstallTarget { + name: trimmed.to_string(), + display, + kind: TargetKind::Unverifiable { + reason: "spec is a VCS / URL / editable / filesystem reference — registry verification skipped".to_string(), + }, + }; + } + + // Find the first specifier operator (`==`, `>=`, `<=`, `!=`, `~=`, + // `>`, `<`). PEP 440 also allows `===` (arbitrary equality). + // Find the leftmost specifier operator. On ties, prefer the + // longer operator (e.g. `==` over `=`). + let separators = ["===", "==", ">=", "<=", "!=", "~=", ">", "<"]; + let mut split_at: Option = None; + for sep in &separators { + if let Some(idx) = trimmed.find(sep) { + split_at = match split_at { + Some(prev) if prev <= idx => Some(prev), + _ => Some(idx), + }; + } + } + + let (name_part, spec_part): (&str, &str) = match split_at { + Some(idx) => (&trimmed[..idx], &trimmed[idx..]), + None => (trimmed, ""), + }; + + // Strip extras: `requests[security]` -> `requests`. + let name_no_extras = name_part.split('[').next().unwrap_or(name_part).trim(); + + // Strip env markers: `package; python_version >= "3.7"`. + let spec_no_marker = spec_part.split(';').next().unwrap_or(spec_part).trim(); + + let kind = if spec_no_marker.is_empty() { + TargetKind::Pypi(PypiSpec::Latest) + } else if let Some(rest) = spec_no_marker.strip_prefix("===") { + TargetKind::Pypi(PypiSpec::Exact(rest.trim().to_string())) + } else if let Some(rest) = spec_no_marker.strip_prefix("==") { + let v = rest.trim(); + if v.is_empty() { + TargetKind::Unverifiable { + reason: "empty `==` specifier".to_string(), + } + } else { + TargetKind::Pypi(PypiSpec::Exact(v.to_string())) + } + } else { + TargetKind::Pypi(PypiSpec::Specifier(spec_no_marker.to_string())) + }; + + InstallTarget { + name: name_no_extras.to_string(), + display, + kind, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn extracts_npm_positionals_skipping_flags() { + let args = vec![ + "axios".to_string(), + "--save-dev".to_string(), + "@types/node@latest".to_string(), + "-D".to_string(), + "--registry".to_string(), + "https://example.com/registry".to_string(), + "lodash@^4.0.0".to_string(), + ]; + let p = extract_node_positionals(&args); + assert_eq!( + p.specs, + vec![ + "axios".to_string(), + "@types/node@latest".to_string(), + "lodash@^4.0.0".to_string(), + ] + ); + } + + #[test] + fn extracts_npm_positionals_after_double_dash() { + let args = vec![ + "--save-dev".to_string(), + "--".to_string(), + "axios".to_string(), + "--this-is-positional-now".to_string(), + ]; + let p = extract_node_positionals(&args); + assert_eq!( + p.specs, + vec!["axios".to_string(), "--this-is-positional-now".to_string()] + ); + } + + #[test] + fn parse_npm_spec_classifies() { + let cases = vec![ + ("axios", NpmSpec::Latest), + ("axios@", NpmSpec::Latest), + ("axios@latest", NpmSpec::Latest), + ("axios@1.0.0", NpmSpec::Exact("1.0.0".to_string())), + ("axios@^1.0.0", NpmSpec::Range("^1.0.0".to_string())), + ("axios@~1.0.0", NpmSpec::Range("~1.0.0".to_string())), + ( + "axios@>=1.0.0 <2.0.0", + NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), + ), + ("axios@next", NpmSpec::Tag("next".to_string())), + ("axios@beta", NpmSpec::Tag("beta".to_string())), + ("@types/node", NpmSpec::Latest), + ("@types/node@20.10.5", NpmSpec::Exact("20.10.5".to_string())), + ("@types/node@^20.0.0", NpmSpec::Range("^20.0.0".to_string())), + ("@types/node@latest", NpmSpec::Latest), + ]; + for (input, expected) in cases { + let target = parse_npm_spec(input); + match (&target.kind, &expected) { + (TargetKind::Npm(actual), expected) => { + assert_eq!(actual, expected, "for input '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_npm_spec_extracts_scoped_names() { + assert_eq!(parse_npm_spec("@types/node").name, "@types/node"); + assert_eq!(parse_npm_spec("@types/node@20.10.5").name, "@types/node"); + assert_eq!(parse_npm_spec("axios@1.2.3").name, "axios"); + assert_eq!(parse_npm_spec("axios").name, "axios"); + } + + #[test] + fn parse_npm_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "git@github.com:x/y.git", + "https://example.com/pkg.tgz", + "file:./local-pkg", + "./local-pkg", + "../sibling", + "/abs/path", + "npm:alias-of-other@1.0.0", + "workspace:*", + ]; + for u in unverifiable { + let t = parse_npm_spec(u); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + } + + #[test] + fn parse_pypi_spec_classifies() { + let cases = vec![ + ("requests", PypiSpec::Latest), + ("requests==2.31.0", PypiSpec::Exact("2.31.0".to_string())), + ("requests>=2.0", PypiSpec::Specifier(">=2.0".to_string())), + ("requests~=2.0", PypiSpec::Specifier("~=2.0".to_string())), + ("requests<3,>=2", PypiSpec::Specifier("<3,>=2".to_string())), + ("requests[security]", PypiSpec::Latest), + ( + "requests[security]==2.31.0", + PypiSpec::Exact("2.31.0".to_string()), + ), + ]; + for (input, expected) in cases { + let t = parse_pypi_spec(input); + match (&t.kind, &expected) { + (TargetKind::Pypi(actual), expected) => { + assert_eq!(actual, expected, "for '{}'", input); + } + _ => panic!("unexpected kind for '{}'", input), + } + } + } + + #[test] + fn parse_pypi_spec_strips_extras_and_markers() { + assert_eq!( + parse_pypi_spec("requests[security]==2.31.0").name, + "requests" + ); + assert_eq!( + parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\"").name, + "requests" + ); + match parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\"").kind { + TargetKind::Pypi(PypiSpec::Exact(v)) => assert_eq!(v, "2.31.0"), + _ => panic!("expected exact spec"), + } + } + + #[test] + fn parse_pypi_spec_skips_unverifiable() { + let unverifiable = vec![ + "git+https://github.com/x/y.git", + "https://example.com/pkg.tar.gz", + "./local-pkg", + "/abs/path", + "-e ./local", + ]; + for u in unverifiable { + let t = parse_pypi_spec(u); + assert!( + matches!(t.kind, TargetKind::Unverifiable { .. }), + "for '{}'", + u + ); + } + } + + #[test] + fn classify_uv_command_recognizes_install_shapes() { + assert!(matches!( + classify_uv_command(&[ + "pip".to_string(), + "install".to_string(), + "requests".to_string(), + ]), + UvCommand::PipInstall { .. } + )); + assert!(matches!( + classify_uv_command(&["pip".to_string(), "i".to_string()]), + UvCommand::PipInstall { .. } + )); + assert!(matches!( + classify_uv_command(&["add".to_string(), "django".to_string()]), + UvCommand::Add { .. } + )); + assert_eq!( + classify_uv_command(&["sync".to_string(), "--extra".to_string(), "dev".to_string()]), + UvCommand::Passthrough + ); + assert_eq!( + classify_uv_command(&["run".to_string(), "pytest".to_string()]), + UvCommand::Passthrough + ); + assert_eq!( + classify_uv_command(&["lock".to_string()]), + UvCommand::Passthrough + ); + } + + #[test] + fn uv_add_positionals_parse_as_pypi_specs() { + let parsed = parse_pypi_positionals_args(&["requests==2.31.0".into()]); + assert_eq!(parsed.targets.len(), 1); + assert!( + matches!( + &parsed.targets[0].kind, + TargetKind::Pypi(PypiSpec::Exact(v)) if v == "2.31.0" + ), + "uv add targets must parse as PyPI specs, got {:?}", + parsed.targets[0].kind + ); + } + + #[test] + fn pip_args_extract_requirements_files() { + let args = vec![ + "-r".to_string(), + "reqs.txt".to_string(), + "requests==2.31.0".to_string(), + "--requirement=other.txt".to_string(), + "-e".to_string(), + "./local".to_string(), + ]; + let p = extract_pip_positionals(&args).unwrap(); + assert_eq!( + p.requirements_files, + vec![PathBuf::from("reqs.txt"), PathBuf::from("other.txt")] + ); + assert!(p.specs.contains(&"requests==2.31.0".to_string())); + assert!(p.specs.iter().any(|s| s.starts_with("-e "))); + } +} diff --git a/src/verify_deps/mod.rs b/src/verify_deps/mod.rs new file mode 100644 index 0000000..b813529 --- /dev/null +++ b/src/verify_deps/mod.rs @@ -0,0 +1,137 @@ +//! Slim slice of #89's verify_deps: registry resolution + threshold helpers. + +pub mod registry; + +use std::time::Duration; + +/// Parse a human-friendly duration like `2d`, `48h`, `30m`, `45s`, or +/// a bare integer (interpreted as days). Returns the parsed duration. +pub fn parse_threshold(input: &str) -> Result { + let s = input.trim(); + if s.is_empty() { + return Err("threshold cannot be empty".to_string()); + } + + let (num_str, unit) = match s.chars().last() { + Some(c) if c.is_ascii_alphabetic() => { + (&s[..s.len() - c.len_utf8()], c.to_ascii_lowercase()) + } + _ => (s, 'd'), + }; + + let value: f64 = num_str + .trim() + .parse() + .map_err(|_| format!("invalid threshold number: '{}'", num_str))?; + + if value < 0.0 || !value.is_finite() { + return Err(format!( + "threshold must be a non-negative finite number: '{}'", + input + )); + } + + let secs = match unit { + 's' => value, + 'm' => value * 60.0, + 'h' => value * 3600.0, + 'd' => value * 86400.0, + 'w' => value * 7.0 * 86400.0, + other => { + return Err(format!( + "unknown threshold unit '{}'. Use s, m, h, d, or w.", + other + )) + } + }; + + let d = Duration::try_from_secs_f64(secs).map_err(|_| "threshold too large".to_string())?; + // Establish the invariant every consumer relies on: the threshold + // must also fit in a `chrono::Duration` (see precheck's from_std). + chrono::Duration::from_std(d).map_err(|_| "threshold too large".to_string())?; + Ok(d) +} + +/// Format a Duration as a short human-readable string (e.g. `1d 4h`). +pub fn format_duration(d: Duration) -> String { + let total_secs = d.as_secs(); + if total_secs < 60 { + return format!("{}s", total_secs); + } + let mins = total_secs / 60; + if mins < 60 { + return format!("{}m", mins); + } + let hours = total_secs / 3600; + let rem_mins = (total_secs % 3600) / 60; + if hours < 24 { + if rem_mins == 0 { + return format!("{}h", hours); + } + return format!("{}h {}m", hours, rem_mins); + } + let days = total_secs / 86400; + let rem_hours = (total_secs % 86400) / 3600; + if rem_hours == 0 { + format!("{}d", days) + } else { + format!("{}d {}h", days, rem_hours) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_threshold_units() { + assert_eq!( + parse_threshold("2d").unwrap(), + Duration::from_secs(2 * 86400) + ); + assert_eq!( + parse_threshold("48h").unwrap(), + Duration::from_secs(48 * 3600) + ); + assert_eq!( + parse_threshold("30m").unwrap(), + Duration::from_secs(30 * 60) + ); + assert_eq!(parse_threshold("90s").unwrap(), Duration::from_secs(90)); + assert_eq!( + parse_threshold("1w").unwrap(), + Duration::from_secs(7 * 86400) + ); + assert_eq!( + parse_threshold("3").unwrap(), + Duration::from_secs(3 * 86400) + ); + assert_eq!(parse_threshold("0.5d").unwrap(), Duration::from_secs(43200)); + } + + #[test] + fn parse_threshold_rejects_garbage() { + assert!(parse_threshold("").is_err()); + assert!(parse_threshold("abc").is_err()); + assert!(parse_threshold("-1d").is_err()); + assert!(parse_threshold("1y").is_err()); + } + + #[test] + fn parse_threshold_rejects_absurdly_large_values() { + // Too large for chrono::Duration (precheck converts via from_std). + assert!(parse_threshold("999999999999d").is_err()); + // Too large even for std::time::Duration. + assert!(parse_threshold("1e308d").is_err()); + } + + #[test] + fn format_duration_short() { + assert_eq!(format_duration(Duration::from_secs(5)), "5s"); + assert_eq!(format_duration(Duration::from_secs(120)), "2m"); + assert_eq!(format_duration(Duration::from_secs(3600)), "1h"); + assert_eq!(format_duration(Duration::from_secs(3700)), "1h 1m"); + assert_eq!(format_duration(Duration::from_secs(86400)), "1d"); + assert_eq!(format_duration(Duration::from_secs(90000)), "1d 1h"); + } +} diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs new file mode 100644 index 0000000..10bc343 --- /dev/null +++ b/src/verify_deps/registry.rs @@ -0,0 +1,613 @@ +//! Registry lookups for npm and PyPI publish times. +//! +//! These talk to public registries (no auth) and are kept independent +//! of the rest of the CLI's HTTP client because: +//! * we must not send the user's Corgea auth header to a third-party, +//! * the timeouts and retry policy are different. +//! +//! Both resolvers turn a version spec into the concrete version that +//! would be installed, plus its publish time as a UTC timestamp. + +use chrono::{DateTime, Utc}; +use serde::Deserialize; +use std::sync::OnceLock; +use std::time::Duration; + +const DEFAULT_NPM_REGISTRY: &str = "https://registry.npmjs.org"; +const DEFAULT_PYPI_REGISTRY: &str = "https://pypi.org"; + +const REQUEST_TIMEOUT: Duration = Duration::from_secs(20); + +fn user_agent() -> String { + format!("corgea-cli/{} (deps)", env!("CARGO_PKG_VERSION")) +} + +fn http_client() -> Result<&'static reqwest::blocking::Client, String> { + static CLIENT: OnceLock = OnceLock::new(); + Ok(CLIENT.get_or_init(|| { + reqwest::blocking::Client::builder() + .timeout(REQUEST_TIMEOUT) + .user_agent(user_agent()) + .build() + .expect("registry http client") + })) +} + +/// URL-encode an npm package name. Scoped names contain `@` and `/`, +/// the latter must be encoded as `%2f` for the package metadata URL. +fn encode_npm_name(name: &str) -> String { + if let Some(stripped) = name.strip_prefix('@') { + if let Some((scope, pkg)) = stripped.split_once('/') { + return format!("@{}%2f{}", scope, pkg); + } + } + name.to_string() +} + +#[derive(Debug, Deserialize)] +struct PypiUrl { + upload_time_iso_8601: Option, + upload_time: Option, +} + +/// Parse an ISO-8601 timestamp from npm or PyPI. PyPI sometimes emits +/// a naive timestamp like `2023-05-22T18:30:00` (no offset) which +/// chrono's RFC3339 parser rejects, so we accept both shapes. +fn parse_iso8601(raw: &str) -> Result, String> { + if let Ok(dt) = DateTime::parse_from_rfc3339(raw) { + return Ok(dt.with_timezone(&Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + if let Ok(naive) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S%.f") { + return Ok(DateTime::::from_naive_utc_and_offset(naive, Utc)); + } + Err(format!("unrecognised timestamp format: {}", raw)) +} + +// Resolution helpers (npm + PyPI). Inserted before the tests module +// in registry.rs. + +/// What the user typed after `pkg@` in an install command. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NpmSpec { + /// `axios`, `axios@`, or no spec — resolve to the `latest` dist-tag. + Latest, + /// `axios@latest`, `axios@next`, etc. + Tag(String), + /// `axios@1.2.3` — already resolved. + Exact(String), + /// `axios@^1.0.0`, `axios@~1.2.0`, `axios@>=1.0.0 <2.0.0`, etc. + Range(String), +} + +#[derive(Debug, Clone)] +pub struct ResolvedPackage { + pub name: String, + pub version: String, + pub published_at: DateTime, +} + +#[derive(Debug, Deserialize)] +struct NpmFullMetadata { + #[serde(default, rename = "dist-tags")] + dist_tags: std::collections::BTreeMap, + /// Only the keys (published version strings) are used; `IgnoredAny` + /// avoids allocating multi-MB JSON trees for big packuments. + #[serde(default)] + versions: std::collections::BTreeMap, + #[serde(default)] + time: std::collections::BTreeMap, +} + +/// Resolve an `NpmSpec` against the npm registry and return the +/// concrete version + publish time. Used by install wrappers when the +/// install command says e.g. `axios@^1.0.0` and we need to know what +/// would actually be installed before the install runs. +pub fn npm_resolve( + name: &str, + spec: &NpmSpec, + registry: Option<&str>, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_NPM_REGISTRY) + .trim_end_matches('/'); + let url = format!("{}/{}", base, encode_npm_name(name)); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("npm registry request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!( + "package '{}' not found on npm registry ({})", + name, base + )); + } + if !status.is_success() { + return Err(format!( + "npm registry returned status {} for '{}'", + status, name + )); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read npm registry response: {}", e))?; + + let meta: NpmFullMetadata = serde_json::from_str(&body).map_err(|e| { + format!( + "failed to parse npm registry response for '{}': {}", + name, e + ) + })?; + + let resolved_version = match spec { + NpmSpec::Latest => meta.dist_tags.get("latest").cloned().ok_or_else(|| { + format!( + "package '{}' has no 'latest' dist-tag on the npm registry", + name + ) + })?, + NpmSpec::Tag(tag) => meta.dist_tags.get(tag).cloned().ok_or_else(|| { + format!( + "package '{}' has no dist-tag named '{}' (available: {})", + name, + tag, + meta.dist_tags + .keys() + .cloned() + .collect::>() + .join(", "), + ) + })?, + NpmSpec::Exact(v) => { + if !meta.versions.contains_key(v) { + return Err(format!( + "version '{}' for package '{}' was not found on the npm registry", + v, name + )); + } + v.clone() + } + NpmSpec::Range(range) => { + npm_pick_highest_matching(&meta.versions, range).ok_or_else(|| { + format!( + "no published version of '{}' satisfies range '{}'", + name, range + ) + })? + } + }; + + let raw_time = meta.time.get(&resolved_version).ok_or_else(|| { + format!( + "publish time missing for {}@{} on the npm registry", + name, resolved_version + ) + })?; + + let published_at = parse_iso8601(raw_time).map_err(|e| { + format!( + "could not parse publish time '{}' for {}@{}: {}", + raw_time, name, resolved_version, e + ) + })?; + + Ok(ResolvedPackage { + name: name.to_string(), + version: resolved_version, + published_at, + }) +} + +/// Pick the highest semver-compatible version that satisfies `range`. +/// Pre-releases are excluded unless the range itself references a +/// pre-release (matches npm's behaviour). +/// Translate an npm-style version range (`>=1.0.0 <2.0.0`, +/// `1.x`, `>=1.0.0`) to a `semver::VersionReq`. The Rust crate uses +/// `,` as the AND separator, npm uses whitespace, so we normalise +/// before parsing. +fn parse_npm_range(range: &str) -> Option { + if let Ok(req) = semver::VersionReq::parse(range) { + return Some(req); + } + let normalised = range.split_whitespace().collect::>().join(","); + semver::VersionReq::parse(&normalised).ok() +} + +fn npm_pick_highest_matching( + versions: &std::collections::BTreeMap, + range: &str, +) -> Option { + // npm separates predicates with spaces (`>=1.0.0 <2.0.0`); the + // Rust `semver` crate uses commas. Try both. We don't support + // npm's `||` OR syntax here — those are best-effort skipped. + let req = parse_npm_range(range)?; + let range_has_prerelease = range.contains('-'); + + let mut best: Option<(semver::Version, String)> = None; + for raw in versions.keys() { + let v = match semver::Version::parse(raw) { + Ok(v) => v, + Err(_) => continue, + }; + if !v.pre.is_empty() && !range_has_prerelease { + continue; + } + if !req.matches(&v) { + continue; + } + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw.clone())), + } + } + best.map(|(_, raw)| raw) +} + +/// PyPI version specifier used by install wrappers. We parse a +/// limited subset of PEP 440 specifiers — enough for the common +/// install-command cases (`pkg`, `pkg==X`, `pkg>=X`, `pkg=2.0`, `<3,>=2`, `~=1.4`). + Specifier(String), +} + +#[derive(Debug, Deserialize)] +struct PypiInfoResponse { + releases: std::collections::BTreeMap>, +} + +/// Resolve a `PypiSpec` against PyPI and return the concrete version +/// + publish time. The latest non-prerelease, non-yanked release is +/// preferred. +pub fn pypi_resolve( + name: &str, + spec: &PypiSpec, + registry: Option<&str>, +) -> Result { + if name.is_empty() { + return Err("empty package name".to_string()); + } + let base = registry + .unwrap_or(DEFAULT_PYPI_REGISTRY) + .trim_end_matches('/'); + let url = format!("{}/pypi/{}/json", base, urlencoding::encode(name)); + + let client = http_client()?; + let resp = client + .get(&url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("PyPI request failed: {}", e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!("package '{}' not found on PyPI ({})", name, base)); + } + if !status.is_success() { + return Err(format!("PyPI returned status {} for '{}'", status, name)); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read PyPI response: {}", e))?; + + let meta: PypiInfoResponse = serde_json::from_str(&body) + .map_err(|e| format!("failed to parse PyPI response for '{}': {}", name, e))?; + + let candidates = collect_pypi_candidates(&meta); + let chosen = match spec { + PypiSpec::Latest => pick_latest_stable(&candidates).map(|c| c.0.clone()), + PypiSpec::Exact(v) => { + if candidates.iter().any(|(ver, _)| ver == v) { + Some(v.clone()) + } else { + None + } + } + PypiSpec::Specifier(spec_str) => pypi_resolve_specifier(&candidates, spec_str) + .or_else(|| pick_latest_stable(&candidates).map(|c| c.0.clone())), + }; + + let chosen = chosen.ok_or_else(|| match spec { + PypiSpec::Exact(v) => { + format!( + "version '{}' for package '{}' was not found on PyPI", + v, name + ) + } + _ => format!("no installable version found for '{}' on PyPI", name), + })?; + + let published_at = candidates + .iter() + .find(|(ver, _)| ver == &chosen) + .map(|(_, dt)| *dt) + .ok_or_else(|| { + format!( + "no upload timestamp for '{}' version '{}' on PyPI", + name, chosen + ) + })?; + + Ok(ResolvedPackage { + name: name.to_string(), + version: chosen, + published_at, + }) +} + +/// Returns `(version, earliest_upload_time)` for every non-yanked +/// release that has at least one uploaded artifact. Empty release +/// entries (which PyPI sometimes keeps around for yanked / private +/// versions) are filtered out so we never pick them. +fn collect_pypi_candidates(meta: &PypiInfoResponse) -> Vec<(String, DateTime)> { + let mut out = Vec::new(); + for (ver, files) in &meta.releases { + if files.is_empty() { + continue; + } + // Skip yanked-only releases. + if files + .iter() + .all(|f| f.upload_time_iso_8601.is_none() && f.upload_time.is_none()) + { + continue; + } + let mut earliest: Option> = None; + for f in files { + let raw = f + .upload_time_iso_8601 + .as_deref() + .or(f.upload_time.as_deref()); + if let Some(raw) = raw { + if let Ok(dt) = parse_iso8601(raw) { + earliest = match earliest { + Some(prev) if prev <= dt => Some(prev), + _ => Some(dt), + }; + } + } + } + if let Some(dt) = earliest { + out.push((ver.clone(), dt)); + } + } + out +} + +/// Pick the latest non-prerelease version using `semver` parsing as a +/// best-effort PEP 440 ordering. Falls back to the entry with the +/// latest upload time if no candidate parses as semver. +fn pick_latest_stable(candidates: &[(String, DateTime)]) -> Option<&(String, DateTime)> { + let mut best_semver: Option<(semver::Version, &(String, DateTime))> = None; + for c in candidates { + let normalized = normalize_for_semver(&c.0); + if let Ok(v) = semver::Version::parse(&normalized) { + if !v.pre.is_empty() { + continue; + } + match &best_semver { + Some((cur, _)) if cur >= &v => {} + _ => best_semver = Some((v, c)), + } + } + } + if let Some((_, picked)) = best_semver { + return Some(picked); + } + candidates.iter().max_by_key(|c| c.1) +} + +/// Best-effort PEP 440 → semver: PyPI versions are usually `X.Y.Z` or +/// `X.Y` or `X.Y.Z.postN` — the dotted-number form usually parses +/// straight as semver if we pad to 3 components. Anything more exotic +/// (`1.0a1`, `2!1.0`, etc.) is left alone and rejected by semver. +pub(super) fn normalize_for_semver(v: &str) -> String { + if v.contains('!') + || v.contains('a') + || v.contains('b') + || v.contains("rc") + || v.contains(".dev") + { + return v.to_string(); + } + let parts: Vec<&str> = v.split('.').collect(); + match parts.len() { + 1 => format!("{}.0.0", parts[0]), + 2 => format!("{}.{}.0", parts[0], parts[1]), + _ => v.to_string(), + } +} + +/// Apply a PEP 440-style specifier expression to the candidate list +/// and return the highest match. Supported operators: `==`, `>=`, `>`, +/// `<=`, `<`, `~=`, `!=`. Unknown operators cause us to give up and +/// return `None` (the caller falls back to "latest stable"). +fn pypi_resolve_specifier(candidates: &[(String, DateTime)], spec: &str) -> Option { + let parts: Vec<&str> = spec.split(',').map(|s| s.trim()).collect(); + let mut requirements: Vec<(&'static str, semver::Version)> = Vec::new(); + + for p in &parts { + let (op, val): (&str, &str) = if let Some(v) = p.strip_prefix("===") { + ("==", v.trim()) + } else if let Some(v) = p.strip_prefix("==") { + ("==", v.trim()) + } else if let Some(v) = p.strip_prefix(">=") { + (">=", v.trim()) + } else if let Some(v) = p.strip_prefix("<=") { + ("<=", v.trim()) + } else if let Some(v) = p.strip_prefix("!=") { + ("!=", v.trim()) + } else if let Some(v) = p.strip_prefix("~=") { + ("~=", v.trim()) + } else if let Some(v) = p.strip_prefix(">") { + (">", v.trim()) + } else if let Some(v) = p.strip_prefix("<") { + ("<", v.trim()) + } else { + return None; + }; + let v = semver::Version::parse(&normalize_for_semver(val)).ok()?; + requirements.push((op, v)); + } + + let mut best: Option<(semver::Version, String)> = None; + for (raw, _) in candidates { + let v = match semver::Version::parse(&normalize_for_semver(raw)) { + Ok(v) => v, + Err(_) => continue, + }; + if !v.pre.is_empty() { + continue; + } + let satisfies = requirements.iter().all(|(op, want)| match *op { + "==" => &v == want, + ">=" => &v >= want, + "<=" => &v <= want, + "!=" => &v != want, + ">" => &v > want, + "<" => &v < want, + "~=" => { + if &v < want { + return false; + } + let upper = semver::Version::new(want.major, want.minor + 1, 0); + v < upper + } + _ => false, + }); + if !satisfies { + continue; + } + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw.clone())), + } + } + best.map(|(_, raw)| raw) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn npm_name_encoding() { + assert_eq!(encode_npm_name("left-pad"), "left-pad"); + assert_eq!(encode_npm_name("@scope/pkg"), "@scope%2fpkg"); + assert_eq!(encode_npm_name("@types/node"), "@types%2fnode"); + } + + #[test] + fn parses_iso8601_variants() { + assert!(parse_iso8601("2024-01-02T03:04:05Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05.123Z").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05+00:00").is_ok()); + assert!(parse_iso8601("2024-01-02T03:04:05").is_ok()); + assert!(parse_iso8601("not a date").is_err()); + } + + /// Network-touching integration tests. Skipped by default (#[ignore]) + /// so unit-test runs stay hermetic. Run with: + /// cargo test -- --ignored verify_deps::registry::tests::live + #[test] + #[ignore] + fn live_npm_resolve_latest() { + let r = npm_resolve("left-pad", &NpmSpec::Latest, None).expect("npm resolve latest"); + assert_eq!(r.name, "left-pad"); + assert_eq!(r.version, "1.3.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2018-04-09"); + } + + #[test] + #[ignore] + fn live_npm_resolve_exact() { + let r = npm_resolve("left-pad", &NpmSpec::Exact("1.3.0".to_string()), None) + .expect("npm resolve exact"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_range() { + let r = npm_resolve("left-pad", &NpmSpec::Range("^1.0.0".to_string()), None) + .expect("npm resolve range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_npm_style_range() { + // npm uses spaces, the Rust crate uses commas — we should + // accept both. + let r = npm_resolve( + "left-pad", + &NpmSpec::Range(">=1.0.0 <2.0.0".to_string()), + None, + ) + .expect("npm resolve space-range"); + assert_eq!(r.version, "1.3.0"); + } + + #[test] + #[ignore] + fn live_npm_resolve_unknown_tag() { + let err = npm_resolve( + "left-pad", + &NpmSpec::Tag("does-not-exist".to_string()), + None, + ) + .err() + .unwrap(); + assert!(err.contains("dist-tag"), "got: {}", err); + } + + #[test] + #[ignore] + fn live_pypi_resolve_latest() { + let r = pypi_resolve("flask", &PypiSpec::Latest, None).expect("pypi resolve latest"); + assert_eq!(r.name, "flask"); + assert!(!r.version.is_empty()); + } + + #[test] + #[ignore] + fn live_pypi_resolve_exact() { + let r = pypi_resolve("requests", &PypiSpec::Exact("2.31.0".to_string()), None) + .expect("pypi resolve exact"); + assert_eq!(r.version, "2.31.0"); + assert_eq!(r.published_at.format("%Y-%m-%d").to_string(), "2023-05-22"); + } + + #[test] + #[ignore] + fn live_pypi_resolve_specifier() { + let r = pypi_resolve( + "requests", + &PypiSpec::Specifier(">=2.30,<2.32".to_string()), + None, + ) + .expect("pypi resolve specifier"); + // `requests==2.31.0` is the only release in [2.30, 2.32). + assert_eq!(r.version, "2.31.0"); + } +} diff --git a/tests/cli_deps.rs b/tests/cli_deps.rs index 7723e8c..596c8cf 100644 --- a/tests/cli_deps.rs +++ b/tests/cli_deps.rs @@ -1,28 +1,9 @@ +mod common; + +use common::{corgea_isolated, fixture}; use std::process::Command; use tempfile::TempDir; -fn corgea_isolated() -> (Command, TempDir) { - let home = TempDir::new().expect("temp HOME"); - let mut cmd = Command::new(env!("CARGO_BIN_EXE_corgea")); - cmd.env("HOME", home.path()) - .env("USERPROFILE", home.path()) - .env_remove("CORGEA_TOKEN") - .env_remove("CORGEA_URL") - .env_remove("AI_AGENT") - .env_remove("CODEX_SANDBOX") - .env_remove("CLAUDECODE") - .env_remove("CLAUDE_CODE") - .env_remove("CURSOR_AGENT") - .env_remove("CURSOR_TRACE_ID") - .env_remove("GEMINI_CLI") - .env_remove("PI_AGENT"); - (cmd, home) -} - -fn fixture(name: &str) -> String { - format!("{}/tests/fixtures/{}", env!("CARGO_MANIFEST_DIR"), name) -} - #[test] fn cli_scan_runs_without_token_or_config() { let (mut cmd, _home) = corgea_isolated(); diff --git a/tests/cli_install.rs b/tests/cli_install.rs new file mode 100644 index 0000000..399a8a4 --- /dev/null +++ b/tests/cli_install.rs @@ -0,0 +1,349 @@ +//! Hermetic end-to-end tests for the install wrappers (`corgea pip|npm …`). +//! +//! Each test spawns the real binary (`CARGO_BIN_EXE_corgea`) against: +//! * a local TcpListener stub standing in for PyPI / the npm registry +//! (wired up via `CORGEA_PYPI_REGISTRY` / `CORGEA_NPM_REGISTRY`), and +//! * a fake package manager on `PATH` — a shell script that records its +//! argv to a marker file, proving whether the install actually ran. +//! +//! No live network. The fake package managers are Unix shell scripts, so +//! the whole file is Unix-only (matching the repo's Linux/macOS CI). + +#![cfg(unix)] + +mod common; + +use common::corgea_isolated; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::thread; +use tempfile::TempDir; + +/// Spawn a registry stub serving both the PyPI and npm routes the +/// resolver hits. Returns the base URL and a counter of accepted +/// connections (used to prove "no registry hit" for passthroughs). +/// +/// Routes: +/// * `/pypi/oldpkg/json` — one release, published 2020-01-01 +/// * `/pypi/freshpkg/json` — one release, published one hour ago +/// * `/oldpkg` — npm metadata, published 2020-01-01 +/// * `/freshpkg` — npm metadata, published one hour ago +/// * anything else — 404 +fn spawn_registry_stub() -> (String, Arc) { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + let hits = Arc::new(AtomicUsize::new(0)); + let hits_in_thread = Arc::clone(&hits); + thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + hits_in_thread.fetch_add(1, Ordering::SeqCst); + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or("") + .to_string(); + + let fresh_ts = (chrono::Utc::now() - chrono::Duration::hours(1)) + .format("%Y-%m-%dT%H:%M:%SZ") + .to_string(); + let (status, body) = match path.as_str() { + "/pypi/oldpkg/json" => ( + "200 OK", + r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#.to_string(), + ), + "/pypi/freshpkg/json" => ( + "200 OK", + format!( + r#"{{"info":{{"name":"freshpkg"}},"releases":{{"9.9.9":[{{"upload_time_iso_8601":"{fresh_ts}"}}]}}}}"#, + ), + ), + "/oldpkg" => ( + "200 OK", + r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#.to_string(), + ), + "/freshpkg" => ( + "200 OK", + format!( + r#"{{"dist-tags":{{"latest":"9.9.9"}},"versions":{{"9.9.9":{{}}}},"time":{{"9.9.9":"{fresh_ts}"}}}}"#, + ), + ), + _ => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), + }; + let response = format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + status, + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + (base_url, hits) +} + +/// Write an executable fake package manager named `binary` into `dir`. +/// It records its argv to `marker` and exits with `exit_code` — proving +/// both "the install ran (with these args)" and exit-code forwarding. +fn write_fake_package_manager(dir: &Path, binary: &str, marker: &Path, exit_code: i32) { + use std::os::unix::fs::PermissionsExt; + let script = format!( + "#!/bin/sh\nprintf '%s' \"$*\" > '{}'\nexit {}\n", + marker.display(), + exit_code + ); + let path = dir.join(binary); + std::fs::write(&path, script).expect("write fake package manager"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) + .expect("chmod fake package manager"); +} + +/// A ready-to-run wrapper invocation: isolated `corgea` command with the +/// registry stub wired in and a fake `binary` on a PATH of its own. +struct WrapperHarness { + cmd: Command, + marker: PathBuf, + registry_hits: Arc, + _home: TempDir, + _bin: TempDir, +} + +impl WrapperHarness { + /// `registry_env` is `CORGEA_PYPI_REGISTRY` or `CORGEA_NPM_REGISTRY`, + /// matching `binary`'s ecosystem. + fn new(binary: &str, registry_env: &str, pm_exit_code: i32) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_package_manager(bin.path(), binary, &marker, pm_exit_code); + let (base_url, registry_hits) = spawn_registry_stub(); + cmd.env("PATH", bin.path()).env(registry_env, &base_url); + Self { + cmd, + marker, + registry_hits, + _home: home, + _bin: bin, + } + } + + /// The argv the fake package manager was invoked with, if it ran. + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +#[test] +fn pip_fresh_pin_blocks_without_running_install() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "freshpkg==9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run when blocked"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("Refusing to run install"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn pip_old_pin_runs_install_with_forwarded_args() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("published"), "stdout: {stdout}"); +} + +#[test] +fn pip_no_fail_demotes_block_and_installs() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "--no-fail", "install", "freshpkg==9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install freshpkg==9.9.9"), + "--no-fail must still run the install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); +} + +#[test] +fn pip_non_install_subcommand_passes_through_without_registry_hit() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "list"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("list")); + assert_eq!( + h.registry_hits.load(Ordering::SeqCst), + 0, + "passthrough must not touch the registry" + ); +} + +#[test] +fn pip_json_reports_fresh_pin_as_recent() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "--json", "install", "freshpkg==9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["results"][0]["status"], "recent"); + assert_eq!(parsed["results"][0]["name"], "freshpkg"); + assert_eq!(parsed["summary"]["recent"], 1); +} + +#[test] +fn pip_resolution_error_prints_error_but_install_proceeds() { + // `nosuchpkg` hits the stub's 404 route → an error outcome, which + // warns but never blocks in the baseline (fail-closed is a later + // chunk) — the install must still run. + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "nosuchpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert!( + h.registry_hits.load(Ordering::SeqCst) >= 1, + "the 404 route must have been hit" + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install nosuchpkg==1.0.0"), + "a resolution error must not block the install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("not found"), "stdout: {stdout}"); + assert!(stdout.contains("1 errors"), "stdout: {stdout}"); +} + +#[test] +fn pip_mixed_fresh_and_old_pins_block_without_running_install() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "install", "freshpkg==9.9.9", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "one recent target must block the whole install" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("within threshold"), "stdout: {stdout}"); + assert!(stdout.contains("1 ok, 1 recent"), "stdout: {stdout}"); +} + +#[test] +fn npm_fresh_pin_blocks_without_running_install() { + let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let out = h + .cmd + .args(["npm", "install", "freshpkg@9.9.9"]) + .output() + .expect("failed to run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "npm must not run when blocked"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("Refusing to run install"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn npm_old_pin_runs_install_with_forwarded_args() { + let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); +} + +#[test] +fn wrapper_forwards_package_manager_exit_code() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 7); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + assert_eq!( + out.status.code(), + Some(7), + "the package manager's exit code must be forwarded" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs new file mode 100644 index 0000000..dfb643c --- /dev/null +++ b/tests/common/mod.rs @@ -0,0 +1,35 @@ +//! Shared helpers for the e2e CLI tests (standard Cargo `tests/common/mod.rs` +//! pattern — included via `mod common;` from each integration-test crate, so +//! items unused by one consumer are `#[allow(dead_code)]`). + +use std::process::Command; +use tempfile::TempDir; + +/// A `corgea` invocation isolated from the host environment: temp +/// HOME/USERPROFILE, no Corgea config/registry env vars, and no +/// agent-detection env vars leaking in. +#[allow(dead_code)] +pub fn corgea_isolated() -> (Command, TempDir) { + let home = TempDir::new().expect("temp HOME"); + let mut cmd = Command::new(env!("CARGO_BIN_EXE_corgea")); + cmd.env("HOME", home.path()) + .env("USERPROFILE", home.path()) + .env_remove("CORGEA_TOKEN") + .env_remove("CORGEA_URL") + .env_remove("CORGEA_NPM_REGISTRY") + .env_remove("CORGEA_PYPI_REGISTRY") + .env_remove("AI_AGENT") + .env_remove("CODEX_SANDBOX") + .env_remove("CLAUDECODE") + .env_remove("CLAUDE_CODE") + .env_remove("CURSOR_AGENT") + .env_remove("CURSOR_TRACE_ID") + .env_remove("GEMINI_CLI") + .env_remove("PI_AGENT"); + (cmd, home) +} + +#[allow(dead_code)] +pub fn fixture(name: &str) -> String { + format!("{}/tests/fixtures/{}", env!("CARGO_MANIFEST_DIR"), name) +} From 65b062b3034149f67c5d39eb4fa7927f7712c317 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Wed, 10 Jun 2026 10:05:29 +0200 Subject: [PATCH 03/59] Block vulnerable/unverifiable installs via vuln-api verdict Wire vuln_api::check_package_version into the install wrappers: after the recency pass, each resolved name@version gets a verdict. is_vulnerable blocks; any API failure (network/timeout/5xx/auth) blocks fail-closed as unverifiable. --force proceeds despite all findings (still printed, manager exit code propagated); --no-fail keeps demoting recency only. Tokenless runs degrade to recency-only with a corgea login prompt. JSON output gains per-result verdict objects, vulnerable/unverifiable counts, and a top-level verdict_mode. Ports the PR #89 vuln-api test stub into the crate (corgea::vuln_api_stub plus a standalone vuln-api-stub bin) and pins the behavior with hermetic e2e tests in tests/cli_verdict.rs. --- Cargo.toml | 8 + skills/corgea/SKILL.md | 24 ++- src/bin/vuln-api-stub.rs | 40 ++++ src/config.rs | 2 - src/lib.rs | 1 + src/main.rs | 46 ++++- src/precheck/mod.rs | 379 +++++++++++++++++++++++++++++++--- src/vuln_api_stub/fixtures.rs | 70 +++++++ src/vuln_api_stub/mod.rs | 264 +++++++++++++++++++++++ tests/cli_verdict.rs | 253 +++++++++++++++++++++++ tests/common/mod.rs | 1 + 11 files changed, 1040 insertions(+), 48 deletions(-) create mode 100644 src/bin/vuln-api-stub.rs create mode 100644 src/vuln_api_stub/fixtures.rs create mode 100644 src/vuln_api_stub/mod.rs create mode 100644 tests/cli_verdict.rs diff --git a/Cargo.toml b/Cargo.toml index 13e1c71..877317e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,14 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[[bin]] +name = "corgea" +path = "src/main.rs" + +[[bin]] +name = "vuln-api-stub" +path = "src/bin/vuln-api-stub.rs" + [dependencies] clap = { version = "4.4.13", features = ["derive"] } dirs = "5.0.1" diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index ec6cb9c..7b8dc5e 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -112,26 +112,34 @@ Installs a pre-commit hook running `corgea scan blast --only-uncommitted`. Bypas ### Install Wrappers — `corgea pip|npm|yarn|pnpm|uv ` Run a package manager through Corgea's install gate. Install commands with named -targets are resolved against the public registry first; a version published within -`--threshold` (default `2d`) blocks the install (exit 1). Everything else passes +targets are resolved against the public registry first, then gated twice: a version +published within `--threshold` (default `2d`) blocks (exit 1), and — when a Corgea +token is configured — each resolved version is checked against Corgea's vuln-api; +known-vulnerable or malicious versions block, and a verdict that cannot be obtained +(network/5xx/auth errors) also blocks (fail-closed). Without a token the vuln check +is skipped (recency-only) and stderr suggests `corgea login`. Everything else passes through with the package manager's own exit code. Offline-only inputs (git/URL/path specs, `-r requirements.txt`, bare `install`) are not checked and run with a printed note. ```bash -corgea pip install requests==2.31.0 # resolves, checks recency, then runs pip +corgea pip install requests==2.31.0 # resolves, checks recency + vuln verdict, then runs pip corgea npm install axios@^1.0.0 # same gate for npm ranges -corgea pip --no-fail install newpkg # demote a block to a warning -corgea pip --json install newpkg # machine-readable per-target report +corgea pip --no-fail install newpkg # demote a recency block to a warning (vuln blocks still apply) +corgea pip --force install badpkg # print findings but install anyway (overrides every block) +corgea pip --json install newpkg # machine-readable per-target report incl. verdicts corgea pip list # non-install subcommands pass straight through ``` | Flag | Short | Description | |------|-------|-------------| | `--threshold` | `-t` | Recency threshold (`2d`, `12h`). Younger resolved versions block. | -| `--no-fail` | | Print the finding but run the install anyway. | -| `--json` | | JSON report instead of text. | +| `--no-fail` | | Demote a recency block to a warning. Does NOT bypass vulnerable/unverifiable blocks. | +| `--force` | | Proceed despite all findings (vulnerable, unverifiable, recent). Findings still print. | +| `--json` | | JSON report instead of text. Per-result `verdict` object + `verdict_mode`. | -No Corgea token required. Registry overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`. +Recency gating needs no token; the vuln verdict uses the configured Corgea token when +present. Overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, +`CORGEA_VULN_API_URL`. ### Deps — `corgea deps ` diff --git a/src/bin/vuln-api-stub.rs b/src/bin/vuln-api-stub.rs new file mode 100644 index 0000000..cf47d08 --- /dev/null +++ b/src/bin/vuln-api-stub.rs @@ -0,0 +1,40 @@ +//! Standalone vuln-api stub for e2e dogfood and local development. + +use clap::Parser; +use corgea::vuln_api_stub; +use std::path::PathBuf; + +#[derive(Parser, Debug)] +#[command( + name = "vuln-api-stub", + about = "Minimal TCP stub for vuln-api package-check routes" +)] +struct Args { + /// JSON fixture file (`package_checks` + optional `advisories`). + #[arg(long)] + fixtures: PathBuf, + + /// TCP port to bind (`0` = ephemeral). + #[arg(long, default_value = "0")] + port: u16, + + /// Print base URL to stdout and keep serving until SIGTERM. + #[arg(long)] + print_url: bool, +} + +fn main() { + let args = Args::parse(); + let stub = if args.port == 0 { + vuln_api_stub::spawn_from_file(&args.fixtures) + } else { + let fixtures = vuln_api_stub::load_from_file(&args.fixtures) + .unwrap_or_else(|e| panic!("failed to load {}: {e}", args.fixtures.display())); + vuln_api_stub::spawn_on_port(fixtures, args.port) + }; + if args.print_url { + println!("{}", stub.base_url); + } + eprintln!("vuln-api stub listening on {}", stub.base_url); + stub.block(); +} diff --git a/src/config.rs b/src/config.rs index 01db7bd..d3c2125 100644 --- a/src/config.rs +++ b/src/config.rs @@ -108,8 +108,6 @@ impl Config { /// Base URL for the vuln-api service: `CORGEA_VULN_API_URL` env var, /// then the config file's `vuln_api_url`, then the public default. - /// Consumed by the install-gate vuln check (chunk 3); no caller yet. - #[allow(dead_code)] pub fn get_vuln_api_url(&self) -> String { let url = crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") .or_else(|| self.vuln_api_url.clone()) diff --git a/src/lib.rs b/src/lib.rs index bf66ab7..399b15f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,3 +6,4 @@ pub mod verify_deps; // compiles cleanly in both crates. mod log; pub mod vuln_api; +pub mod vuln_api_stub; diff --git a/src/main.rs b/src/main.rs index 8d26534..e127f85 100644 --- a/src/main.rs +++ b/src/main.rs @@ -249,6 +249,12 @@ struct InstallWrapArgs { )] no_fail: bool, + #[arg( + long, + help = "Proceed with the install despite vulnerable, unverifiable, or recent findings. Findings are still printed." + )] + force: bool, + #[arg( long, help = "Output the result as JSON instead of human-readable text." @@ -260,18 +266,38 @@ struct InstallWrapArgs { cmd: Vec, } -fn install_wrap_options(args: &InstallWrapArgs) -> corgea::precheck::PrecheckOptions { +fn install_wrap_options( + args: &InstallWrapArgs, + config: &Config, +) -> corgea::precheck::PrecheckOptions { + let token = config.get_token(); + let token = token.trim(); + let verdict = if token.is_empty() { + None + } else { + Some(corgea::precheck::VerdictConfig { + base_url: config.get_vuln_api_url(), + token: token.to_string(), + }) + }; corgea::precheck::PrecheckOptions { threshold: args.threshold, no_fail: args.no_fail, + force: args.force, json: args.json, + verdict, npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), } } -fn run_install_wrap_command(manager: corgea::precheck::PackageManager, args: &InstallWrapArgs) { - let code = corgea::precheck::run_install(manager, &args.cmd, install_wrap_options(args)); +fn run_install_wrap_command( + manager: corgea::precheck::PackageManager, + args: &InstallWrapArgs, + config: &Config, +) { + let code = + corgea::precheck::run_install(manager, &args.cmd, install_wrap_options(args, config)); std::process::exit(code); } @@ -558,21 +584,23 @@ fn main() { // Offline: no token / network. Exit code propagates fail-on policy. std::process::exit(i32::from(corgea::deps::run::run(command.clone()))); } - // Install wrappers: no auth gate — mirror `Deps` (offline-only in Phase 1). + // Install wrappers: no hard auth gate — the recency check is offline, + // and a token (when present) additionally enables the vuln-api verdict. + // Tokenless degrades to recency-only with a login prompt. Some(Commands::Npm(args)) => { - run_install_wrap_command(corgea::precheck::PackageManager::Npm, args) + run_install_wrap_command(corgea::precheck::PackageManager::Npm, args, &corgea_config) } Some(Commands::Yarn(args)) => { - run_install_wrap_command(corgea::precheck::PackageManager::Yarn, args) + run_install_wrap_command(corgea::precheck::PackageManager::Yarn, args, &corgea_config) } Some(Commands::Pnpm(args)) => { - run_install_wrap_command(corgea::precheck::PackageManager::Pnpm, args) + run_install_wrap_command(corgea::precheck::PackageManager::Pnpm, args, &corgea_config) } Some(Commands::Pip(args)) => { - run_install_wrap_command(corgea::precheck::PackageManager::Pip, args) + run_install_wrap_command(corgea::precheck::PackageManager::Pip, args, &corgea_config) } Some(Commands::Uv(args)) => { - run_install_wrap_command(corgea::precheck::PackageManager::Uv, args) + run_install_wrap_command(corgea::precheck::PackageManager::Uv, args, &corgea_config) } None => { utils::terminal::show_welcome_message(); diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 5bbd94f..6897168 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -56,14 +56,54 @@ impl PackageManager { PackageManager::Uv => false, } } + + /// vuln-api ecosystem path segment for this manager's registry. + pub fn ecosystem(self) -> &'static str { + match self { + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => "npm", + PackageManager::Pip | PackageManager::Uv => "pypi", + } + } +} + +/// Connection details for the vuln-api verdict pass. +/// `None` in `PrecheckOptions.verdict` ⇒ tokenless mode: verdicts are +/// skipped and the gate degrades to recency-only cover. +#[derive(Debug, Clone)] +pub struct VerdictConfig { + pub base_url: String, + pub token: String, +} + +/// Threat verdict for one resolved target. +#[derive(Debug, Clone)] +pub enum VerdictStatus { + /// vuln-api answered: no known advisories for this exact version. + Clean, + /// vuln-api answered: known vulnerable or malicious — blocks. + Vulnerable(Vec), + /// The verdict could not be obtained (network/5xx/auth/integrity). + /// Blocks fail-closed. + Unverifiable(String), + /// Verdict never attempted (no token). Recency-only cover. + NotChecked(String), } +/// Reason recorded on resolved targets when no token is configured. +const NO_TOKEN_REASON: &str = "no Corgea token; vulnerability verdict skipped"; + #[derive(Debug, Clone)] pub struct PrecheckOptions { pub threshold: Duration, /// If true, demote a recent finding from "block" to "warn-and-run". pub no_fail: bool, + /// If true, never block: print findings (recent, vulnerable, + /// unverifiable) and run the install anyway. + pub force: bool, pub json: bool, + /// `Some` ⇒ run the vuln-api verdict pass against this endpoint; + /// `None` ⇒ tokenless recency-only mode. + pub verdict: Option, /// Optional registry overrides, used by tests. pub npm_registry: Option, pub pypi_registry: Option, @@ -100,6 +140,7 @@ pub enum TargetOutcome { resolved: crate::verify_deps::registry::ResolvedPackage, age: Duration, recent: bool, + verdict: VerdictStatus, }, /// We deliberately couldn't verify this target (URL / git / etc.). Skipped { @@ -132,6 +173,28 @@ impl PrecheckReport { pub fn recent_count(&self) -> usize { self.count(|o| matches!(o, TargetOutcome::Resolved { recent: true, .. })) } + pub fn vulnerable_count(&self) -> usize { + self.count(|o| { + matches!( + o, + TargetOutcome::Resolved { + verdict: VerdictStatus::Vulnerable(_), + .. + } + ) + }) + } + pub fn unverifiable_count(&self) -> usize { + self.count(|o| { + matches!( + o, + TargetOutcome::Resolved { + verdict: VerdictStatus::Unverifiable(_), + .. + } + ) + }) + } pub fn skipped_count(&self) -> usize { self.count(|o| matches!(o, TargetOutcome::Skipped { .. })) } @@ -241,12 +304,19 @@ fn run_parsed_install( let threshold = chrono::Duration::from_std(opts.threshold).expect("threshold validated before run_install"); - let outcomes: Vec<_> = parsed + let mut outcomes: Vec<_> = parsed .targets .iter() .map(|target| verify_one(target, &opts, &now, threshold)) .collect(); + run_verdict_pass(manager, &mut outcomes, &opts); + if opts.verdict.is_none() { + eprintln!( + "note: no Corgea token — vulnerability verdicts skipped (recency-only). Run `corgea login` for the full gate." + ); + } + let report = PrecheckReport { manager, subcommand: subcommand_label.to_string(), @@ -256,14 +326,18 @@ fn run_parsed_install( }; if opts.json { - print_json(&report); + print_json(&report, &opts); } else { print_text(&report); } if should_block_install(&report, &opts) { if !opts.json { - eprintln!("Refusing to run install. Pass --no-fail to proceed anyway."); + if report.vulnerable_count() > 0 || report.unverifiable_count() > 0 { + eprintln!("Refusing to run install. Pass --force to proceed despite findings."); + } else { + eprintln!("Refusing to run install. Pass --no-fail to proceed anyway."); + } } return 1; } @@ -271,8 +345,59 @@ fn run_parsed_install( exec() } +/// Sequential vuln-api verdict pass over resolved targets. No-op without +/// a `VerdictConfig` (tokenless mode — `verify_one` already marked every +/// resolved target `NotChecked`). Any client/call failure is fail-closed: +/// the target becomes `Unverifiable`, which blocks unless `--force`. +fn run_verdict_pass( + manager: PackageManager, + outcomes: &mut [TargetOutcome], + opts: &PrecheckOptions, +) { + let Some(cfg) = &opts.verdict else { return }; + + let client = match crate::vuln_api::http_client() { + Ok(c) => c, + Err(e) => { + for o in outcomes.iter_mut() { + if let TargetOutcome::Resolved { verdict, .. } = o { + *verdict = VerdictStatus::Unverifiable(e.clone()); + } + } + return; + } + }; + + let ecosystem = manager.ecosystem(); + for o in outcomes.iter_mut() { + let TargetOutcome::Resolved { + resolved, verdict, .. + } = o + else { + continue; + }; + *verdict = match crate::vuln_api::check_package_version( + &client, + &cfg.base_url, + &cfg.token, + ecosystem, + &resolved.name, + &resolved.version, + ) { + Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), + Ok(_) => VerdictStatus::Clean, + Err(e) => VerdictStatus::Unverifiable(e.to_string()), + }; + } +} + fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { - !opts.no_fail && report.recent_count() > 0 + if opts.force { + return false; + } + report.vulnerable_count() > 0 + || report.unverifiable_count() > 0 + || (!opts.no_fail && report.recent_count() > 0) } fn verify_one( @@ -309,6 +434,7 @@ fn verify_one( resolved, age, recent: age_chrono < threshold, + verdict: VerdictStatus::NotChecked(NO_TOKEN_REASON.to_string()), } } Err(e) => TargetOutcome::Error { @@ -367,9 +493,11 @@ fn print_text(report: &PrecheckReport) { verify_deps::format_duration(report.threshold) ); println!( - " {} ok, {} recent, {} skipped, {} errors", + " {} ok, {} recent, {} vulnerable, {} unverifiable, {} skipped, {} errors", report.ok_count(), report.recent_count(), + report.vulnerable_count(), + report.unverifiable_count(), report.skipped_count(), report.error_count(), ); @@ -381,26 +509,44 @@ fn print_text(report: &PrecheckReport) { resolved, age, recent, - } => { - if *recent { + verdict, + } => match verdict { + VerdictStatus::Vulnerable(matches) => { println!( - " ⚠ {} → {}@{} published {} ago at {} (within threshold)", - target.display, - resolved.name, - resolved.version, - verify_deps::format_duration(*age), - resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + " ✗ {} → {}@{} known vulnerable:", + target.display, resolved.name, resolved.version, ); - } else { + for m in matches { + println!(" {} ({})", m.advisory_id, m.severity_level); + } + } + VerdictStatus::Unverifiable(error) => { println!( - " ✓ {} → {}@{} published {} ago", - target.display, - resolved.name, - resolved.version, - verify_deps::format_duration(*age), + " ⚠ {} → {}@{} could not be verified: {}", + target.display, resolved.name, resolved.version, error, ); } - } + VerdictStatus::Clean | VerdictStatus::NotChecked(_) => { + if *recent { + println!( + " ⚠ {} → {}@{} published {} ago at {} (within threshold)", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } else { + println!( + " ✓ {} → {}@{} published {} ago", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + ); + } + } + }, TargetOutcome::Skipped { target, reason } => { println!(" ? {}: {}", target.display, reason); } @@ -411,7 +557,7 @@ fn print_text(report: &PrecheckReport) { } } -fn print_json(report: &PrecheckReport) { +fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { use serde_json::json; let outcomes: Vec<_> = report .outcomes @@ -422,14 +568,30 @@ fn print_json(report: &PrecheckReport) { resolved, age, recent, - } => json!({ - "status": if *recent { "recent" } else { "ok" }, - "spec": target.display, - "name": resolved.name, - "resolved_version": resolved.version, - "published_at": resolved.published_at.to_rfc3339(), - "age_seconds": age.as_secs(), - }), + verdict, + } => { + let verdict_json = match verdict { + VerdictStatus::Clean => json!({ "status": "clean" }), + VerdictStatus::Vulnerable(matches) => { + json!({ "status": "vulnerable", "matches": matches }) + } + VerdictStatus::Unverifiable(error) => { + json!({ "status": "unverifiable", "error": error }) + } + VerdictStatus::NotChecked(reason) => { + json!({ "status": "not_checked", "reason": reason }) + } + }; + json!({ + "status": if *recent { "recent" } else { "ok" }, + "spec": target.display, + "name": resolved.name, + "resolved_version": resolved.version, + "published_at": resolved.published_at.to_rfc3339(), + "age_seconds": age.as_secs(), + "verdict": verdict_json, + }) + } TargetOutcome::Skipped { target, reason } => json!({ "status": "skipped", "spec": target.display, @@ -453,9 +615,12 @@ fn print_json(report: &PrecheckReport) { "summary": { "ok": report.ok_count(), "recent": report.recent_count(), + "vulnerable": report.vulnerable_count(), + "unverifiable": report.unverifiable_count(), "skipped": report.skipped_count(), "errors": report.error_count(), }, + "verdict_mode": if opts.verdict.is_some() { "full" } else { "recency-only" }, "results": outcomes, }); @@ -488,7 +653,9 @@ mod tests { PrecheckOptions { threshold: Duration::from_secs(2 * 86400), no_fail, + force: false, json: false, + verdict: None, npm_registry: None, pypi_registry: Some(pypi_registry), } @@ -541,4 +708,158 @@ mod tests { assert_eq!(code, 42); assert!(exec_ran); } + + fn resolved_outcome(name: &str, version: &str, recent: bool) -> TargetOutcome { + TargetOutcome::Resolved { + target: InstallTarget { + name: name.to_string(), + display: format!("{name}=={version}"), + kind: TargetKind::Unverifiable { + reason: "test".to_string(), + }, + }, + resolved: crate::verify_deps::registry::ResolvedPackage { + name: name.to_string(), + version: version.to_string(), + published_at: Utc::now() - chrono::Duration::days(365), + }, + age: Duration::from_secs(365 * 86400), + recent, + verdict: VerdictStatus::NotChecked(NO_TOKEN_REASON.to_string()), + } + } + + fn report_with(outcomes: Vec) -> PrecheckReport { + PrecheckReport { + manager: PackageManager::Pip, + subcommand: "install".to_string(), + original_args: vec![], + outcomes, + threshold: Duration::from_secs(2 * 86400), + } + } + + fn set_verdict(outcome: &mut TargetOutcome, v: VerdictStatus) { + if let TargetOutcome::Resolved { verdict, .. } = outcome { + *verdict = v; + } + } + + #[test] + fn ecosystem_mapping() { + assert_eq!(PackageManager::Pip.ecosystem(), "pypi"); + assert_eq!(PackageManager::Uv.ecosystem(), "pypi"); + assert_eq!(PackageManager::Npm.ecosystem(), "npm"); + assert_eq!(PackageManager::Yarn.ecosystem(), "npm"); + assert_eq!(PackageManager::Pnpm.ecosystem(), "npm"); + } + + /// Full predicate matrix: force ⇒ never block; vulnerable and + /// unverifiable block regardless of --no-fail; recency keeps its + /// task-2 --no-fail demotion. + #[test] + fn block_predicate_matrix() { + let opts = |no_fail: bool, force: bool| PrecheckOptions { + no_fail, + force, + ..stub_opts("http://127.0.0.1:9".to_string(), false) + }; + + let clean = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Clean); + report_with(vec![o]) + }; + let recent = report_with(vec![resolved_outcome("pkg", "1.0.0", true)]); + let vulnerable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Vulnerable(vec![])); + report_with(vec![o]) + }; + let unverifiable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Unverifiable("503".to_string())); + report_with(vec![o]) + }; + + assert!(!should_block_install(&clean, &opts(false, false))); + assert!(should_block_install(&recent, &opts(false, false))); + assert!(!should_block_install(&recent, &opts(true, false))); + assert!(should_block_install(&vulnerable, &opts(false, false))); + assert!( + should_block_install(&vulnerable, &opts(true, false)), + "--no-fail must not waive a vulnerable block" + ); + assert!( + should_block_install(&unverifiable, &opts(true, false)), + "--no-fail must not waive an unverifiable block" + ); + for report in [&clean, &recent, &vulnerable, &unverifiable] { + assert!( + !should_block_install(report, &opts(false, true)), + "--force must never block" + ); + assert!(!should_block_install(report, &opts(true, true))); + } + } + + /// Verdict pass against an in-process stub: vulnerable body → Vulnerable + /// with matches; 503 override → Unverifiable; no VerdictConfig → outcomes + /// keep NotChecked. + #[test] + fn verdict_pass_maps_stub_responses() { + use std::collections::HashMap; + + let key = |name: &str| ("pypi".to_string(), name.to_string(), "1.0.0".to_string()); + let mut checks = HashMap::new(); + checks.insert( + key("evil"), + r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string(), + ); + checks.insert(key("flaky"), "{}".to_string()); + let mut statuses = HashMap::new(); + statuses.insert(key("flaky"), 503u16); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, statuses); + + let mut opts = stub_opts("http://127.0.0.1:9".to_string(), false); + opts.verdict = Some(VerdictConfig { + base_url: stub.base_url.clone(), + token: "test-token".to_string(), + }); + + let mut outcomes = vec![ + resolved_outcome("evil", "1.0.0", false), + resolved_outcome("flaky", "1.0.0", false), + resolved_outcome("goodpkg", "1.0.0", false), // unknown → stub default clean + ]; + run_verdict_pass(PackageManager::Pip, &mut outcomes, &opts); + + let verdicts: Vec<_> = outcomes + .iter() + .map(|o| match o { + TargetOutcome::Resolved { verdict, .. } => verdict.clone(), + _ => unreachable!(), + }) + .collect(); + assert!( + matches!(&verdicts[0], VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + assert!(matches!(&verdicts[1], VerdictStatus::Unverifiable(_))); + assert!(matches!(&verdicts[2], VerdictStatus::Clean)); + + // Without a VerdictConfig the pass is a no-op. + let mut untouched = vec![resolved_outcome("evil", "1.0.0", false)]; + let no_verdict = stub_opts("http://127.0.0.1:9".to_string(), false); + run_verdict_pass(PackageManager::Pip, &mut untouched, &no_verdict); + assert!(matches!( + &untouched[0], + TargetOutcome::Resolved { + verdict: VerdictStatus::NotChecked(_), + .. + } + )); + } } diff --git a/src/vuln_api_stub/fixtures.rs b/src/vuln_api_stub/fixtures.rs new file mode 100644 index 0000000..626bfea --- /dev/null +++ b/src/vuln_api_stub/fixtures.rs @@ -0,0 +1,70 @@ +use super::StubFixtures; +use serde::Deserialize; +use serde_json::Value; +use std::collections::HashMap; +use std::fs; +use std::path::Path; + +#[derive(Debug, Deserialize)] +struct FixtureFile { + #[serde(default)] + package_checks: HashMap, + #[serde(default)] + advisories: HashMap, +} + +/// Load stub fixtures from JSON. Keys in `package_checks` use `{ecosystem}/{name}/{version}`. +pub fn load_from_file(path: &Path) -> Result> { + let raw = fs::read_to_string(path)?; + let file: FixtureFile = serde_json::from_str(&raw)?; + + let mut package_checks = HashMap::new(); + for (key, value) in file.package_checks { + let (eco, name, ver) = parse_package_key(&key)?; + let body = serde_json::to_string(&value)?; + package_checks.insert((eco, name, ver), body); + } + + let mut advisories = HashMap::new(); + for (id, value) in file.advisories { + advisories.insert(id, serde_json::to_string(&value)?); + } + + Ok(StubFixtures { + package_checks, + advisories, + status_overrides: HashMap::new(), + }) +} + +fn parse_package_key(key: &str) -> Result<(String, String, String), Box> { + let parts: Vec<&str> = key.split('/').collect(); + if parts.len() != 3 { + return Err( + format!("package_checks key must be ecosystem/name/version, got {key:?}").into(), + ); + } + Ok(( + parts[0].to_string(), + parts[1].to_string(), + parts[2].to_string(), + )) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_package_key_validates_format() { + assert_eq!( + parse_package_key("npm/lodash/4.17.20").unwrap(), + ( + "npm".to_string(), + "lodash".to_string(), + "4.17.20".to_string() + ) + ); + assert!(parse_package_key("npm/lodash").is_err()); + } +} diff --git a/src/vuln_api_stub/mod.rs b/src/vuln_api_stub/mod.rs new file mode 100644 index 0000000..0535c88 --- /dev/null +++ b/src/vuln_api_stub/mod.rs @@ -0,0 +1,264 @@ +mod fixtures; + +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::path::Path; +use std::sync::Arc; +use std::thread; +use std::time::Duration; + +pub use fixtures::load_from_file; + +pub type PackageKey = (String, String, String); + +const NOT_FOUND_BODY: &str = r#"{"error":"not found"}"#; + +/// Loaded fixture data for the vuln-api stub server. +#[derive(Debug, Clone, Default)] +pub struct StubFixtures { + pub package_checks: HashMap, + pub advisories: HashMap, + pub status_overrides: HashMap, +} + +pub struct VulnApiStub { + pub base_url: String, + _handle: thread::JoinHandle<()>, +} + +impl VulnApiStub { + /// Block until the stub server thread exits (normally never, unless the listener fails). + pub fn block(self) { + let _ = self._handle.join(); + } +} + +/// Minimal TCP vuln-api stub for CLI integration tests and e2e dogfood. +pub fn spawn(fixtures: HashMap) -> VulnApiStub { + spawn_with_statuses(fixtures, HashMap::new()) +} + +pub fn spawn_with_statuses( + fixtures: HashMap, + status_overrides: HashMap, +) -> VulnApiStub { + spawn_on_port( + StubFixtures { + package_checks: fixtures, + advisories: HashMap::new(), + status_overrides, + }, + 0, + ) +} + +/// Bind stub on `port` (`0` = ephemeral). Returns base URL `http://127.0.0.1:{port}`. +pub fn spawn_on_port(fixtures: StubFixtures, port: u16) -> VulnApiStub { + let addr = if port == 0 { + "127.0.0.1:0".to_string() + } else { + format!("127.0.0.1:{port}") + }; + let listener = TcpListener::bind(&addr).unwrap_or_else(|e| panic!("bind stub on {addr}: {e}")); + let bound_port = listener.local_addr().expect("stub local_addr").port(); + let base_url = format!("http://127.0.0.1:{bound_port}"); + + let package_checks = Arc::new(fixtures.package_checks); + let advisories = Arc::new(fixtures.advisories); + let status_overrides = Arc::new(fixtures.status_overrides); + + let handle = thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { + continue; + }; + handle_connection(&mut stream, &package_checks, &advisories, &status_overrides); + } + }); + + thread::sleep(Duration::from_millis(50)); + + VulnApiStub { + base_url, + _handle: handle, + } +} + +pub fn spawn_from_file(path: &Path) -> VulnApiStub { + let fixtures = + load_from_file(path).unwrap_or_else(|e| panic!("load stub fixtures {path:?}: {e}")); + spawn_on_port(fixtures, 0) +} + +fn handle_connection( + stream: &mut std::net::TcpStream, + package_checks: &Arc>, + advisories: &Arc>, + status_overrides: &Arc>, +) { + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + + let path = req.lines().next().and_then(|l| l.split_whitespace().nth(1)); + + let (status_code, response_body) = match path { + Some(path) => { + let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); + if parts.len() >= 7 + && parts[0] == "v1" + && parts[1] == "packages" + && parts[4] == "versions" + && parts[6] == "check" + { + let key = ( + parts[2].to_string(), + urlencoding::decode(parts[3]) + .unwrap_or_default() + .into_owned(), + urlencoding::decode(parts[5]) + .unwrap_or_default() + .into_owned(), + ); + let body = package_checks + .get(&key) + .cloned() + .unwrap_or_else(|| default_clean_response(&key.0, &key.1, &key.2)); + let status = status_overrides.get(&key).copied().unwrap_or(200); + (status, body) + } else if parts.len() >= 3 && parts[0] == "v1" && parts[1] == "advisories" { + let id = urlencoding::decode(parts[2]) + .unwrap_or_default() + .into_owned(); + match advisories.get(&id) { + Some(body) => (200, body.clone()), + None => (404, NOT_FOUND_BODY.to_string()), + } + } else { + (404, NOT_FOUND_BODY.to_string()) + } + } + None => (400, r#"{"error":"bad request"}"#.to_string()), + }; + + let status_text = status_text(status_code); + let response = format!( + "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}", + status_code, + status_text, + response_body.len(), + response_body + ); + let _ = stream.write_all(response.as_bytes()); +} + +fn status_text(status_code: u16) -> &'static str { + match status_code { + 404 => "Not Found", + 401 => "Unauthorized", + 403 => "Forbidden", + 429 => "Too Many Requests", + 500..=599 => "Internal Server Error", + _ if status_code >= 400 => "Error", + _ => "OK", + } +} + +fn default_clean_response(eco: &str, name: &str, ver: &str) -> String { + format!( + r#"{{"ecosystem":"{eco}","package_name":"{name}","version":"{ver}","is_vulnerable":false,"matches":[]}}"# + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Read, Write}; + use std::net::TcpStream; + + fn get(base_url: &str, path: &str) -> String { + let addr = base_url.trim_start_matches("http://"); + let mut stream = TcpStream::connect(addr).expect("connect stub"); + let req = format!("GET {path} HTTP/1.1\r\nHost: localhost\r\n\r\n"); + stream.write_all(req.as_bytes()).unwrap(); + let mut resp = String::new(); + stream.read_to_string(&mut resp).unwrap(); + resp + } + + fn key(eco: &str, name: &str, ver: &str) -> super::PackageKey { + (eco.to_string(), name.to_string(), ver.to_string()) + } + + #[test] + fn scripted_package_check_and_status_override() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evil", "1.0.0"), + r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true,"matches":[]}"#.to_string(), + ); + checks.insert(key("pypi", "flaky", "1.0.0"), "{}".to_string()); + let mut statuses = HashMap::new(); + statuses.insert(key("pypi", "flaky", "1.0.0"), 503u16); + let stub = spawn_with_statuses(checks, statuses); + + let resp = get( + &stub.base_url, + "/v1/packages/pypi/evil/versions/1.0.0/check", + ); + assert!(resp.starts_with("HTTP/1.1 200"), "resp: {resp}"); + assert!(resp.contains(r#""is_vulnerable":true"#), "resp: {resp}"); + + let resp = get( + &stub.base_url, + "/v1/packages/pypi/flaky/versions/1.0.0/check", + ); + assert!(resp.starts_with("HTTP/1.1 503"), "resp: {resp}"); + + // Unknown package → synthesized clean 200. + let resp = get( + &stub.base_url, + "/v1/packages/pypi/unknown/versions/2.0.0/check", + ); + assert!(resp.starts_with("HTTP/1.1 200"), "resp: {resp}"); + assert!(resp.contains(r#""is_vulnerable":false"#), "resp: {resp}"); + } + + #[test] + fn advisory_route_and_fixture_file_loading() { + let dir = tempfile::tempdir().expect("tempdir"); + let path = dir.path().join("fixtures.json"); + std::fs::write( + &path, + r#"{ + "package_checks": { + "npm/left-pad/1.0.0": {"ecosystem":"npm","package_name":"left-pad","version":"1.0.0","is_vulnerable":true,"matches":[]} + }, + "advisories": {"MAL-2024-0001": {"id":"MAL-2024-0001"}} + }"#, + ) + .unwrap(); + let stub = spawn_from_file(&path); + + let resp = get( + &stub.base_url, + "/v1/packages/npm/left-pad/versions/1.0.0/check", + ); + assert!(resp.contains(r#""is_vulnerable":true"#), "resp: {resp}"); + + let resp = get(&stub.base_url, "/v1/advisories/MAL-2024-0001"); + assert!(resp.starts_with("HTTP/1.1 200"), "resp: {resp}"); + let resp = get(&stub.base_url, "/v1/advisories/NOPE"); + assert!(resp.starts_with("HTTP/1.1 404"), "resp: {resp}"); + } +} diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs new file mode 100644 index 0000000..e6db018 --- /dev/null +++ b/tests/cli_verdict.rs @@ -0,0 +1,253 @@ +//! Hermetic e2e tests for the install-gate vuln-api verdict +//! (`corgea pip install …` with a token + `CORGEA_VULN_API_URL` stub). +//! +//! Composes the `cli_install.rs` harness pattern (fake package manager on a +//! private PATH + local pypi registry stub) with the in-crate vuln-api stub. +//! `oldpkg==1.0.0` is published in 2020, so recency never blocks here — +//! every block in this file is the verdict's doing. + +#![cfg(unix)] + +mod common; + +use common::corgea_isolated; +use corgea::vuln_api_stub::{self, PackageKey}; +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::thread; +use tempfile::TempDir; + +fn key(eco: &str, name: &str, ver: &str) -> PackageKey { + (eco.to_string(), name.to_string(), ver.to_string()) +} + +fn vulnerable_oldpkg_body() -> String { + r#"{"ecosystem":"pypi","package_name":"oldpkg","version":"1.0.0","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":"2.0.0"}]}"# + .to_string() +} + +/// Registry stub serving only `/pypi/oldpkg/json` (published 2020 → never +/// recent). Everything else 404s. +fn spawn_pypi_stub() -> String { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or("") + .to_string(); + + let (status, body) = match path.as_str() { + "/pypi/oldpkg/json" => ( + "200 OK", + r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#.to_string(), + ), + _ => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), + }; + let response = format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + status, + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + base_url +} + +/// Write an executable fake `pip` into `dir`. It records its argv to `marker` +/// and exits with `exit_code` — proving both whether the install ran and that +/// the exit code propagates. +fn write_fake_pip(dir: &Path, marker: &Path, exit_code: i32) { + use std::os::unix::fs::PermissionsExt; + let script = format!( + "#!/bin/sh\nprintf '%s' \"$*\" > '{}'\nexit {}\n", + marker.display(), + exit_code + ); + let path = dir.join("pip"); + std::fs::write(&path, script).expect("write fake pip"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) + .expect("chmod fake pip"); +} + +/// `corgea` wired to the registry stub, a fake pip, and a vuln-api stub. +struct VerdictHarness { + cmd: Command, + marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +impl VerdictHarness { + /// `token: None` exercises tokenless mode (no CORGEA_TOKEN set). + fn new( + checks: HashMap, + statuses: HashMap, + token: Option<&str>, + pip_exit_code: i32, + ) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_pip(bin.path(), &marker, pip_exit_code); + let registry = spawn_pypi_stub(); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); + if let Some(t) = token { + cmd.env("CORGEA_TOKEN", t); + } + Self { + cmd, + marker, + _home: home, + _bin: bin, + } + } + + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +#[test] +fn vulnerable_pin_blocks_without_running_install() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + let mut h = VerdictHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("MAL-2024-0001"), "stdout: {stdout}"); + assert!(stdout.contains("critical"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("--force"), + "block message must name --force" + ); +} + +#[test] +fn force_overrides_vulnerable_block_and_propagates_exit_code() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + let mut h = VerdictHarness::new(checks, HashMap::new(), Some("test-token"), 7); + let out = h + .cmd + .args(["pip", "--force", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(7), + "manager exit code must propagate under --force" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("MAL-2024-0001"), + "findings must still print under --force: {stdout}" + ); +} + +#[test] +fn verdict_503_fails_closed() { + let mut statuses = HashMap::new(); + statuses.insert(key("pypi", "oldpkg", "1.0.0"), 503u16); + let mut h = VerdictHarness::new(HashMap::new(), statuses, Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "unverifiable must block (fail-closed)" + ); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("could not be verified"), "stdout: {stdout}"); +} + +#[test] +fn tokenless_degrades_to_recency_only_with_login_prompt() { + // Stub would flag oldpkg, but with no token it must never be consulted. + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + let mut h = VerdictHarness::new(checks, HashMap::new(), None, 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "old + unchecked package must install" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("corgea login"), + "tokenless mode must prompt for login" + ); +} + +#[test] +fn json_carries_verdict_object_and_mode() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + let mut h = VerdictHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "--json", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["verdict_mode"], "full"); + assert_eq!(parsed["results"][0]["verdict"]["status"], "vulnerable"); + assert_eq!( + parsed["results"][0]["verdict"]["matches"][0]["advisory_id"], + "MAL-2024-0001" + ); + assert_eq!( + parsed["results"][0]["verdict"]["matches"][0]["fixed_version"], + "2.0.0" + ); + assert_eq!(parsed["summary"]["vulnerable"], 1); +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index dfb643c..1f23471 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -18,6 +18,7 @@ pub fn corgea_isolated() -> (Command, TempDir) { .env_remove("CORGEA_URL") .env_remove("CORGEA_NPM_REGISTRY") .env_remove("CORGEA_PYPI_REGISTRY") + .env_remove("CORGEA_VULN_API_URL") .env_remove("AI_AGENT") .env_remove("CODEX_SANDBOX") .env_remove("CLAUDECODE") From 37deb2d4468baa6811be78e2654166dd1339700e Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Wed, 10 Jun 2026 15:29:22 +0200 Subject: [PATCH 04/59] Gate the full would-install set via dry-run tree resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expand corgea pip|npm install from named-target verification to every package the install would pull in: pip resolves via 'pip install --dry-run --quiet --report - --only-binary :all:' (the --only-binary guard keeps sdist build backends from running before the verdict, pypa/pip#13091); npm resolves in a throwaway temp dir via 'npm install --package-lock-only --ignore-scripts' and parses the generated lockfile (upstream --dry-run --json emits only counts, npm/cli#6558). Verdicts for the deduped tree + named union run through a bounded std::thread::scope pool (--concurrency, default 8); fail-closed and --force semantics extend to transitive findings unchanged. When resolution is unavailable (yarn/pnpm/uv have no safe dry-run) or fails, the gate verifies named targets only and prints a mandatory "transitive dependencies not checked" warning; --json carries the same fact in a new additive `tree` object (null in recency-only mode). PyPI name matching now shares deps' PEP 503 normalizer via PackageManager::normalize_name. The vuln-api test stub now sends Connection: close — it serves one response per connection, and without the header reqwest's pooled sockets raced the close once the gate began making several requests per run. --- skills/corgea/SKILL.md | 17 +- src/deps/ecosystems/pypi.rs | 5 +- src/main.rs | 8 + src/precheck/mod.rs | 491 +++++++++++++++++++++++++++++++----- src/precheck/tree.rs | 293 +++++++++++++++++++++ src/vuln_api_stub/mod.rs | 5 +- tests/cli_tree.rs | 356 ++++++++++++++++++++++++++ tests/cli_verdict.rs | 5 +- 8 files changed, 1117 insertions(+), 63 deletions(-) create mode 100644 src/precheck/tree.rs create mode 100644 tests/cli_tree.rs diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index 7b8dc5e..961bad9 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -121,6 +121,16 @@ is skipped (recency-only) and stderr suggests `corgea login`. Everything else pa through with the package manager's own exit code. Offline-only inputs (git/URL/path specs, `-r requirements.txt`, bare `install`) are not checked and run with a printed note. +With a token, the vuln check covers the **full would-install set**, not just the +named targets: `pip` and `npm` resolve the complete tree (named + transitive) via a +safe dry-run (`pip install --dry-run …`; an isolated `npm install --package-lock-only` +in a temp dir, never touching your lockfile) and verdict every package, so a flagged +**transitive** dependency blocks the install too. `yarn`, `pnpm`, and `uv` have no safe +dry-run, so they verify the named targets only and print +`warning: transitive dependencies not checked (…); only named packages were verified.` +The same warning is emitted (and the gate falls back to named-only) whenever a pip/npm +dry-run fails. Verdict requests run in a bounded pool (`--concurrency`, default 8). + ```bash corgea pip install requests==2.31.0 # resolves, checks recency + vuln verdict, then runs pip corgea npm install axios@^1.0.0 # same gate for npm ranges @@ -135,7 +145,12 @@ corgea pip list # non-install subcommands pass straight th | `--threshold` | `-t` | Recency threshold (`2d`, `12h`). Younger resolved versions block. | | `--no-fail` | | Demote a recency block to a warning. Does NOT bypass vulnerable/unverifiable blocks. | | `--force` | | Proceed despite all findings (vulnerable, unverifiable, recent). Findings still print. | -| `--json` | | JSON report instead of text. Per-result `verdict` object + `verdict_mode`. | +| `--concurrency` | | Max parallel vuln-verdict requests during the gate (1-32, default 8). | +| `--json` | | JSON report instead of text. Per-result `verdict` object + `verdict_mode` + `tree`. | + +`--json` adds a `tree` object: `null` in recency-only mode; otherwise `mode` is `"full"` +(transitive checked) or `"named-only"` (with a `reason`), plus `resolved_count` and a +`transitive[]` array of `{name, version, verdict}` for packages beyond the named targets. Recency gating needs no token; the vuln verdict uses the configured Corgea token when present. Overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, diff --git a/src/deps/ecosystems/pypi.rs b/src/deps/ecosystems/pypi.rs index 062f13c..0f5fa77 100644 --- a/src/deps/ecosystems/pypi.rs +++ b/src/deps/ecosystems/pypi.rs @@ -367,7 +367,10 @@ fn exact_version_from_declared(name: &str, declared: &str) -> Option { Some(declared.trim_start_matches('=').trim().to_string()) } -fn normalize_pypi_name(name: &str) -> String { +/// PEP 503 name normalization: lowercase, runs of `-`/`_`/`.` collapse to `-`. +/// Also used by the install gate (`precheck`) so both features share one +/// canonical pypi name form. +pub(crate) fn normalize_pypi_name(name: &str) -> String { let mut out = String::new(); let mut last_was_separator = false; for c in name.trim().chars() { diff --git a/src/main.rs b/src/main.rs index e127f85..7cc67ea 100644 --- a/src/main.rs +++ b/src/main.rs @@ -261,6 +261,13 @@ struct InstallWrapArgs { )] json: bool, + #[arg( + long, + default_value_t = 8, + help = "Max parallel vulnerability-verdict requests during the gate (1-32)." + )] + concurrency: usize, + /// Arguments forwarded to the package manager (subcommand and package specs). #[arg(trailing_var_arg = true, allow_hyphen_values = true)] cmd: Vec, @@ -288,6 +295,7 @@ fn install_wrap_options( verdict, npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), + concurrency: args.concurrency, } } diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 6897168..958b555 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -14,6 +14,7 @@ //! warning (the install runs anyway). pub mod parse; +pub mod tree; use std::ffi::OsString; use std::process::Command; @@ -64,6 +65,17 @@ impl PackageManager { PackageManager::Pip | PackageManager::Uv => "pypi", } } + + /// Canonical package name for dedup/matching across spec spellings: + /// PEP 503 for pypi (shared with `deps`), verbatim for npm. + pub fn normalize_name(self, name: &str) -> String { + match self { + PackageManager::Pip | PackageManager::Uv => { + crate::deps::ecosystems::pypi::normalize_pypi_name(name) + } + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => name.to_string(), + } + } } /// Connection details for the vuln-api verdict pass. @@ -107,6 +119,8 @@ pub struct PrecheckOptions { /// Optional registry overrides, used by tests. pub npm_registry: Option, pub pypi_registry: Option, + /// Max parallel vuln-api verdict requests; `verdict_pool` clamps to 1..=32. + pub concurrency: usize, } /// Each item the user (or a `-r` requirements file) asked us to install. @@ -154,6 +168,29 @@ pub enum TargetOutcome { }, } +/// Verdict for one package the tree pass resolved beyond the named targets. +#[derive(Debug)] +pub struct TreeOutcome { + pub name: String, + pub version: String, + pub verdict: VerdictStatus, +} + +/// Result of the tree pass. `PrecheckReport.tree` is `None` when the pass +/// never ran (recency-only / tokenless mode). +#[derive(Debug)] +pub enum TreeReport { + /// The full would-install set was resolved and verdicted. + Full { + /// Distinct packages the dry-run resolved (named + transitive). + resolved_count: usize, + /// Verdicts for resolved packages beyond the named targets. + transitive: Vec, + }, + /// Resolution unavailable or failed — only named targets were verified. + NamedOnly { reason: String }, +} + #[derive(Debug)] pub struct PrecheckReport { pub manager: PackageManager, @@ -161,6 +198,8 @@ pub struct PrecheckReport { pub original_args: Vec, pub outcomes: Vec, pub threshold: Duration, + /// `None` ⇒ recency-only mode, the tree pass never ran. + pub tree: Option, } impl PrecheckReport { @@ -182,7 +221,7 @@ impl PrecheckReport { .. } ) - }) + }) + self.tree_finding_count(|v| matches!(v, VerdictStatus::Vulnerable(_))) } pub fn unverifiable_count(&self) -> usize { self.count(|o| { @@ -193,7 +232,16 @@ impl PrecheckReport { .. } ) - }) + }) + self.tree_finding_count(|v| matches!(v, VerdictStatus::Unverifiable(_))) + } + /// Count transitive tree findings whose verdict matches `pred`. + fn tree_finding_count(&self, pred: impl Fn(&VerdictStatus) -> bool) -> usize { + match &self.tree { + Some(TreeReport::Full { transitive, .. }) => { + transitive.iter().filter(|o| pred(&o.verdict)).count() + } + Some(TreeReport::NamedOnly { .. }) | None => 0, + } } pub fn skipped_count(&self) -> usize { self.count(|o| matches!(o, TargetOutcome::Skipped { .. })) @@ -284,19 +332,12 @@ fn run_parsed_install( exec: impl FnOnce() -> i32, opts: PrecheckOptions, ) -> i32 { - if !parsed.requirements_files.is_empty() { - let files: Vec = parsed - .requirements_files - .iter() - .map(|p| p.display().to_string()) - .collect(); - eprintln!( - "note: requirements files ({}) are not recency-checked by the baseline gate", - files.join(", ") - ); - } + // With a verdict config, the tree pass resolves the full would-install + // set; `tree::covers_input` owns what each manager's resolver can chew on. + let tree_eligible = opts.verdict.is_some() && tree::covers_input(manager, &parsed); - if parsed.targets.is_empty() { + if parsed.targets.is_empty() && !tree_eligible { + requirements_note(&parsed); return exec(); } @@ -310,7 +351,24 @@ fn run_parsed_install( .map(|target| verify_one(target, &opts, &now, threshold)) .collect(); - run_verdict_pass(manager, &mut outcomes, &opts); + let tree = if tree_eligible { + Some(run_tree_pass(manager, rest, &mut outcomes, &opts)) + } else { + run_verdict_pass(manager, &mut outcomes, &opts); // no-op tokenless + None + }; + + // The mandatory loud warning when the tree pass fell back to named-only. + if let Some(TreeReport::NamedOnly { reason }) = &tree { + eprintln!( + "warning: transitive dependencies not checked ({reason}); only named packages were verified." + ); + } + // The requirements note only matters when the tree pass did *not* cover + // those files (fallback to named-only, or recency-only mode). + if !matches!(&tree, Some(TreeReport::Full { .. })) { + requirements_note(&parsed); + } if opts.verdict.is_none() { eprintln!( "note: no Corgea token — vulnerability verdicts skipped (recency-only). Run `corgea login` for the full gate." @@ -323,6 +381,7 @@ fn run_parsed_install( original_args: rest.to_vec(), outcomes, threshold: opts.threshold, + tree, }; if opts.json { @@ -345,50 +404,199 @@ fn run_parsed_install( exec() } -/// Sequential vuln-api verdict pass over resolved targets. No-op without -/// a `VerdictConfig` (tokenless mode — `verify_one` already marked every -/// resolved target `NotChecked`). Any client/call failure is fail-closed: -/// the target becomes `Unverifiable`, which blocks unless `--force`. -fn run_verdict_pass( +/// Print the "requirements files are not recency-checked" note when the +/// install carried any `-r` files. No-op otherwise. +fn requirements_note(parsed: &parse::ParsedInstall) { + if parsed.requirements_files.is_empty() { + return; + } + let files: Vec = parsed + .requirements_files + .iter() + .map(|p| p.display().to_string()) + .collect(); + eprintln!( + "note: requirements files ({}) are not recency-checked by the baseline gate", + files.join(", ") + ); +} + +/// Resolve the full would-install set and verdict it. On any resolution +/// failure, fall back to the named-only verdict pass; the caller renders the +/// loud warning from the returned `NamedOnly` reason. Only called when +/// `opts.verdict.is_some()`. +fn run_tree_pass( manager: PackageManager, + rest: &[String], outcomes: &mut [TargetOutcome], opts: &PrecheckOptions, -) { - let Some(cfg) = &opts.verdict else { return }; +) -> TreeReport { + let set = match tree::resolve_tree(manager, rest) { + Ok(Some(set)) => set, + Ok(None) => { + run_verdict_pass(manager, outcomes, opts); + return TreeReport::NamedOnly { + reason: format!("{} has no safe dry-run", manager.binary_name()), + }; + } + Err(reason) => { + run_verdict_pass(manager, outcomes, opts); + return TreeReport::NamedOnly { reason }; + } + }; + + // Dedup the dry-run set (npm lockfiles repeat the same name@version at + // multiple nested paths), then union in the named-resolved targets — a + // named target already installed is absent from the dry-run delta but + // must still be verdicted. + let norm = |n: &str| manager.normalize_name(n); + let mut seen = std::collections::HashSet::new(); + let mut jobs: Vec = Vec::with_capacity(set.len()); + for p in set { + if seen.insert((norm(&p.name), p.version.clone())) { + jobs.push(p); + } + } + let resolved_count = jobs.len(); + for o in outcomes.iter() { + if let TargetOutcome::Resolved { resolved, .. } = o { + if seen.insert((norm(&resolved.name), resolved.version.clone())) { + jobs.push(tree::TreePackage { + name: resolved.name.clone(), + version: resolved.version.clone(), + }); + } + } + } + + let cfg = opts + .verdict + .as_ref() + .expect("tree pass requires verdict config"); + let results = verdict_pool(jobs, cfg, manager, opts.concurrency); + let transitive = apply_verdicts(manager, results, outcomes); + TreeReport::Full { + resolved_count, + transitive, + } +} + +/// Bounded worker pool over the verdict jobs — owns client creation and the +/// fail-closed policy: on client failure every job comes back `Unverifiable`. +/// Plain work queue, no new crates; `reqwest::blocking::Client` is +/// `Send + Sync`. Result order is not preserved; callers match results back +/// by `(name, version)`. +fn verdict_pool( + jobs: Vec, + cfg: &VerdictConfig, + manager: PackageManager, + concurrency: usize, +) -> Vec<(tree::TreePackage, VerdictStatus)> { + use std::collections::VecDeque; + use std::sync::Mutex; let client = match crate::vuln_api::http_client() { Ok(c) => c, Err(e) => { - for o in outcomes.iter_mut() { - if let TargetOutcome::Resolved { verdict, .. } = o { - *verdict = VerdictStatus::Unverifiable(e.clone()); - } - } - return; + return jobs + .into_iter() + .map(|j| (j, VerdictStatus::Unverifiable(e.clone()))) + .collect(); } }; let ecosystem = manager.ecosystem(); - for o in outcomes.iter_mut() { - let TargetOutcome::Resolved { - resolved, verdict, .. - } = o - else { - continue; - }; - *verdict = match crate::vuln_api::check_package_version( - &client, - &cfg.base_url, - &cfg.token, - ecosystem, - &resolved.name, - &resolved.version, - ) { - Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), - Ok(_) => VerdictStatus::Clean, - Err(e) => VerdictStatus::Unverifiable(e.to_string()), - }; + let workers = concurrency.clamp(1, 32).min(jobs.len().max(1)); + let queue = Mutex::new(VecDeque::from(jobs)); + let results = Mutex::new(Vec::new()); + std::thread::scope(|s| { + for _ in 0..workers { + s.spawn(|| loop { + let Some(job) = queue.lock().unwrap().pop_front() else { + break; + }; + let verdict = match crate::vuln_api::check_package_version( + &client, + &cfg.base_url, + &cfg.token, + ecosystem, + &job.name, + &job.version, + ) { + Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), + Ok(_) => VerdictStatus::Clean, + Err(e) => VerdictStatus::Unverifiable(e.to_string()), + }; + results.lock().unwrap().push((job, verdict)); + }); + } + }); + results.into_inner().unwrap() +} + +/// Assign pooled verdicts onto matching named outcomes (by normalized +/// name + version) and return the unmatched leftovers — the transitive set. +fn apply_verdicts( + manager: PackageManager, + results: Vec<(tree::TreePackage, VerdictStatus)>, + outcomes: &mut [TargetOutcome], +) -> Vec { + let norm = |n: &str| manager.normalize_name(n); + let mut transitive = Vec::new(); + for (pkg, verdict) in results { + let key = (norm(&pkg.name), pkg.version.clone()); + let mut matched = false; + for o in outcomes.iter_mut() { + if let TargetOutcome::Resolved { + resolved, + verdict: v, + .. + } = o + { + if (norm(&resolved.name), resolved.version.clone()) == key { + *v = verdict.clone(); + matched = true; + } + } + } + if !matched { + transitive.push(TreeOutcome { + name: pkg.name, + version: pkg.version, + verdict, + }); + } } + transitive +} + +/// Vuln-api verdict pass over resolved targets, run through the bounded +/// worker pool. No-op without a `VerdictConfig` (tokenless mode — `verify_one` +/// already marked every resolved target `NotChecked`). Any client/call failure +/// is fail-closed: the target becomes `Unverifiable`, which blocks unless +/// `--force`. +fn run_verdict_pass( + manager: PackageManager, + outcomes: &mut [TargetOutcome], + opts: &PrecheckOptions, +) { + let Some(cfg) = &opts.verdict else { return }; + + // One job per resolved target; jobs are 1:1 with outcomes, so + // `apply_verdicts` matches everything and returns no leftovers. + let jobs: Vec = outcomes + .iter() + .filter_map(|o| match o { + TargetOutcome::Resolved { resolved, .. } => Some(tree::TreePackage { + name: resolved.name.clone(), + version: resolved.version.clone(), + }), + _ => None, + }) + .collect(); + + let results = verdict_pool(jobs, cfg, manager, opts.concurrency); + apply_verdicts(manager, results, outcomes); } fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { @@ -502,6 +710,44 @@ fn print_text(report: &PrecheckReport) { report.error_count(), ); + match &report.tree { + Some(TreeReport::Full { + resolved_count, + transitive, + }) => { + println!( + " tree: {} packages resolved, {} transitive checked", + resolved_count, + transitive.len() + ); + for t in transitive { + match &t.verdict { + VerdictStatus::Vulnerable(matches) => { + println!( + " ✗ {}@{} (transitive) known vulnerable:", + t.name, t.version + ); + for m in matches { + println!(" {} ({})", m.advisory_id, m.severity_level); + } + } + VerdictStatus::Unverifiable(error) => { + println!( + " ⚠ {}@{} (transitive) could not be verified: {}", + t.name, t.version, error + ); + } + // Clean / not-checked transitive entries stay quiet in text mode. + VerdictStatus::Clean | VerdictStatus::NotChecked(_) => {} + } + } + } + Some(TreeReport::NamedOnly { reason }) => { + println!(" tree: transitive dependencies NOT checked ({reason})"); + } + None => {} + } + for o in &report.outcomes { match o { TargetOutcome::Resolved { @@ -557,6 +803,24 @@ fn print_text(report: &PrecheckReport) { } } +/// JSON shape for a single verdict. Shared by named outcomes and tree +/// (transitive) outcomes so both render verdicts identically. +fn verdict_json(verdict: &VerdictStatus) -> serde_json::Value { + use serde_json::json; + match verdict { + VerdictStatus::Clean => json!({ "status": "clean" }), + VerdictStatus::Vulnerable(matches) => { + json!({ "status": "vulnerable", "matches": matches }) + } + VerdictStatus::Unverifiable(error) => { + json!({ "status": "unverifiable", "error": error }) + } + VerdictStatus::NotChecked(reason) => { + json!({ "status": "not_checked", "reason": reason }) + } + } +} + fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { use serde_json::json; let outcomes: Vec<_> = report @@ -570,18 +834,7 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { recent, verdict, } => { - let verdict_json = match verdict { - VerdictStatus::Clean => json!({ "status": "clean" }), - VerdictStatus::Vulnerable(matches) => { - json!({ "status": "vulnerable", "matches": matches }) - } - VerdictStatus::Unverifiable(error) => { - json!({ "status": "unverifiable", "error": error }) - } - VerdictStatus::NotChecked(reason) => { - json!({ "status": "not_checked", "reason": reason }) - } - }; + let verdict_json = verdict_json(verdict); json!({ "status": if *recent { "recent" } else { "ok" }, "spec": target.display, @@ -622,6 +875,24 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { }, "verdict_mode": if opts.verdict.is_some() { "full" } else { "recency-only" }, "results": outcomes, + "tree": report.tree.as_ref().map(|t| match t { + TreeReport::Full { resolved_count, transitive } => json!({ + "mode": "full", + "reason": serde_json::Value::Null, + "resolved_count": resolved_count, + "transitive": transitive.iter().map(|o| json!({ + "name": o.name, + "version": o.version, + "verdict": verdict_json(&o.verdict), + })).collect::>(), + }), + TreeReport::NamedOnly { reason } => json!({ + "mode": "named-only", + "reason": reason, + "resolved_count": 0, + "transitive": [], + }), + }), }); println!("{}", serde_json::to_string_pretty(&body).unwrap()); @@ -658,6 +929,7 @@ mod tests { verdict: None, npm_registry: None, pypi_registry: Some(pypi_registry), + concurrency: 4, } } @@ -736,6 +1008,7 @@ mod tests { original_args: vec![], outcomes, threshold: Duration::from_secs(2 * 86400), + tree: None, } } @@ -754,6 +1027,22 @@ mod tests { assert_eq!(PackageManager::Pnpm.ecosystem(), "npm"); } + #[test] + fn normalize_name_per_manager() { + // pypi: PEP 503 — lowercase, separator runs collapse to one `-`. + assert_eq!( + PackageManager::Pip.normalize_name("Flask_Cors"), + "flask-cors" + ); + assert_eq!( + PackageManager::Uv.normalize_name("zope.interface"), + "zope-interface" + ); + assert_eq!(PackageManager::Pip.normalize_name("a__b"), "a-b"); + // npm names are case-sensitive and pass through verbatim. + assert_eq!(PackageManager::Npm.normalize_name("Left_Pad"), "Left_Pad"); + } + /// Full predicate matrix: force ⇒ never block; vulnerable and /// unverifiable block regardless of --no-fail; recency keeps its /// task-2 --no-fail demotion. @@ -803,6 +1092,32 @@ mod tests { } } + /// A clean named outcome plus a vulnerable transitive tree finding must + /// roll into the block counts: `vulnerable_count() == 1`, + /// `should_block_install` true without `--force`, false with it. + #[test] + fn tree_findings_extend_block_counts() { + let mut named = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut named, VerdictStatus::Clean); + let mut report = report_with(vec![named]); + report.tree = Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![TreeOutcome { + name: "evildep".to_string(), + version: "0.4.2".to_string(), + verdict: VerdictStatus::Vulnerable(vec![]), + }], + }); + + assert_eq!(report.vulnerable_count(), 1); + let opts = |force: bool| PrecheckOptions { + force, + ..stub_opts("http://127.0.0.1:9".to_string(), false) + }; + assert!(should_block_install(&report, &opts(false))); + assert!(!should_block_install(&report, &opts(true))); + } + /// Verdict pass against an in-process stub: vulnerable body → Vulnerable /// with matches; 503 override → Unverifiable; no VerdictConfig → outcomes /// keep NotChecked. @@ -862,4 +1177,62 @@ mod tests { } )); } + + /// The pool must verdict every job exactly once and return the flagged + /// job `Vulnerable` with the rest `Clean`, regardless of `concurrency` + /// (1 = serial, 8 > job count = all workers spawn but some drain empty). + #[test] + fn verdict_pool_returns_all_results() { + use std::collections::HashMap; + + let key = |name: &str| ("pypi".to_string(), name.to_string(), "1.0.0".to_string()); + let mut checks = HashMap::new(); + checks.insert( + key("evil"), + r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string(), + ); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + + let cfg = VerdictConfig { + base_url: stub.base_url.clone(), + token: "test-token".to_string(), + }; + + let jobs: Vec = ["a", "b", "evil", "c", "d", "e"] + .iter() + .map(|n| tree::TreePackage { + name: n.to_string(), + version: "1.0.0".to_string(), + }) + .collect(); + + for concurrency in [1usize, 8] { + let results = verdict_pool(jobs.clone(), &cfg, PackageManager::Pip, concurrency); + assert_eq!( + results.len(), + 6, + "concurrency {concurrency}: all jobs verdicted" + ); + let flagged = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Vulnerable(_))) + .count(); + let clean = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Clean)) + .count(); + assert_eq!(flagged, 1, "concurrency {concurrency}: only evil flagged"); + assert_eq!(clean, 5, "concurrency {concurrency}: rest clean"); + let evil = results + .iter() + .find(|(p, _)| p.name == "evil") + .expect("evil present"); + assert!( + matches!(&evil.1, VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + } + } } diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs new file mode 100644 index 0000000..5f4db68 --- /dev/null +++ b/src/precheck/tree.rs @@ -0,0 +1,293 @@ +//! Full would-install-set resolution (the "tree pass"). +//! +//! Safety invariant: resolution must never execute package code. +//! pip: `--only-binary :all:` prevents sdist builds (pypa/pip#13091). +//! npm: `--ignore-scripts` guards npm/cli#2787. + +use std::process::Command; + +use super::PackageManager; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TreePackage { + pub name: String, + pub version: String, +} + +/// Whether this manager's resolver has anything to resolve for the parsed +/// install. pip's dry-run also reads `-r` requirements files, so those make +/// a pip install eligible even with no named targets. +pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstall) -> bool { + !parsed.targets.is_empty() + || (manager == PackageManager::Pip && !parsed.requirements_files.is_empty()) +} + +/// `Ok(None)`: manager has no safe dry-run — named-only with warning. +/// `Err(reason)`: dry-run attempted and failed — named-only, warning carries reason. +pub fn resolve_tree( + manager: PackageManager, + install_args: &[String], +) -> Result>, String> { + match manager { + PackageManager::Pip => resolve_pip_tree(manager.binary_name(), install_args).map(Some), + PackageManager::Npm => resolve_npm_tree(manager.binary_name(), install_args).map(Some), + // yarn/pnpm/uv have no safe dry-run for installs. + _ => Ok(None), + } +} + +/// Last stderr line of a failed subprocess, for one-line error messages. +fn stderr_tail(output: &std::process::Output) -> String { + String::from_utf8_lossy(&output.stderr) + .trim() + .lines() + .last() + .unwrap_or("unknown error") + .to_string() +} + +fn resolve_pip_tree(binary: &str, install_args: &[String]) -> Result, String> { + let resolved = which::which(binary).map_err(|e| format!("{binary} not found on PATH: {e}"))?; + let output = Command::new(resolved) + .arg("install") + .args([ + "--dry-run", + "--quiet", + "--report", + "-", + "--only-binary", + ":all:", + ]) + .args(install_args) + .output() + .map_err(|e| format!("run pip dry-run: {e}"))?; + if !output.status.success() { + return Err(format!("pip dry-run failed: {}", stderr_tail(&output))); + } + parse_pip_report(&String::from_utf8_lossy(&output.stdout)) +} + +fn parse_pip_report(json: &str) -> Result, String> { + let report: serde_json::Value = + serde_json::from_str(json).map_err(|e| format!("parse pip report: {e}"))?; + let install = report + .get("install") + .and_then(|v| v.as_array()) + .ok_or("pip report has no install[] array")?; + install + .iter() + .map(|item| { + let metadata = item.get("metadata").ok_or("report item missing metadata")?; + let field = |k: &str| { + metadata + .get(k) + .and_then(|v| v.as_str()) + .map(str::to_string) + .ok_or_else(|| format!("report item missing metadata.{k}")) + }; + Ok(TreePackage { + name: field("name")?, + version: field("version")?, + }) + }) + .collect() +} + +/// Resolve npm's full would-install set by generating a lockfile in a +/// throwaway dir so the user's own lockfile is never touched. npm's +/// `--dry-run --json` only emits counts (npm/cli#6558), so we read the +/// generated `package-lock.json` instead. +/// +/// `--ignore-scripts` because npm has run lifecycle scripts under +/// `--package-lock-only` before (npm/cli#2787). +fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result, String> { + let resolved = which::which(binary).map_err(|e| format!("{binary} not found on PATH: {e}"))?; + let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; + for manifest in [ + "package.json", + "package-lock.json", + "npm-shrinkwrap.json", + ".npmrc", + ] { + if std::path::Path::new(manifest).exists() { + std::fs::copy(manifest, work.path().join(manifest)) + .map_err(|e| format!("copy {manifest}: {e}"))?; + } + } + let output = Command::new(resolved) + .arg("install") + .args(install_args) + .args([ + "--package-lock-only", + "--ignore-scripts", + "--no-audit", + "--no-fund", + ]) + .current_dir(work.path()) + .output() + .map_err(|e| format!("run npm lockfile resolution: {e}"))?; + if !output.status.success() { + return Err(format!( + "npm lockfile resolution failed: {}", + stderr_tail(&output) + )); + } + let lock = std::fs::read_to_string(work.path().join("package-lock.json")) + .map_err(|e| format!("read generated package-lock.json: {e}"))?; + parse_npm_lockfile(&lock) +} + +fn parse_npm_lockfile(json: &str) -> Result, String> { + let lock: serde_json::Value = + serde_json::from_str(json).map_err(|e| format!("parse package-lock.json: {e}"))?; + let packages = lock + .get("packages") + .and_then(|v| v.as_object()) + .ok_or("package-lock.json has no packages map (npm < 7?)")?; + let mut out = Vec::new(); + for (path, entry) in packages { + if path.is_empty() { + continue; // root project entry + } + if entry.get("link").and_then(|v| v.as_bool()) == Some(true) { + continue; + } + let name = entry + .get("name") + .and_then(|v| v.as_str()) + .map(str::to_string) + .or_else(|| name_from_lock_path(path)); + let (Some(name), Some(version)) = (name, entry.get("version").and_then(|v| v.as_str())) + else { + continue; + }; + out.push(TreePackage { + name, + version: version.to_string(), + }); + } + Ok(out) +} + +/// Derive a package name from a lockfile path key like +/// `node_modules/a/node_modules/@scope/pkg` → `@scope/pkg`. +fn name_from_lock_path(path: &str) -> Option { + let idx = path.rfind("node_modules/")?; + let name = &path[idx + "node_modules/".len()..]; + (!name.is_empty()).then(|| name.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + const OK_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + + #[test] + fn parse_pip_report_ok() { + let pkgs = parse_pip_report(OK_REPORT).expect("parse ok report"); + assert_eq!( + pkgs, + vec![ + TreePackage { + name: "oldpkg".to_string(), + version: "1.0.0".to_string() + }, + TreePackage { + name: "evildep".to_string(), + version: "0.4.2".to_string() + }, + ] + ); + } + + #[test] + fn parse_pip_report_missing_install() { + let err = parse_pip_report(r#"{"version":"1"}"#).expect_err("no install[]"); + assert!(err.contains("no install[]"), "got: {err}"); + } + + #[test] + fn parse_pip_report_missing_version() { + let json = r#"{"install":[{"metadata":{"name":"x"}}]}"#; + let err = parse_pip_report(json).expect_err("missing version"); + assert!(err.contains("metadata.version"), "got: {err}"); + } + + #[test] + fn parse_pip_report_non_json() { + let err = parse_pip_report("not json").expect_err("non-json"); + assert!(err.contains("parse pip report"), "got: {err}"); + } + + // lockfile-v3 with: root entry (skipped), a plain dep, a nested dep, + // a scoped dep, and a workspace `link: true` entry (skipped). + const NPM_LOCK: &str = r#"{ + "name": "proj", "lockfileVersion": 3, + "packages": { + "": {"name": "proj", "version": "1.0.0"}, + "node_modules/oldpkg": {"version": "1.0.0"}, + "node_modules/evildep": {"version": "0.4.2"}, + "node_modules/a/node_modules/b": {"version": "2.3.4"}, + "node_modules/@scope/pkg": {"version": "9.0.1"}, + "node_modules/localdep": {"resolved": "../local", "link": true}, + "packages/localdep": {"name": "localdep", "version": "0.0.1"} + } + }"#; + + #[test] + fn parse_npm_lockfile_ok() { + let mut pkgs = parse_npm_lockfile(NPM_LOCK).expect("parse npm lock"); + pkgs.sort_by(|a, b| a.name.cmp(&b.name)); + assert_eq!( + pkgs, + vec![ + TreePackage { + name: "@scope/pkg".to_string(), + version: "9.0.1".to_string() + }, + TreePackage { + name: "b".to_string(), + version: "2.3.4".to_string() + }, + TreePackage { + name: "evildep".to_string(), + version: "0.4.2".to_string() + }, + TreePackage { + name: "localdep".to_string(), + version: "0.0.1".to_string() + }, + TreePackage { + name: "oldpkg".to_string(), + version: "1.0.0".to_string() + }, + ] + ); + } + + #[test] + fn parse_npm_lockfile_missing_packages() { + let err = parse_npm_lockfile(r#"{"lockfileVersion":1}"#).expect_err("no packages map"); + assert!(err.contains("no packages map"), "got: {err}"); + } + + #[test] + fn name_from_lock_path_handles_nested_and_scoped() { + assert_eq!( + name_from_lock_path("node_modules/oldpkg").as_deref(), + Some("oldpkg") + ); + assert_eq!( + name_from_lock_path("node_modules/a/node_modules/b").as_deref(), + Some("b") + ); + assert_eq!( + name_from_lock_path("node_modules/a/node_modules/@scope/pkg").as_deref(), + Some("@scope/pkg") + ); + assert_eq!(name_from_lock_path("packages/foo"), None); + } +} diff --git a/src/vuln_api_stub/mod.rs b/src/vuln_api_stub/mod.rs index 0535c88..cef3670 100644 --- a/src/vuln_api_stub/mod.rs +++ b/src/vuln_api_stub/mod.rs @@ -152,8 +152,11 @@ fn handle_connection( }; let status_text = status_text(status_code); + // `Connection: close` is load-bearing: the stub serves one response per + // connection, so without it reqwest pools the socket and a second request + // (the gate's tree pass makes several per run) races the close and fails. let response = format!( - "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}", + "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", status_code, status_text, response_body.len(), diff --git a/tests/cli_tree.rs b/tests/cli_tree.rs new file mode 100644 index 0000000..9ba5e91 --- /dev/null +++ b/tests/cli_tree.rs @@ -0,0 +1,356 @@ +//! Hermetic e2e tests for the full-tree resolution pass +//! (`corgea pip install …` with a token + `CORGEA_VULN_API_URL` stub). +//! +//! Composes the `cli_verdict.rs` harness pattern (fake pip on a private PATH + +//! local pypi registry stub + in-crate vuln-api stub) with a dry-run-aware +//! fake pip: a `--dry-run` invocation answers with a canned pip report on +//! stdout, every other invocation records its argv to a marker and exits. +//! `oldpkg==1.0.0` is published in 2020 so recency never blocks here — every +//! block is the verdict's doing. + +#![cfg(unix)] + +mod common; + +use common::corgea_isolated; +use corgea::vuln_api_stub::{self, PackageKey}; +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::thread; +use tempfile::TempDir; + +fn key(eco: &str, name: &str, ver: &str) -> PackageKey { + (eco.to_string(), name.to_string(), ver.to_string()) +} + +/// Pip `--report -` payload: `oldpkg` (named) + `evildep` (transitive). +const TREE_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + +fn vulnerable_evildep_body(ecosystem: &str) -> String { + format!( + r#"{{"ecosystem":"{ecosystem}","package_name":"evildep","version":"0.4.2","is_vulnerable":true, + "matches":[{{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}}]}}"# + ) +} + +/// Registry stub serving `/pypi/oldpkg/json` (pypi) and `/oldpkg` (npm +/// packument), both published 2020 → never recent. Everything else 404s. +fn spawn_pypi_stub() -> String { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or("") + .to_string(); + + let (status, body) = match path.as_str() { + "/pypi/oldpkg/json" => ( + "200 OK", + r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#.to_string(), + ), + "/oldpkg" => ( + "200 OK", + r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#.to_string(), + ), + _ => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), + }; + let response = format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + status, + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + base_url +} + +/// Sentinel payload that makes the fake manager exit non-zero on its tree +/// (resolution) invocation, forcing the named-only fallback. +const RESOLUTION_FAILS: &str = "RESOLUTION_FAILS"; + +/// Write an executable fake package manager into `dir`. On an invocation +/// whose argv contains `tree_flag` it emits `payload` (to stdout for pip's +/// `--dry-run --report -`, into `./package-lock.json` for npm's +/// `--package-lock-only`, whose cwd is the resolver's throwaway temp dir) and +/// exits 0 — the tree pass; if `payload` is `RESOLUTION_FAILS` it exits +/// non-zero instead, emitting nothing. Any other invocation records its argv +/// to `marker` and exits `exit_code`. +/// +/// The payload is read from a sibling file via shell builtins so it works +/// under the test's locked-down `PATH` (which has no `cat`); the +/// `|| [ -n "$line" ]` guard keeps the final line when the payload file has +/// no trailing newline. +fn write_fake_pm(dir: &Path, marker: &Path, binary: &str, payload: &str, exit_code: i32) { + use std::os::unix::fs::PermissionsExt; + let (tree_flag, redirect, fail_exit) = match binary { + "pip" => ("--dry-run", "", 2), + "npm" => ("--package-lock-only", " > package-lock.json", 1), + other => panic!("unsupported fake manager {other}"), + }; + let tree_branch = if payload == RESOLUTION_FAILS { + format!("exit {fail_exit}") + } else { + let payload_path = dir.join(format!("{binary}-tree-payload.json")); + std::fs::write(&payload_path, payload).expect("write fake pm payload"); + format!( + "while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{}'{redirect}; exit 0", + payload_path.display() + ) + }; + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" {tree_flag} \"*) {tree_branch};; esac\nprintf '%s' \"$*\" > '{marker}'\nexit {exit_code}\n", + marker = marker.display(), + ); + let path = dir.join(binary); + std::fs::write(&path, script).expect("write fake pm"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).expect("chmod fake pm"); +} + +/// npm lockfile-v3 fixture: named `oldpkg` 1.0.0 + transitive `evildep` 0.4.2. +const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0"}, + "node_modules/oldpkg":{"version":"1.0.0"}, + "node_modules/evildep":{"version":"0.4.2"}}}"#; + +/// `corgea` wired to the registry stub, a tree-aware fake pip, and a vuln-api +/// stub. +struct TreeHarness { + cmd: Command, + marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +impl TreeHarness { + /// Wires the registry + vuln-api stubs, token, and a fake `binary` + /// (`"pip"` or `"npm"`) into a private PATH dir. `payload` is the canned + /// tree-resolution output (pip report / npm lockfile), or + /// `RESOLUTION_FAILS` to simulate a failed resolution. + fn new( + binary: &str, + checks: HashMap, + statuses: HashMap, + payload: &str, + exit_code: i32, + ) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_pm(bin.path(), &marker, binary, payload, exit_code); + let registry = spawn_pypi_stub(); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry) + .env("CORGEA_NPM_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) + .env("CORGEA_TOKEN", "test-token"); + Self { + cmd, + marker, + _home: home, + _bin: bin, + } + } + + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +#[test] +fn pip_transitive_vulnerable_blocks_install() { + // Only the transitive `evildep` is flagged; the named `oldpkg` is clean. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_evildep_body("pypi"), + ); + let mut h = TreeHarness::new("pip", checks, HashMap::new(), TREE_REPORT, 0); + let out = h + .cmd + .args(["pip", "--concurrency", "2", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a transitive vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); + assert!(stdout.contains("MAL-2024-0002"), "stdout: {stdout}"); + assert!(stdout.contains("(transitive)"), "stdout: {stdout}"); +} + +#[test] +fn pip_dry_run_failure_falls_back_with_loud_warning() { + // Fake pip exits 2 on `--dry-run` (simulates old pip with no `--report`). + // Stub is all-clean, so the named-only fallback proceeds. + let mut h = TreeHarness::new("pip", HashMap::new(), HashMap::new(), RESOLUTION_FAILS, 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean named-only must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), + "stderr must carry the fallback warning: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn pip_json_carries_tree_object() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_evildep_body("pypi"), + ); + let mut h = TreeHarness::new("pip", checks, HashMap::new(), TREE_REPORT, 0); + let out = h + .cmd + .args(["pip", "--json", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["tree"]["mode"], "full"); + assert_eq!(parsed["tree"]["transitive"][0]["name"], "evildep"); + assert_eq!( + parsed["tree"]["transitive"][0]["verdict"]["status"], + "vulnerable" + ); + assert_eq!(parsed["summary"]["vulnerable"], 1); +} + +#[test] +fn pip_clean_tree_proceeds() { + // Stub default-clean (no overrides), so every resolved package is clean. + let mut h = TreeHarness::new("pip", HashMap::new(), HashMap::new(), TREE_REPORT, 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("tree: 2 packages resolved"), + "stdout: {stdout}" + ); +} + +#[test] +fn npm_transitive_vulnerable_blocks_install() { + // The generated lockfile carries a transitive `evildep` 0.4.2 that the + // vuln stub flags; the named `oldpkg` is clean. + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_evildep_body("npm"), + ); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK, 0); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a transitive vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); + assert!(stdout.contains("MAL-2024-0002"), "stdout: {stdout}"); + assert!(stdout.contains("(transitive)"), "stdout: {stdout}"); +} + +#[test] +fn npm_resolution_failure_falls_back_with_warning() { + // Fake npm exits 1 on `--package-lock-only`. Stub is all-clean, so the + // named-only fallback proceeds with a loud warning. + let mut h = TreeHarness::new("npm", HashMap::new(), HashMap::new(), RESOLUTION_FAILS, 0); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean named-only must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), + "stderr must carry the fallback warning: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn npm_does_not_touch_project_lockfile() { + // Run from a project dir holding sentinel manifests; the resolver works in + // a throwaway copy, so after a gated run both files are byte-identical. + let project = TempDir::new().expect("project dir"); + let pkg_json = project.path().join("package.json"); + let lock_json = project.path().join("package-lock.json"); + let pkg_sentinel = r#"{"name":"sentinel","version":"0.0.0"}"#; + let lock_sentinel = r#"{"name":"sentinel","lockfileVersion":3,"packages":{}}"#; + std::fs::write(&pkg_json, pkg_sentinel).expect("write package.json"); + std::fs::write(&lock_json, lock_sentinel).expect("write package-lock.json"); + + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_evildep_body("npm"), + ); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK, 0); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + + assert_eq!( + std::fs::read_to_string(&pkg_json).unwrap(), + pkg_sentinel, + "package.json must be untouched" + ); + assert_eq!( + std::fs::read_to_string(&lock_json).unwrap(), + lock_sentinel, + "package-lock.json must be untouched" + ); +} diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs index e6db018..4b661f0 100644 --- a/tests/cli_verdict.rs +++ b/tests/cli_verdict.rs @@ -82,8 +82,11 @@ fn spawn_pypi_stub() -> String { /// the exit code propagates. fn write_fake_pip(dir: &Path, marker: &Path, exit_code: i32) { use std::os::unix::fs::PermissionsExt; + // Simulate an old pip with no `--report`: exit 2 on the tree dry-run + // *without* touching the marker, so these tests exercise the named-only + // fallback path and keep their pre-tree semantics. let script = format!( - "#!/bin/sh\nprintf '%s' \"$*\" > '{}'\nexit {}\n", + "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) exit 2;; esac\nprintf '%s' \"$*\" > '{}'\nexit {}\n", marker.display(), exit_code ); From 55544f41fb0c5030a528fd656eecbcbb0d384cbf Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Wed, 10 Jun 2026 16:33:49 +0200 Subject: [PATCH 05/59] Steer blocked installs to a safe version from verdict fix data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a flagged package, annotate each advisory line with its fix (— fixed in X / — no fixed version known) and print a '→ safe version: name@X' steer when every advisory has a fix — single distinct value as-is, several distinct picked by lenient semver, and no certification when any advisory lacks a fix or a candidate doesn't parse. --json vulnerable verdicts carry a 'remediation' field (safe version or null), shared by named and transitive entries via verdict_json. Render-only (design D7, vuln-verdict spec 'Remediation steering'): no client, flag, or blocking changes. normalize_for_semver widened to pub(crate) for the cross-ecosystem version ordering. Covered by 7 safe_version unit tests + 4 hermetic e2e tests (cli_remediation). --- skills/corgea/SKILL.md | 6 + src/precheck/mod.rs | 133 ++++++++++++++++++-- src/verify_deps/registry.rs | 5 +- tests/cli_remediation.rs | 234 ++++++++++++++++++++++++++++++++++++ 4 files changed, 370 insertions(+), 8 deletions(-) create mode 100644 tests/cli_remediation.rs diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index 961bad9..b034506 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -121,6 +121,10 @@ is skipped (recency-only) and stderr suggests `corgea login`. Everything else pa through with the package manager's own exit code. Offline-only inputs (git/URL/path specs, `-r requirements.txt`, bare `install`) are not checked and run with a printed note. +Blocked findings steer to the fix: each advisory line shows `fixed in ` (or +`no fixed version known`), and when every advisory on a package has a fix, a +`→ safe version: @` line names the version to install instead. + With a token, the vuln check covers the **full would-install set**, not just the named targets: `pip` and `npm` resolve the complete tree (named + transitive) via a safe dry-run (`pip install --dry-run …`; an isolated `npm install --package-lock-only` @@ -151,6 +155,8 @@ corgea pip list # non-install subcommands pass straight th `--json` adds a `tree` object: `null` in recency-only mode; otherwise `mode` is `"full"` (transitive checked) or `"named-only"` (with a `reason`), plus `resolved_count` and a `transitive[]` array of `{name, version, verdict}` for packages beyond the named targets. +Vulnerable `verdict` objects carry a `remediation` field: the certified safe version, +or `null` when any advisory has no known fix. Recency gating needs no token; the vuln verdict uses the configured Corgea token when present. Overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 958b555..22d13ff 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -692,6 +692,60 @@ fn exec_command(binary: &str, args: &[String]) -> i32 { } } +/// Suffix for a vulnerable match line: the advisory's fix, if known. +fn fix_note(m: &crate::vuln_api::VulnMatch) -> String { + match &m.fixed_version { + Some(v) => format!(" — fixed in {v}"), + None => " — no fixed version known".to_string(), + } +} + +/// The one version certified to clear every match. Requires every match to +/// carry a `fixed_version`: a single distinct value is returned as-is; +/// several distinct values pick the highest by lenient semver. Any match +/// without a fix — or an unparsable candidate among several — means no +/// version can be certified, so `None`. +fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let mut fixes: Vec<&str> = matches + .iter() + .map(|m| m.fixed_version.as_deref()) + .collect::>()?; + fixes.sort_unstable(); + fixes.dedup(); + match fixes.as_slice() { + [] => None, + [only] => Some((*only).to_string()), + many => { + let mut best: Option<(semver::Version, &str)> = None; + for raw in many { + let v = semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) + .ok()?; + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw)), + } + } + best.map(|(_, raw)| (*raw).to_string()) + } + } +} + +/// Per-match advisory lines plus the safe-version steer, shared by the +/// named-target and transitive vulnerable render arms. +fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) { + for m in matches { + println!( + " {} ({}){}", + m.advisory_id, + m.severity_level, + fix_note(m) + ); + } + if let Some(safe) = safe_version(matches) { + println!(" → safe version: {name}@{safe}"); + } +} + fn print_text(report: &PrecheckReport) { println!( "Pre-checking `{} {} {}` (threshold {})", @@ -727,9 +781,7 @@ fn print_text(report: &PrecheckReport) { " ✗ {}@{} (transitive) known vulnerable:", t.name, t.version ); - for m in matches { - println!(" {} ({})", m.advisory_id, m.severity_level); - } + print_vulnerable_matches(&t.name, matches); } VerdictStatus::Unverifiable(error) => { println!( @@ -762,9 +814,7 @@ fn print_text(report: &PrecheckReport) { " ✗ {} → {}@{} known vulnerable:", target.display, resolved.name, resolved.version, ); - for m in matches { - println!(" {} ({})", m.advisory_id, m.severity_level); - } + print_vulnerable_matches(&resolved.name, matches); } VerdictStatus::Unverifiable(error) => { println!( @@ -810,7 +860,11 @@ fn verdict_json(verdict: &VerdictStatus) -> serde_json::Value { match verdict { VerdictStatus::Clean => json!({ "status": "clean" }), VerdictStatus::Vulnerable(matches) => { - json!({ "status": "vulnerable", "matches": matches }) + json!({ + "status": "vulnerable", + "matches": matches, + "remediation": safe_version(matches), + }) } VerdictStatus::Unverifiable(error) => { json!({ "status": "unverifiable", "error": error }) @@ -1235,4 +1289,69 @@ mod tests { ); } } + + fn vm(advisory: &str, fixed: Option<&str>) -> crate::vuln_api::VulnMatch { + crate::vuln_api::VulnMatch { + advisory_id: advisory.to_string(), + severity_level: "high".to_string(), + tier: 1, + vulnerable_version_range: None, + fixed_version: fixed.map(str::to_string), + } + } + + #[test] + fn safe_version_single_fix() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0"))]), + Some("2.0.0".to_string()) + ); + } + + #[test] + fn safe_version_duplicate_fixes_collapse_without_parsing() { + // "1.0rc1" is unparsable, but a single distinct value needs no parse. + assert_eq!( + safe_version(&[vm("A-1", Some("1.0rc1")), vm("A-2", Some("1.0rc1"))]), + Some("1.0rc1".to_string()) + ); + } + + #[test] + fn safe_version_picks_highest_of_distinct_fixes() { + // Semver order, not lexical ("1.2.0" > "1.10.0" lexically). + assert_eq!( + safe_version(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } + + #[test] + fn safe_version_two_component_versions_normalize() { + assert_eq!( + safe_version(&[vm("A-1", Some("4.0")), vm("A-2", Some("3.2.5"))]), + Some("4.0".to_string()) + ); + } + + #[test] + fn safe_version_mixed_fix_and_none_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + None + ); + } + + #[test] + fn safe_version_unparsable_among_distinct_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2!1.0")), vm("A-2", Some("1.0.0"))]), + None + ); + } + + #[test] + fn safe_version_empty_matches_is_none() { + assert_eq!(safe_version(&[]), None); + } } diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs index 10bc343..b351b79 100644 --- a/src/verify_deps/registry.rs +++ b/src/verify_deps/registry.rs @@ -421,7 +421,10 @@ fn pick_latest_stable(candidates: &[(String, DateTime)]) -> Option<&(String /// `X.Y` or `X.Y.Z.postN` — the dotted-number form usually parses /// straight as semver if we pad to 3 components. Anything more exotic /// (`1.0a1`, `2!1.0`, etc.) is left alone and rejected by semver. -pub(super) fn normalize_for_semver(v: &str) -> String { +/// +/// Also used outside the registry (`precheck::safe_version`) as a lenient +/// cross-ecosystem pad for ordering fixed versions; keep it ecosystem-agnostic. +pub(crate) fn normalize_for_semver(v: &str) -> String { if v.contains('!') || v.contains('a') || v.contains('b') diff --git a/tests/cli_remediation.rs b/tests/cli_remediation.rs new file mode 100644 index 0000000..e583240 --- /dev/null +++ b/tests/cli_remediation.rs @@ -0,0 +1,234 @@ +//! Hermetic e2e tests for remediation steering: a blocked install names the +//! safe version from the verdict's `fixed_version` data. +//! +//! Mirrors the `cli_verdict.rs` harness (inline PyPI stub published 2020 so +//! recency never blocks, a fake pip recording its argv, the in-crate vuln-api +//! stub, and a set token) — every block here is the verdict's doing. + +#![cfg(unix)] + +mod common; + +use common::corgea_isolated; +use corgea::vuln_api_stub::{self, PackageKey}; +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::thread; +use tempfile::TempDir; + +fn key(eco: &str, name: &str, ver: &str) -> PackageKey { + (eco.to_string(), name.to_string(), ver.to_string()) +} + +fn fixed_body() -> String { + r#"{"ecosystem":"pypi","package_name":"oldpkg","version":"1.0.0","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":"2.0.0"}]}"# + .to_string() +} + +fn no_fix_body() -> String { + r#"{"ecosystem":"pypi","package_name":"oldpkg","version":"1.0.0","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string() +} + +/// Registry stub serving only `/pypi/oldpkg/json` (published 2020 → never +/// recent). Everything else 404s. +fn spawn_pypi_stub() -> String { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or("") + .to_string(); + + let (status, body) = match path.as_str() { + "/pypi/oldpkg/json" => ( + "200 OK", + r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#.to_string(), + ), + _ => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), + }; + let response = format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + status, + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + base_url +} + +/// Write an executable fake `pip` into `dir`. It records its argv to `marker` +/// and exits with `exit_code` — proving both whether the install ran and that +/// the exit code propagates. +fn write_fake_pip(dir: &Path, marker: &Path, exit_code: i32) { + use std::os::unix::fs::PermissionsExt; + // Simulate an old pip with no `--report`: exit 2 on the tree dry-run + // *without* touching the marker, so these tests exercise the named-only + // fallback path and keep their pre-tree semantics. + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) exit 2;; esac\nprintf '%s' \"$*\" > '{}'\nexit {}\n", + marker.display(), + exit_code + ); + let path = dir.join("pip"); + std::fs::write(&path, script).expect("write fake pip"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) + .expect("chmod fake pip"); +} + +/// `corgea` wired to the registry stub, a fake pip, and a vuln-api stub. +struct RemediationHarness { + cmd: Command, + marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +impl RemediationHarness { + fn new(checks: HashMap, token: Option<&str>, pip_exit_code: i32) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_pip(bin.path(), &marker, pip_exit_code); + let registry = spawn_pypi_stub(); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); + if let Some(t) = token { + cmd.env("CORGEA_TOKEN", t); + } + Self { + cmd, + marker, + _home: home, + _bin: bin, + } + } + + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +#[test] +fn fixed_match_blocks_and_names_safe_version() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); + let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("fixed in 2.0.0"), "stdout: {stdout}"); + assert!( + stdout.contains("safe version: oldpkg@2.0.0"), + "stdout: {stdout}" + ); +} + +#[test] +fn no_fix_match_reports_no_fixed_version_known() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), no_fix_body()); + let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!( + h.recorded_argv(), + None, + "pip must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("no fixed version known"), + "stdout: {stdout}" + ); + assert!( + !stdout.contains("safe version:"), + "no steer line when the fix is unknown: {stdout}" + ); +} + +#[test] +fn json_remediation_carries_safe_version() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); + let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "--json", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!( + parsed["results"][0]["verdict"]["remediation"], "2.0.0", + "parsed: {parsed}" + ); +} + +#[test] +fn json_remediation_null_when_no_fix() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), no_fix_body()); + let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "--json", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + let v = &parsed["results"][0]["verdict"]; + assert!( + v.as_object().unwrap().contains_key("remediation"), + "verdict must carry the remediation key: {parsed}" + ); + assert!( + v["remediation"].is_null(), + "remediation must be null when no fix is known: {parsed}" + ); +} From 10693db2ea636ec2117cb49f9c48d2ee9284ebed Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 07:15:51 +0200 Subject: [PATCH 06/59] Scrub git hook env in harness pre-commit before running tests git exports an absolute GIT_DIR to hooks in linked worktrees; tests that spawn 'git init' in tempdirs inherited it and reinitialized the shared gitdir (setting core.bare=true, breaking every checkout). Unset GIT_* in cmd_pre_commit so the test suite is hermetic. --- harness | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/harness b/harness index 84b5076..8b430d9 100755 --- a/harness +++ b/harness @@ -260,6 +260,12 @@ $output" } cmd_pre_commit() { + # git exports GIT_DIR/GIT_INDEX_FILE/… to hooks. From a linked + # worktree GIT_DIR is absolute, so any `git init`/`git add` a test + # spawns in a tempdir would resolve to the shared gitdir and + # corrupt the real repo. Scrub the hook env before running tests. + unset GIT_DIR GIT_WORK_TREE GIT_INDEX_FILE GIT_OBJECT_DIRECTORY \ + GIT_COMMON_DIR GIT_PREFIX local staged; staged="$(staged_rs_files)" if [ -z "$staged" ]; then printf "No staged Rust files — skipping checks\n" From a1212cd641d380bdcf1a504f627b73971e174706 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 07:16:00 +0200 Subject: [PATCH 07/59] Remove unused advisory-detail client path and stub route MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The install gate never consults GET /v1/advisories/:id — verdicts come entirely from the package-check route. Drop AdvisoryResponse, get_advisory, the stub's advisories map/route, and the fixture. --- src/bin/vuln-api-stub.rs | 2 +- src/vuln_api/mod.rs | 214 +------------------ src/vuln_api_stub/fixtures.rs | 8 - src/vuln_api_stub/mod.rs | 28 +-- tests/fixtures/vuln_api/advisory_detail.json | 15 -- 5 files changed, 6 insertions(+), 261 deletions(-) delete mode 100644 tests/fixtures/vuln_api/advisory_detail.json diff --git a/src/bin/vuln-api-stub.rs b/src/bin/vuln-api-stub.rs index cf47d08..4edc509 100644 --- a/src/bin/vuln-api-stub.rs +++ b/src/bin/vuln-api-stub.rs @@ -10,7 +10,7 @@ use std::path::PathBuf; about = "Minimal TCP stub for vuln-api package-check routes" )] struct Args { - /// JSON fixture file (`package_checks` + optional `advisories`). + /// JSON fixture file (`package_checks`). #[arg(long)] fixtures: PathBuf, diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs index 0cb24f1..c10c8ab 100644 --- a/src/vuln_api/mod.rs +++ b/src/vuln_api/mod.rs @@ -42,41 +42,6 @@ pub struct VulnMatch { pub fixed_version: Option, } -/// Subset of `GET /v1/advisories/:id` we consume. -/// -/// Field-name notes (kept stable for callers, but mapped to the real -/// server shape via `#[serde(rename = …)]`): -/// -/// * `advisory_id` ← server's `id` -/// * `title` ← server's `summary` -/// * `severity_level` ← server's `severity` -/// * `url` ← server's `source_url` -/// * `tier` is `Option` because the server may emit `null` -/// (see `VULNERABILITY_SERVICE.md` §5). -/// -/// The server also returns many fields we don't currently use -/// (`alias`, `severity_badge`, `tier_score`, `details`, `llm_summary`, -/// `packages`, `cwes`, `raw`, …). `serde` ignores unknown fields by -/// default; we add them here only when a caller needs them. No -/// top-level `remediation` field exists on the server — do not add one -/// (server's `llm_summary` is a 1-2 sentence developer summary, not -/// remediation guidance, and the semantics differ). -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct AdvisoryResponse { - #[serde(rename = "id")] - pub advisory_id: String, - #[serde(default)] - pub aliases: Vec, - #[serde(default, rename = "summary")] - pub title: Option, - #[serde(default, rename = "severity")] - pub severity_level: Option, - #[serde(default)] - pub tier: Option, - #[serde(default, rename = "source_url")] - pub url: Option, -} - fn user_agent() -> String { format!("corgea-cli/{} (vuln-api)", env!("CARGO_PKG_VERSION")) } @@ -305,66 +270,6 @@ pub fn check_package_version( Ok(parsed) } -pub fn get_advisory( - client: &reqwest::blocking::Client, - base_url: &str, - token: &str, - advisory_id: &str, -) -> Result> { - let base = validated_base(token, base_url)?; - let encoded_id = urlencoding::encode(advisory_id); - let url = format!("{}/v1/advisories/{}", base, encoded_id); - - debug(&format!( - "Sending vuln-api advisory request to URL: {}", - url - )); - - let response = build_authed_get(client, &url, token) - .send() - .map_err(|e| format!("Failed to send vuln-api advisory request: {}", e))?; - - let status = response.status(); - if !status.is_success() { - let suffix = error_body_suffix(response); - return Err(format!( - "vuln-api advisory lookup failed: HTTP {}{}", - status.as_u16(), - suffix - ) - .into()); - } - - let response_text = response.text()?; - let parsed: AdvisoryResponse = serde_json::from_str(&response_text).map_err(|e| { - debug(&format!( - "Failed to parse vuln-api advisory response: {}. Body: {}", - e, response_text - )); - format!("Failed to parse vuln-api advisory response: {}", e) - })?; - - // Identity guard: refuse a response that names a different advisory - // than we asked about. The server is allowed to be silent on - // identity (empty advisory_id), but if it answers it must match - // either the canonical id or one of the aliases. - if !parsed.advisory_id.is_empty() - && !parsed.advisory_id.eq_ignore_ascii_case(advisory_id) - && !parsed - .aliases - .iter() - .any(|a| a.eq_ignore_ascii_case(advisory_id)) - { - return Err(format!( - "vuln-api response advisory_id '{}' does not match request '{}'", - parsed.advisory_id, advisory_id - ) - .into()); - } - - Ok(parsed) -} - #[cfg(test)] mod tests { use super::*; @@ -399,19 +304,15 @@ mod tests { /// Keys in `retry_after_keys`: first hit → 429 + Retry-After: 1, second hit → /// response from `responses` (or clean 200 fallback). - /// `advisory_responses` keys advisory id → (status, body) for the - /// `/v1/advisories/:id` route. Empty map = route returns 404. fn spawn_package_check_stub_with_retry_keys( responses: KeyedResponses, retry_after_keys: KeyedResponses, - advisory_responses: HashMap, ) -> PackageCheckStub { let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); let port = listener.local_addr().unwrap().port(); let base_url = format!("http://127.0.0.1:{}", port); let responses = Arc::new(Mutex::new(responses)); let retry_after_keys = Arc::new(Mutex::new(retry_after_keys)); - let advisory_responses = Arc::new(Mutex::new(advisory_responses)); let hit_counts: Arc>> = Arc::new(Mutex::new(HashMap::new())); let handle = thread::spawn(move || { @@ -472,17 +373,6 @@ mod tests { .unwrap_or((200, r#"{"is_vulnerable":false,"matches":[]}"#.into())); (code, status_text(code), body, String::new()) } - } else if parts.len() >= 3 && parts[0] == "v1" && parts[1] == "advisories" { - let id = urlencoding::decode(parts[2]) - .unwrap_or_default() - .into_owned(); - let (code, body) = advisory_responses - .lock() - .unwrap() - .get(&id) - .cloned() - .unwrap_or((404, r#"{"error":"not found"}"#.into())); - (code, status_text(code), body, String::new()) } else { ( 404, @@ -525,8 +415,7 @@ mod tests { ("npm".into(), "lodash".into(), "4.17.20".into()), (status_code, body.to_string()), ); - let stub = - spawn_package_check_stub_with_retry_keys(responses, HashMap::new(), HashMap::new()); + let stub = spawn_package_check_stub_with_retry_keys(responses, HashMap::new()); check_package_version( &client, &stub.base_url, @@ -589,11 +478,7 @@ mod tests { ("npm".into(), "lodash".into(), "4.17.20".into()), (200, vulnerable_body.to_string()), ); - let stub = spawn_package_check_stub_with_retry_keys( - HashMap::new(), - retry_after_keys, - HashMap::new(), - ); + let stub = spawn_package_check_stub_with_retry_keys(HashMap::new(), retry_after_keys); let resp = check_package_version( &client, &stub.base_url, @@ -639,31 +524,6 @@ mod tests { ); } - #[test] - fn get_advisory_non_success_includes_body_snippet() { - let client = http_client().expect("test client"); - let mut advisories = HashMap::new(); - advisories.insert( - "GHSA-deploy-gap".to_string(), - (400, r#"{"error":"Invalid url"}"#.to_string()), - ); - let stub = - spawn_package_check_stub_with_retry_keys(HashMap::new(), HashMap::new(), advisories); - let err = get_advisory(&client, &stub.base_url, "test-token", "GHSA-deploy-gap") - .expect_err("400 should fail"); - let msg = err.to_string(); - assert!( - msg.contains("advisory lookup failed: HTTP 400"), - "got: {}", - msg - ); - assert!( - msg.contains("Invalid url"), - "expected body snippet in advisory error; got: {}", - msg - ); - } - #[test] fn body_snippet_truncates_at_char_boundary() { // Multi-byte char ("é" is 2 bytes UTF-8). Naïve byte-slicing would @@ -741,59 +601,6 @@ mod tests { assert!(!is_jwt("a..c")); } - #[test] - fn deserialize_advisory_response_real_server_shape() { - // Mirrors the worker's emitted payload (cve_worker/src/worker.js): - // server emits `id` (not `advisory_id`) and `source_url` (not `url`), - // plus many fields we ignore. No top-level `remediation` exists. - let body = r#"{ - "id": "GHSA-xxxx-yyyy-zzzz", - "source": "ghsa", - "source_url": "https://github.com/advisories/GHSA-xxxx-yyyy-zzzz", - "alias": "CVE-2026-12345", - "aliases": ["CVE-2026-12345"], - "ecosystem": "npm", - "summary": "Prototype pollution in lodash", - "severity": "HIGH", - "severity_badge": "HIGH", - "tier": 1, - "tier_score": 74.5, - "llm_summary": "Short developer-facing summary.", - "packages": [], - "cwes": [] - }"#; - let parsed: AdvisoryResponse = serde_json::from_str(body).unwrap(); - assert_eq!(parsed.advisory_id, "GHSA-xxxx-yyyy-zzzz"); - assert_eq!(parsed.aliases, vec!["CVE-2026-12345".to_string()]); - assert_eq!(parsed.tier, Some(1)); - assert_eq!(parsed.severity_level.as_deref(), Some("HIGH")); - assert_eq!( - parsed.title.as_deref(), - Some("Prototype pollution in lodash") - ); - assert_eq!( - parsed.url.as_deref(), - Some("https://github.com/advisories/GHSA-xxxx-yyyy-zzzz") - ); - } - - #[test] - fn deserialize_advisory_response_tier_null_and_missing_source_url() { - // Server emits `tier: null` for unscored advisories - // (VULNERABILITY_SERVICE.md §5). `source_url` may also be absent. - let body = r#"{ - "id": "GHSA-only-id", - "tier": null - }"#; - let parsed: AdvisoryResponse = serde_json::from_str(body).unwrap(); - assert_eq!(parsed.advisory_id, "GHSA-only-id"); - assert!(parsed.tier.is_none()); - assert!(parsed.aliases.is_empty()); - assert!(parsed.title.is_none()); - assert!(parsed.severity_level.is_none()); - assert!(parsed.url.is_none()); - } - // Fixture-based deserialization tests — committed JSON under tests/fixtures/vuln_api/, // built to the authoritative server serialization (vuln-api/cve_worker/src/worker.js). macro_rules! fixture { @@ -853,21 +660,4 @@ mod tests { assert!(m.vulnerable_version_range.is_none()); assert!(m.fixed_version.is_none()); } - - #[test] - fn fixture_advisory_detail_reconciles_server_fields() { - // AdvisoryResponse reconciliation: server `severity`/`summary` map to - // severity_level/title via #[serde(rename)]. - let parsed: AdvisoryResponse = - serde_json::from_str(fixture!("advisory_detail.json")).unwrap(); - assert_eq!(parsed.advisory_id, "GHSA-xxxx-yyyy-zzzz"); - assert_eq!(parsed.aliases, vec!["CVE-2026-12345".to_string()]); - assert_eq!(parsed.tier, Some(1)); - assert_eq!(parsed.severity_level.as_deref(), Some("HIGH")); - assert_eq!(parsed.title.as_deref(), Some("SQL injection in django")); - assert_eq!( - parsed.url.as_deref(), - Some("https://github.com/advisories/GHSA-xxxx-yyyy-zzzz") - ); - } } diff --git a/src/vuln_api_stub/fixtures.rs b/src/vuln_api_stub/fixtures.rs index 626bfea..a37eea5 100644 --- a/src/vuln_api_stub/fixtures.rs +++ b/src/vuln_api_stub/fixtures.rs @@ -9,8 +9,6 @@ use std::path::Path; struct FixtureFile { #[serde(default)] package_checks: HashMap, - #[serde(default)] - advisories: HashMap, } /// Load stub fixtures from JSON. Keys in `package_checks` use `{ecosystem}/{name}/{version}`. @@ -25,14 +23,8 @@ pub fn load_from_file(path: &Path) -> Result, - pub advisories: HashMap, pub status_overrides: HashMap, } @@ -35,10 +34,6 @@ impl VulnApiStub { } /// Minimal TCP vuln-api stub for CLI integration tests and e2e dogfood. -pub fn spawn(fixtures: HashMap) -> VulnApiStub { - spawn_with_statuses(fixtures, HashMap::new()) -} - pub fn spawn_with_statuses( fixtures: HashMap, status_overrides: HashMap, @@ -46,7 +41,6 @@ pub fn spawn_with_statuses( spawn_on_port( StubFixtures { package_checks: fixtures, - advisories: HashMap::new(), status_overrides, }, 0, @@ -65,7 +59,6 @@ pub fn spawn_on_port(fixtures: StubFixtures, port: u16) -> VulnApiStub { let base_url = format!("http://127.0.0.1:{bound_port}"); let package_checks = Arc::new(fixtures.package_checks); - let advisories = Arc::new(fixtures.advisories); let status_overrides = Arc::new(fixtures.status_overrides); let handle = thread::spawn(move || { @@ -73,7 +66,7 @@ pub fn spawn_on_port(fixtures: StubFixtures, port: u16) -> VulnApiStub { let Ok(mut stream) = stream else { continue; }; - handle_connection(&mut stream, &package_checks, &advisories, &status_overrides); + handle_connection(&mut stream, &package_checks, &status_overrides); } }); @@ -94,7 +87,6 @@ pub fn spawn_from_file(path: &Path) -> VulnApiStub { fn handle_connection( stream: &mut std::net::TcpStream, package_checks: &Arc>, - advisories: &Arc>, status_overrides: &Arc>, ) { let mut buf = Vec::with_capacity(4096); @@ -136,14 +128,6 @@ fn handle_connection( .unwrap_or_else(|| default_clean_response(&key.0, &key.1, &key.2)); let status = status_overrides.get(&key).copied().unwrap_or(200); (status, body) - } else if parts.len() >= 3 && parts[0] == "v1" && parts[1] == "advisories" { - let id = urlencoding::decode(parts[2]) - .unwrap_or_default() - .into_owned(); - match advisories.get(&id) { - Some(body) => (200, body.clone()), - None => (404, NOT_FOUND_BODY.to_string()), - } } else { (404, NOT_FOUND_BODY.to_string()) } @@ -238,7 +222,7 @@ mod tests { } #[test] - fn advisory_route_and_fixture_file_loading() { + fn fixture_file_loading() { let dir = tempfile::tempdir().expect("tempdir"); let path = dir.path().join("fixtures.json"); std::fs::write( @@ -246,8 +230,7 @@ mod tests { r#"{ "package_checks": { "npm/left-pad/1.0.0": {"ecosystem":"npm","package_name":"left-pad","version":"1.0.0","is_vulnerable":true,"matches":[]} - }, - "advisories": {"MAL-2024-0001": {"id":"MAL-2024-0001"}} + } }"#, ) .unwrap(); @@ -258,10 +241,5 @@ mod tests { "/v1/packages/npm/left-pad/versions/1.0.0/check", ); assert!(resp.contains(r#""is_vulnerable":true"#), "resp: {resp}"); - - let resp = get(&stub.base_url, "/v1/advisories/MAL-2024-0001"); - assert!(resp.starts_with("HTTP/1.1 200"), "resp: {resp}"); - let resp = get(&stub.base_url, "/v1/advisories/NOPE"); - assert!(resp.starts_with("HTTP/1.1 404"), "resp: {resp}"); } } diff --git a/tests/fixtures/vuln_api/advisory_detail.json b/tests/fixtures/vuln_api/advisory_detail.json deleted file mode 100644 index 1db6245..0000000 --- a/tests/fixtures/vuln_api/advisory_detail.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "id": "GHSA-xxxx-yyyy-zzzz", - "source": "ghsa", - "source_url": "https://github.com/advisories/GHSA-xxxx-yyyy-zzzz", - "alias": "CVE-2026-12345", - "aliases": ["CVE-2026-12345"], - "ecosystem": "pypi", - "summary": "SQL injection in django", - "details": "A detailed description of the vulnerability.", - "severity": "HIGH", - "severity_badge": "HIGH", - "tier": 1, - "tier_score": 74.5, - "llm_summary": "Short developer-facing summary." -} From 6d90ca3357704874e9041e847dae1b33691b85a8 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 07:30:52 +0200 Subject: [PATCH 08/59] Fall back to pip3 when pip is missing; clearer missing-binary error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When `which::which("pip")` fails, exec resolution retries `pip3` — pip is the one manager with a conventional alias. The missing-binary error now names the binary and the fallback tried: error: 'pip' not found on PATH (also tried 'pip3') Exit 127 unchanged. The post-resolution exec-failure message names the resolved path instead of the requested binary so it stays accurate when the fallback was taken. New hermetic e2e suite (tests/cli_exec_fallback.rs): a controlled PATH with only a fake pip3 runs the install through it; a PATH with neither exits 127 with the message; npm's error names the binary without a fallback hint. --- src/precheck/mod.rs | 29 ++++-- tests/cli_exec_fallback.rs | 175 +++++++++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+), 8 deletions(-) create mode 100644 tests/cli_exec_fallback.rs diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 22d13ff..90277ed 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -659,15 +659,27 @@ fn exec_install_with_args(manager: PackageManager, subcommand: &str, rest: &[Str exec_command(manager.binary_name(), &full) } +/// Resolve `binary` on PATH. On Windows this finds `.cmd` shims. pip is the +/// one manager with a conventional alias, so a missing `pip` retries `pip3`. +/// The error names the binary and any fallback tried. +fn resolve_binary(binary: &str) -> Result { + if let Ok(p) = which::which(binary) { + return Ok(p); + } + if binary == "pip" { + if let Ok(p) = which::which("pip3") { + return Ok(p); + } + return Err("error: 'pip' not found on PATH (also tried 'pip3')".to_string()); + } + Err(format!("error: '{binary}' not found on PATH")) +} + fn exec_command(binary: &str, args: &[String]) -> i32 { - // Resolve the binary on PATH. On Windows this finds `.cmd` shims. - let resolved = match which::which(binary) { + let resolved = match resolve_binary(binary) { Ok(p) => p, - Err(e) => { - eprintln!( - "could not find '{}' on PATH ({}). Make sure the package manager is installed.", - binary, e - ); + Err(msg) => { + eprintln!("{msg}"); return 127; } }; @@ -686,7 +698,8 @@ fn exec_command(binary: &str, args: &[String]) -> i32 { 1 }), Err(e) => { - eprintln!("failed to exec {}: {}", binary, e); + // Name the resolved path: it may be the pip3 fallback, not `binary`. + eprintln!("failed to exec {}: {}", resolved.display(), e); 1 } } diff --git a/tests/cli_exec_fallback.rs b/tests/cli_exec_fallback.rs new file mode 100644 index 0000000..a1e1ba8 --- /dev/null +++ b/tests/cli_exec_fallback.rs @@ -0,0 +1,175 @@ +//! Hermetic e2e tests for package-manager binary resolution: the pip→pip3 +//! fallback and the missing-binary error (exit 127). +//! +//! Same harness shape as `cli_install.rs`: the real `corgea` binary, a local +//! TcpListener stub standing in for PyPI, and a controlled `PATH` dir that +//! either holds a fake `pip3` (recording its argv to a marker file) or +//! nothing at all. Unix-only — the fake manager is a shell script. + +#![cfg(unix)] + +mod common; + +use common::corgea_isolated; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::thread; +use tempfile::TempDir; + +/// Spawn a PyPI stub serving `/pypi/oldpkg/json` (published 2020-01-01, +/// safely past the recency threshold). Anything else 404s. +fn spawn_pypi_stub() -> String { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or(""); + let (status, body) = if path == "/pypi/oldpkg/json" { + ( + "200 OK", + r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#, + ) + } else { + ("404 Not Found", r#"{"message":"not found"}"#) + }; + let response = format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + status, + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + base_url +} + +/// Write an executable fake package manager named `binary` into `dir`. +/// It records its argv to `marker` and exits 0. +fn write_fake_package_manager(dir: &Path, binary: &str, marker: &Path) { + use std::os::unix::fs::PermissionsExt; + let script = format!( + "#!/bin/sh\nprintf '%s' \"$*\" > '{}'\nexit 0\n", + marker.display() + ); + let path = dir.join(binary); + std::fs::write(&path, script).expect("write fake package manager"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) + .expect("chmod fake package manager"); +} + +/// Isolated `corgea` wired to the PyPI stub, with `PATH` set to a private +/// temp dir containing only the named fake binaries. +struct FallbackHarness { + cmd: Command, + marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +impl FallbackHarness { + fn new(binaries: &[&str]) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + for binary in binaries { + write_fake_package_manager(bin.path(), binary, &marker); + } + let registry = spawn_pypi_stub(); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry); + Self { + cmd, + marker, + _home: home, + _bin: bin, + } + } + + /// The argv the fake package manager was invoked with, if it ran. + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +#[test] +fn pip_install_falls_back_to_pip3_when_pip_missing() { + let mut h = FallbackHarness::new(&["pip3"]); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!( + h.recorded_argv().as_deref(), + Some("install oldpkg==1.0.0"), + "the install must run via pip3 with forwarded args" + ); +} + +#[test] +fn pip_passthrough_falls_back_to_pip3() { + let mut h = FallbackHarness::new(&["pip3"]); + let out = h.cmd.args(["pip", "list"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("list")); +} + +#[test] +fn pip_missing_both_pip_and_pip3_exits_127_with_message() { + let mut h = FallbackHarness::new(&[]); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(127)); + assert_eq!(h.recorded_argv(), None, "nothing must have run"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: 'pip' not found on PATH (also tried 'pip3')"), + "stderr: {stderr}" + ); +} + +#[test] +fn npm_missing_binary_error_names_binary_without_fallback() { + let mut h = FallbackHarness::new(&[]); + let out = h.cmd.args(["npm", "list"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(127)); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: 'npm' not found on PATH"), + "stderr: {stderr}" + ); + assert!( + !stderr.contains("also tried"), + "npm has no fallback alias; stderr: {stderr}" + ); +} From ce74ca53cad44d5cfd9787c28c5714a36891c47b Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 07:32:46 +0200 Subject: [PATCH 09/59] Document deterministic staging targets for testing the install gate Add a 'Testing the gate' subsection to the corgea skill: staging vuln-api URL, known-vulnerable npm/PyPI targets with their fixes, a verified copy-paste command with real observed output, and the recent-CVEs-only caveat for the staging PyPI seed. --- skills/corgea/SKILL.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index b034506..f9497c5 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -162,6 +162,44 @@ Recency gating needs no token; the vuln verdict uses the configured Corgea token present. Overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, `CORGEA_VULN_API_URL`. +#### Testing the gate + +Staging vuln-api (`CORGEA_VULN_API_URL=https://cve-worker-staging.corgea.workers.dev`) +serves deterministic verdicts for dogfooding. It ignores auth — any non-empty +`CORGEA_TOKEN` value enables full-gate mode. Known-vulnerable targets: + +| Ecosystem | Target | Verdict | +|-----------|--------|---------| +| npm | `axios@0.21.0` | vulnerable — fixed in 0.21.2 | +| npm | `minimist@0.0.8` | vulnerable — fixed in 1.2.2 | +| npm | `node-fetch@2.6.0` | vulnerable — fixed in 2.6.7 | +| PyPI | `mezzanine==6.0.0` | vulnerable — no fixed version known | + +Verify the gate end-to-end: + +```bash +CORGEA_TOKEN=dogfood-dummy \ +CORGEA_VULN_API_URL=https://cve-worker-staging.corgea.workers.dev \ +corgea npm install axios@0.21.0 +``` + +Expected output (exit code 1; nothing is installed): + +``` +Pre-checking `npm install axios@0.21.0` (threshold 2d) + 1 ok, 0 recent, 1 vulnerable, 0 unverifiable, 0 skipped, 0 errors + tree: 2 packages resolved, 1 transitive checked + ✗ axios@0.21.0 → axios@0.21.0 known vulnerable: + CVE-2021-3749 (high) — fixed in 0.21.2 + CVE-2020-28168 (medium) — fixed in 0.21.1 + → safe version: axios@0.21.2 +Refusing to run install. Pass --force to proceed despite findings. +``` + +Caveat: the staging PyPI seed covers recent CVEs only. Decade-old classics +(`pyyaml==5.1`, `django==2.2`) return clean **by design** — a clean verdict on +those does not mean the gate is broken. + ### Deps — `corgea deps ` From 3cc9d08a418cd61805a58aafb815182508f22ec1 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 07:35:45 +0200 Subject: [PATCH 10/59] Gate bare npm installs; honest ungated note for yarn/pnpm/uv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-spec `corgea npm install` with a package.json and a token now runs the existing tree pass: the lockfile-resolved set (0 named, N transitive) is verdicted, a vulnerable lockfile blocks fail-closed, --force escapes, and a resolution failure degrades to the named-only warning + exec as before. Bare yarn/pnpm/uv install-shaped commands print one stderr line (`note: bare ' ' is not gated …`) instead of silently running unchecked. The gated report header no longer renders a trailing space when the arg list is empty. SKILL.md offline-inputs sentence rewritten to match. --- skills/corgea/SKILL.md | 8 +- src/precheck/mod.rs | 31 ++- src/precheck/tree.rs | 5 +- tests/cli_bare_install.rs | 417 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 454 insertions(+), 7 deletions(-) create mode 100644 tests/cli_bare_install.rs diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index b034506..bce3ee8 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -118,8 +118,12 @@ token is configured — each resolved version is checked against Corgea's vuln-a known-vulnerable or malicious versions block, and a verdict that cannot be obtained (network/5xx/auth errors) also blocks (fail-closed). Without a token the vuln check is skipped (recency-only) and stderr suggests `corgea login`. Everything else passes -through with the package manager's own exit code. Offline-only inputs (git/URL/path -specs, `-r requirements.txt`, bare `install`) are not checked and run with a printed note. +through with the package manager's own exit code. Git/URL/path specs are noted, never +blocked. With a token, bare `npm install` (zero specs, `package.json` present) is gated +too: the full lockfile-resolved tree is verdicted, so a vulnerable lockfile blocks. Bare +`yarn`/`pnpm`/`uv` installs have no safe dry-run; they run unchecked after a stderr note +(`note: bare ' ' is not gated …`). `-r requirements.txt` files get a printed +note when the tree pass doesn't cover them. Blocked findings steer to the fix: each advisory line shows `fixed in ` (or `no fixed version known`), and when every advisory on a package has a fix, a diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 22d13ff..c71812d 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -337,6 +337,7 @@ fn run_parsed_install( let tree_eligible = opts.verdict.is_some() && tree::covers_input(manager, &parsed); if parsed.targets.is_empty() && !tree_eligible { + bare_install_note(manager, subcommand_label); requirements_note(&parsed); return exec(); } @@ -404,6 +405,23 @@ fn run_parsed_install( exec() } +/// One honest stderr line when a zero-spec install can't be gated: +/// yarn/pnpm/uv have no safe dry-run, so a bare install pulls its whole +/// dependency set unchecked. No-op for other managers (bare npm is gated +/// via the tree pass; bare pip installs nothing). +fn bare_install_note(manager: PackageManager, subcommand_label: &str) { + if matches!( + manager, + PackageManager::Yarn | PackageManager::Pnpm | PackageManager::Uv + ) { + eprintln!( + "note: bare '{} {}' is not gated (no safe dry-run) — dependencies install unchecked", + manager.binary_name(), + subcommand_label + ); + } +} + /// Print the "requirements files are not recency-checked" note when the /// install carried any `-r` files. No-op otherwise. fn requirements_note(parsed: &parse::ParsedInstall) { @@ -747,11 +765,16 @@ fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) } fn print_text(report: &PrecheckReport) { + // Build the echoed command from non-empty parts: a bare gated install + // (e.g. `npm install` with zero specs) has no args to append. + let mut command = format!("{} {}", report.manager.binary_name(), report.subcommand); + if !report.original_args.is_empty() { + command.push(' '); + command.push_str(&report.original_args.join(" ")); + } println!( - "Pre-checking `{} {} {}` (threshold {})", - report.manager.binary_name(), - report.subcommand, - report.original_args.join(" "), + "Pre-checking `{}` (threshold {})", + command, verify_deps::format_duration(report.threshold) ); println!( diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index 5f4db68..1a55ab8 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -16,10 +16,13 @@ pub struct TreePackage { /// Whether this manager's resolver has anything to resolve for the parsed /// install. pip's dry-run also reads `-r` requirements files, so those make -/// a pip install eligible even with no named targets. +/// a pip install eligible even with no named targets. npm's lockfile +/// resolution reads `package.json`, so a bare `npm install` is eligible +/// whenever the working directory has one. pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstall) -> bool { !parsed.targets.is_empty() || (manager == PackageManager::Pip && !parsed.requirements_files.is_empty()) + || (manager == PackageManager::Npm && std::path::Path::new("package.json").exists()) } /// `Ok(None)`: manager has no safe dry-run — named-only with warning. diff --git a/tests/cli_bare_install.rs b/tests/cli_bare_install.rs new file mode 100644 index 0000000..7f96a7c --- /dev/null +++ b/tests/cli_bare_install.rs @@ -0,0 +1,417 @@ +//! Hermetic e2e tests for zero-spec ("bare") installs. +//! +//! With a token and a `package.json`, bare `npm install` is gated like any +//! other install: the tree pass resolves the full lockfile set and verdicts +//! every package, so a vulnerable lockfile blocks (exit 1, `--force` escape). +//! Bare yarn/pnpm/uv installs have no safe dry-run — they exec unchecked +//! behind one honest stderr note. +//! +//! Harness mirrors `cli_tree.rs`: fake package manager on a private PATH +//! (tree-aware for npm, plain argv recorder for yarn/pnpm/uv) + local +//! registry stub + in-crate vuln-api stub. `oldpkg` is published in 2020 so +//! recency never blocks here. + +#![cfg(unix)] + +mod common; + +use common::corgea_isolated; +use corgea::vuln_api_stub::{self, PackageKey}; +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::thread; +use tempfile::TempDir; + +fn key(eco: &str, name: &str, ver: &str) -> PackageKey { + (eco.to_string(), name.to_string(), ver.to_string()) +} + +/// npm lockfile-v3 fixture the fake npm "resolves" from `package.json`: +/// `oldpkg` 1.0.0 + `evildep` 0.4.2 — with zero specs, both are transitive. +const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0"}, + "node_modules/oldpkg":{"version":"1.0.0"}, + "node_modules/evildep":{"version":"0.4.2"}}}"#; + +const PACKAGE_JSON: &str = r#"{"name":"proj","version":"1.0.0","dependencies":{"oldpkg":"1.0.0"}}"#; + +fn vulnerable_evildep_body() -> String { + r#"{"ecosystem":"npm","package_name":"evildep","version":"0.4.2","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string() +} + +/// Registry stub serving the `/oldpkg` npm packument, published 2020 → never +/// recent. Everything else 404s. +fn spawn_registry_stub() -> String { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or(""); + let (status, body) = if path == "/oldpkg" { + ( + "200 OK", + r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#, + ) + } else { + ("404 Not Found", r#"{"message":"not found"}"#) + }; + let response = format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + status, + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + base_url +} + +/// Sentinel payload: the fake npm exits non-zero on its tree (resolution) +/// invocation, forcing the named-only fallback. +const RESOLUTION_FAILS: &str = "RESOLUTION_FAILS"; + +/// Tree-aware fake npm (same scheme as `cli_tree.rs`): an invocation carrying +/// `--package-lock-only` writes `payload` to `./package-lock.json` (the +/// resolver's throwaway temp dir) and exits 0, or exits 1 when `payload` is +/// `RESOLUTION_FAILS`. Any other invocation records its argv to `marker` and +/// exits `exit_code`. Payload is emitted via shell builtins — the locked-down +/// PATH has no `cat`. +fn write_fake_npm(dir: &Path, marker: &Path, payload: &str, exit_code: i32) { + use std::os::unix::fs::PermissionsExt; + let tree_branch = if payload == RESOLUTION_FAILS { + "exit 1".to_string() + } else { + let payload_path = dir.join("npm-tree-payload.json"); + std::fs::write(&payload_path, payload).expect("write fake npm payload"); + format!( + "while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{}' > package-lock.json; exit 0", + payload_path.display() + ) + }; + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" --package-lock-only \"*) {tree_branch};; esac\nprintf '%s' \"$*\" > '{marker}'\nexit {exit_code}\n", + marker = marker.display(), + ); + let path = dir.join("npm"); + std::fs::write(&path, script).expect("write fake npm"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).expect("chmod"); +} + +/// Plain recorder for managers with no tree pass (yarn/pnpm/uv): record argv +/// to `marker`, exit `exit_code`. +fn write_fake_recorder(dir: &Path, marker: &Path, binary: &str, exit_code: i32) { + use std::os::unix::fs::PermissionsExt; + let script = format!( + "#!/bin/sh\nprintf '%s' \"$*\" > '{marker}'\nexit {exit_code}\n", + marker = marker.display(), + ); + let path = dir.join(binary); + std::fs::write(&path, script).expect("write fake pm"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).expect("chmod"); +} + +/// `corgea` wired to a fake package manager, the registry + vuln-api stubs, +/// a token, and a throwaway project dir as cwd. +struct BareHarness { + cmd: Command, + marker: PathBuf, + project: TempDir, + _home: TempDir, + _bin: TempDir, +} + +impl BareHarness { + /// `npm_payload`: `Some` wires a tree-aware fake npm with that canned + /// lockfile (or `RESOLUTION_FAILS`); `None` wires a plain recorder for + /// `binary`. `exit_code` is what the fake exits with on the exec'd + /// (non-tree) invocation. + fn new( + binary: &str, + checks: HashMap, + npm_payload: Option<&str>, + exit_code: i32, + ) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let project = TempDir::new().expect("project dir"); + let marker = bin.path().join("pm-argv.txt"); + match npm_payload { + Some(payload) => write_fake_npm(bin.path(), &marker, payload, exit_code), + None => write_fake_recorder(bin.path(), &marker, binary, exit_code), + } + let registry = spawn_registry_stub(); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + cmd.env("PATH", bin.path()) + .env("CORGEA_NPM_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) + .env("CORGEA_TOKEN", "test-token") + .current_dir(project.path()); + Self { + cmd, + marker, + project, + _home: home, + _bin: bin, + } + } + + fn with_package_json(self) -> Self { + std::fs::write(self.project.path().join("package.json"), PACKAGE_JSON) + .expect("write package.json"); + self + } + + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +#[test] +fn bare_npm_install_vulnerable_lockfile_blocks() { + let mut checks = HashMap::new(); + checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); + let mut h = BareHarness::new("npm", checks, Some(NPM_LOCK), 0).with_package_json(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "vulnerable lockfile must block"); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); + assert!(stdout.contains("MAL-2024-0002"), "stdout: {stdout}"); + assert!(stdout.contains("(transitive)"), "stdout: {stdout}"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("not gated"), + "gated bare npm must not print the ungated note: {stderr}" + ); +} + +#[test] +fn bare_npm_install_clean_lockfile_proceeds() { + let mut h = BareHarness::new("npm", HashMap::new(), Some(NPM_LOCK), 0).with_package_json(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("tree: 2 packages resolved"), + "stdout: {stdout}" + ); +} + +#[test] +fn bare_npm_install_force_overrides_block() { + let mut checks = HashMap::new(); + checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); + let mut h = BareHarness::new("npm", checks, Some(NPM_LOCK), 0).with_package_json(); + let out = h + .cmd + .args(["npm", "--force", "install"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force must run the install"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + assert!( + String::from_utf8_lossy(&out.stdout).contains("evildep"), + "findings still printed under --force" + ); +} + +#[test] +fn bare_npm_install_json_carries_tree_object() { + let mut checks = HashMap::new(); + checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); + let mut h = BareHarness::new("npm", checks, Some(NPM_LOCK), 0).with_package_json(); + let out = h + .cmd + .args(["npm", "--json", "install"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["tree"]["mode"], "full"); + assert_eq!(parsed["tree"]["resolved_count"], 2); + assert_eq!(parsed["summary"]["vulnerable"], 1); + assert_eq!( + parsed["results"].as_array().map(Vec::len), + Some(0), + "zero named targets" + ); +} + +#[test] +fn bare_npm_resolution_failure_falls_back_with_warning() { + // Fake npm exits 1 on `--package-lock-only`. Nothing named remains to + // verify, so the install proceeds behind the loud fallback warning. + let mut h = + BareHarness::new("npm", HashMap::new(), Some(RESOLUTION_FAILS), 0).with_package_json(); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "fallback must proceed"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), + "stderr must carry the fallback warning: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn bare_npm_without_package_json_passes_through() { + // No package.json in cwd → nothing to resolve → straight exec, no gate. + let mut h = BareHarness::new("npm", HashMap::new(), Some(NPM_LOCK), 3); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(3), "npm's own exit code propagates"); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(!stdout.contains("Pre-checking"), "stdout: {stdout}"); + assert!( + !String::from_utf8_lossy(&out.stderr).contains("not gated"), + "npm never gets the yarn/pnpm/uv note" + ); +} + +#[test] +fn bare_npm_tokenless_passes_through() { + // package.json present but no token → recency-only mode has no tree pass; + // bare install execs untouched. + let mut h = BareHarness::new("npm", HashMap::new(), Some(NPM_LOCK), 0).with_package_json(); + h.cmd.env_remove("CORGEA_TOKEN"); + let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + assert!(!String::from_utf8_lossy(&out.stdout).contains("Pre-checking")); +} + +#[test] +fn bare_yarn_install_prints_note_and_execs() { + let mut h = BareHarness::new("yarn", HashMap::new(), None, 7); + let out = h + .cmd + .args(["yarn", "install"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(7), + "yarn's own exit code propagates" + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + assert!( + String::from_utf8_lossy(&out.stderr).contains( + "note: bare 'yarn install' is not gated (no safe dry-run) — dependencies install unchecked" + ), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn bare_yarn_note_prints_without_token_too() { + let mut h = BareHarness::new("yarn", HashMap::new(), None, 0); + h.cmd.env_remove("CORGEA_TOKEN"); + let out = h + .cmd + .args(["yarn", "install"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert!( + String::from_utf8_lossy(&out.stderr).contains("bare 'yarn install' is not gated"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn bare_pnpm_install_prints_note() { + let mut h = BareHarness::new("pnpm", HashMap::new(), None, 0); + let out = h + .cmd + .args(["pnpm", "install"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("install")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("bare 'pnpm install' is not gated"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn bare_uv_add_and_pip_install_print_note() { + let mut h = BareHarness::new("uv", HashMap::new(), None, 0); + let out = h.cmd.args(["uv", "add"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("add")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("bare 'uv add' is not gated"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let mut h = BareHarness::new("uv", HashMap::new(), None, 0); + let out = h + .cmd + .args(["uv", "pip", "install"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("pip install")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("bare 'uv pip install' is not gated"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn yarn_named_target_does_not_print_bare_note() { + // A named target takes the gated path: named-only warning, no bare note. + let mut h = BareHarness::new("yarn", HashMap::new(), None, 0); + let out = h + .cmd + .args(["yarn", "add", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean named target proceeds"); + assert_eq!(h.recorded_argv().as_deref(), Some("add oldpkg@1.0.0")); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("not gated"), + "named install must not print the bare note: {stderr}" + ); + assert!( + stderr.contains("transitive dependencies not checked"), + "named-only warning still applies to yarn: {stderr}" + ); +} From 4f30eb1136c665ad2206f2218068fd6586654e84 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 07:36:57 +0200 Subject: [PATCH 11/59] Label tree findings by provenance instead of blanket (transitive) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tree-pass findings were all labeled (transitive), which misled when the flagged package came from the user's own requirements file or was already a direct dep of the project. - TreePackage carries pip's per-item "requested" report flag (npm: false). - New TreeOrigin on TreeOutcome: Requested / PreExisting / Transitive. Requested = pip-requested leftovers (-r files); PreExisting = npm leftovers named in the project package.json's direct deps (all four dep groups), read from cwd; Transitive otherwise. - Text labels: (from requirements), (already in package.json), (transitive). JSON tree entries gain an "origin" field. - PreExisting findings with an advertised fix get a hint: fix with: corgea npm install @ (advertised fix). advertised_fix() takes the max parseable fixed_version across matches, ignoring fixless matches — deliberately weaker than safe_version's certification and independent of it. should_block_install semantics unchanged. New hermetic e2e coverage in tests/cli_provenance.rs (fake PM + registry stub + vuln-api stub). --- src/precheck/mod.rs | 173 ++++++++++++++++++++- src/precheck/tree.rs | 102 ++++++++++--- tests/cli_provenance.rs | 322 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 567 insertions(+), 30 deletions(-) create mode 100644 tests/cli_provenance.rs diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 22d13ff..ada7917 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -168,11 +168,45 @@ pub enum TargetOutcome { }, } +/// Why a tree-pass finding is in the would-install set. Drives the +/// provenance label so a package the user asked for (or already depends on) +/// is never mislabeled "(transitive)". +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TreeOrigin { + /// Pulled in as a dependency of something else. + Transitive, + /// Explicitly requested (pip report `"requested"` — CLI arg or + /// requirements file; leftovers here come from `-r` files since named + /// CLI targets match a named outcome instead). + Requested, + /// Already a direct dependency in the project's `package.json`. + PreExisting, +} + +impl TreeOrigin { + fn label(self) -> &'static str { + match self { + TreeOrigin::Transitive => "(transitive)", + TreeOrigin::Requested => "(from requirements)", + TreeOrigin::PreExisting => "(already in package.json)", + } + } + + fn json_name(self) -> &'static str { + match self { + TreeOrigin::Transitive => "transitive", + TreeOrigin::Requested => "requested", + TreeOrigin::PreExisting => "pre-existing", + } + } +} + /// Verdict for one package the tree pass resolved beyond the named targets. #[derive(Debug)] pub struct TreeOutcome { pub name: String, pub version: String, + pub origin: TreeOrigin, pub verdict: VerdictStatus, } @@ -464,17 +498,26 @@ fn run_tree_pass( jobs.push(tree::TreePackage { name: resolved.name.clone(), version: resolved.version.clone(), + requested: true, }); } } } + // npm leftovers that are direct deps of the project manifest are + // pre-existing, not transitive. pip carries `requested` instead. + let direct_deps = if manager == PackageManager::Npm { + tree::project_direct_deps() + } else { + Default::default() + }; + let cfg = opts .verdict .as_ref() .expect("tree pass requires verdict config"); let results = verdict_pool(jobs, cfg, manager, opts.concurrency); - let transitive = apply_verdicts(manager, results, outcomes); + let transitive = apply_verdicts(manager, results, outcomes, &direct_deps); TreeReport::Full { resolved_count, transitive, @@ -535,11 +578,14 @@ fn verdict_pool( } /// Assign pooled verdicts onto matching named outcomes (by normalized -/// name + version) and return the unmatched leftovers — the transitive set. +/// name + version) and return the unmatched leftovers — the tree findings. +/// Each leftover carries its provenance: pip's `requested` flag, membership +/// in the project manifest's direct deps (`direct_deps`), or transitive. fn apply_verdicts( manager: PackageManager, results: Vec<(tree::TreePackage, VerdictStatus)>, outcomes: &mut [TargetOutcome], + direct_deps: &std::collections::HashSet, ) -> Vec { let norm = |n: &str| manager.normalize_name(n); let mut transitive = Vec::new(); @@ -560,9 +606,17 @@ fn apply_verdicts( } } if !matched { + let origin = if pkg.requested { + TreeOrigin::Requested + } else if direct_deps.contains(&pkg.name) { + TreeOrigin::PreExisting + } else { + TreeOrigin::Transitive + }; transitive.push(TreeOutcome { name: pkg.name, version: pkg.version, + origin, verdict, }); } @@ -590,13 +644,14 @@ fn run_verdict_pass( TargetOutcome::Resolved { resolved, .. } => Some(tree::TreePackage { name: resolved.name.clone(), version: resolved.version.clone(), + requested: true, }), _ => None, }) .collect(); let results = verdict_pool(jobs, cfg, manager, opts.concurrency); - apply_verdicts(manager, results, outcomes); + apply_verdicts(manager, results, outcomes, &Default::default()); } fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { @@ -730,6 +785,38 @@ fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { } } +/// Highest `fixed_version` the advisories advertise, by lenient semver. +/// Unlike `safe_version` this is *not* a certification: matches without a +/// fix are ignored, so the result may still be vulnerable to them. `None` +/// only when no match advertises a fix (or no candidate parses). +fn advertised_fix(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let mut fixes: Vec<&str> = matches + .iter() + .filter_map(|m| m.fixed_version.as_deref()) + .collect(); + fixes.sort_unstable(); + fixes.dedup(); + match fixes.as_slice() { + [] => None, + [only] => Some((*only).to_string()), + many => { + let mut best: Option<(semver::Version, &str)> = None; + for raw in many { + let Ok(v) = + semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) + else { + continue; + }; + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw)), + } + } + best.map(|(_, raw)| (*raw).to_string()) + } + } +} + /// Per-match advisory lines plus the safe-version steer, shared by the /// named-target and transitive vulnerable render arms. fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) { @@ -778,18 +865,35 @@ fn print_text(report: &PrecheckReport) { match &t.verdict { VerdictStatus::Vulnerable(matches) => { println!( - " ✗ {}@{} (transitive) known vulnerable:", - t.name, t.version + " ✗ {}@{} {} known vulnerable:", + t.name, + t.version, + t.origin.label() ); print_vulnerable_matches(&t.name, matches); + // A vulnerable dep the project already declares can be + // bumped directly — point at the advertised fix. + if t.origin == TreeOrigin::PreExisting { + if let Some(fix) = advertised_fix(matches) { + println!( + " fix with: corgea {} install {}@{} (advertised fix)", + report.manager.binary_name(), + t.name, + fix + ); + } + } } VerdictStatus::Unverifiable(error) => { println!( - " ⚠ {}@{} (transitive) could not be verified: {}", - t.name, t.version, error + " ⚠ {}@{} {} could not be verified: {}", + t.name, + t.version, + t.origin.label(), + error ); } - // Clean / not-checked transitive entries stay quiet in text mode. + // Clean / not-checked tree entries stay quiet in text mode. VerdictStatus::Clean | VerdictStatus::NotChecked(_) => {} } } @@ -937,6 +1041,7 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { "transitive": transitive.iter().map(|o| json!({ "name": o.name, "version": o.version, + "origin": o.origin.json_name(), "verdict": verdict_json(&o.verdict), })).collect::>(), }), @@ -1159,6 +1264,7 @@ mod tests { transitive: vec![TreeOutcome { name: "evildep".to_string(), version: "0.4.2".to_string(), + origin: TreeOrigin::Transitive, verdict: VerdictStatus::Vulnerable(vec![]), }], }); @@ -1260,6 +1366,7 @@ mod tests { .map(|n| tree::TreePackage { name: n.to_string(), version: "1.0.0".to_string(), + requested: false, }) .collect(); @@ -1354,4 +1461,54 @@ mod tests { fn safe_version_empty_matches_is_none() { assert_eq!(safe_version(&[]), None); } + + #[test] + fn advertised_fix_ignores_matches_without_fix() { + // safe_version returns None here; the advertised fix still surfaces. + assert_eq!( + advertised_fix(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + Some("2.0.0".to_string()) + ); + assert_eq!(advertised_fix(&[vm("A-1", None)]), None); + assert_eq!(advertised_fix(&[]), None); + } + + #[test] + fn advertised_fix_picks_highest_by_semver() { + assert_eq!( + advertised_fix(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } + + /// Leftover origin assignment: pip `requested` ⇒ Requested; manifest + /// direct dep ⇒ PreExisting; otherwise Transitive. Requested wins over + /// a direct-dep hit. + #[test] + fn apply_verdicts_assigns_origins() { + let pkg = |name: &str, requested: bool| tree::TreePackage { + name: name.to_string(), + version: "1.0.0".to_string(), + requested, + }; + let results = vec![ + (pkg("reqdep", true), VerdictStatus::Clean), + (pkg("predep", false), VerdictStatus::Clean), + (pkg("deepdep", false), VerdictStatus::Clean), + ]; + let direct_deps = std::collections::HashSet::from(["predep".to_string()]); + let mut outcomes = []; + let mut tree = apply_verdicts(PackageManager::Npm, results, &mut outcomes, &direct_deps); + tree.sort_by(|a, b| a.name.cmp(&b.name)); + let origins: Vec<(&str, TreeOrigin)> = + tree.iter().map(|t| (t.name.as_str(), t.origin)).collect(); + assert_eq!( + origins, + vec![ + ("deepdep", TreeOrigin::Transitive), + ("predep", TreeOrigin::PreExisting), + ("reqdep", TreeOrigin::Requested), + ] + ); + } } diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index 5f4db68..f21a62c 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -12,6 +12,10 @@ use super::PackageManager; pub struct TreePackage { pub name: String, pub version: String, + /// pip report `"requested"`: the user named this package (CLI arg or + /// requirements file). Always false for npm — its lockfile has no + /// equivalent flag. + pub requested: bool, } /// Whether this manager's resolver has anything to resolve for the parsed @@ -88,11 +92,42 @@ fn parse_pip_report(json: &str) -> Result, String> { Ok(TreePackage { name: field("name")?, version: field("version")?, + requested: item + .get("requested") + .and_then(|v| v.as_bool()) + .unwrap_or(false), }) }) .collect() } +/// Direct dependency names declared by the project's `package.json` in the +/// current directory (the manifest `resolve_npm_tree` copies). Empty when +/// the manifest is absent or unparsable — origin labeling then degrades to +/// `(transitive)`. +pub fn project_direct_deps() -> std::collections::HashSet { + std::fs::read_to_string("package.json") + .map(|s| direct_deps_from_manifest(&s)) + .unwrap_or_default() +} + +fn direct_deps_from_manifest(json: &str) -> std::collections::HashSet { + let Ok(manifest) = serde_json::from_str::(json) else { + return Default::default(); + }; + let groups = [ + "dependencies", + "devDependencies", + "optionalDependencies", + "peerDependencies", + ]; + groups + .iter() + .filter_map(|g| manifest.get(g)?.as_object()) + .flat_map(|deps| deps.keys().cloned()) + .collect() +} + /// Resolve npm's full would-install set by generating a lockfile in a /// throwaway dir so the user's own lockfile is never touched. npm's /// `--dry-run --json` only emits counts (npm/cli#6558), so we read the @@ -164,6 +199,7 @@ fn parse_npm_lockfile(json: &str) -> Result, String> { out.push(TreePackage { name, version: version.to_string(), + requested: false, }); } Ok(out) @@ -193,16 +229,25 @@ mod tests { vec![ TreePackage { name: "oldpkg".to_string(), - version: "1.0.0".to_string() + version: "1.0.0".to_string(), + requested: true, }, TreePackage { name: "evildep".to_string(), - version: "0.4.2".to_string() + version: "0.4.2".to_string(), + requested: false, }, ] ); } + #[test] + fn parse_pip_report_missing_requested_defaults_false() { + let json = r#"{"install":[{"metadata":{"name":"x","version":"1.0.0"}}]}"#; + let pkgs = parse_pip_report(json).expect("parse report without requested"); + assert!(!pkgs[0].requested); + } + #[test] fn parse_pip_report_missing_install() { let err = parse_pip_report(r#"{"version":"1"}"#).expect_err("no install[]"); @@ -241,29 +286,19 @@ mod tests { fn parse_npm_lockfile_ok() { let mut pkgs = parse_npm_lockfile(NPM_LOCK).expect("parse npm lock"); pkgs.sort_by(|a, b| a.name.cmp(&b.name)); + let pkg = |name: &str, version: &str| TreePackage { + name: name.to_string(), + version: version.to_string(), + requested: false, + }; assert_eq!( pkgs, vec![ - TreePackage { - name: "@scope/pkg".to_string(), - version: "9.0.1".to_string() - }, - TreePackage { - name: "b".to_string(), - version: "2.3.4".to_string() - }, - TreePackage { - name: "evildep".to_string(), - version: "0.4.2".to_string() - }, - TreePackage { - name: "localdep".to_string(), - version: "0.0.1".to_string() - }, - TreePackage { - name: "oldpkg".to_string(), - version: "1.0.0".to_string() - }, + pkg("@scope/pkg", "9.0.1"), + pkg("b", "2.3.4"), + pkg("evildep", "0.4.2"), + pkg("localdep", "0.0.1"), + pkg("oldpkg", "1.0.0"), ] ); } @@ -290,4 +325,27 @@ mod tests { ); assert_eq!(name_from_lock_path("packages/foo"), None); } + + #[test] + fn direct_deps_from_manifest_unions_all_groups() { + let manifest = r#"{ + "name": "proj", + "dependencies": {"a": "^1.0.0", "@scope/b": "2.x"}, + "devDependencies": {"c": "*"}, + "optionalDependencies": {"d": "1.2.3"}, + "peerDependencies": {"e": ">=1"} + }"#; + let deps = direct_deps_from_manifest(manifest); + for name in ["a", "@scope/b", "c", "d", "e"] { + assert!(deps.contains(name), "missing {name}"); + } + assert_eq!(deps.len(), 5); + } + + #[test] + fn direct_deps_from_manifest_degrades_to_empty() { + assert!(direct_deps_from_manifest("not json").is_empty()); + assert!(direct_deps_from_manifest(r#"{"name":"proj"}"#).is_empty()); + assert!(direct_deps_from_manifest(r#"{"dependencies":[]}"#).is_empty()); + } } diff --git a/tests/cli_provenance.rs b/tests/cli_provenance.rs new file mode 100644 index 0000000..9349e1e --- /dev/null +++ b/tests/cli_provenance.rs @@ -0,0 +1,322 @@ +//! Hermetic e2e tests for provenance labels on tree-pass findings: +//! `(from requirements)` for pip-requested packages, `(already in +//! package.json)` for npm direct deps the project already declares (plus the +//! `fix with:` advertised-fix hint), `(transitive)` otherwise, and the +//! `"origin"` field in `--json` output. +//! +//! Same harness pattern as `cli_tree.rs`: fake package manager on a private +//! PATH (answers the tree-resolution invocation with a canned payload), +//! a local registry stub, and the in-crate vuln-api stub. `oldpkg` is +//! published in 2020 so recency never blocks — every block is the verdict's. + +#![cfg(unix)] + +mod common; + +use common::corgea_isolated; +use corgea::vuln_api_stub::{self, PackageKey}; +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::thread; +use tempfile::TempDir; + +fn key(eco: &str, name: &str, ver: &str) -> PackageKey { + (eco.to_string(), name.to_string(), ver.to_string()) +} + +/// Vulnerable verdict body; `fixed_version` is spliced in as given +/// (`"1.2.2"` or `null`). +fn vulnerable_body(ecosystem: &str, name: &str, version: &str, fixed: &str) -> String { + format!( + r#"{{"ecosystem":"{ecosystem}","package_name":"{name}","version":"{version}","is_vulnerable":true, + "matches":[{{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":{fixed}}}]}}"# + ) +} + +/// Pip report: only `reqpkg`, requested (as if it came from a `-r` file). +const PIP_REQ_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"reqpkg","version":"6.0.0"},"requested":true}]}"#; + +/// Pip report mixing all three origins: `oldpkg` (named on the CLI, matches +/// the named outcome), `reqpkg` (requested via `-r`), `evildep` (transitive). +const PIP_MIXED_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"reqpkg","version":"6.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + +/// npm lockfile-v3: named `oldpkg` 1.0.0 + `evildep` 0.4.2 (resolved from the +/// project's pre-existing direct dep). +const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0"}, + "node_modules/oldpkg":{"version":"1.0.0"}, + "node_modules/evildep":{"version":"0.4.2"}}}"#; + +/// Project manifest that already declares `evildep` as a direct dep. +const PROJECT_MANIFEST: &str = + r#"{"name":"proj","version":"1.0.0","dependencies":{"evildep":"^0.4.0"}}"#; + +/// Registry stub serving `/pypi/oldpkg/json` (pypi) and `/oldpkg` (npm +/// packument), both published 2020 → never recent. Everything else 404s. +fn spawn_registry_stub() -> String { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or("") + .to_string(); + + let (status, body) = match path.as_str() { + "/pypi/oldpkg/json" => ( + "200 OK", + r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#.to_string(), + ), + "/oldpkg" => ( + "200 OK", + r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#.to_string(), + ), + _ => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), + }; + let response = format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + status, + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + base_url +} + +/// Write an executable fake package manager into `dir`. The tree-resolution +/// invocation (pip `--dry-run` / npm `--package-lock-only`) emits `payload` +/// (stdout for pip, `./package-lock.json` for npm) and exits 0; any other +/// invocation records its argv to `marker` and exits 0. The payload is read +/// via shell builtins because the locked-down test `PATH` has no `cat`. +fn write_fake_pm(dir: &Path, marker: &Path, binary: &str, payload: &str) { + use std::os::unix::fs::PermissionsExt; + let (tree_flag, redirect) = match binary { + "pip" => ("--dry-run", ""), + "npm" => ("--package-lock-only", " > package-lock.json"), + other => panic!("unsupported fake manager {other}"), + }; + let payload_path = dir.join(format!("{binary}-tree-payload.json")); + std::fs::write(&payload_path, payload).expect("write fake pm payload"); + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" {tree_flag} \"*) while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{payload}'{redirect}; exit 0;; esac\nprintf '%s' \"$*\" > '{marker}'\nexit 0\n", + payload = payload_path.display(), + marker = marker.display(), + ); + let path = dir.join(binary); + std::fs::write(&path, script).expect("write fake pm"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).expect("chmod fake pm"); +} + +/// `corgea` wired to the registry stub, a tree-aware fake manager, and a +/// vuln-api stub. +struct Harness { + cmd: Command, + marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +impl Harness { + fn new(binary: &str, checks: HashMap, payload: &str) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_pm(bin.path(), &marker, binary, payload); + let registry = spawn_registry_stub(); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry) + .env("CORGEA_NPM_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) + .env("CORGEA_TOKEN", "test-token"); + Self { + cmd, + marker, + _home: home, + _bin: bin, + } + } + + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +/// Project dir holding a `package.json` that already declares `evildep`. +fn npm_project() -> TempDir { + let project = TempDir::new().expect("project dir"); + std::fs::write(project.path().join("package.json"), PROJECT_MANIFEST) + .expect("write package.json"); + project +} + +#[test] +fn pip_requirements_finding_labeled_from_requirements() { + // The flagged package comes from a `-r` file (pip marks it `requested`), + // so it must not be mislabeled "(transitive)". + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "reqpkg", "6.0.0"), + vulnerable_body("pypi", "reqpkg", "6.0.0", "null"), + ); + let mut h = Harness::new("pip", checks, PIP_REQ_REPORT); + let out = h + .cmd + .args(["pip", "install", "-r", "reqs.txt"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "requested vuln must block"); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("reqpkg@6.0.0 (from requirements)"), + "stdout: {stdout}" + ); + assert!(!stdout.contains("(transitive)"), "stdout: {stdout}"); +} + +#[test] +fn npm_preexisting_direct_dep_labeled_with_fix_hint() { + // `evildep` is already a direct dep in the project's package.json; the + // finding gets the pre-existing label plus the advertised-fix hint. + let project = npm_project(); + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", r#""1.2.2""#), + ); + let mut h = Harness::new("npm", checks, NPM_LOCK); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "pre-existing vuln must block"); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("evildep@0.4.2 (already in package.json)"), + "stdout: {stdout}" + ); + assert!( + stdout.contains("fix with: corgea npm install evildep@1.2.2 (advertised fix)"), + "stdout: {stdout}" + ); +} + +#[test] +fn npm_preexisting_without_fix_has_no_hint() { + let project = npm_project(); + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "null"), + ); + let mut h = Harness::new("npm", checks, NPM_LOCK); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("evildep@0.4.2 (already in package.json)"), + "stdout: {stdout}" + ); + assert!( + !stdout.contains("fix with:"), + "no advertised fix → no hint; stdout: {stdout}" + ); +} + +#[test] +fn pip_json_carries_origin_per_tree_entry() { + // All-clean run mixing origins: the named `oldpkg` matches its outcome, + // `reqpkg` (requested) and `evildep` (transitive) land in `tree.transitive` + // with their origins. + let mut h = Harness::new("pip", HashMap::new(), PIP_MIXED_REPORT); + let out = h + .cmd + .args([ + "pip", + "--json", + "install", + "oldpkg==1.0.0", + "-r", + "reqs.txt", + ]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["tree"]["mode"], "full"); + let entries = parsed["tree"]["transitive"] + .as_array() + .expect("transitive array"); + let origin_of = |name: &str| { + entries + .iter() + .find(|e| e["name"] == name) + .unwrap_or_else(|| panic!("{name} missing from tree entries"))["origin"] + .clone() + }; + assert_eq!(origin_of("reqpkg"), "requested"); + assert_eq!(origin_of("evildep"), "transitive"); + assert_eq!(entries.len(), 2, "named oldpkg must not be a tree entry"); +} + +#[test] +fn npm_json_carries_preexisting_origin() { + let project = npm_project(); + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", r#""1.2.2""#), + ); + let mut h = Harness::new("npm", checks, NPM_LOCK); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "--json", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["tree"]["transitive"][0]["name"], "evildep"); + assert_eq!(parsed["tree"]["transitive"][0]["origin"], "pre-existing"); + assert_eq!( + parsed["tree"]["transitive"][0]["verdict"]["status"], + "vulnerable" + ); +} From fdfa472bd54f7c1a07db047b43b33d2366fad932 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 07:37:33 +0200 Subject: [PATCH 12/59] Fix u16 overflow in find_available_port range arithmetic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit start_port + 50 panics in debug builds when the start port is above 65485 — the port-search test binds an ephemeral port and trips this whenever the OS hands out one near u16::MAX. Saturate instead. --- src/authorize.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/authorize.rs b/src/authorize.rs index 7271cf9..854415c 100644 --- a/src/authorize.rs +++ b/src/authorize.rs @@ -94,7 +94,8 @@ pub fn run(scope: Option, url: Option) -> Result<(), Box Result> { // Try a more reliable approach - start from a higher range that's less likely to be used let search_ranges = vec![ - (start_port, start_port + 50), + // Saturate: a start port near u16::MAX must clamp, not overflow. + (start_port, start_port.saturating_add(50)), (9000, 9100), (8000, 8100), (7000, 7100), From f32235481cd9d7a15a67a197b21e5cfb9774465f Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 07:37:50 +0200 Subject: [PATCH 13/59] Verify the safe-version steer before printing it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-verdict each proposed '→ safe version' against vuln-api through the existing verdict_pool before any output. A clean re-check prints the steer; a flagged one prints '→ advertised fix {v} is also flagged — no safe version to suggest'; a failed re-check suppresses the steer quietly and never moves counts or exit codes. JSON 'remediation' now emits only on a Verified steer. Proposals dedup by normalized (name, version) and requests fire only when a vulnerable verdict exists with a token configured. --- skills/corgea/SKILL.md | 7 +- src/precheck/mod.rs | 257 +++++++++++++++++++++++++++++++++++++-- tests/cli_remediation.rs | 119 +++++++++++++++++- 3 files changed, 367 insertions(+), 16 deletions(-) diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index b034506..f7f5b0d 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -122,8 +122,11 @@ through with the package manager's own exit code. Offline-only inputs (git/URL/p specs, `-r requirements.txt`, bare `install`) are not checked and run with a printed note. Blocked findings steer to the fix: each advisory line shows `fixed in ` (or -`no fixed version known`), and when every advisory on a package has a fix, a -`→ safe version: @` line names the version to install instead. +`no fixed version known`). When every advisory on a package has a fix, the gate +re-checks that version against vuln-api before suggesting it: a clean re-check prints +`→ safe version: @`; a flagged one prints `→ advertised fix +is also flagged — no safe version to suggest`; a failed re-check suppresses the +suggestion quietly (it never changes counts or exit codes). With a token, the vuln check covers the **full would-install set**, not just the named targets: `pip` and `npm` resolve the complete tree (named + transitive) via a diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 22d13ff..52b44ea 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -16,6 +16,7 @@ pub mod parse; pub mod tree; +use std::collections::HashMap; use std::ffi::OsString; use std::process::Command; use std::time::Duration; @@ -101,6 +102,19 @@ pub enum VerdictStatus { NotChecked(String), } +/// Result of re-verdicting a proposed `→ safe version` steer against +/// vuln-api before it prints. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SteerCheck { + /// vuln-api confirmed the proposed version is clean — print the steer. + Verified, + /// vuln-api flagged the proposed version too — print the rejection note. + Rejected, + /// The re-check failed (network/5xx/auth) — suppress the steer quietly. + /// Never feeds counts or the block decision. + Unverified, +} + /// Reason recorded on resolved targets when no token is configured. const NO_TOKEN_REASON: &str = "no Corgea token; vulnerability verdict skipped"; @@ -200,6 +214,10 @@ pub struct PrecheckReport { pub threshold: Duration, /// `None` ⇒ recency-only mode, the tree pass never ran. pub tree: Option, + /// Verification results for proposed safe-version steers, keyed by + /// (normalized name, proposed version). Populated by `verify_steers`; + /// consulted only at render time, never by the block predicate. + pub steers: HashMap<(String, String), SteerCheck>, } impl PrecheckReport { @@ -375,14 +393,16 @@ fn run_parsed_install( ); } - let report = PrecheckReport { + let mut report = PrecheckReport { manager, subcommand: subcommand_label.to_string(), original_args: rest.to_vec(), outcomes, threshold: opts.threshold, tree, + steers: HashMap::new(), }; + verify_steers(&mut report, &opts); if opts.json { print_json(&report, &opts); @@ -599,6 +619,68 @@ fn run_verdict_pass( apply_verdicts(manager, results, outcomes); } +/// Re-verdict every proposed `→ safe version` steer before anything prints. +/// Populates `report.steers` keyed by (normalized name, proposed version): +/// `Clean` ⇒ `Verified`, flagged ⇒ `Rejected`, request failure ⇒ `Unverified` +/// (suppressed quietly — never feeds counts or exit codes). Sends requests +/// only when a token is configured and at least one vulnerable verdict +/// proposed a steer; proposals dedup by normalized (name, version). +fn verify_steers(report: &mut PrecheckReport, opts: &PrecheckOptions) { + let Some(cfg) = &opts.verdict else { return }; + let manager = report.manager; + + let mut proposals: Vec<(&str, &[crate::vuln_api::VulnMatch])> = Vec::new(); + for o in &report.outcomes { + if let TargetOutcome::Resolved { + resolved, + verdict: VerdictStatus::Vulnerable(matches), + .. + } = o + { + proposals.push((&resolved.name, matches)); + } + } + if let Some(TreeReport::Full { transitive, .. }) = &report.tree { + for t in transitive { + if let VerdictStatus::Vulnerable(matches) = &t.verdict { + proposals.push((&t.name, matches)); + } + } + } + + let mut seen = std::collections::HashSet::new(); + let mut jobs: Vec = Vec::new(); + for (name, matches) in proposals { + let Some(safe) = safe_version(matches) else { + continue; + }; + if seen.insert((manager.normalize_name(name), safe.clone())) { + jobs.push(tree::TreePackage { + name: name.to_string(), + version: safe, + }); + } + } + if jobs.is_empty() { + return; + } + + let results = verdict_pool(jobs, cfg, manager, opts.concurrency); + report.steers = results + .into_iter() + .map(|(pkg, verdict)| { + let check = match verdict { + VerdictStatus::Clean => SteerCheck::Verified, + VerdictStatus::Vulnerable(_) => SteerCheck::Rejected, + VerdictStatus::Unverifiable(_) | VerdictStatus::NotChecked(_) => { + SteerCheck::Unverified + } + }; + ((manager.normalize_name(&pkg.name), pkg.version), check) + }) + .collect(); +} + fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { if opts.force { return false; @@ -730,9 +812,31 @@ fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { } } -/// Per-match advisory lines plus the safe-version steer, shared by the -/// named-target and transitive vulnerable render arms. -fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) { +/// The safe-version proposal for a vulnerable package, paired with its +/// `verify_steers` re-check. `None` when no version can be proposed at all; +/// a proposal absent from the steer map counts as `Unverified` so callers +/// suppress it. +fn steer_for( + report: &PrecheckReport, + name: &str, + matches: &[crate::vuln_api::VulnMatch], +) -> Option<(String, SteerCheck)> { + let safe = safe_version(matches)?; + let check = report + .steers + .get(&(report.manager.normalize_name(name), safe.clone())) + .copied() + .unwrap_or(SteerCheck::Unverified); + Some((safe, check)) +} + +/// Per-match advisory lines plus the verified safe-version steer, shared by +/// the named-target and transitive vulnerable render arms. +fn print_vulnerable_matches( + report: &PrecheckReport, + name: &str, + matches: &[crate::vuln_api::VulnMatch], +) { for m in matches { println!( " {} ({}){}", @@ -741,8 +845,14 @@ fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) fix_note(m) ); } - if let Some(safe) = safe_version(matches) { - println!(" → safe version: {name}@{safe}"); + match steer_for(report, name, matches) { + Some((safe, SteerCheck::Verified)) => { + println!(" → safe version: {name}@{safe}"); + } + Some((safe, SteerCheck::Rejected)) => { + println!(" → advertised fix {safe} is also flagged — no safe version to suggest"); + } + Some((_, SteerCheck::Unverified)) | None => {} } } @@ -781,7 +891,7 @@ fn print_text(report: &PrecheckReport) { " ✗ {}@{} (transitive) known vulnerable:", t.name, t.version ); - print_vulnerable_matches(&t.name, matches); + print_vulnerable_matches(report, &t.name, matches); } VerdictStatus::Unverifiable(error) => { println!( @@ -814,7 +924,7 @@ fn print_text(report: &PrecheckReport) { " ✗ {} → {}@{} known vulnerable:", target.display, resolved.name, resolved.version, ); - print_vulnerable_matches(&resolved.name, matches); + print_vulnerable_matches(report, &resolved.name, matches); } VerdictStatus::Unverifiable(error) => { println!( @@ -855,15 +965,21 @@ fn print_text(report: &PrecheckReport) { /// JSON shape for a single verdict. Shared by named outcomes and tree /// (transitive) outcomes so both render verdicts identically. -fn verdict_json(verdict: &VerdictStatus) -> serde_json::Value { +/// `remediation` carries the safe version only when its steer re-check +/// came back `Verified`; rejected/unverified steers emit `null`. +fn verdict_json(report: &PrecheckReport, name: &str, verdict: &VerdictStatus) -> serde_json::Value { use serde_json::json; match verdict { VerdictStatus::Clean => json!({ "status": "clean" }), VerdictStatus::Vulnerable(matches) => { + let remediation = match steer_for(report, name, matches) { + Some((safe, SteerCheck::Verified)) => Some(safe), + _ => None, + }; json!({ "status": "vulnerable", "matches": matches, - "remediation": safe_version(matches), + "remediation": remediation, }) } VerdictStatus::Unverifiable(error) => { @@ -888,7 +1004,7 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { recent, verdict, } => { - let verdict_json = verdict_json(verdict); + let verdict_json = verdict_json(report, &resolved.name, verdict); json!({ "status": if *recent { "recent" } else { "ok" }, "spec": target.display, @@ -937,7 +1053,7 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { "transitive": transitive.iter().map(|o| json!({ "name": o.name, "version": o.version, - "verdict": verdict_json(&o.verdict), + "verdict": verdict_json(report, &o.name, &o.verdict), })).collect::>(), }), TreeReport::NamedOnly { reason } => json!({ @@ -1063,6 +1179,7 @@ mod tests { outcomes, threshold: Duration::from_secs(2 * 86400), tree: None, + steers: HashMap::new(), } } @@ -1354,4 +1471,120 @@ mod tests { fn safe_version_empty_matches_is_none() { assert_eq!(safe_version(&[]), None); } + + fn vulnerable_outcome(name: &str, version: &str, fixed: Option<&str>) -> TargetOutcome { + let mut o = resolved_outcome(name, version, false); + set_verdict(&mut o, VerdictStatus::Vulnerable(vec![vm("A-1", fixed)])); + o + } + + /// `verify_steers` re-verdicts each proposed fix, from named and + /// transitive findings alike: clean → Verified, flagged → Rejected, + /// 5xx → Unverified. Counts and the block predicate never move. + #[test] + fn verify_steers_maps_reverdicts() { + let key = |name: &str, ver: &str| ("pypi".to_string(), name.to_string(), ver.to_string()); + let mut checks = HashMap::new(); + checks.insert( + key("badfix", "3.0.0"), + r#"{"ecosystem":"pypi","package_name":"badfix","version":"3.0.0","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0009","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string(), + ); + checks.insert(key("flaky", "4.0.0"), "{}".to_string()); + let mut statuses = HashMap::new(); + statuses.insert(key("flaky", "4.0.0"), 503u16); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, statuses); + + let mut opts = stub_opts("http://127.0.0.1:9".to_string(), false); + opts.verdict = Some(VerdictConfig { + base_url: stub.base_url.clone(), + token: "test-token".to_string(), + }); + + // oldpkg's fix is unknown to the stub → default clean; badfix's fix is + // flagged; flaky's fix 503s. badfix arrives via the transitive arm. + let mut report = report_with(vec![ + vulnerable_outcome("oldpkg", "1.0.0", Some("2.0.0")), + vulnerable_outcome("flaky", "1.0.0", Some("4.0.0")), + ]); + report.tree = Some(TreeReport::Full { + resolved_count: 3, + transitive: vec![TreeOutcome { + name: "badfix".to_string(), + version: "0.1.0".to_string(), + verdict: VerdictStatus::Vulnerable(vec![vm("A-2", Some("3.0.0"))]), + }], + }); + verify_steers(&mut report, &opts); + + let steer = |name: &str, ver: &str| { + report + .steers + .get(&(name.to_string(), ver.to_string())) + .copied() + }; + assert_eq!(steer("oldpkg", "2.0.0"), Some(SteerCheck::Verified)); + assert_eq!(steer("badfix", "3.0.0"), Some(SteerCheck::Rejected)); + assert_eq!(steer("flaky", "4.0.0"), Some(SteerCheck::Unverified)); + + // Steer re-checks never feed counts or the block decision. + assert_eq!(report.vulnerable_count(), 3); + assert_eq!(report.unverifiable_count(), 0); + } + + /// Tokenless mode never sends steer requests; `steer_for` treats a + /// missing map entry as Unverified. + #[test] + fn verify_steers_noop_without_token() { + let opts = stub_opts("http://127.0.0.1:9".to_string(), false); + let mut report = report_with(vec![vulnerable_outcome("oldpkg", "1.0.0", Some("2.0.0"))]); + verify_steers(&mut report, &opts); + assert!(report.steers.is_empty()); + assert_eq!( + steer_for(&report, "oldpkg", &[vm("A-1", Some("2.0.0"))]), + Some(("2.0.0".to_string(), SteerCheck::Unverified)) + ); + } + + /// No proposal (fix unknown) ⇒ no requests at all: with the vuln-api at a + /// dead address, an attempted request would land as Unverified. + #[test] + fn verify_steers_skips_requests_without_proposals() { + let mut opts = stub_opts("http://127.0.0.1:9".to_string(), false); + opts.verdict = Some(VerdictConfig { + base_url: "http://127.0.0.1:9".to_string(), + token: "test-token".to_string(), + }); + let mut report = report_with(vec![vulnerable_outcome("oldpkg", "1.0.0", None)]); + verify_steers(&mut report, &opts); + assert!(report.steers.is_empty()); + } + + /// Proposals dedup by normalized (name, version): two pypi spellings of + /// the same package produce one steer entry, and `steer_for` resolves it + /// for either spelling. + #[test] + fn verify_steers_dedups_by_normalized_name() { + let stub = crate::vuln_api_stub::spawn_with_statuses(HashMap::new(), HashMap::new()); + let mut opts = stub_opts("http://127.0.0.1:9".to_string(), false); + opts.verdict = Some(VerdictConfig { + base_url: stub.base_url.clone(), + token: "test-token".to_string(), + }); + let mut report = report_with(vec![ + vulnerable_outcome("Flask_Cors", "1.0.0", Some("2.0.0")), + vulnerable_outcome("flask-cors", "1.0.0", Some("2.0.0")), + ]); + verify_steers(&mut report, &opts); + assert_eq!(report.steers.len(), 1); + for spelling in ["Flask_Cors", "flask-cors"] { + assert_eq!( + steer_for(&report, spelling, &[vm("A-1", Some("2.0.0"))]), + Some(("2.0.0".to_string(), SteerCheck::Verified)), + "spelling {spelling}" + ); + } + } } diff --git a/tests/cli_remediation.rs b/tests/cli_remediation.rs index e583240..058493c 100644 --- a/tests/cli_remediation.rs +++ b/tests/cli_remediation.rs @@ -1,5 +1,8 @@ //! Hermetic e2e tests for remediation steering: a blocked install names the -//! safe version from the verdict's `fixed_version` data. +//! safe version from the verdict's `fixed_version` data — but only after the +//! proposed version itself re-verdicts clean against vuln-api. A flagged +//! proposal prints the rejection note instead; a failed re-check suppresses +//! the steer quietly without moving counts or exit codes. //! //! Mirrors the `cli_verdict.rs` harness (inline PyPI stub published 2020 so //! recency never blocks, a fake pip recording its argv, the in-crate vuln-api @@ -37,6 +40,15 @@ fn no_fix_body() -> String { .to_string() } +/// The advertised fix `oldpkg@2.0.0` is itself flagged — the steer re-check +/// must reject it. +fn flagged_fix_body() -> String { + r#"{"ecosystem":"pypi","package_name":"oldpkg","version":"2.0.0","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0003","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string() +} + /// Registry stub serving only `/pypi/oldpkg/json` (published 2020 → never /// recent). Everything else 404s. fn spawn_pypi_stub() -> String { @@ -112,12 +124,21 @@ struct RemediationHarness { impl RemediationHarness { fn new(checks: HashMap, token: Option<&str>, pip_exit_code: i32) -> Self { + Self::with_statuses(checks, HashMap::new(), token, pip_exit_code) + } + + fn with_statuses( + checks: HashMap, + statuses: HashMap, + token: Option<&str>, + pip_exit_code: i32, + ) -> Self { let (mut cmd, home) = corgea_isolated(); let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); write_fake_pip(bin.path(), &marker, pip_exit_code); let registry = spawn_pypi_stub(); - let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); cmd.env("PATH", bin.path()) .env("CORGEA_PYPI_REGISTRY", ®istry) .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); @@ -139,6 +160,8 @@ impl RemediationHarness { #[test] fn fixed_match_blocks_and_names_safe_version() { + // The stub answers default-clean for the unscripted `oldpkg@2.0.0` steer + // re-check, so the proposal verifies and the steer prints. let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); let mut h = RemediationHarness::new(checks, Some("test-token"), 0); @@ -232,3 +255,95 @@ fn json_remediation_null_when_no_fix() { "remediation must be null when no fix is known: {parsed}" ); } + +#[test] +fn rejected_fix_prints_rejection_instead_of_steer() { + // oldpkg@1.0.0 is vulnerable with an advertised fix of 2.0.0 — but the + // stub flags 2.0.0 too, so the steer must turn into the rejection note. + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); + checks.insert(key("pypi", "oldpkg", "2.0.0"), flagged_fix_body()); + let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("advertised fix 2.0.0 is also flagged — no safe version to suggest"), + "stdout: {stdout}" + ); + assert!( + !stdout.contains("safe version:"), + "a rejected fix must not print the steer: {stdout}" + ); + assert!( + stdout.contains("1 vulnerable, 0 unverifiable"), + "the steer re-check must not inflate counts: {stdout}" + ); +} + +#[test] +fn unverified_fix_suppresses_steer_quietly() { + // The steer re-check for oldpkg@2.0.0 503s. The steer disappears with no + // substitute line, and counts/exit stay exactly as without the re-check. + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); + let mut statuses = HashMap::new(); + statuses.insert(key("pypi", "oldpkg", "2.0.0"), 503u16); + let mut h = RemediationHarness::with_statuses(checks, statuses, Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + !stdout.contains("safe version:"), + "an unverified fix must not print the steer: {stdout}" + ); + assert!( + !stdout.contains("also flagged"), + "an unverified fix must stay quiet, not claim rejection: {stdout}" + ); + assert!( + stdout.contains("1 vulnerable, 0 unverifiable"), + "a failed steer re-check must not change counts: {stdout}" + ); + assert!( + stdout.contains("fixed in 2.0.0"), + "advisory fix data still prints: {stdout}" + ); +} + +#[test] +fn json_remediation_null_when_fix_rejected() { + let mut checks = HashMap::new(); + checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); + checks.insert(key("pypi", "oldpkg", "2.0.0"), flagged_fix_body()); + let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "--json", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + let v = &parsed["results"][0]["verdict"]; + assert_eq!(v["status"], "vulnerable", "parsed: {parsed}"); + assert!( + v["remediation"].is_null(), + "remediation must be null when the fix re-verdicts vulnerable: {parsed}" + ); + assert_eq!( + parsed["summary"]["vulnerable"], 1, + "the steer re-check must not inflate counts: {parsed}" + ); +} From aae91130ca9ea2fc0741c120832c94212d38a051 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 07:40:44 +0200 Subject: [PATCH 14/59] Distinguish existing-tree findings from named targets in install refusal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When every vulnerable finding sits in the resolved tree beyond the named targets (and no named target is unverifiable), the refusal now says the existing dependency tree is the problem instead of implying the package the user typed is at fault. The text summary line gains a "(N from existing tree)" parenthetical on the vulnerable/unverifiable counts when the tree contributed findings. Messaging only — should_block_install semantics are unchanged. --- src/precheck/mod.rs | 90 +++++++++---- tests/cli_refusal_context.rs | 238 +++++++++++++++++++++++++++++++++++ 2 files changed, 302 insertions(+), 26 deletions(-) create mode 100644 tests/cli_refusal_context.rs diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 22d13ff..c258dff 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -213,26 +213,30 @@ impl PrecheckReport { self.count(|o| matches!(o, TargetOutcome::Resolved { recent: true, .. })) } pub fn vulnerable_count(&self) -> usize { - self.count(|o| { - matches!( - o, - TargetOutcome::Resolved { - verdict: VerdictStatus::Vulnerable(_), - .. - } - ) - }) + self.tree_finding_count(|v| matches!(v, VerdictStatus::Vulnerable(_))) + self.named_vulnerable_count() + self.tree_vulnerable_count() } pub fn unverifiable_count(&self) -> usize { - self.count(|o| { - matches!( - o, - TargetOutcome::Resolved { - verdict: VerdictStatus::Unverifiable(_), - .. - } - ) - }) + self.tree_finding_count(|v| matches!(v, VerdictStatus::Unverifiable(_))) + self.named_unverifiable_count() + self.tree_unverifiable_count() + } + /// Vulnerable findings among the named targets this command adds. + pub fn named_vulnerable_count(&self) -> usize { + self.named_finding_count(|v| matches!(v, VerdictStatus::Vulnerable(_))) + } + /// Unverifiable findings among the named targets this command adds. + pub fn named_unverifiable_count(&self) -> usize { + self.named_finding_count(|v| matches!(v, VerdictStatus::Unverifiable(_))) + } + /// Count named (resolved) outcomes whose verdict matches `pred`. + fn named_finding_count(&self, pred: impl Fn(&VerdictStatus) -> bool) -> usize { + self.count(|o| matches!(o, TargetOutcome::Resolved { verdict, .. } if pred(verdict))) + } + /// Vulnerable findings beyond the named targets (the resolved tree). + pub fn tree_vulnerable_count(&self) -> usize { + self.tree_finding_count(|v| matches!(v, VerdictStatus::Vulnerable(_))) + } + /// Unverifiable findings beyond the named targets (the resolved tree). + pub fn tree_unverifiable_count(&self) -> usize { + self.tree_finding_count(|v| matches!(v, VerdictStatus::Unverifiable(_))) } /// Count transitive tree findings whose verdict matches `pred`. fn tree_finding_count(&self, pred: impl Fn(&VerdictStatus) -> bool) -> usize { @@ -392,11 +396,7 @@ fn run_parsed_install( if should_block_install(&report, &opts) { if !opts.json { - if report.vulnerable_count() > 0 || report.unverifiable_count() > 0 { - eprintln!("Refusing to run install. Pass --force to proceed despite findings."); - } else { - eprintln!("Refusing to run install. Pass --no-fail to proceed anyway."); - } + print_refusal(&report); } return 1; } @@ -404,6 +404,24 @@ fn run_parsed_install( exec() } +/// The refusal line on stderr. When vulnerable findings exist but none sit on +/// a named target — and no named target is unverifiable either — the block is +/// entirely the existing tree's doing, so say that instead of implying the +/// package the user typed is at fault. Messaging only; the block decision +/// stays with `should_block_install`. +fn print_refusal(report: &PrecheckReport) { + let named_findings = report.named_vulnerable_count() + report.named_unverifiable_count(); + if report.vulnerable_count() > 0 && named_findings == 0 { + eprintln!( + "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." + ); + } else if report.vulnerable_count() > 0 || report.unverifiable_count() > 0 { + eprintln!("Refusing to run install. Pass --force to proceed despite findings."); + } else { + eprintln!("Refusing to run install. Pass --no-fail to proceed anyway."); + } +} + /// Print the "requirements files are not recency-checked" note when the /// install carried any `-r` files. No-op otherwise. fn requirements_note(parsed: &parse::ParsedInstall) { @@ -746,6 +764,18 @@ fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) } } +/// One summary-line segment, e.g. `"2 vulnerable (2 from existing tree)"`. +/// The parenthetical separates findings the resolved tree carried in from +/// findings on the targets this command names; omitted when the tree +/// contributed none. +fn summary_segment(total: usize, from_tree: usize, label: &str) -> String { + if from_tree > 0 { + format!("{total} {label} ({from_tree} from existing tree)") + } else { + format!("{total} {label}") + } +} + fn print_text(report: &PrecheckReport) { println!( "Pre-checking `{} {} {}` (threshold {})", @@ -755,11 +785,19 @@ fn print_text(report: &PrecheckReport) { verify_deps::format_duration(report.threshold) ); println!( - " {} ok, {} recent, {} vulnerable, {} unverifiable, {} skipped, {} errors", + " {} ok, {} recent, {}, {}, {} skipped, {} errors", report.ok_count(), report.recent_count(), - report.vulnerable_count(), - report.unverifiable_count(), + summary_segment( + report.vulnerable_count(), + report.tree_vulnerable_count(), + "vulnerable" + ), + summary_segment( + report.unverifiable_count(), + report.tree_unverifiable_count(), + "unverifiable" + ), report.skipped_count(), report.error_count(), ); diff --git a/tests/cli_refusal_context.rs b/tests/cli_refusal_context.rs new file mode 100644 index 0000000..0ef3e72 --- /dev/null +++ b/tests/cli_refusal_context.rs @@ -0,0 +1,238 @@ +//! Hermetic e2e tests for refusal-message context: when every vulnerable +//! finding sits in the resolved tree beyond the named targets, the refusal +//! must say the existing tree is the problem; a finding on a named target +//! keeps the generic refusal. +//! +//! Same harness as `cli_tree.rs`, pip-only: a fake pip on a private PATH +//! answers the `--dry-run --report -` tree pass with a canned report, a local +//! pypi registry stub publishes `oldpkg` in 2020 (recency never blocks), and +//! the in-crate vuln-api stub supplies verdicts. Every block here is the +//! verdict's doing. + +#![cfg(unix)] + +mod common; + +use common::corgea_isolated; +use corgea::vuln_api_stub::{self, PackageKey}; +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::thread; +use tempfile::TempDir; + +/// Refusal when the existing tree alone caused the block. +const TREE_REFUSAL: &str = "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force."; +/// Refusal when a named target carries a blocking verdict. +const GENERIC_REFUSAL: &str = "Refusing to run install. Pass --force to proceed despite findings."; + +fn key(eco: &str, name: &str, ver: &str) -> PackageKey { + (eco.to_string(), name.to_string(), ver.to_string()) +} + +/// Pip `--report -` payload: `oldpkg` (named) + `evildep` (transitive). +const TREE_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + +fn vulnerable_body(name: &str, version: &str) -> String { + format!( + r#"{{"ecosystem":"pypi","package_name":"{name}","version":"{version}","is_vulnerable":true, + "matches":[{{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}}]}}"# + ) +} + +/// Registry stub serving `/pypi/oldpkg/json`, published 2020 → never recent. +/// Everything else 404s. +fn spawn_pypi_stub() -> String { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or(""); + + let (status, body) = match path { + "/pypi/oldpkg/json" => ( + "200 OK", + r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#, + ), + _ => ("404 Not Found", r#"{"message":"not found"}"#), + }; + let response = format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + status, + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + base_url +} + +/// Write an executable fake pip into `dir`. A `--dry-run` invocation emits +/// the canned tree report on stdout and exits 0; any other invocation records +/// its argv to `marker` and exits 0. The payload is read via shell builtins +/// because the test's locked-down `PATH` has no `cat`; the `|| [ -n "$line" ]` +/// guard keeps the final line when the payload file has no trailing newline. +fn write_fake_pip(dir: &Path, marker: &Path) { + use std::os::unix::fs::PermissionsExt; + let payload_path = dir.join("pip-tree-payload.json"); + std::fs::write(&payload_path, TREE_REPORT).expect("write fake pip payload"); + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{payload}'; exit 0;; esac\nprintf '%s' \"$*\" > '{marker}'\nexit 0\n", + payload = payload_path.display(), + marker = marker.display(), + ); + let path = dir.join("pip"); + std::fs::write(&path, script).expect("write fake pip"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) + .expect("chmod fake pip"); +} + +/// `corgea` wired to the registry stub, a tree-aware fake pip, and a vuln-api +/// stub. +struct Harness { + cmd: Command, + marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +impl Harness { + fn new(checks: HashMap, statuses: HashMap) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_pip(bin.path(), &marker); + let registry = spawn_pypi_stub(); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) + .env("CORGEA_TOKEN", "test-token"); + Self { + cmd, + marker, + _home: home, + _bin: bin, + } + } + + fn run_install(&mut self) -> std::process::Output { + self.cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea") + } + + fn pip_ran(&self) -> bool { + self.marker.exists() + } +} + +#[test] +fn transitive_only_vulnerable_gets_existing_tree_refusal() { + // Only the transitive `evildep` is flagged; the named `oldpkg` is clean. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_body("evildep", "0.4.2"), + ); + let mut h = Harness::new(checks, HashMap::new()); + let out = h.run_install(); + + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert!(!h.pip_ran(), "pip must not run on a blocked install"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains(TREE_REFUSAL), + "stderr must carry the existing-tree refusal: {stderr}" + ); + assert!( + !stderr.contains(GENERIC_REFUSAL), + "generic refusal must be replaced, not appended: {stderr}" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("1 vulnerable (1 from existing tree)"), + "summary must attribute the finding to the tree: {stdout}" + ); +} + +#[test] +fn named_vulnerable_keeps_generic_refusal() { + // The named `oldpkg` itself is flagged; `evildep` is clean. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("oldpkg", "1.0.0"), + ); + let mut h = Harness::new(checks, HashMap::new()); + let out = h.run_install(); + + assert_eq!(out.status.code(), Some(1), "named vuln must block"); + assert!(!h.pip_ran(), "pip must not run on a blocked install"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains(GENERIC_REFUSAL), + "named finding keeps the generic refusal: {stderr}" + ); + assert!( + !stderr.contains(TREE_REFUSAL), + "existing-tree refusal must not fire on a named finding: {stderr}" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + !stdout.contains("from existing tree"), + "summary must not attribute a named finding to the tree: {stdout}" + ); +} + +#[test] +fn named_unverifiable_with_transitive_vulnerable_keeps_generic_refusal() { + // The named `oldpkg` verdict 503s (unverifiable, fail-closed) while the + // transitive `evildep` is vulnerable. The command's own target is part of + // the block, so the existing-tree refusal would mislead. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_body("evildep", "0.4.2"), + ); + let mut statuses = HashMap::new(); + statuses.insert(key("pypi", "oldpkg", "1.0.0"), 503u16); + let mut h = Harness::new(checks, statuses); + let out = h.run_install(); + + assert_eq!(out.status.code(), Some(1), "must block"); + assert!(!h.pip_ran(), "pip must not run on a blocked install"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains(GENERIC_REFUSAL), + "named unverifiable keeps the generic refusal: {stderr}" + ); + assert!( + !stderr.contains(TREE_REFUSAL), + "existing-tree refusal must not fire while a named target blocks: {stderr}" + ); +} From fa14fa03f1addb7d64ed68d2bedc7a72850dc8a5 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 07:41:59 +0200 Subject: [PATCH 15/59] Polish gate output: honest tokenless warning, progress line, collapsed outage noise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Tokenless note becomes a warning that states the consequence: known-vulnerable packages will NOT be blocked (recency-only). - verdict_pool prints 'checking N packages against Corgea vuln-api…' to stderr when more than 8 jobs run, so big tree passes don't look hung. - print_text collapses >3 unverifiable findings sharing an error-prefix (the vuln-api outage case) into one line; counts and exit codes unchanged. --- src/precheck/mod.rs | 151 ++++++++++++++++++++++++++++++++++++++++--- tests/cli_verdict.rs | 118 ++++++++++++++++++++++++++++++--- 2 files changed, 252 insertions(+), 17 deletions(-) diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 22d13ff..3b8ba41 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -371,7 +371,7 @@ fn run_parsed_install( } if opts.verdict.is_none() { eprintln!( - "note: no Corgea token — vulnerability verdicts skipped (recency-only). Run `corgea login` for the full gate." + "warning: no Corgea token — known-vulnerable packages will NOT be blocked (recency-only). Run 'corgea login' for the full gate." ); } @@ -481,6 +481,10 @@ fn run_tree_pass( } } +/// Above this many verdict jobs, print a stderr progress line so a big tree +/// pass doesn't look hung. +const VERDICT_PROGRESS_THRESHOLD: usize = 8; + /// Bounded worker pool over the verdict jobs — owns client creation and the /// fail-closed policy: on client failure every job comes back `Unverifiable`. /// Plain work queue, no new crates; `reqwest::blocking::Client` is @@ -505,6 +509,10 @@ fn verdict_pool( } }; + if jobs.len() > VERDICT_PROGRESS_THRESHOLD { + eprintln!("checking {} packages against Corgea vuln-api…", jobs.len()); + } + let ecosystem = manager.ecosystem(); let workers = concurrency.clamp(1, 32).min(jobs.len().max(1)); let queue = Mutex::new(VecDeque::from(jobs)); @@ -746,7 +754,68 @@ fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) } } +/// More than this many unverifiable findings with the same error-prefix +/// render as one collapsed line instead of one line per package. +const UNVERIFIABLE_COLLAPSE_THRESHOLD: usize = 3; + +/// Group key for collapsing repeated unverifiable errors: the text before +/// the first `(` — strips per-package detail (URLs, status codes) so one +/// outage groups under one key. +fn error_prefix(error: &str) -> &str { + match error.find('(') { + Some(i) => error[..i].trim_end(), + None => error, + } +} + +/// Unverifiable error strings across transitive tree findings and named +/// outcomes, in render order. +fn unverifiable_errors(report: &PrecheckReport) -> Vec<&str> { + let mut errors = Vec::new(); + if let Some(TreeReport::Full { transitive, .. }) = &report.tree { + for t in transitive { + if let VerdictStatus::Unverifiable(e) = &t.verdict { + errors.push(e.as_str()); + } + } + } + for o in &report.outcomes { + if let TargetOutcome::Resolved { + verdict: VerdictStatus::Unverifiable(e), + .. + } = o + { + errors.push(e.as_str()); + } + } + errors +} + +/// `(prefix, count, first error)` groups of unverifiable findings large +/// enough to collapse (> `UNVERIFIABLE_COLLAPSE_THRESHOLD` per prefix) — +/// the vuln-api outage case, where every package fails the same way. +/// Display-only: counts and exit codes never change. +fn collapsed_unverifiable_groups(report: &PrecheckReport) -> Vec<(&str, usize, &str)> { + let mut groups: Vec<(&str, usize, &str)> = Vec::new(); + for e in unverifiable_errors(report) { + let prefix = error_prefix(e); + match groups.iter_mut().find(|(p, _, _)| *p == prefix) { + Some((_, count, _)) => *count += 1, + None => groups.push((prefix, 1, e)), + } + } + groups.retain(|(_, count, _)| *count > UNVERIFIABLE_COLLAPSE_THRESHOLD); + groups +} + fn print_text(report: &PrecheckReport) { + let collapsed = collapsed_unverifiable_groups(report); + let is_collapsed = |error: &str| { + collapsed + .iter() + .any(|(prefix, _, _)| *prefix == error_prefix(error)) + }; + println!( "Pre-checking `{} {} {}` (threshold {})", report.manager.binary_name(), @@ -784,10 +853,12 @@ fn print_text(report: &PrecheckReport) { print_vulnerable_matches(&t.name, matches); } VerdictStatus::Unverifiable(error) => { - println!( - " ⚠ {}@{} (transitive) could not be verified: {}", - t.name, t.version, error - ); + if !is_collapsed(error) { + println!( + " ⚠ {}@{} (transitive) could not be verified: {}", + t.name, t.version, error + ); + } } // Clean / not-checked transitive entries stay quiet in text mode. VerdictStatus::Clean | VerdictStatus::NotChecked(_) => {} @@ -800,6 +871,13 @@ fn print_text(report: &PrecheckReport) { None => {} } + // One line per collapsed outage group instead of one per package. + for (_, count, first_error) in &collapsed { + println!( + " ⚠ {count} packages could not be verified (vuln-api unreachable: {first_error})" + ); + } + for o in &report.outcomes { match o { TargetOutcome::Resolved { @@ -817,10 +895,12 @@ fn print_text(report: &PrecheckReport) { print_vulnerable_matches(&resolved.name, matches); } VerdictStatus::Unverifiable(error) => { - println!( - " ⚠ {} → {}@{} could not be verified: {}", - target.display, resolved.name, resolved.version, error, - ); + if !is_collapsed(error) { + println!( + " ⚠ {} → {}@{} could not be verified: {}", + target.display, resolved.name, resolved.version, error, + ); + } } VerdictStatus::Clean | VerdictStatus::NotChecked(_) => { if *recent { @@ -1354,4 +1434,57 @@ mod tests { fn safe_version_empty_matches_is_none() { assert_eq!(safe_version(&[]), None); } + + #[test] + fn error_prefix_strips_parenthesized_detail() { + // The reqwest network-failure shape: per-package URL in parens. + assert_eq!( + error_prefix("Failed to send vuln-api request: error sending request for url (http://x/v1/packages/pypi/a/versions/1.0.0/check)"), + "Failed to send vuln-api request: error sending request for url" + ); + assert_eq!( + error_prefix("vuln-api unavailable (HTTP 503)"), + "vuln-api unavailable" + ); + assert_eq!(error_prefix("no parens here"), "no parens here"); + } + + /// Four unverifiable findings sharing a prefix collapse into one group + /// (named + transitive both count); three do not. + #[test] + fn collapsed_groups_require_more_than_threshold() { + let unverifiable = |name: &str| { + let mut o = resolved_outcome(name, "1.0.0", false); + set_verdict( + &mut o, + VerdictStatus::Unverifiable(format!("vuln-api unavailable (HTTP 503: {name})")), + ); + o + }; + + let mut report = report_with(vec![ + unverifiable("a"), + unverifiable("b"), + unverifiable("c"), + ]); + assert!(collapsed_unverifiable_groups(&report).is_empty()); + + report.tree = Some(TreeReport::Full { + resolved_count: 4, + transitive: vec![TreeOutcome { + name: "d".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Unverifiable( + "vuln-api unavailable (HTTP 503: d)".to_string(), + ), + }], + }); + let groups = collapsed_unverifiable_groups(&report); + assert_eq!(groups.len(), 1); + let (prefix, count, first) = groups[0]; + assert_eq!(prefix, "vuln-api unavailable"); + assert_eq!(count, 4); + // Render order is transitive-first, so the tree finding leads. + assert_eq!(first, "vuln-api unavailable (HTTP 503: d)"); + } } diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs index 4b661f0..1aab577 100644 --- a/tests/cli_verdict.rs +++ b/tests/cli_verdict.rs @@ -31,8 +31,8 @@ fn vulnerable_oldpkg_body() -> String { .to_string() } -/// Registry stub serving only `/pypi/oldpkg/json` (published 2020 → never -/// recent). Everything else 404s. +/// Registry stub serving `/pypi//json` for any single-segment name, +/// always version 1.0.0 published 2020 → never recent. Everything else 404s. fn spawn_pypi_stub() -> String { let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); @@ -58,12 +58,18 @@ fn spawn_pypi_stub() -> String { .unwrap_or("") .to_string(); - let (status, body) = match path.as_str() { - "/pypi/oldpkg/json" => ( + let name = path + .strip_prefix("/pypi/") + .and_then(|p| p.strip_suffix("/json")) + .filter(|n| !n.is_empty() && !n.contains('/')); + let (status, body) = match name { + Some(name) => ( "200 OK", - r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#.to_string(), + format!( + r#"{{"info":{{"name":"{name}"}},"releases":{{"1.0.0":[{{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}}]}}}}"# + ), ), - _ => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), + None => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), }; let response = format!( "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", @@ -222,9 +228,105 @@ fn tokenless_degrades_to_recency_only_with_login_prompt() { "old + unchecked package must install" ); assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stderr = String::from_utf8_lossy(&out.stderr); assert!( - String::from_utf8_lossy(&out.stderr).contains("corgea login"), - "tokenless mode must prompt for login" + stderr.contains("corgea login"), + "tokenless mode must prompt for login: {stderr}" + ); + assert!( + stderr.contains("warning: no Corgea token") && stderr.contains("will NOT be blocked"), + "tokenless warning must state the consequence: {stderr}" + ); +} + +#[test] +fn progress_line_prints_only_above_eight_verdict_jobs() { + // Nine resolvable named targets → 9 verdict jobs (> 8) → progress line. + let mut h = VerdictHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let mut args = vec!["pip".to_string(), "install".to_string()]; + args.extend((1..=9).map(|i| format!("pkg{i}==1.0.0"))); + let out = h.cmd.args(&args).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "all clean + old must install"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("checking 9 packages against Corgea vuln-api"), + "stderr: {stderr}" + ); + + // Two jobs → quiet. + let mut h = VerdictHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "pkg1==1.0.0", "pkg2==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("against Corgea vuln-api"), + "no progress line at or below 8 jobs: {stderr}" + ); +} + +#[test] +fn outage_noise_collapses_above_three_unverifiable() { + // vuln-api refuses connections: every check fails with the same + // error-prefix (only the per-package URL differs). Four findings → + // one collapsed line; counts and fail-closed exit code unchanged. + let mut h = VerdictHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); + let out = h + .cmd + .args([ + "pip", + "install", + "pkg1==1.0.0", + "pkg2==1.0.0", + "pkg3==1.0.0", + "pkg4==1.0.0", + ]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "unverifiable must still block"); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("4 packages could not be verified (vuln-api unreachable:"), + "stdout: {stdout}" + ); + assert!( + !stdout.contains("could not be verified:"), + "per-package lines must collapse: {stdout}" + ); + assert!( + stdout.contains("4 unverifiable"), + "summary counts unchanged: {stdout}" + ); + + // Three findings stay per-line — no collapse at the threshold. + let mut h = VerdictHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); + let out = h + .cmd + .args([ + "pip", + "install", + "pkg1==1.0.0", + "pkg2==1.0.0", + "pkg3==1.0.0", + ]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + let stdout = String::from_utf8_lossy(&out.stdout); + assert_eq!( + stdout.matches("could not be verified:").count(), + 3, + "three findings must keep per-package lines: {stdout}" + ); + assert!( + !stdout.contains("vuln-api unreachable:"), + "no collapsed line at exactly the threshold: {stdout}" ); } From 8a41c5ebcdc5d6660f672f74ac25e7748cfc425d Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 07:50:05 +0200 Subject: [PATCH 16/59] Add warn-only npm audit second opinion to the install gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resolve_tree now returns TreeResolution{packages, audit}: for npm, the dry-run temp dir moves into a detached thread that runs `npm audit --json --package-lock-only` (5s deadline, kill on timeout; stdout parsed regardless of exit code — audit exits 1 when it finds advisories). run_tree_pass collects the summary via recv_timeout(2s) after the verdict pool so the two overlap; any failure is a silent skip. The signal is supplementary only: should_block_install never consults it. When total>0 a note prints to stderr; --json carries an `npm_audit` object (or null) in the tree arm. CORGEA_NO_NPM_AUDIT=1 disables. --- skills/corgea/SKILL.md | 3 +- src/precheck/mod.rs | 52 +++++- src/precheck/tree.rs | 225 +++++++++++++++++++++++++- tests/cli_npm_audit.rs | 359 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 629 insertions(+), 10 deletions(-) create mode 100644 tests/cli_npm_audit.rs diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index b034506..13df903 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -160,7 +160,8 @@ or `null` when any advisory has no known fix. Recency gating needs no token; the vuln verdict uses the configured Corgea token when present. Overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, -`CORGEA_VULN_API_URL`. +`CORGEA_VULN_API_URL`; `CORGEA_NO_NPM_AUDIT=1` disables the warn-only `npm audit` +second opinion. ### Deps — `corgea deps ` diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 22d13ff..e7e25ce 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -186,6 +186,9 @@ pub enum TreeReport { resolved_count: usize, /// Verdicts for resolved packages beyond the named targets. transitive: Vec, + /// Warn-only `npm audit` second opinion (npm only; `None` when + /// unavailable, disabled, or failed). Never consulted for blocking. + audit: Option, }, /// Resolution unavailable or failed — only named targets were verified. NamedOnly { reason: String }, @@ -364,6 +367,20 @@ fn run_parsed_install( "warning: transitive dependencies not checked ({reason}); only named packages were verified." ); } + // Warn-only npm audit second opinion: never blocks, never changes + // exit codes (`should_block_install` ignores it by design). + if let Some(TreeReport::Full { + audit: Some(audit), .. + }) = &tree + { + if audit.total > 0 { + eprintln!( + "note: npm audit reports {} advisories ({} high/critical) — supplementary signal, not blocking", + audit.total, + audit.high + audit.critical + ); + } + } // The requirements note only matters when the tree pass did *not* cover // those files (fallback to named-only, or recency-only mode). if !matches!(&tree, Some(TreeReport::Full { .. })) { @@ -431,8 +448,11 @@ fn run_tree_pass( outcomes: &mut [TargetOutcome], opts: &PrecheckOptions, ) -> TreeReport { - let set = match tree::resolve_tree(manager, rest) { - Ok(Some(set)) => set, + let tree::TreeResolution { + packages: set, + audit: audit_rx, + } = match tree::resolve_tree(manager, rest) { + Ok(Some(resolution)) => resolution, Ok(None) => { run_verdict_pass(manager, outcomes, opts); return TreeReport::NamedOnly { @@ -474,10 +494,15 @@ fn run_tree_pass( .as_ref() .expect("tree pass requires verdict config"); let results = verdict_pool(jobs, cfg, manager, opts.concurrency); + // Collect the warn-only npm audit second opinion only after the verdict + // pool so the two truly overlap; any failure (timeout, disconnected + // sender) is a silent skip. + let audit = audit_rx.and_then(|rx| rx.recv_timeout(Duration::from_secs(2)).ok()); let transitive = apply_verdicts(manager, results, outcomes); TreeReport::Full { resolved_count, transitive, + audit, } } @@ -768,6 +793,7 @@ fn print_text(report: &PrecheckReport) { Some(TreeReport::Full { resolved_count, transitive, + .. }) => { println!( " tree: {} packages resolved, {} transitive checked", @@ -875,6 +901,23 @@ fn verdict_json(verdict: &VerdictStatus) -> serde_json::Value { } } +/// JSON shape for the warn-only npm audit second opinion in the tree arm. +fn npm_audit_json(audit: &tree::AuditSummary) -> serde_json::Value { + use serde_json::json; + json!({ + "total": audit.total, + "critical": audit.critical, + "high": audit.high, + "moderate": audit.moderate, + "low": audit.low, + "info": audit.info, + "top": audit.top.iter().map(|(name, severity)| json!({ + "name": name, + "severity": severity, + })).collect::>(), + }) +} + fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { use serde_json::json; let outcomes: Vec<_> = report @@ -930,7 +973,7 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { "verdict_mode": if opts.verdict.is_some() { "full" } else { "recency-only" }, "results": outcomes, "tree": report.tree.as_ref().map(|t| match t { - TreeReport::Full { resolved_count, transitive } => json!({ + TreeReport::Full { resolved_count, transitive, audit } => json!({ "mode": "full", "reason": serde_json::Value::Null, "resolved_count": resolved_count, @@ -939,12 +982,14 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { "version": o.version, "verdict": verdict_json(&o.verdict), })).collect::>(), + "npm_audit": audit.as_ref().map(npm_audit_json), }), TreeReport::NamedOnly { reason } => json!({ "mode": "named-only", "reason": reason, "resolved_count": 0, "transitive": [], + "npm_audit": serde_json::Value::Null, }), }), }); @@ -1161,6 +1206,7 @@ mod tests { version: "0.4.2".to_string(), verdict: VerdictStatus::Vulnerable(vec![]), }], + audit: None, }); assert_eq!(report.vulnerable_count(), 1); diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index 5f4db68..1c1f2c6 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -4,7 +4,10 @@ //! pip: `--only-binary :all:` prevents sdist builds (pypa/pip#13091). //! npm: `--ignore-scripts` guards npm/cli#2787. -use std::process::Command; +use std::path::PathBuf; +use std::process::{Command, Stdio}; +use std::sync::mpsc; +use std::time::{Duration, Instant}; use super::PackageManager; @@ -14,6 +17,28 @@ pub struct TreePackage { pub version: String, } +/// Warn-only `npm audit` second opinion: counts from +/// `metadata.vulnerabilities` plus the worst few advisories. Never blocks. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AuditSummary { + pub total: u64, + pub critical: u64, + pub high: u64, + pub moderate: u64, + pub low: u64, + pub info: u64, + /// Worst advisories as `(package name, severity)`, capped at + /// `AUDIT_TOP_LIMIT`, severest first. + pub top: Vec<(String, String)>, +} + +/// What `resolve_tree` hands back: the would-install set, plus (npm only) +/// a receiver for the concurrent `npm audit` second opinion. +pub struct TreeResolution { + pub packages: Vec, + pub audit: Option>, +} + /// Whether this manager's resolver has anything to resolve for the parsed /// install. pip's dry-run also reads `-r` requirements files, so those make /// a pip install eligible even with no named targets. @@ -27,9 +52,16 @@ pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstal pub fn resolve_tree( manager: PackageManager, install_args: &[String], -) -> Result>, String> { +) -> Result, String> { match manager { - PackageManager::Pip => resolve_pip_tree(manager.binary_name(), install_args).map(Some), + PackageManager::Pip => { + resolve_pip_tree(manager.binary_name(), install_args).map(|packages| { + Some(TreeResolution { + packages, + audit: None, + }) + }) + } PackageManager::Npm => resolve_npm_tree(manager.binary_name(), install_args).map(Some), // yarn/pnpm/uv have no safe dry-run for installs. _ => Ok(None), @@ -100,7 +132,7 @@ fn parse_pip_report(json: &str) -> Result, String> { /// /// `--ignore-scripts` because npm has run lifecycle scripts under /// `--package-lock-only` before (npm/cli#2787). -fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result, String> { +fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result { let resolved = which::which(binary).map_err(|e| format!("{binary} not found on PATH: {e}"))?; let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; for manifest in [ @@ -114,7 +146,7 @@ fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result Result bool { + std::env::var("CORGEA_NO_NPM_AUDIT").is_ok_and(|v| !v.trim().is_empty()) +} + +/// Kill the audit subprocess if it hasn't finished by then. +const AUDIT_DEADLINE: Duration = Duration::from_secs(5); + +/// Cap on `AuditSummary::top` advisory entries. +const AUDIT_TOP_LIMIT: usize = 5; + +/// Run `npm audit --json` in the dry-run temp dir, concurrent with the +/// verdict pool. The thread owns `work` so the dir outlives the resolver and +/// is cleaned up when the audit finishes. Any failure (spawn error, timeout, +/// unparsable output) drops the sender — the receiver sees a disconnect and +/// the gate silently skips the second opinion. +fn spawn_audit(work: tempfile::TempDir, npm: PathBuf) -> mpsc::Receiver { + let (tx, rx) = mpsc::channel(); + std::thread::spawn(move || { + if let Some(summary) = run_audit(work.path(), &npm) { + let _ = tx.send(summary); + } + drop(work); + }); + rx +} + +/// `npm audit` exits 1 when it finds advisories — that's the success case, +/// so stdout is parsed regardless of exit code. Stdout goes through a file +/// (not a pipe) so the deadline poll can't deadlock on a full pipe buffer. +/// `--package-lock-only` because the work dir holds only manifests and the +/// generated lockfile — never a `node_modules`. +fn run_audit(work: &std::path::Path, npm: &std::path::Path) -> Option { + let stdout_path = work.join("corgea-npm-audit.json"); + let stdout_file = std::fs::File::create(&stdout_path).ok()?; + let mut child = Command::new(npm) + .args(["audit", "--json", "--package-lock-only"]) + .current_dir(work) + .stdin(Stdio::null()) + .stdout(stdout_file) + .stderr(Stdio::null()) + .spawn() + .ok()?; + let deadline = Instant::now() + AUDIT_DEADLINE; + loop { + match child.try_wait() { + Ok(Some(_)) => break, + Ok(None) if Instant::now() < deadline => std::thread::sleep(Duration::from_millis(50)), + _ => { + let _ = child.kill(); + let _ = child.wait(); + return None; + } + } + } + parse_npm_audit(&std::fs::read_to_string(&stdout_path).ok()?) +} + +/// Parse npm audit report v2 (npm 7+): counts from `metadata.vulnerabilities`, +/// `top` from the `vulnerabilities` map, severest first. +fn parse_npm_audit(json: &str) -> Option { + let report: serde_json::Value = serde_json::from_str(json).ok()?; + let counts = report.get("metadata")?.get("vulnerabilities")?; + let count = |k: &str| counts.get(k).and_then(|v| v.as_u64()).unwrap_or(0); + let (critical, high, moderate, low, info) = ( + count("critical"), + count("high"), + count("moderate"), + count("low"), + count("info"), + ); + let total = counts + .get("total") + .and_then(|v| v.as_u64()) + .unwrap_or(critical + high + moderate + low + info); + let mut top: Vec<(String, String)> = report + .get("vulnerabilities") + .and_then(|v| v.as_object()) + .map(|vulns| { + vulns + .values() + .filter_map(|entry| { + Some(( + entry.get("name")?.as_str()?.to_string(), + entry.get("severity")?.as_str()?.to_string(), + )) + }) + .collect() + }) + .unwrap_or_default(); + top.sort_by(|a, b| (severity_rank(&a.1), &a.0).cmp(&(severity_rank(&b.1), &b.0))); + top.truncate(AUDIT_TOP_LIMIT); + Some(AuditSummary { + total, + critical, + high, + moderate, + low, + info, + top, + }) +} + +/// Sort key for npm audit severities, severest first. +fn severity_rank(severity: &str) -> u8 { + match severity { + "critical" => 0, + "high" => 1, + "moderate" => 2, + "low" => 3, + "info" => 4, + _ => 5, + } } fn parse_npm_lockfile(json: &str) -> Result, String> { @@ -274,6 +424,69 @@ mod tests { assert!(err.contains("no packages map"), "got: {err}"); } + // npm audit report v2 shape: per-package `vulnerabilities` map plus + // `metadata.vulnerabilities` counts. + const AUDIT_REPORT: &str = r#"{ + "auditReportVersion": 2, + "vulnerabilities": { + "minimist": {"name": "minimist", "severity": "critical", "via": []}, + "lodash": {"name": "lodash", "severity": "high", "via": []}, + "ms": {"name": "ms", "severity": "moderate", "via": []} + }, + "metadata": {"vulnerabilities": + {"info": 0, "low": 0, "moderate": 1, "high": 1, "critical": 1, "total": 3}} + }"#; + + #[test] + fn parse_npm_audit_counts_and_top() { + let summary = parse_npm_audit(AUDIT_REPORT).expect("parse audit report"); + assert_eq!(summary.total, 3); + assert_eq!(summary.critical, 1); + assert_eq!(summary.high, 1); + assert_eq!(summary.moderate, 1); + assert_eq!(summary.low, 0); + assert_eq!(summary.info, 0); + // Severest first: critical, high, moderate. + assert_eq!( + summary.top, + vec![ + ("minimist".to_string(), "critical".to_string()), + ("lodash".to_string(), "high".to_string()), + ("ms".to_string(), "moderate".to_string()), + ] + ); + } + + #[test] + fn parse_npm_audit_caps_top_entries() { + let entries: Vec = (0..8) + .map(|i| format!(r#""p{i}": {{"name": "p{i}", "severity": "low"}}"#)) + .collect(); + let json = format!( + r#"{{"vulnerabilities": {{{}}}, + "metadata": {{"vulnerabilities": {{"low": 8, "total": 8}}}}}}"#, + entries.join(",") + ); + let summary = parse_npm_audit(&json).expect("parse audit report"); + assert_eq!(summary.total, 8); + assert_eq!(summary.top.len(), AUDIT_TOP_LIMIT); + } + + #[test] + fn parse_npm_audit_missing_total_sums_levels() { + let json = r#"{"vulnerabilities": {}, + "metadata": {"vulnerabilities": {"high": 2, "low": 1}}}"#; + let summary = parse_npm_audit(json).expect("parse audit report"); + assert_eq!(summary.total, 3); + } + + #[test] + fn parse_npm_audit_rejects_garbage() { + assert_eq!(parse_npm_audit("not json"), None); + assert_eq!(parse_npm_audit("{}"), None); + assert_eq!(parse_npm_audit(r#"{"metadata": {}}"#), None); + } + #[test] fn name_from_lock_path_handles_nested_and_scoped() { assert_eq!( diff --git a/tests/cli_npm_audit.rs b/tests/cli_npm_audit.rs new file mode 100644 index 0000000..b721187 --- /dev/null +++ b/tests/cli_npm_audit.rs @@ -0,0 +1,359 @@ +//! Hermetic e2e tests for the warn-only `npm audit` second opinion +//! (`corgea npm install …` with a token + vuln-api stub). +//! +//! Extends the `cli_tree.rs` harness pattern with an audit-aware fake npm: +//! a `--package-lock-only` invocation writes a canned lockfile (the tree +//! pass), an `audit` invocation emits a canned audit report on stdout (real +//! `npm audit` exits 1 when it finds advisories — that's the success case), +//! and any other invocation records its argv to a marker. The audit is a +//! supplementary signal only: it must never block, never unblock, and never +//! change exit codes. + +#![cfg(unix)] + +mod common; + +use common::corgea_isolated; +use corgea::vuln_api_stub::{self, PackageKey}; +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::net::TcpListener; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::thread; +use tempfile::TempDir; + +fn key(eco: &str, name: &str, ver: &str) -> PackageKey { + (eco.to_string(), name.to_string(), ver.to_string()) +} + +/// npm lockfile-v3 fixture: named `oldpkg` 1.0.0 + transitive `evildep` 0.4.2. +const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0"}, + "node_modules/oldpkg":{"version":"1.0.0"}, + "node_modules/evildep":{"version":"0.4.2"}}}"#; + +/// npm audit report v2 with two advisories: 1 critical + 1 high. +const AUDIT_ADVISORIES: &str = r#"{"auditReportVersion":2, + "vulnerabilities":{ + "minimist":{"name":"minimist","severity":"critical","via":[]}, + "lodash":{"name":"lodash","severity":"high","via":[]}}, + "metadata":{"vulnerabilities": + {"info":0,"low":0,"moderate":0,"high":1,"critical":1,"total":2}}}"#; + +/// npm audit report v2 with no advisories. +const AUDIT_CLEAN: &str = r#"{"auditReportVersion":2,"vulnerabilities":{}, + "metadata":{"vulnerabilities": + {"info":0,"low":0,"moderate":0,"high":0,"critical":0,"total":0}}}"#; + +fn vulnerable_evildep_body() -> String { + r#"{"ecosystem":"npm","package_name":"evildep","version":"0.4.2","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string() +} + +/// How the fake npm behaves on its `audit --json` invocation. +#[derive(Clone, Copy)] +enum AuditScenario { + /// Emits `AUDIT_ADVISORIES` and exits 1 — real npm audit's + /// advisories-found behaviour. + Advisories, + /// Emits `AUDIT_CLEAN` and exits 0. + Clean, + /// Emits nothing and exits 1 — unparsable output must be a silent skip. + Broken, + /// Never answers — the gate's `recv_timeout` must move on without it. + Hang, +} + +/// Registry stub serving the `/oldpkg` npm packument, published 2020 → +/// never recent. Everything else 404s. +fn spawn_registry_stub() -> String { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or(""); + + let (status, body) = if path == "/oldpkg" { + ( + "200 OK", + r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#, + ) + } else { + ("404 Not Found", r#"{"message":"not found"}"#) + }; + let response = format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + status, + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + base_url +} + +/// Shell loop that emits `path` line by line — works under the locked-down +/// test PATH (no `cat`); the `|| [ -n "$line" ]` guard keeps a final line +/// with no trailing newline. +fn emit(path: &Path) -> String { + format!( + "while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{}'", + path.display() + ) +} + +/// Write an executable fake npm into `dir`: +/// * `audit` (checked first — the audit argv also carries +/// `--package-lock-only`) → records argv to `audit_marker`, then acts out +/// `scenario`; +/// * `--package-lock-only` → writes `NPM_LOCK` to `./package-lock.json` +/// (cwd is the resolver's throwaway temp dir), exits 0 — the tree pass; +/// * anything else → records argv to `marker`, exits 0 — the real install. +fn write_fake_npm(dir: &Path, marker: &Path, audit_marker: &Path, scenario: AuditScenario) { + use std::os::unix::fs::PermissionsExt; + let lock_payload = dir.join("npm-lock-payload.json"); + std::fs::write(&lock_payload, NPM_LOCK).expect("write lock payload"); + let audit_branch = match scenario { + AuditScenario::Advisories | AuditScenario::Clean => { + let (body, code) = match scenario { + AuditScenario::Advisories => (AUDIT_ADVISORIES, 1), + _ => (AUDIT_CLEAN, 0), + }; + let audit_payload = dir.join("npm-audit-payload.json"); + std::fs::write(&audit_payload, body).expect("write audit payload"); + format!("{}; exit {code}", emit(&audit_payload)) + } + AuditScenario::Broken => "exit 1".to_string(), + AuditScenario::Hang => "/bin/sleep 10; exit 0".to_string(), + }; + let script = format!( + "#!/bin/sh\ncase \" $* \" in\n\ + *\" audit \"*) printf '%s' \"$*\" > '{audit_marker}'; {audit_branch};;\n\ + *\" --package-lock-only \"*) {lock} > package-lock.json; exit 0;;\n\ + esac\nprintf '%s' \"$*\" > '{marker}'\nexit 0\n", + lock = emit(&lock_payload), + audit_marker = audit_marker.display(), + marker = marker.display(), + ); + let path = dir.join("npm"); + std::fs::write(&path, script).expect("write fake npm"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).expect("chmod"); +} + +/// `corgea` wired to the registry stub, an audit-aware fake npm, and a +/// vuln-api stub. +struct AuditHarness { + cmd: Command, + marker: PathBuf, + audit_marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +impl AuditHarness { + fn new(checks: HashMap, scenario: AuditScenario) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + let audit_marker = bin.path().join("audit-argv.txt"); + write_fake_npm(bin.path(), &marker, &audit_marker, scenario); + let registry = spawn_registry_stub(); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + cmd.env("PATH", bin.path()) + .env("CORGEA_NPM_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) + .env("CORGEA_TOKEN", "test-token") + .env_remove("CORGEA_NO_NPM_AUDIT"); + Self { + cmd, + marker, + audit_marker, + _home: home, + _bin: bin, + } + } + + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +#[test] +fn audit_advisories_warn_on_stderr_without_blocking() { + // Verdicts all clean; only npm audit complains → note on stderr, the + // install still runs, exit code stays 0. + let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Advisories); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "audit findings must not block"); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains( + "note: npm audit reports 2 advisories (2 high/critical) — supplementary signal, not blocking" + ), + "stderr: {stderr}" + ); + assert_eq!( + std::fs::read_to_string(&h.audit_marker).as_deref().ok(), + Some("audit --json --package-lock-only"), + "audit must run as `npm audit --json --package-lock-only`" + ); +} + +#[test] +fn audit_clean_report_prints_no_note() { + let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Clean); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("npm audit reports"), + "zero advisories must stay silent: {stderr}" + ); +} + +#[test] +fn audit_json_object_in_tree_arm() { + let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Advisories); + let out = h + .cmd + .args(["npm", "--json", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + let audit = &parsed["tree"]["npm_audit"]; + assert_eq!(audit["total"], 2); + assert_eq!(audit["critical"], 1); + assert_eq!(audit["high"], 1); + assert_eq!(audit["moderate"], 0); + // `top` is sorted severest first. + assert_eq!(audit["top"][0]["name"], "minimist"); + assert_eq!(audit["top"][0]["severity"], "critical"); + assert_eq!(audit["top"][1]["name"], "lodash"); + assert_eq!(audit["top"][1]["severity"], "high"); +} + +#[test] +fn audit_disabled_by_env_var() { + let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Advisories); + let out = h + .cmd + .env("CORGEA_NO_NPM_AUDIT", "1") + .args(["npm", "--json", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!(!stderr.contains("npm audit reports"), "stderr: {stderr}"); + assert!( + !h.audit_marker.exists(), + "CORGEA_NO_NPM_AUDIT=1 must skip the audit subprocess entirely" + ); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert_eq!(parsed["tree"]["mode"], "full"); + assert!(parsed["tree"]["npm_audit"].is_null()); +} + +#[test] +fn audit_failure_is_a_silent_skip() { + // Audit exits 1 with no output (unparsable) → no note, null in JSON, + // gate result untouched. + let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Broken); + let out = h + .cmd + .args(["npm", "--json", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); + assert!( + !String::from_utf8_lossy(&out.stderr).contains("npm audit"), + "a failed audit must stay silent" + ); + let parsed: serde_json::Value = + serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); + assert!(parsed["tree"]["npm_audit"].is_null()); +} + +#[test] +fn audit_hang_is_skipped_within_the_collect_window() { + // The fake audit sleeps 10s; the gate's recv_timeout(2s) must move on. + let started = std::time::Instant::now(); + let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Hang); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); + assert!( + !String::from_utf8_lossy(&out.stderr).contains("npm audit"), + "a timed-out audit must stay silent" + ); + assert!( + started.elapsed() < std::time::Duration::from_secs(8), + "gate must not wait out the hung audit (took {:?})", + started.elapsed() + ); +} + +#[test] +fn audit_never_unblocks_a_vulnerable_verdict() { + // Transitive `evildep` is flagged by the verdict; the audit also has + // findings. Block behaviour and exit code are the verdict's alone — the + // audit note still prints as a supplementary signal. + let mut checks = HashMap::new(); + checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); + let mut h = AuditHarness::new(checks, AuditScenario::Advisories); + let out = h + .cmd + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "verdict block must stand"); + assert_eq!( + h.recorded_argv(), + None, + "npm must not run on a vulnerable verdict regardless of audit" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("npm audit reports 2 advisories"), + "stderr: {stderr}" + ); +} From 032761f625928bbbffe075a261bd612ca1782a9f Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 08:26:14 +0200 Subject: [PATCH 17/59] Integration cleanup: coherent fix hints, honest refusal blame, audit plumbing Post-merge cleanup across the eight install-gate units: - Consolidate safe_version/advertised_fix into one highest_fix core (strict vs lenient parsing); the pre-existing fix hint's hedge now follows the steer re-check: Verified drops '(advertised fix)', Rejected suppresses the hint, Unverified keeps the hedge. - The existing-tree refusal only fires when every vulnerable finding predates the command: Requested (pip -r) findings and transitives pulled in by named targets get the generic refusal. Summary parenthetical reworded to 'from resolved tree'. - CORGEA_NO_NPM_AUDIT moves out of tree.rs into PrecheckOptions.npm_audit, read in main like the registry overrides; env semantics unchanged. - Audit collect window tightened to 1s: the warn-only signal never changes the outcome, so a finished gate shouldn't stall for it. --- src/main.rs | 1 + src/precheck/mod.rs | 179 +++++++++++++++++++++++++---------- src/precheck/tree.rs | 19 ++-- tests/cli_bare_install.rs | 6 ++ tests/cli_npm_audit.rs | 2 +- tests/cli_provenance.rs | 48 +++++++++- tests/cli_refusal_context.rs | 26 ++--- 7 files changed, 208 insertions(+), 73 deletions(-) diff --git a/src/main.rs b/src/main.rs index 7cc67ea..4bf6d31 100644 --- a/src/main.rs +++ b/src/main.rs @@ -296,6 +296,7 @@ fn install_wrap_options( npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), concurrency: args.concurrency, + npm_audit: utils::generic::get_env_var_if_exists("CORGEA_NO_NPM_AUDIT").is_none(), } } diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 7a00e71..b63c3c8 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -135,6 +135,10 @@ pub struct PrecheckOptions { pub pypi_registry: Option, /// Max parallel vuln-api verdict requests; `verdict_pool` clamps to 1..=32. pub concurrency: usize, + /// Run the warn-only `npm audit` second opinion during the npm tree + /// pass. Cleared by `CORGEA_NO_NPM_AUDIT` (read in `main`, like the + /// registry overrides). + pub npm_audit: bool, } /// Each item the user (or a `-r` requirements file) asked us to install. @@ -499,8 +503,7 @@ fn bare_install_note(manager: PackageManager, subcommand_label: &str) { /// package the user typed is at fault. Messaging only; the block decision /// stays with `should_block_install`. fn print_refusal(report: &PrecheckReport) { - let named_findings = report.named_vulnerable_count() + report.named_unverifiable_count(); - if report.vulnerable_count() > 0 && named_findings == 0 { + if refusal_blames_existing_tree(report) { eprintln!( "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." ); @@ -511,6 +514,32 @@ fn print_refusal(report: &PrecheckReport) { } } +/// True when the block is entirely the existing tree's doing: vulnerable +/// findings exist, none sit on a named target (or block as unverifiable +/// there), and every vulnerable tree finding genuinely predates this +/// command. A `Requested` finding (pip `-r`) is added by this command and +/// renders as `(from requirements)`; a `Transitive` finding on an install +/// *with* named targets is being pulled in by them right now. Only a bare +/// install (no named targets) or manifest-declared `PreExisting` findings +/// may blame the existing tree. +fn refusal_blames_existing_tree(report: &PrecheckReport) -> bool { + let named_findings = report.named_vulnerable_count() + report.named_unverifiable_count(); + if report.vulnerable_count() == 0 || named_findings > 0 { + return false; + } + let Some(TreeReport::Full { transitive, .. }) = &report.tree else { + return false; + }; + transitive + .iter() + .filter(|t| matches!(t.verdict, VerdictStatus::Vulnerable(_))) + .all(|t| match t.origin { + TreeOrigin::PreExisting => true, + TreeOrigin::Requested => false, + TreeOrigin::Transitive => report.outcomes.is_empty(), + }) +} + /// Print the "requirements files are not recency-checked" note when the /// install carried any `-r` files. No-op otherwise. fn requirements_note(parsed: &parse::ParsedInstall) { @@ -541,7 +570,7 @@ fn run_tree_pass( let tree::TreeResolution { packages: set, audit: audit_rx, - } = match tree::resolve_tree(manager, rest) { + } = match tree::resolve_tree(manager, rest, opts.npm_audit) { Ok(Some(resolution)) => resolution, Ok(None) => { run_verdict_pass(manager, outcomes, opts); @@ -594,9 +623,10 @@ fn run_tree_pass( .expect("tree pass requires verdict config"); let results = verdict_pool(jobs, cfg, manager, opts.concurrency); // Collect the warn-only npm audit second opinion only after the verdict - // pool so the two truly overlap; any failure (timeout, disconnected - // sender) is a silent skip. - let audit = audit_rx.and_then(|rx| rx.recv_timeout(Duration::from_secs(2)).ok()); + // pool so the two truly overlap. The wait is capped tight: this signal + // never changes the outcome, so a finished gate won't stall long for it — + // a slow audit (or any failure) is a silent skip. + let audit = audit_rx.and_then(|rx| rx.recv_timeout(Duration::from_secs(1)).ok()); let transitive = apply_verdicts(manager, results, outcomes, &direct_deps); TreeReport::Full { resolved_count, @@ -921,16 +951,12 @@ fn fix_note(m: &crate::vuln_api::VulnMatch) -> String { } } -/// The one version certified to clear every match. Requires every match to -/// carry a `fixed_version`: a single distinct value is returned as-is; -/// several distinct values pick the highest by lenient semver. Any match -/// without a fix — or an unparsable candidate among several — means no -/// version can be certified, so `None`. -fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { - let mut fixes: Vec<&str> = matches - .iter() - .map(|m| m.fixed_version.as_deref()) - .collect::>()?; +/// Highest of `fixes` after sort/dedup: a single distinct value is returned +/// as-is (no parsing — preserves odd-but-unambiguous forms); several distinct +/// values compare by lenient semver. With `all_must_parse`, one unparsable +/// candidate among several poisons the answer (`None`); otherwise unparsable +/// candidates are skipped. +fn highest_fix(mut fixes: Vec<&str>, all_must_parse: bool) -> Option { fixes.sort_unstable(); fixes.dedup(); match fixes.as_slice() { @@ -939,8 +965,13 @@ fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { many => { let mut best: Option<(semver::Version, &str)> = None; for raw in many { - let v = semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) - .ok()?; + let v = + match semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) + { + Ok(v) => v, + Err(_) if all_must_parse => return None, + Err(_) => continue, + }; match &best { Some((cur, _)) if cur >= &v => {} _ => best = Some((v, raw)), @@ -951,6 +982,17 @@ fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { } } +/// The one version certified to clear every match. Requires every match to +/// carry a `fixed_version`; any match without one — or an unparsable +/// candidate among several — means no version can be certified, so `None`. +fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let fixes: Vec<&str> = matches + .iter() + .map(|m| m.fixed_version.as_deref()) + .collect::>()?; + highest_fix(fixes, true) +} + /// The safe-version proposal for a vulnerable package, paired with its /// `verify_steers` re-check. `None` when no version can be proposed at all; /// a proposal absent from the steer map counts as `Unverified` so callers @@ -974,31 +1016,11 @@ fn steer_for( /// fix are ignored, so the result may still be vulnerable to them. `None` /// only when no match advertises a fix (or no candidate parses). fn advertised_fix(matches: &[crate::vuln_api::VulnMatch]) -> Option { - let mut fixes: Vec<&str> = matches + let fixes: Vec<&str> = matches .iter() .filter_map(|m| m.fixed_version.as_deref()) .collect(); - fixes.sort_unstable(); - fixes.dedup(); - match fixes.as_slice() { - [] => None, - [only] => Some((*only).to_string()), - many => { - let mut best: Option<(semver::Version, &str)> = None; - for raw in many { - let Ok(v) = - semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) - else { - continue; - }; - match &best { - Some((cur, _)) if cur >= &v => {} - _ => best = Some((v, raw)), - } - } - best.map(|(_, raw)| (*raw).to_string()) - } - } + highest_fix(fixes, false) } /// Per-match advisory lines plus the verified safe-version steer, shared by @@ -1027,13 +1049,13 @@ fn print_vulnerable_matches( } } -/// One summary-line segment, e.g. `"2 vulnerable (2 from existing tree)"`. +/// One summary-line segment, e.g. `"2 vulnerable (2 from resolved tree)"`. /// The parenthetical separates findings the resolved tree carried in from /// findings on the targets this command names; omitted when the tree /// contributed none. fn summary_segment(total: usize, from_tree: usize, label: &str) -> String { if from_tree > 0 { - format!("{total} {label} ({from_tree} from existing tree)") + format!("{total} {label} ({from_tree} from resolved tree)") } else { format!("{total} {label}") } @@ -1154,15 +1176,30 @@ fn print_text(report: &PrecheckReport) { ); print_vulnerable_matches(report, &t.name, matches); // A vulnerable dep the project already declares can be - // bumped directly — point at the advertised fix. + // bumped directly — point at the fix as a command. The + // caveat follows the steer check above: a Verified + // steer certified this same version (when `safe_version` + // is `Some` it equals `advertised_fix`), a Rejected one + // already said the fix is flagged, so only an + // unverified proposal keeps the "(advertised fix)" + // hedge. if t.origin == TreeOrigin::PreExisting { if let Some(fix) = advertised_fix(matches) { - println!( - " fix with: corgea {} install {}@{} (advertised fix)", - report.manager.binary_name(), - t.name, - fix - ); + match steer_for(report, &t.name, matches) { + Some((_, SteerCheck::Rejected)) => {} + Some((_, SteerCheck::Verified)) => println!( + " fix with: corgea {} install {}@{}", + report.manager.binary_name(), + t.name, + fix + ), + Some((_, SteerCheck::Unverified)) | None => println!( + " fix with: corgea {} install {}@{} (advertised fix)", + report.manager.binary_name(), + t.name, + fix + ), + } } } } @@ -1407,6 +1444,8 @@ mod tests { npm_registry: None, pypi_registry: Some(pypi_registry), concurrency: 4, + // Unit tests never want the real `npm audit` subprocess. + npm_audit: false, } } @@ -2004,4 +2043,46 @@ mod tests { ] ); } + + /// The existing-tree refusal fires only when every vulnerable finding + /// predates the command: a `Requested` finding (pip `-r`) is added by + /// this command, and a `Transitive` finding on an install *with* named + /// targets is being pulled in by them right now. On a bare install + /// (no named targets) everything resolved is the existing tree's. + #[test] + fn refusal_blame_respects_finding_origin() { + let tree_vulnerable = |origin| TreeOutcome { + name: "dep".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + origin, + }; + // (origin, named targets present, expected) + let cases = [ + (TreeOrigin::PreExisting, false, true), + (TreeOrigin::PreExisting, true, true), + (TreeOrigin::Transitive, false, true), + (TreeOrigin::Transitive, true, false), + (TreeOrigin::Requested, false, false), + (TreeOrigin::Requested, true, false), + ]; + for (origin, with_named, blames_tree) in cases { + let outcomes = if with_named { + vec![resolved_outcome("cleanpkg", "1.0.0", false)] + } else { + vec![] + }; + let mut report = report_with(outcomes); + report.tree = Some(TreeReport::Full { + resolved_count: 1, + transitive: vec![tree_vulnerable(origin)], + audit: None, + }); + assert_eq!( + refusal_blames_existing_tree(&report), + blames_tree, + "origin {origin:?}, with_named {with_named}" + ); + } + } } diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index 8604a89..c734a90 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -59,6 +59,7 @@ pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstal pub fn resolve_tree( manager: PackageManager, install_args: &[String], + run_audit: bool, ) -> Result, String> { match manager { PackageManager::Pip => { @@ -69,7 +70,9 @@ pub fn resolve_tree( }) }) } - PackageManager::Npm => resolve_npm_tree(manager.binary_name(), install_args).map(Some), + PackageManager::Npm => { + resolve_npm_tree(manager.binary_name(), install_args, run_audit).map(Some) + } // yarn/pnpm/uv have no safe dry-run for installs. _ => Ok(None), } @@ -170,7 +173,11 @@ fn direct_deps_from_manifest(json: &str) -> std::collections::HashSet { /// /// `--ignore-scripts` because npm has run lifecycle scripts under /// `--package-lock-only` before (npm/cli#2787). -fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result { +fn resolve_npm_tree( + binary: &str, + install_args: &[String], + run_audit: bool, +) -> Result { let resolved = which::which(binary).map_err(|e| format!("{binary} not found on PATH: {e}"))?; let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; for manifest in [ @@ -205,16 +212,10 @@ fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result bool { - std::env::var("CORGEA_NO_NPM_AUDIT").is_ok_and(|v| !v.trim().is_empty()) -} - /// Kill the audit subprocess if it hasn't finished by then. const AUDIT_DEADLINE: Duration = Duration::from_secs(5); diff --git a/tests/cli_bare_install.rs b/tests/cli_bare_install.rs index 7f96a7c..101f239 100644 --- a/tests/cli_bare_install.rs +++ b/tests/cli_bare_install.rs @@ -211,6 +211,12 @@ fn bare_npm_install_vulnerable_lockfile_blocks() { !stderr.contains("not gated"), "gated bare npm must not print the ungated note: {stderr}" ); + // A bare install names no targets, so everything resolved is the + // existing tree's — the refusal must say so. + assert!( + stderr.contains("your existing dependency tree has known-vulnerable packages"), + "bare install blames the existing tree: {stderr}" + ); } #[test] diff --git a/tests/cli_npm_audit.rs b/tests/cli_npm_audit.rs index b721187..5820748 100644 --- a/tests/cli_npm_audit.rs +++ b/tests/cli_npm_audit.rs @@ -311,7 +311,7 @@ fn audit_failure_is_a_silent_skip() { #[test] fn audit_hang_is_skipped_within_the_collect_window() { - // The fake audit sleeps 10s; the gate's recv_timeout(2s) must move on. + // The fake audit sleeps 10s; the gate's recv_timeout(1s) must move on. let started = std::time::Instant::now(); let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Hang); let out = h diff --git a/tests/cli_provenance.rs b/tests/cli_provenance.rs index 9349e1e..f8c9bb3 100644 --- a/tests/cli_provenance.rs +++ b/tests/cli_provenance.rs @@ -144,12 +144,21 @@ struct Harness { impl Harness { fn new(binary: &str, checks: HashMap, payload: &str) -> Self { + Self::new_with_statuses(binary, checks, HashMap::new(), payload) + } + + fn new_with_statuses( + binary: &str, + checks: HashMap, + statuses: HashMap, + payload: &str, + ) -> Self { let (mut cmd, home) = corgea_isolated(); let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); write_fake_pm(bin.path(), &marker, binary, payload); let registry = spawn_registry_stub(); - let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); cmd.env("PATH", bin.path()) .env("CORGEA_PYPI_REGISTRY", ®istry) .env("CORGEA_NPM_REGISTRY", ®istry) @@ -204,7 +213,9 @@ fn pip_requirements_finding_labeled_from_requirements() { #[test] fn npm_preexisting_direct_dep_labeled_with_fix_hint() { // `evildep` is already a direct dep in the project's package.json; the - // finding gets the pre-existing label plus the advertised-fix hint. + // finding gets the pre-existing label plus the fix-command hint. The + // steer re-check verified 1.2.2 clean (the stub defaults unknown + // versions to clean), so the hint drops the "(advertised fix)" hedge. let project = npm_project(); let mut checks = HashMap::new(); checks.insert( @@ -225,9 +236,40 @@ fn npm_preexisting_direct_dep_labeled_with_fix_hint() { stdout.contains("evildep@0.4.2 (already in package.json)"), "stdout: {stdout}" ); + assert!( + stdout.contains("fix with: corgea npm install evildep@1.2.2\n"), + "verified fix hint must print without the advertised-fix hedge: {stdout}" + ); +} + +#[test] +fn npm_preexisting_fix_hint_keeps_hedge_when_unverifiable() { + // The steer re-check for 1.2.2 fails (503), so the bare steer line stays + // quiet and the fix-command hint keeps its "(advertised fix)" hedge. + let project = npm_project(); + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", r#""1.2.2""#), + ); + let mut statuses = HashMap::new(); + statuses.insert(key("npm", "evildep", "1.2.2"), 503u16); + let mut h = Harness::new_with_statuses("npm", checks, statuses, NPM_LOCK); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "pre-existing vuln must block"); + let stdout = String::from_utf8_lossy(&out.stdout); assert!( stdout.contains("fix with: corgea npm install evildep@1.2.2 (advertised fix)"), - "stdout: {stdout}" + "unverified fix hint must keep the hedge: {stdout}" + ); + assert!( + !stdout.contains("→ safe version"), + "an unverified steer must stay quiet: {stdout}" ); } diff --git a/tests/cli_refusal_context.rs b/tests/cli_refusal_context.rs index 0ef3e72..e612625 100644 --- a/tests/cli_refusal_context.rs +++ b/tests/cli_refusal_context.rs @@ -1,7 +1,9 @@ -//! Hermetic e2e tests for refusal-message context: when every vulnerable -//! finding sits in the resolved tree beyond the named targets, the refusal -//! must say the existing tree is the problem; a finding on a named target -//! keeps the generic refusal. +//! Hermetic e2e tests for refusal-message context: the refusal blames the +//! existing tree only when every vulnerable finding predates the command +//! (bare installs, or manifest-declared pre-existing deps — see +//! `cli_bare_install.rs` for the positive case). A finding on a named +//! target, or a transitive finding the named targets pull in, keeps the +//! generic refusal. //! //! Same harness as `cli_tree.rs`, pip-only: a fake pip on a private PATH //! answers the `--dry-run --report -` tree pass with a canned report, a local @@ -152,8 +154,10 @@ impl Harness { } #[test] -fn transitive_only_vulnerable_gets_existing_tree_refusal() { +fn named_install_with_transitive_vulnerable_keeps_generic_refusal() { // Only the transitive `evildep` is flagged; the named `oldpkg` is clean. + // `evildep` is being pulled in *by this command*, so the existing-tree + // refusal ("none were added by this command") would lie. let mut checks = HashMap::new(); checks.insert( key("pypi", "evildep", "0.4.2"), @@ -166,16 +170,16 @@ fn transitive_only_vulnerable_gets_existing_tree_refusal() { assert!(!h.pip_ran(), "pip must not run on a blocked install"); let stderr = String::from_utf8_lossy(&out.stderr); assert!( - stderr.contains(TREE_REFUSAL), - "stderr must carry the existing-tree refusal: {stderr}" + stderr.contains(GENERIC_REFUSAL), + "a transitive dep of a named target keeps the generic refusal: {stderr}" ); assert!( - !stderr.contains(GENERIC_REFUSAL), - "generic refusal must be replaced, not appended: {stderr}" + !stderr.contains(TREE_REFUSAL), + "existing-tree refusal must not fire for command-added transitives: {stderr}" ); let stdout = String::from_utf8_lossy(&out.stdout); assert!( - stdout.contains("1 vulnerable (1 from existing tree)"), + stdout.contains("1 vulnerable (1 from resolved tree)"), "summary must attribute the finding to the tree: {stdout}" ); } @@ -204,7 +208,7 @@ fn named_vulnerable_keeps_generic_refusal() { ); let stdout = String::from_utf8_lossy(&out.stdout); assert!( - !stdout.contains("from existing tree"), + !stdout.contains("from resolved tree"), "summary must not attribute a named finding to the tree: {stdout}" ); } From eb68a5e039c98e4c8c390057989e5410ed9f4a36 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 08:58:21 +0200 Subject: [PATCH 18/59] Address review: unverifiable findings in refusal blame; reap audit child MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes from the PR #108 review: - refusal_blames_existing_tree filtered tree findings to Vulnerable before the origin test, but should_block_install refuses on Unverifiable too — a command-added unverifiable transitive alongside a pre-existing vulnerable dep printed 'none were added by this command'. Every blocking tree finding now passes the origin test. - The audit thread was detached with the Child trapped inside it; a fast gate exit orphaned a slow 'npm audit' past the CLI's lifetime. spawn_audit now returns an AuditHandle whose collect() does the 1s recv, then kills whatever is left in the shared child slot and joins the thread — no orphan, TempDir drops deterministically, happy-path latency unchanged. The hang test's fake now execs its sleep (a grandchild would dodge the kill) and asserts the child is dead after the CLI exits. --- src/precheck/mod.rs | 61 +++++++++++++++++++++++++++++--- src/precheck/tree.rs | 78 +++++++++++++++++++++++++++++++++++------ tests/cli_npm_audit.rs | 36 ++++++++++++++++--- tests/cli_provenance.rs | 38 ++++++++++++++++++++ 4 files changed, 194 insertions(+), 19 deletions(-) diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index b63c3c8..a52548a 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -516,7 +516,8 @@ fn print_refusal(report: &PrecheckReport) { /// True when the block is entirely the existing tree's doing: vulnerable /// findings exist, none sit on a named target (or block as unverifiable -/// there), and every vulnerable tree finding genuinely predates this +/// there), and every *blocking* tree finding — vulnerable or unverifiable, +/// since `should_block_install` refuses on both — genuinely predates this /// command. A `Requested` finding (pip `-r`) is added by this command and /// renders as `(from requirements)`; a `Transitive` finding on an install /// *with* named targets is being pulled in by them right now. Only a bare @@ -532,7 +533,12 @@ fn refusal_blames_existing_tree(report: &PrecheckReport) -> bool { }; transitive .iter() - .filter(|t| matches!(t.verdict, VerdictStatus::Vulnerable(_))) + .filter(|t| { + matches!( + t.verdict, + VerdictStatus::Vulnerable(_) | VerdictStatus::Unverifiable(_) + ) + }) .all(|t| match t.origin { TreeOrigin::PreExisting => true, TreeOrigin::Requested => false, @@ -625,8 +631,9 @@ fn run_tree_pass( // Collect the warn-only npm audit second opinion only after the verdict // pool so the two truly overlap. The wait is capped tight: this signal // never changes the outcome, so a finished gate won't stall long for it — - // a slow audit (or any failure) is a silent skip. - let audit = audit_rx.and_then(|rx| rx.recv_timeout(Duration::from_secs(1)).ok()); + // a slow audit is killed and skipped (collect also reaps the subprocess, + // so nothing outlives the CLI). + let audit = audit_rx.and_then(|handle| handle.collect(Duration::from_secs(1))); let transitive = apply_verdicts(manager, results, outcomes, &direct_deps); TreeReport::Full { resolved_count, @@ -2085,4 +2092,50 @@ mod tests { ); } } + + /// Unverifiable tree findings block too (`should_block_install`), so + /// they must pass the same origin test before the refusal may blame the + /// existing tree: a command-added unverifiable transitive alongside a + /// pre-existing vulnerable dep keeps the generic refusal on a named + /// install, while on a bare install everything still predates the + /// command. + #[test] + fn refusal_blame_considers_unverifiable_tree_findings() { + let tree_finding = |name: &str, verdict, origin| TreeOutcome { + name: name.to_string(), + version: "1.0.0".to_string(), + verdict, + origin, + }; + let mixed_tree = || { + Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![ + tree_finding( + "stickydep", + VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + TreeOrigin::PreExisting, + ), + tree_finding( + "newdep", + VerdictStatus::Unverifiable("vuln-api unavailable".to_string()), + TreeOrigin::Transitive, + ), + ], + audit: None, + }) + }; + + // Named install: the unverifiable transitive is being added by this + // command, so "none were added by this command" would lie. + let mut report = report_with(vec![resolved_outcome("cleanpkg", "1.0.0", false)]); + report.tree = mixed_tree(); + assert!(!refusal_blames_existing_tree(&report)); + + // Bare install: no named targets, everything resolved predates the + // command — the mixed findings still blame the existing tree. + let mut report = report_with(vec![]); + report.tree = mixed_tree(); + assert!(refusal_blames_existing_tree(&report)); + } } diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index c734a90..54e4509 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -6,7 +6,7 @@ use std::path::PathBuf; use std::process::{Command, Stdio}; -use std::sync::mpsc; +use std::sync::{mpsc, Arc, Mutex}; use std::time::{Duration, Instant}; use super::PackageManager; @@ -37,10 +37,41 @@ pub struct AuditSummary { } /// What `resolve_tree` hands back: the would-install set, plus (npm only) -/// a receiver for the concurrent `npm audit` second opinion. +/// a handle to the concurrent `npm audit` second opinion. pub struct TreeResolution { pub packages: Vec, - pub audit: Option>, + pub audit: Option, +} + +/// The in-flight `npm audit` second opinion: a receiver for the summary plus +/// deterministic cleanup. The CLI exits the process as soon as the gate +/// returns, which would strand the audit thread mid-poll and orphan the +/// `npm audit` child — so `collect` owns reaping both before the gate moves +/// on. +pub struct AuditHandle { + rx: mpsc::Receiver, + /// The audit subprocess, shared with the polling thread. Emptied by + /// whichever side reaps it first. + child: Arc>>, + thread: std::thread::JoinHandle<()>, +} + +impl AuditHandle { + /// Wait up to `window` for the summary, then kill whatever is left of + /// the subprocess and join the thread. On the happy path the child has + /// already exited and the join is instant; a hung audit is killed now + /// rather than left running past the CLI's exit. + pub fn collect(self, window: Duration) -> Option { + let summary = self.rx.recv_timeout(window).ok(); + if let Ok(mut slot) = self.child.lock() { + if let Some(mut child) = slot.take() { + let _ = child.kill(); + let _ = child.wait(); + } + } + let _ = self.thread.join(); + summary + } } /// Whether this manager's resolver has anything to resolve for the parsed @@ -227,15 +258,17 @@ const AUDIT_TOP_LIMIT: usize = 5; /// is cleaned up when the audit finishes. Any failure (spawn error, timeout, /// unparsable output) drops the sender — the receiver sees a disconnect and /// the gate silently skips the second opinion. -fn spawn_audit(work: tempfile::TempDir, npm: PathBuf) -> mpsc::Receiver { +fn spawn_audit(work: tempfile::TempDir, npm: PathBuf) -> AuditHandle { let (tx, rx) = mpsc::channel(); - std::thread::spawn(move || { - if let Some(summary) = run_audit(work.path(), &npm) { + let child = Arc::new(Mutex::new(None)); + let slot = Arc::clone(&child); + let thread = std::thread::spawn(move || { + if let Some(summary) = run_audit(work.path(), &npm, &slot) { let _ = tx.send(summary); } drop(work); }); - rx + AuditHandle { rx, child, thread } } /// `npm audit` exits 1 when it finds advisories — that's the success case, @@ -243,10 +276,18 @@ fn spawn_audit(work: tempfile::TempDir, npm: PathBuf) -> mpsc::Receiver Option { +/// +/// The subprocess lives in `slot`, shared with `AuditHandle::collect`: the +/// poll relocks each iteration, and an empty slot means the collector +/// already reaped the child — stop quietly. +fn run_audit( + work: &std::path::Path, + npm: &std::path::Path, + slot: &Mutex>, +) -> Option { let stdout_path = work.join("corgea-npm-audit.json"); let stdout_file = std::fs::File::create(&stdout_path).ok()?; - let mut child = Command::new(npm) + let child = Command::new(npm) .args(["audit", "--json", "--package-lock-only"]) .current_dir(work) .stdin(Stdio::null()) @@ -254,12 +295,27 @@ fn run_audit(work: &std::path::Path, npm: &std::path::Path) -> Option break, - Ok(None) if Instant::now() < deadline => std::thread::sleep(Duration::from_millis(50)), + Ok(Some(_)) => { + // Exited on its own: clear the slot so the collector has + // nothing left to kill. + guard.take(); + break; + } + Ok(None) if Instant::now() < deadline => { + drop(guard); + std::thread::sleep(Duration::from_millis(50)); + } _ => { + let mut child = guard.take().expect("checked above"); + drop(guard); let _ = child.kill(); let _ = child.wait(); return None; diff --git a/tests/cli_npm_audit.rs b/tests/cli_npm_audit.rs index 5820748..54fb97f 100644 --- a/tests/cli_npm_audit.rs +++ b/tests/cli_npm_audit.rs @@ -130,7 +130,13 @@ fn emit(path: &Path) -> String { /// * `--package-lock-only` → writes `NPM_LOCK` to `./package-lock.json` /// (cwd is the resolver's throwaway temp dir), exits 0 — the tree pass; /// * anything else → records argv to `marker`, exits 0 — the real install. -fn write_fake_npm(dir: &Path, marker: &Path, audit_marker: &Path, scenario: AuditScenario) { +fn write_fake_npm( + dir: &Path, + marker: &Path, + audit_marker: &Path, + audit_pid: &Path, + scenario: AuditScenario, +) { use std::os::unix::fs::PermissionsExt; let lock_payload = dir.join("npm-lock-payload.json"); std::fs::write(&lock_payload, NPM_LOCK).expect("write lock payload"); @@ -145,7 +151,13 @@ fn write_fake_npm(dir: &Path, marker: &Path, audit_marker: &Path, scenario: Audi format!("{}; exit {code}", emit(&audit_payload)) } AuditScenario::Broken => "exit 1".to_string(), - AuditScenario::Hang => "/bin/sleep 10; exit 0".to_string(), + // Record the PID, then `exec` so the sleep IS the audit child (a + // plain `/bin/sleep 10` would be a grandchild the gate's kill never + // reaches). + AuditScenario::Hang => format!( + "printf '%s' $$ > '{}'; exec /bin/sleep 10", + audit_pid.display() + ), }; let script = format!( "#!/bin/sh\ncase \" $* \" in\n\ @@ -167,6 +179,7 @@ struct AuditHarness { cmd: Command, marker: PathBuf, audit_marker: PathBuf, + audit_pid: PathBuf, _home: TempDir, _bin: TempDir, } @@ -177,7 +190,8 @@ impl AuditHarness { let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); let audit_marker = bin.path().join("audit-argv.txt"); - write_fake_npm(bin.path(), &marker, &audit_marker, scenario); + let audit_pid = bin.path().join("audit-pid.txt"); + write_fake_npm(bin.path(), &marker, &audit_marker, &audit_pid, scenario); let registry = spawn_registry_stub(); let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); cmd.env("PATH", bin.path()) @@ -189,6 +203,7 @@ impl AuditHarness { cmd, marker, audit_marker, + audit_pid, _home: home, _bin: bin, } @@ -311,7 +326,9 @@ fn audit_failure_is_a_silent_skip() { #[test] fn audit_hang_is_skipped_within_the_collect_window() { - // The fake audit sleeps 10s; the gate's recv_timeout(1s) must move on. + // The fake audit sleeps 10s; the gate's 1s collect window must move on — + // and must kill the audit child on its way out, not orphan it past the + // CLI's exit. let started = std::time::Instant::now(); let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Hang); let out = h @@ -330,6 +347,17 @@ fn audit_hang_is_skipped_within_the_collect_window() { "gate must not wait out the hung audit (took {:?})", started.elapsed() ); + let pid = std::fs::read_to_string(&h.audit_pid).expect("audit must have started"); + let alive = Command::new("kill") + .args(["-0", pid.trim()]) + .status() + .expect("run kill -0") + .success(); + assert!( + !alive, + "hung audit child (pid {}) must be dead after the CLI exits", + pid.trim() + ); } #[test] diff --git a/tests/cli_provenance.rs b/tests/cli_provenance.rs index f8c9bb3..e59a3f7 100644 --- a/tests/cli_provenance.rs +++ b/tests/cli_provenance.rs @@ -273,6 +273,44 @@ fn npm_preexisting_fix_hint_keeps_hedge_when_unverifiable() { ); } +/// PR #108 review regression: unverifiable tree findings block too, so the +/// refusal may not blame the existing tree when a command-added transitive +/// is part of the block — even if the only *vulnerable* finding is a +/// pre-existing direct dep. +#[test] +fn preexisting_vulnerable_with_unverifiable_transitive_keeps_generic_refusal() { + const LOCK_WITH_NEWDEP: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0"}, + "node_modules/oldpkg":{"version":"1.0.0"}, + "node_modules/evildep":{"version":"0.4.2"}, + "node_modules/newdep":{"version":"2.0.0"}}}"#; + let project = npm_project(); + let mut checks = HashMap::new(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "null"), + ); + let mut statuses = HashMap::new(); + statuses.insert(key("npm", "newdep", "2.0.0"), 503u16); + let mut h = Harness::new_with_statuses("npm", checks, statuses, LOCK_WITH_NEWDEP); + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "must block"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("Refusing to run install. Pass --force to proceed despite findings."), + "the command-added unverifiable transitive keeps the generic refusal: {stderr}" + ); + assert!( + !stderr.contains("your existing dependency tree"), + "existing-tree refusal must not fire when a command-added finding blocks: {stderr}" + ); +} + #[test] fn npm_preexisting_without_fix_has_no_hint() { let project = npm_project(); From c70895809289827f75685d19d6252478481ce518 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 09:15:44 +0200 Subject: [PATCH 19/59] Carry explicit bare-install context into the refusal blame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A requirements-only install (pip install -r reqs.txt) has no named outcomes — exactly like a bare install — so outcomes.is_empty() wrongly let a vulnerable transitive of a clean requirements entry blame the existing tree. PrecheckReport now carries bare_install (no CLI targets AND no requirements files), set from the parsed command, and the Transitive arm tests that instead of inferring from outcomes. Regressions: requirements-only e2e + full origin/named/bare unit matrix. --- src/precheck/mod.rs | 54 ++++++++++++++++++++++++------------ tests/cli_refusal_context.rs | 39 ++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 18 deletions(-) diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index a52548a..aded2b3 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -259,6 +259,12 @@ pub struct PrecheckReport { /// (normalized name, proposed version). Populated by `verify_steers`; /// consulted only at render time, never by the block predicate. pub steers: HashMap<(String, String), SteerCheck>, + /// True when the command named nothing — no CLI targets and no + /// requirements files — so everything the tree pass resolved predates + /// this command (bare `npm install`). Distinct from + /// `outcomes.is_empty()`: a requirements-only install also has no named + /// outcomes, but its resolved set IS added by the command. + pub bare_install: bool, } impl PrecheckReport { @@ -398,6 +404,7 @@ fn run_parsed_install( // With a verdict config, the tree pass resolves the full would-install // set; `tree::covers_input` owns what each manager's resolver can chew on. let tree_eligible = opts.verdict.is_some() && tree::covers_input(manager, &parsed); + let bare_install = parsed.targets.is_empty() && parsed.requirements_files.is_empty(); if parsed.targets.is_empty() && !tree_eligible { bare_install_note(manager, subcommand_label); @@ -461,6 +468,7 @@ fn run_parsed_install( threshold: opts.threshold, tree, steers: HashMap::new(), + bare_install, }; verify_steers(&mut report, &opts); @@ -519,10 +527,10 @@ fn print_refusal(report: &PrecheckReport) { /// there), and every *blocking* tree finding — vulnerable or unverifiable, /// since `should_block_install` refuses on both — genuinely predates this /// command. A `Requested` finding (pip `-r`) is added by this command and -/// renders as `(from requirements)`; a `Transitive` finding on an install -/// *with* named targets is being pulled in by them right now. Only a bare -/// install (no named targets) or manifest-declared `PreExisting` findings -/// may blame the existing tree. +/// renders as `(from requirements)`; a `Transitive` finding on any install +/// that names targets or requirements files is being pulled in by them +/// right now. Only a truly bare install (`report.bare_install`) or +/// manifest-declared `PreExisting` findings may blame the existing tree. fn refusal_blames_existing_tree(report: &PrecheckReport) -> bool { let named_findings = report.named_vulnerable_count() + report.named_unverifiable_count(); if report.vulnerable_count() == 0 || named_findings > 0 { @@ -542,7 +550,7 @@ fn refusal_blames_existing_tree(report: &PrecheckReport) -> bool { .all(|t| match t.origin { TreeOrigin::PreExisting => true, TreeOrigin::Requested => false, - TreeOrigin::Transitive => report.outcomes.is_empty(), + TreeOrigin::Transitive => report.bare_install, }) } @@ -1533,6 +1541,9 @@ mod tests { threshold: Duration::from_secs(2 * 86400), tree: None, steers: HashMap::new(), + // Most tests model an install that named something; bare-install + // cases set this explicitly. + bare_install: false, } } @@ -2053,9 +2064,10 @@ mod tests { /// The existing-tree refusal fires only when every vulnerable finding /// predates the command: a `Requested` finding (pip `-r`) is added by - /// this command, and a `Transitive` finding on an install *with* named - /// targets is being pulled in by them right now. On a bare install - /// (no named targets) everything resolved is the existing tree's. + /// this command, and a `Transitive` finding is being pulled in right + /// now unless the install is truly bare. `bare_install` is the explicit + /// discriminator — a requirements-only install also has no named + /// outcomes, but its resolved set is the command's doing. #[test] fn refusal_blame_respects_finding_origin() { let tree_vulnerable = |origin| TreeOutcome { @@ -2064,22 +2076,27 @@ mod tests { verdict: VerdictStatus::Vulnerable(vec![vm("A-1", None)]), origin, }; - // (origin, named targets present, expected) + // (origin, named outcomes present, bare_install, expected). + // (origin, named=false, bare=false) is the requirements-only shape. let cases = [ - (TreeOrigin::PreExisting, false, true), - (TreeOrigin::PreExisting, true, true), - (TreeOrigin::Transitive, false, true), - (TreeOrigin::Transitive, true, false), - (TreeOrigin::Requested, false, false), - (TreeOrigin::Requested, true, false), + (TreeOrigin::PreExisting, false, true, true), + (TreeOrigin::PreExisting, false, false, true), + (TreeOrigin::PreExisting, true, false, true), + (TreeOrigin::Transitive, false, true, true), + (TreeOrigin::Transitive, false, false, false), + (TreeOrigin::Transitive, true, false, false), + (TreeOrigin::Requested, false, true, false), + (TreeOrigin::Requested, false, false, false), + (TreeOrigin::Requested, true, false, false), ]; - for (origin, with_named, blames_tree) in cases { + for (origin, with_named, bare_install, blames_tree) in cases { let outcomes = if with_named { vec![resolved_outcome("cleanpkg", "1.0.0", false)] } else { vec![] }; let mut report = report_with(outcomes); + report.bare_install = bare_install; report.tree = Some(TreeReport::Full { resolved_count: 1, transitive: vec![tree_vulnerable(origin)], @@ -2088,7 +2105,7 @@ mod tests { assert_eq!( refusal_blames_existing_tree(&report), blames_tree, - "origin {origin:?}, with_named {with_named}" + "origin {origin:?}, with_named {with_named}, bare {bare_install}" ); } } @@ -2132,9 +2149,10 @@ mod tests { report.tree = mixed_tree(); assert!(!refusal_blames_existing_tree(&report)); - // Bare install: no named targets, everything resolved predates the + // Bare install: nothing named, everything resolved predates the // command — the mixed findings still blame the existing tree. let mut report = report_with(vec![]); + report.bare_install = true; report.tree = mixed_tree(); assert!(refusal_blames_existing_tree(&report)); } diff --git a/tests/cli_refusal_context.rs b/tests/cli_refusal_context.rs index e612625..2fa963f 100644 --- a/tests/cli_refusal_context.rs +++ b/tests/cli_refusal_context.rs @@ -148,6 +148,18 @@ impl Harness { .expect("run corgea") } + /// `pip install -r reqs.txt` with no named targets — the canned tree + /// report still resolves oldpkg (requested) + evildep (transitive). + fn run_requirements_install(&mut self) -> std::process::Output { + let reqs = self._bin.path().join("reqs.txt"); + std::fs::write(&reqs, "oldpkg==1.0.0\n").expect("write reqs.txt"); + self.cmd + .args(["pip", "install", "-r"]) + .arg(&reqs) + .output() + .expect("run corgea") + } + fn pip_ran(&self) -> bool { self.marker.exists() } @@ -184,6 +196,33 @@ fn named_install_with_transitive_vulnerable_keeps_generic_refusal() { ); } +/// PR #108 review regression: a requirements-only install has no named +/// outcomes — exactly like a bare install — but its resolved set is added +/// by this command. A vulnerable transitive of a clean requirements entry +/// must keep the generic refusal. +#[test] +fn requirements_only_install_with_vulnerable_transitive_keeps_generic_refusal() { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_body("evildep", "0.4.2"), + ); + let mut h = Harness::new(checks, HashMap::new()); + let out = h.run_requirements_install(); + + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert!(!h.pip_ran(), "pip must not run on a blocked install"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains(GENERIC_REFUSAL), + "requirements-driven transitives keep the generic refusal: {stderr}" + ); + assert!( + !stderr.contains(TREE_REFUSAL), + "existing-tree refusal must not fire for a requirements-only install: {stderr}" + ); +} + #[test] fn named_vulnerable_keeps_generic_refusal() { // The named `oldpkg` itself is flagged; `evildep` is clean. From b6c2e83c7168f6af75e4d8d053e8e69e9723c17f Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 09:23:23 +0200 Subject: [PATCH 20/59] Drop the standalone vuln-api-stub binary The bin existed for live e2e dogfooding, now done. Every consumer in the repo (7 integration test files + precheck unit tests) uses the in-process vuln_api_stub::spawn_with_statuses, so the bin-only surface goes with it: the fixtures.rs file loader, spawn_from_file, VulnApiStub::block, and the StubFixtures/spawn_on_port indirection (the ephemeral-port bind now lives directly in spawn_with_statuses). --- Cargo.toml | 4 -- src/bin/vuln-api-stub.rs | 40 ------------------- src/vuln_api_stub/fixtures.rs | 62 ----------------------------- src/vuln_api_stub/mod.rs | 74 +++-------------------------------- 4 files changed, 6 insertions(+), 174 deletions(-) delete mode 100644 src/bin/vuln-api-stub.rs delete mode 100644 src/vuln_api_stub/fixtures.rs diff --git a/Cargo.toml b/Cargo.toml index 877317e..2ff42c1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,10 +9,6 @@ edition = "2021" name = "corgea" path = "src/main.rs" -[[bin]] -name = "vuln-api-stub" -path = "src/bin/vuln-api-stub.rs" - [dependencies] clap = { version = "4.4.13", features = ["derive"] } dirs = "5.0.1" diff --git a/src/bin/vuln-api-stub.rs b/src/bin/vuln-api-stub.rs deleted file mode 100644 index 4edc509..0000000 --- a/src/bin/vuln-api-stub.rs +++ /dev/null @@ -1,40 +0,0 @@ -//! Standalone vuln-api stub for e2e dogfood and local development. - -use clap::Parser; -use corgea::vuln_api_stub; -use std::path::PathBuf; - -#[derive(Parser, Debug)] -#[command( - name = "vuln-api-stub", - about = "Minimal TCP stub for vuln-api package-check routes" -)] -struct Args { - /// JSON fixture file (`package_checks`). - #[arg(long)] - fixtures: PathBuf, - - /// TCP port to bind (`0` = ephemeral). - #[arg(long, default_value = "0")] - port: u16, - - /// Print base URL to stdout and keep serving until SIGTERM. - #[arg(long)] - print_url: bool, -} - -fn main() { - let args = Args::parse(); - let stub = if args.port == 0 { - vuln_api_stub::spawn_from_file(&args.fixtures) - } else { - let fixtures = vuln_api_stub::load_from_file(&args.fixtures) - .unwrap_or_else(|e| panic!("failed to load {}: {e}", args.fixtures.display())); - vuln_api_stub::spawn_on_port(fixtures, args.port) - }; - if args.print_url { - println!("{}", stub.base_url); - } - eprintln!("vuln-api stub listening on {}", stub.base_url); - stub.block(); -} diff --git a/src/vuln_api_stub/fixtures.rs b/src/vuln_api_stub/fixtures.rs deleted file mode 100644 index a37eea5..0000000 --- a/src/vuln_api_stub/fixtures.rs +++ /dev/null @@ -1,62 +0,0 @@ -use super::StubFixtures; -use serde::Deserialize; -use serde_json::Value; -use std::collections::HashMap; -use std::fs; -use std::path::Path; - -#[derive(Debug, Deserialize)] -struct FixtureFile { - #[serde(default)] - package_checks: HashMap, -} - -/// Load stub fixtures from JSON. Keys in `package_checks` use `{ecosystem}/{name}/{version}`. -pub fn load_from_file(path: &Path) -> Result> { - let raw = fs::read_to_string(path)?; - let file: FixtureFile = serde_json::from_str(&raw)?; - - let mut package_checks = HashMap::new(); - for (key, value) in file.package_checks { - let (eco, name, ver) = parse_package_key(&key)?; - let body = serde_json::to_string(&value)?; - package_checks.insert((eco, name, ver), body); - } - - Ok(StubFixtures { - package_checks, - status_overrides: HashMap::new(), - }) -} - -fn parse_package_key(key: &str) -> Result<(String, String, String), Box> { - let parts: Vec<&str> = key.split('/').collect(); - if parts.len() != 3 { - return Err( - format!("package_checks key must be ecosystem/name/version, got {key:?}").into(), - ); - } - Ok(( - parts[0].to_string(), - parts[1].to_string(), - parts[2].to_string(), - )) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_package_key_validates_format() { - assert_eq!( - parse_package_key("npm/lodash/4.17.20").unwrap(), - ( - "npm".to_string(), - "lodash".to_string(), - "4.17.20".to_string() - ) - ); - assert!(parse_package_key("npm/lodash").is_err()); - } -} diff --git a/src/vuln_api_stub/mod.rs b/src/vuln_api_stub/mod.rs index ea57ef6..ff58f9f 100644 --- a/src/vuln_api_stub/mod.rs +++ b/src/vuln_api_stub/mod.rs @@ -1,65 +1,31 @@ -mod fixtures; - use std::collections::HashMap; use std::io::{Read, Write}; use std::net::TcpListener; -use std::path::Path; use std::sync::Arc; use std::thread; use std::time::Duration; -pub use fixtures::load_from_file; - pub type PackageKey = (String, String, String); const NOT_FOUND_BODY: &str = r#"{"error":"not found"}"#; -/// Loaded fixture data for the vuln-api stub server. -#[derive(Debug, Clone, Default)] -pub struct StubFixtures { - pub package_checks: HashMap, - pub status_overrides: HashMap, -} - pub struct VulnApiStub { pub base_url: String, _handle: thread::JoinHandle<()>, } -impl VulnApiStub { - /// Block until the stub server thread exits (normally never, unless the listener fails). - pub fn block(self) { - let _ = self._handle.join(); - } -} - -/// Minimal TCP vuln-api stub for CLI integration tests and e2e dogfood. +/// Minimal TCP vuln-api stub for CLI integration tests. Binds an ephemeral +/// 127.0.0.1 port; unknown packages get a synthesized clean 200. pub fn spawn_with_statuses( - fixtures: HashMap, + package_checks: HashMap, status_overrides: HashMap, ) -> VulnApiStub { - spawn_on_port( - StubFixtures { - package_checks: fixtures, - status_overrides, - }, - 0, - ) -} - -/// Bind stub on `port` (`0` = ephemeral). Returns base URL `http://127.0.0.1:{port}`. -pub fn spawn_on_port(fixtures: StubFixtures, port: u16) -> VulnApiStub { - let addr = if port == 0 { - "127.0.0.1:0".to_string() - } else { - format!("127.0.0.1:{port}") - }; - let listener = TcpListener::bind(&addr).unwrap_or_else(|e| panic!("bind stub on {addr}: {e}")); + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); let bound_port = listener.local_addr().expect("stub local_addr").port(); let base_url = format!("http://127.0.0.1:{bound_port}"); - let package_checks = Arc::new(fixtures.package_checks); - let status_overrides = Arc::new(fixtures.status_overrides); + let package_checks = Arc::new(package_checks); + let status_overrides = Arc::new(status_overrides); let handle = thread::spawn(move || { for stream in listener.incoming() { @@ -78,12 +44,6 @@ pub fn spawn_on_port(fixtures: StubFixtures, port: u16) -> VulnApiStub { } } -pub fn spawn_from_file(path: &Path) -> VulnApiStub { - let fixtures = - load_from_file(path).unwrap_or_else(|e| panic!("load stub fixtures {path:?}: {e}")); - spawn_on_port(fixtures, 0) -} - fn handle_connection( stream: &mut std::net::TcpStream, package_checks: &Arc>, @@ -220,26 +180,4 @@ mod tests { assert!(resp.starts_with("HTTP/1.1 200"), "resp: {resp}"); assert!(resp.contains(r#""is_vulnerable":false"#), "resp: {resp}"); } - - #[test] - fn fixture_file_loading() { - let dir = tempfile::tempdir().expect("tempdir"); - let path = dir.path().join("fixtures.json"); - std::fs::write( - &path, - r#"{ - "package_checks": { - "npm/left-pad/1.0.0": {"ecosystem":"npm","package_name":"left-pad","version":"1.0.0","is_vulnerable":true,"matches":[]} - } - }"#, - ) - .unwrap(); - let stub = spawn_from_file(&path); - - let resp = get( - &stub.base_url, - "/v1/packages/npm/left-pad/versions/1.0.0/check", - ); - assert!(resp.contains(r#""is_vulnerable":true"#), "resp: {resp}"); - } } From adba36d402538cb16b5d7d222b4cf9b65c2a009c Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 09:52:47 +0200 Subject: [PATCH 21/59] Refine install vuln gate precheck flow --- src/precheck/mod.rs | 95 +++++++++++---------- src/verify_deps/registry.rs | 3 +- src/vuln_api/mod.rs | 43 +++++----- src/vuln_api_stub/mod.rs | 4 +- tests/cli_bare_install.rs | 102 +++------------------- tests/cli_exec_fallback.rs | 69 ++------------- tests/cli_install.rs | 105 ++++++----------------- tests/cli_npm_audit.rs | 67 ++------------- tests/cli_provenance.rs | 86 +++---------------- tests/cli_refusal_context.rs | 75 ++-------------- tests/cli_remediation.rs | 76 +++-------------- tests/cli_tree.rs | 102 +++------------------- tests/cli_verdict.rs | 86 ++++--------------- tests/common/mod.rs | 160 +++++++++++++++++++++++++++++++++++ 14 files changed, 346 insertions(+), 727 deletions(-) diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index aded2b3..d4e40b2 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -98,8 +98,9 @@ pub enum VerdictStatus { /// The verdict could not be obtained (network/5xx/auth/integrity). /// Blocks fail-closed. Unverifiable(String), - /// Verdict never attempted (no token). Recency-only cover. - NotChecked(String), + /// Verdict never attempted (no token). Recency-only cover; the + /// constant reason (`NO_TOKEN_REASON`) is attached at render time. + NotChecked, } /// Result of re-verdicting a proposed `→ safe version` steer against @@ -165,13 +166,12 @@ pub enum TargetKind { /// Outcome of resolving + verifying a single target. #[derive(Debug, Clone)] pub enum TargetOutcome { - /// Resolved cleanly. `recent` is true when the version was - /// published within the threshold (the blocking condition). + /// Resolved cleanly. The blocking recency condition is derived from + /// `age` against the report's threshold (`PrecheckReport::is_recent`). Resolved { target: InstallTarget, resolved: crate::verify_deps::registry::ResolvedPackage, age: Duration, - recent: bool, verdict: VerdictStatus, }, /// We deliberately couldn't verify this target (URL / git / etc.). @@ -271,11 +271,16 @@ impl PrecheckReport { fn count(&self, pred: impl Fn(&TargetOutcome) -> bool) -> usize { self.outcomes.iter().filter(|o| pred(o)).count() } + /// True when this age is within the recency threshold (the blocking + /// condition). The single definition of "recent". + fn is_recent(&self, age: Duration) -> bool { + age < self.threshold + } pub fn ok_count(&self) -> usize { - self.count(|o| matches!(o, TargetOutcome::Resolved { recent: false, .. })) + self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if !self.is_recent(*age))) } pub fn recent_count(&self) -> usize { - self.count(|o| matches!(o, TargetOutcome::Resolved { recent: true, .. })) + self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if self.is_recent(*age))) } pub fn vulnerable_count(&self) -> usize { self.named_vulnerable_count() + self.tree_vulnerable_count() @@ -413,13 +418,10 @@ fn run_parsed_install( } let now = Utc::now(); - let threshold = - chrono::Duration::from_std(opts.threshold).expect("threshold validated before run_install"); - let mut outcomes: Vec<_> = parsed .targets .iter() - .map(|target| verify_one(target, &opts, &now, threshold)) + .map(|target| verify_one(target, &opts, &now)) .collect(); let tree = if tree_eligible { @@ -722,24 +724,28 @@ fn apply_verdicts( direct_deps: &std::collections::HashSet, ) -> Vec { let norm = |n: &str| manager.normalize_name(n); + // Index named outcomes by (normalized name, version) so matching the + // pooled results stays linear on big trees. + let mut named: std::collections::HashMap<(String, String), Vec> = + std::collections::HashMap::new(); + for (i, o) in outcomes.iter().enumerate() { + if let TargetOutcome::Resolved { resolved, .. } = o { + named + .entry((norm(&resolved.name), resolved.version.clone())) + .or_default() + .push(i); + } + } + let mut transitive = Vec::new(); for (pkg, verdict) in results { - let key = (norm(&pkg.name), pkg.version.clone()); - let mut matched = false; - for o in outcomes.iter_mut() { - if let TargetOutcome::Resolved { - resolved, - verdict: v, - .. - } = o - { - if (norm(&resolved.name), resolved.version.clone()) == key { + if let Some(indices) = named.get(&(norm(&pkg.name), pkg.version.clone())) { + for &i in indices { + if let TargetOutcome::Resolved { verdict: v, .. } = &mut outcomes[i] { *v = verdict.clone(); - matched = true; } } - } - if !matched { + } else { let origin = if pkg.requested { TreeOrigin::Requested } else if direct_deps.contains(&pkg.name) { @@ -843,7 +849,7 @@ fn verify_steers(report: &mut PrecheckReport, opts: &PrecheckOptions) { let check = match verdict { VerdictStatus::Clean => SteerCheck::Verified, VerdictStatus::Vulnerable(_) => SteerCheck::Rejected, - VerdictStatus::Unverifiable(_) | VerdictStatus::NotChecked(_) => { + VerdictStatus::Unverifiable(_) | VerdictStatus::NotChecked => { SteerCheck::Unverified } }; @@ -865,7 +871,6 @@ fn verify_one( target: &InstallTarget, opts: &PrecheckOptions, now: &chrono::DateTime, - threshold: chrono::Duration, ) -> TargetOutcome { use crate::verify_deps::registry; @@ -886,16 +891,16 @@ fn verify_one( match resolved { Ok(resolved) => { - let age_chrono = now.signed_duration_since(resolved.published_at); - let age = age_chrono + // Future publish dates clamp to zero — maximally recent. + let age = now + .signed_duration_since(resolved.published_at) .to_std() .unwrap_or_else(|_| Duration::from_secs(0)); TargetOutcome::Resolved { target: target.clone(), resolved, age, - recent: age_chrono < threshold, - verdict: VerdictStatus::NotChecked(NO_TOKEN_REASON.to_string()), + verdict: VerdictStatus::NotChecked, } } Err(e) => TargetOutcome::Error { @@ -1230,7 +1235,7 @@ fn print_text(report: &PrecheckReport) { } } // Clean / not-checked tree entries stay quiet in text mode. - VerdictStatus::Clean | VerdictStatus::NotChecked(_) => {} + VerdictStatus::Clean | VerdictStatus::NotChecked => {} } } } @@ -1253,7 +1258,6 @@ fn print_text(report: &PrecheckReport) { target, resolved, age, - recent, verdict, } => match verdict { VerdictStatus::Vulnerable(matches) => { @@ -1271,8 +1275,8 @@ fn print_text(report: &PrecheckReport) { ); } } - VerdictStatus::Clean | VerdictStatus::NotChecked(_) => { - if *recent { + VerdictStatus::Clean | VerdictStatus::NotChecked => { + if report.is_recent(*age) { println!( " ⚠ {} → {}@{} published {} ago at {} (within threshold)", target.display, @@ -1324,8 +1328,8 @@ fn verdict_json(report: &PrecheckReport, name: &str, verdict: &VerdictStatus) -> VerdictStatus::Unverifiable(error) => { json!({ "status": "unverifiable", "error": error }) } - VerdictStatus::NotChecked(reason) => { - json!({ "status": "not_checked", "reason": reason }) + VerdictStatus::NotChecked => { + json!({ "status": "not_checked", "reason": NO_TOKEN_REASON }) } } } @@ -1357,12 +1361,11 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { target, resolved, age, - recent, verdict, } => { let verdict_json = verdict_json(report, &resolved.name, verdict); json!({ - "status": if *recent { "recent" } else { "ok" }, + "status": if report.is_recent(*age) { "recent" } else { "ok" }, "spec": target.display, "name": resolved.name, "resolved_version": resolved.version, @@ -1513,6 +1516,13 @@ mod tests { } fn resolved_outcome(name: &str, version: &str, recent: bool) -> TargetOutcome { + // Recency derives from age vs `report_with`'s 2-day threshold: + // one hour ⇒ recent, a year ⇒ not. + let age = if recent { + Duration::from_secs(3600) + } else { + Duration::from_secs(365 * 86400) + }; TargetOutcome::Resolved { target: InstallTarget { name: name.to_string(), @@ -1524,11 +1534,10 @@ mod tests { resolved: crate::verify_deps::registry::ResolvedPackage { name: name.to_string(), version: version.to_string(), - published_at: Utc::now() - chrono::Duration::days(365), + published_at: Utc::now() - chrono::Duration::from_std(age).unwrap(), }, - age: Duration::from_secs(365 * 86400), - recent, - verdict: VerdictStatus::NotChecked(NO_TOKEN_REASON.to_string()), + age, + verdict: VerdictStatus::NotChecked, } } @@ -1709,7 +1718,7 @@ mod tests { assert!(matches!( &untouched[0], TargetOutcome::Resolved { - verdict: VerdictStatus::NotChecked(_), + verdict: VerdictStatus::NotChecked, .. } )); diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs index b351b79..bfe61cd 100644 --- a/src/verify_deps/registry.rs +++ b/src/verify_deps/registry.rs @@ -35,7 +35,8 @@ fn http_client() -> Result<&'static reqwest::blocking::Client, String> { /// URL-encode an npm package name. Scoped names contain `@` and `/`, /// the latter must be encoded as `%2f` for the package metadata URL. -fn encode_npm_name(name: &str) -> String { +/// Also used by `vuln_api` for its npm path segments. +pub(crate) fn encode_npm_name(name: &str) -> String { if let Some(stripped) = name.strip_prefix('@') { if let Some((scope, pkg)) = stripped.split_once('/') { return format!("@{}%2f{}", scope, pkg); diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs index c10c8ab..e410861 100644 --- a/src/vuln_api/mod.rs +++ b/src/vuln_api/mod.rs @@ -12,6 +12,7 @@ //! token (no global state). use serde::{Deserialize, Serialize}; +use std::sync::OnceLock; use std::time::Duration; use crate::log::debug; @@ -46,13 +47,22 @@ fn user_agent() -> String { format!("corgea-cli/{} (vuln-api)", env!("CARGO_PKG_VERSION")) } +/// Build (once) and clone the shared vuln-api client. A blocking reqwest +/// client owns a runtime thread, and a gate makes up to three verdict +/// passes (tree, named-only, steers) — cache it like `registry.rs` does. +/// `Client` clones share the same pool, so the clone is cheap. pub fn http_client() -> Result { - reqwest::blocking::Client::builder() - .timeout(REQUEST_TIMEOUT) - .user_agent(user_agent()) - .redirect(reqwest::redirect::Policy::none()) - .build() - .map_err(|e| format!("failed to build vuln-api http client: {}", e)) + static CLIENT: OnceLock> = OnceLock::new(); + CLIENT + .get_or_init(|| { + reqwest::blocking::Client::builder() + .timeout(REQUEST_TIMEOUT) + .user_agent(user_agent()) + .redirect(reqwest::redirect::Policy::none()) + .build() + .map_err(|e| format!("failed to build vuln-api http client: {}", e)) + }) + .clone() } fn is_jwt(token: &str) -> bool { @@ -65,15 +75,10 @@ fn normalize_base_url(base_url: &str) -> String { } /// Encode package name for the vuln-api path segment. -/// npm scoped names: `@scope/pkg` → `@scope%2fpkg` (mirrors registry.rs). +/// npm scoped names: `@scope/pkg` → `@scope%2fpkg`. fn encode_package_name(ecosystem: &str, name: &str) -> String { if ecosystem.eq_ignore_ascii_case("npm") { - if let Some(stripped) = name.strip_prefix('@') { - if let Some((scope, pkg)) = stripped.split_once('/') { - return format!("@{}%2f{}", scope, pkg); - } - } - name.to_string() + crate::verify_deps::registry::encode_npm_name(name) } else { urlencoding::encode(name).into_owned() } @@ -285,17 +290,7 @@ mod tests { /// Maps a request key to a canned `(status, body)` response. type KeyedResponses = HashMap; - /// Reason phrase for the stub's status line. - fn status_text(code: u16) -> &'static str { - match code { - 401 => "Unauthorized", - 403 => "Forbidden", - 404 => "Not Found", - 429 => "Too Many Requests", - 500..=599 => "Internal Server Error", - _ => "Error", - } - } + use crate::vuln_api_stub::status_text; struct PackageCheckStub { base_url: String, diff --git a/src/vuln_api_stub/mod.rs b/src/vuln_api_stub/mod.rs index ff58f9f..7e3bf47 100644 --- a/src/vuln_api_stub/mod.rs +++ b/src/vuln_api_stub/mod.rs @@ -109,7 +109,9 @@ fn handle_connection( let _ = stream.write_all(response.as_bytes()); } -fn status_text(status_code: u16) -> &'static str { +/// Reason phrase for a stub status line. Shared with the in-crate test +/// stubs so the mapping lives once. +pub fn status_text(status_code: u16) -> &'static str { match status_code { 404 => "Not Found", 401 => "Unauthorized", diff --git a/tests/cli_bare_install.rs b/tests/cli_bare_install.rs index 101f239..bc9a845 100644 --- a/tests/cli_bare_install.rs +++ b/tests/cli_bare_install.rs @@ -15,14 +15,14 @@ mod common; -use common::corgea_isolated; +use common::{ + corgea_isolated, spawn_http_stub, write_fake_recorder, write_fake_tree_pm, NOT_FOUND_JSON, + OLDPKG_NPM_PACKUMENT, RESOLUTION_FAILS, +}; use corgea::vuln_api_stub::{self, PackageKey}; use std::collections::HashMap; -use std::io::{Read, Write}; -use std::net::TcpListener; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::process::Command; -use std::thread; use tempfile::TempDir; fn key(eco: &str, name: &str, ver: &str) -> PackageKey { @@ -48,90 +48,10 @@ fn vulnerable_evildep_body() -> String { /// Registry stub serving the `/oldpkg` npm packument, published 2020 → never /// recent. Everything else 404s. fn spawn_registry_stub() -> String { - let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); - let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); - thread::spawn(move || { - for stream in listener.incoming() { - let Ok(mut stream) = stream else { continue }; - let mut buf = Vec::with_capacity(4096); - let mut chunk = [0u8; 1024]; - while let Ok(n) = stream.read(&mut chunk) { - if n == 0 { - break; - } - buf.extend_from_slice(&chunk[..n]); - if buf.windows(4).any(|w| w == b"\r\n\r\n") { - break; - } - } - let req = String::from_utf8_lossy(&buf); - let path = req - .lines() - .next() - .and_then(|l| l.split_whitespace().nth(1)) - .unwrap_or(""); - let (status, body) = if path == "/oldpkg" { - ( - "200 OK", - r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#, - ) - } else { - ("404 Not Found", r#"{"message":"not found"}"#) - }; - let response = format!( - "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - status, - body.len(), - body - ); - let _ = stream.write_all(response.as_bytes()); - } - }); - base_url -} - -/// Sentinel payload: the fake npm exits non-zero on its tree (resolution) -/// invocation, forcing the named-only fallback. -const RESOLUTION_FAILS: &str = "RESOLUTION_FAILS"; - -/// Tree-aware fake npm (same scheme as `cli_tree.rs`): an invocation carrying -/// `--package-lock-only` writes `payload` to `./package-lock.json` (the -/// resolver's throwaway temp dir) and exits 0, or exits 1 when `payload` is -/// `RESOLUTION_FAILS`. Any other invocation records its argv to `marker` and -/// exits `exit_code`. Payload is emitted via shell builtins — the locked-down -/// PATH has no `cat`. -fn write_fake_npm(dir: &Path, marker: &Path, payload: &str, exit_code: i32) { - use std::os::unix::fs::PermissionsExt; - let tree_branch = if payload == RESOLUTION_FAILS { - "exit 1".to_string() - } else { - let payload_path = dir.join("npm-tree-payload.json"); - std::fs::write(&payload_path, payload).expect("write fake npm payload"); - format!( - "while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{}' > package-lock.json; exit 0", - payload_path.display() - ) - }; - let script = format!( - "#!/bin/sh\ncase \" $* \" in *\" --package-lock-only \"*) {tree_branch};; esac\nprintf '%s' \"$*\" > '{marker}'\nexit {exit_code}\n", - marker = marker.display(), - ); - let path = dir.join("npm"); - std::fs::write(&path, script).expect("write fake npm"); - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).expect("chmod"); -} - -/// Plain recorder for managers with no tree pass (yarn/pnpm/uv): record argv -/// to `marker`, exit `exit_code`. -fn write_fake_recorder(dir: &Path, marker: &Path, binary: &str, exit_code: i32) { - use std::os::unix::fs::PermissionsExt; - let script = format!( - "#!/bin/sh\nprintf '%s' \"$*\" > '{marker}'\nexit {exit_code}\n", - marker = marker.display(), - ); - let path = dir.join(binary); - std::fs::write(&path, script).expect("write fake pm"); - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).expect("chmod"); + spawn_http_stub(|path| match path { + "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + }) } /// `corgea` wired to a fake package manager, the registry + vuln-api stubs, @@ -160,8 +80,8 @@ impl BareHarness { let project = TempDir::new().expect("project dir"); let marker = bin.path().join("pm-argv.txt"); match npm_payload { - Some(payload) => write_fake_npm(bin.path(), &marker, payload, exit_code), - None => write_fake_recorder(bin.path(), &marker, binary, exit_code), + Some(payload) => write_fake_tree_pm(bin.path(), "npm", &marker, payload, exit_code), + None => write_fake_recorder(bin.path(), binary, &marker, exit_code), } let registry = spawn_registry_stub(); let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); diff --git a/tests/cli_exec_fallback.rs b/tests/cli_exec_fallback.rs index a1e1ba8..a3a4609 100644 --- a/tests/cli_exec_fallback.rs +++ b/tests/cli_exec_fallback.rs @@ -10,71 +10,20 @@ mod common; -use common::corgea_isolated; -use std::io::{Read, Write}; -use std::net::TcpListener; -use std::path::{Path, PathBuf}; +use common::{ + corgea_isolated, spawn_http_stub, write_fake_recorder, NOT_FOUND_JSON, OLDPKG_PYPI_JSON, +}; +use std::path::PathBuf; use std::process::Command; -use std::thread; use tempfile::TempDir; /// Spawn a PyPI stub serving `/pypi/oldpkg/json` (published 2020-01-01, /// safely past the recency threshold). Anything else 404s. fn spawn_pypi_stub() -> String { - let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); - let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); - thread::spawn(move || { - for stream in listener.incoming() { - let Ok(mut stream) = stream else { continue }; - let mut buf = Vec::with_capacity(4096); - let mut chunk = [0u8; 1024]; - while let Ok(n) = stream.read(&mut chunk) { - if n == 0 { - break; - } - buf.extend_from_slice(&chunk[..n]); - if buf.windows(4).any(|w| w == b"\r\n\r\n") { - break; - } - } - let req = String::from_utf8_lossy(&buf); - let path = req - .lines() - .next() - .and_then(|l| l.split_whitespace().nth(1)) - .unwrap_or(""); - let (status, body) = if path == "/pypi/oldpkg/json" { - ( - "200 OK", - r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#, - ) - } else { - ("404 Not Found", r#"{"message":"not found"}"#) - }; - let response = format!( - "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - status, - body.len(), - body - ); - let _ = stream.write_all(response.as_bytes()); - } - }); - base_url -} - -/// Write an executable fake package manager named `binary` into `dir`. -/// It records its argv to `marker` and exits 0. -fn write_fake_package_manager(dir: &Path, binary: &str, marker: &Path) { - use std::os::unix::fs::PermissionsExt; - let script = format!( - "#!/bin/sh\nprintf '%s' \"$*\" > '{}'\nexit 0\n", - marker.display() - ); - let path = dir.join(binary); - std::fs::write(&path, script).expect("write fake package manager"); - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) - .expect("chmod fake package manager"); + spawn_http_stub(|path| match path { + "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + }) } /// Isolated `corgea` wired to the PyPI stub, with `PATH` set to a private @@ -92,7 +41,7 @@ impl FallbackHarness { let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); for binary in binaries { - write_fake_package_manager(bin.path(), binary, &marker); + write_fake_recorder(bin.path(), binary, &marker, 0); } let registry = spawn_pypi_stub(); cmd.env("PATH", bin.path()) diff --git a/tests/cli_install.rs b/tests/cli_install.rs index 399a8a4..e51f9f0 100644 --- a/tests/cli_install.rs +++ b/tests/cli_install.rs @@ -13,14 +13,14 @@ mod common; -use common::corgea_isolated; -use std::io::{Read, Write}; -use std::net::TcpListener; -use std::path::{Path, PathBuf}; +use common::{ + corgea_isolated, spawn_http_stub, write_fake_recorder, NOT_FOUND_JSON, OLDPKG_NPM_PACKUMENT, + OLDPKG_PYPI_JSON, +}; +use std::path::PathBuf; use std::process::Command; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; -use std::thread; use tempfile::TempDir; /// Spawn a registry stub serving both the PyPI and npm routes the @@ -34,87 +34,34 @@ use tempfile::TempDir; /// * `/freshpkg` — npm metadata, published one hour ago /// * anything else — 404 fn spawn_registry_stub() -> (String, Arc) { - let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); - let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); let hits = Arc::new(AtomicUsize::new(0)); - let hits_in_thread = Arc::clone(&hits); - thread::spawn(move || { - for stream in listener.incoming() { - let Ok(mut stream) = stream else { continue }; - hits_in_thread.fetch_add(1, Ordering::SeqCst); - let mut buf = Vec::with_capacity(4096); - let mut chunk = [0u8; 1024]; - while let Ok(n) = stream.read(&mut chunk) { - if n == 0 { - break; - } - buf.extend_from_slice(&chunk[..n]); - if buf.windows(4).any(|w| w == b"\r\n\r\n") { - break; - } - } - let req = String::from_utf8_lossy(&buf); - let path = req - .lines() - .next() - .and_then(|l| l.split_whitespace().nth(1)) - .unwrap_or("") - .to_string(); - - let fresh_ts = (chrono::Utc::now() - chrono::Duration::hours(1)) - .format("%Y-%m-%dT%H:%M:%SZ") - .to_string(); - let (status, body) = match path.as_str() { - "/pypi/oldpkg/json" => ( - "200 OK", - r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#.to_string(), - ), - "/pypi/freshpkg/json" => ( - "200 OK", - format!( - r#"{{"info":{{"name":"freshpkg"}},"releases":{{"9.9.9":[{{"upload_time_iso_8601":"{fresh_ts}"}}]}}}}"#, - ), - ), - "/oldpkg" => ( - "200 OK", - r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#.to_string(), + let hits_in_stub = Arc::clone(&hits); + let base_url = spawn_http_stub(move |path| { + hits_in_stub.fetch_add(1, Ordering::SeqCst); + let fresh_ts = (chrono::Utc::now() - chrono::Duration::hours(1)) + .format("%Y-%m-%dT%H:%M:%SZ") + .to_string(); + match path { + "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), + "/pypi/freshpkg/json" => ( + "200 OK", + format!( + r#"{{"info":{{"name":"freshpkg"}},"releases":{{"9.9.9":[{{"upload_time_iso_8601":"{fresh_ts}"}}]}}}}"#, ), - "/freshpkg" => ( - "200 OK", - format!( - r#"{{"dist-tags":{{"latest":"9.9.9"}},"versions":{{"9.9.9":{{}}}},"time":{{"9.9.9":"{fresh_ts}"}}}}"#, - ), + ), + "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), + "/freshpkg" => ( + "200 OK", + format!( + r#"{{"dist-tags":{{"latest":"9.9.9"}},"versions":{{"9.9.9":{{}}}},"time":{{"9.9.9":"{fresh_ts}"}}}}"#, ), - _ => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), - }; - let response = format!( - "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - status, - body.len(), - body - ); - let _ = stream.write_all(response.as_bytes()); + ), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), } }); (base_url, hits) } -/// Write an executable fake package manager named `binary` into `dir`. -/// It records its argv to `marker` and exits with `exit_code` — proving -/// both "the install ran (with these args)" and exit-code forwarding. -fn write_fake_package_manager(dir: &Path, binary: &str, marker: &Path, exit_code: i32) { - use std::os::unix::fs::PermissionsExt; - let script = format!( - "#!/bin/sh\nprintf '%s' \"$*\" > '{}'\nexit {}\n", - marker.display(), - exit_code - ); - let path = dir.join(binary); - std::fs::write(&path, script).expect("write fake package manager"); - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) - .expect("chmod fake package manager"); -} - /// A ready-to-run wrapper invocation: isolated `corgea` command with the /// registry stub wired in and a fake `binary` on a PATH of its own. struct WrapperHarness { @@ -132,7 +79,7 @@ impl WrapperHarness { let (mut cmd, home) = corgea_isolated(); let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); - write_fake_package_manager(bin.path(), binary, &marker, pm_exit_code); + write_fake_recorder(bin.path(), binary, &marker, pm_exit_code); let (base_url, registry_hits) = spawn_registry_stub(); cmd.env("PATH", bin.path()).env(registry_env, &base_url); Self { diff --git a/tests/cli_npm_audit.rs b/tests/cli_npm_audit.rs index 54fb97f..3cc6d22 100644 --- a/tests/cli_npm_audit.rs +++ b/tests/cli_npm_audit.rs @@ -13,14 +13,13 @@ mod common; -use common::corgea_isolated; +use common::{ + corgea_isolated, emit, spawn_http_stub, write_script, NOT_FOUND_JSON, OLDPKG_NPM_PACKUMENT, +}; use corgea::vuln_api_stub::{self, PackageKey}; use std::collections::HashMap; -use std::io::{Read, Write}; -use std::net::TcpListener; use std::path::{Path, PathBuf}; use std::process::Command; -use std::thread; use tempfile::TempDir; fn key(eco: &str, name: &str, ver: &str) -> PackageKey { @@ -70,57 +69,10 @@ enum AuditScenario { /// Registry stub serving the `/oldpkg` npm packument, published 2020 → /// never recent. Everything else 404s. fn spawn_registry_stub() -> String { - let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); - let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); - thread::spawn(move || { - for stream in listener.incoming() { - let Ok(mut stream) = stream else { continue }; - let mut buf = Vec::with_capacity(4096); - let mut chunk = [0u8; 1024]; - while let Ok(n) = stream.read(&mut chunk) { - if n == 0 { - break; - } - buf.extend_from_slice(&chunk[..n]); - if buf.windows(4).any(|w| w == b"\r\n\r\n") { - break; - } - } - let req = String::from_utf8_lossy(&buf); - let path = req - .lines() - .next() - .and_then(|l| l.split_whitespace().nth(1)) - .unwrap_or(""); - - let (status, body) = if path == "/oldpkg" { - ( - "200 OK", - r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#, - ) - } else { - ("404 Not Found", r#"{"message":"not found"}"#) - }; - let response = format!( - "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - status, - body.len(), - body - ); - let _ = stream.write_all(response.as_bytes()); - } - }); - base_url -} - -/// Shell loop that emits `path` line by line — works under the locked-down -/// test PATH (no `cat`); the `|| [ -n "$line" ]` guard keeps a final line -/// with no trailing newline. -fn emit(path: &Path) -> String { - format!( - "while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{}'", - path.display() - ) + spawn_http_stub(|path| match path { + "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + }) } /// Write an executable fake npm into `dir`: @@ -137,7 +89,6 @@ fn write_fake_npm( audit_pid: &Path, scenario: AuditScenario, ) { - use std::os::unix::fs::PermissionsExt; let lock_payload = dir.join("npm-lock-payload.json"); std::fs::write(&lock_payload, NPM_LOCK).expect("write lock payload"); let audit_branch = match scenario { @@ -168,9 +119,7 @@ fn write_fake_npm( audit_marker = audit_marker.display(), marker = marker.display(), ); - let path = dir.join("npm"); - std::fs::write(&path, script).expect("write fake npm"); - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).expect("chmod"); + write_script(dir, "npm", &script); } /// `corgea` wired to the registry stub, an audit-aware fake npm, and a diff --git a/tests/cli_provenance.rs b/tests/cli_provenance.rs index e59a3f7..5c58360 100644 --- a/tests/cli_provenance.rs +++ b/tests/cli_provenance.rs @@ -13,14 +13,14 @@ mod common; -use common::corgea_isolated; +use common::{ + corgea_isolated, spawn_http_stub, write_fake_tree_pm, NOT_FOUND_JSON, OLDPKG_NPM_PACKUMENT, + OLDPKG_PYPI_JSON, +}; use corgea::vuln_api_stub::{self, PackageKey}; use std::collections::HashMap; -use std::io::{Read, Write}; -use std::net::TcpListener; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::process::Command; -use std::thread; use tempfile::TempDir; fn key(eco: &str, name: &str, ver: &str) -> PackageKey { @@ -62,75 +62,11 @@ const PROJECT_MANIFEST: &str = /// Registry stub serving `/pypi/oldpkg/json` (pypi) and `/oldpkg` (npm /// packument), both published 2020 → never recent. Everything else 404s. fn spawn_registry_stub() -> String { - let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); - let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); - thread::spawn(move || { - for stream in listener.incoming() { - let Ok(mut stream) = stream else { continue }; - let mut buf = Vec::with_capacity(4096); - let mut chunk = [0u8; 1024]; - while let Ok(n) = stream.read(&mut chunk) { - if n == 0 { - break; - } - buf.extend_from_slice(&chunk[..n]); - if buf.windows(4).any(|w| w == b"\r\n\r\n") { - break; - } - } - let req = String::from_utf8_lossy(&buf); - let path = req - .lines() - .next() - .and_then(|l| l.split_whitespace().nth(1)) - .unwrap_or("") - .to_string(); - - let (status, body) = match path.as_str() { - "/pypi/oldpkg/json" => ( - "200 OK", - r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#.to_string(), - ), - "/oldpkg" => ( - "200 OK", - r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#.to_string(), - ), - _ => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), - }; - let response = format!( - "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - status, - body.len(), - body - ); - let _ = stream.write_all(response.as_bytes()); - } - }); - base_url -} - -/// Write an executable fake package manager into `dir`. The tree-resolution -/// invocation (pip `--dry-run` / npm `--package-lock-only`) emits `payload` -/// (stdout for pip, `./package-lock.json` for npm) and exits 0; any other -/// invocation records its argv to `marker` and exits 0. The payload is read -/// via shell builtins because the locked-down test `PATH` has no `cat`. -fn write_fake_pm(dir: &Path, marker: &Path, binary: &str, payload: &str) { - use std::os::unix::fs::PermissionsExt; - let (tree_flag, redirect) = match binary { - "pip" => ("--dry-run", ""), - "npm" => ("--package-lock-only", " > package-lock.json"), - other => panic!("unsupported fake manager {other}"), - }; - let payload_path = dir.join(format!("{binary}-tree-payload.json")); - std::fs::write(&payload_path, payload).expect("write fake pm payload"); - let script = format!( - "#!/bin/sh\ncase \" $* \" in *\" {tree_flag} \"*) while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{payload}'{redirect}; exit 0;; esac\nprintf '%s' \"$*\" > '{marker}'\nexit 0\n", - payload = payload_path.display(), - marker = marker.display(), - ); - let path = dir.join(binary); - std::fs::write(&path, script).expect("write fake pm"); - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).expect("chmod fake pm"); + spawn_http_stub(|path| match path { + "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), + "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + }) } /// `corgea` wired to the registry stub, a tree-aware fake manager, and a @@ -156,7 +92,7 @@ impl Harness { let (mut cmd, home) = corgea_isolated(); let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); - write_fake_pm(bin.path(), &marker, binary, payload); + write_fake_tree_pm(bin.path(), binary, &marker, payload, 0); let registry = spawn_registry_stub(); let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); cmd.env("PATH", bin.path()) diff --git a/tests/cli_refusal_context.rs b/tests/cli_refusal_context.rs index 2fa963f..36cdfca 100644 --- a/tests/cli_refusal_context.rs +++ b/tests/cli_refusal_context.rs @@ -15,14 +15,13 @@ mod common; -use common::corgea_isolated; +use common::{ + corgea_isolated, spawn_http_stub, write_fake_tree_pm, NOT_FOUND_JSON, OLDPKG_PYPI_JSON, +}; use corgea::vuln_api_stub::{self, PackageKey}; use std::collections::HashMap; -use std::io::{Read, Write}; -use std::net::TcpListener; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::process::Command; -use std::thread; use tempfile::TempDir; /// Refusal when the existing tree alone caused the block. @@ -50,66 +49,10 @@ fn vulnerable_body(name: &str, version: &str) -> String { /// Registry stub serving `/pypi/oldpkg/json`, published 2020 → never recent. /// Everything else 404s. fn spawn_pypi_stub() -> String { - let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); - let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); - thread::spawn(move || { - for stream in listener.incoming() { - let Ok(mut stream) = stream else { continue }; - let mut buf = Vec::with_capacity(4096); - let mut chunk = [0u8; 1024]; - while let Ok(n) = stream.read(&mut chunk) { - if n == 0 { - break; - } - buf.extend_from_slice(&chunk[..n]); - if buf.windows(4).any(|w| w == b"\r\n\r\n") { - break; - } - } - let req = String::from_utf8_lossy(&buf); - let path = req - .lines() - .next() - .and_then(|l| l.split_whitespace().nth(1)) - .unwrap_or(""); - - let (status, body) = match path { - "/pypi/oldpkg/json" => ( - "200 OK", - r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#, - ), - _ => ("404 Not Found", r#"{"message":"not found"}"#), - }; - let response = format!( - "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - status, - body.len(), - body - ); - let _ = stream.write_all(response.as_bytes()); - } - }); - base_url -} - -/// Write an executable fake pip into `dir`. A `--dry-run` invocation emits -/// the canned tree report on stdout and exits 0; any other invocation records -/// its argv to `marker` and exits 0. The payload is read via shell builtins -/// because the test's locked-down `PATH` has no `cat`; the `|| [ -n "$line" ]` -/// guard keeps the final line when the payload file has no trailing newline. -fn write_fake_pip(dir: &Path, marker: &Path) { - use std::os::unix::fs::PermissionsExt; - let payload_path = dir.join("pip-tree-payload.json"); - std::fs::write(&payload_path, TREE_REPORT).expect("write fake pip payload"); - let script = format!( - "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{payload}'; exit 0;; esac\nprintf '%s' \"$*\" > '{marker}'\nexit 0\n", - payload = payload_path.display(), - marker = marker.display(), - ); - let path = dir.join("pip"); - std::fs::write(&path, script).expect("write fake pip"); - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) - .expect("chmod fake pip"); + spawn_http_stub(|path| match path { + "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + }) } /// `corgea` wired to the registry stub, a tree-aware fake pip, and a vuln-api @@ -126,7 +69,7 @@ impl Harness { let (mut cmd, home) = corgea_isolated(); let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); - write_fake_pip(bin.path(), &marker); + write_fake_tree_pm(bin.path(), "pip", &marker, TREE_REPORT, 0); let registry = spawn_pypi_stub(); let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); cmd.env("PATH", bin.path()) diff --git a/tests/cli_remediation.rs b/tests/cli_remediation.rs index 058493c..9769d43 100644 --- a/tests/cli_remediation.rs +++ b/tests/cli_remediation.rs @@ -12,14 +12,14 @@ mod common; -use common::corgea_isolated; +use common::{ + corgea_isolated, spawn_http_stub, write_fake_pip_without_report, NOT_FOUND_JSON, + OLDPKG_PYPI_JSON, +}; use corgea::vuln_api_stub::{self, PackageKey}; use std::collections::HashMap; -use std::io::{Read, Write}; -use std::net::TcpListener; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::process::Command; -use std::thread; use tempfile::TempDir; fn key(eco: &str, name: &str, ver: &str) -> PackageKey { @@ -52,66 +52,10 @@ fn flagged_fix_body() -> String { /// Registry stub serving only `/pypi/oldpkg/json` (published 2020 → never /// recent). Everything else 404s. fn spawn_pypi_stub() -> String { - let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); - let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); - thread::spawn(move || { - for stream in listener.incoming() { - let Ok(mut stream) = stream else { continue }; - let mut buf = Vec::with_capacity(4096); - let mut chunk = [0u8; 1024]; - while let Ok(n) = stream.read(&mut chunk) { - if n == 0 { - break; - } - buf.extend_from_slice(&chunk[..n]); - if buf.windows(4).any(|w| w == b"\r\n\r\n") { - break; - } - } - let req = String::from_utf8_lossy(&buf); - let path = req - .lines() - .next() - .and_then(|l| l.split_whitespace().nth(1)) - .unwrap_or("") - .to_string(); - - let (status, body) = match path.as_str() { - "/pypi/oldpkg/json" => ( - "200 OK", - r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#.to_string(), - ), - _ => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), - }; - let response = format!( - "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - status, - body.len(), - body - ); - let _ = stream.write_all(response.as_bytes()); - } - }); - base_url -} - -/// Write an executable fake `pip` into `dir`. It records its argv to `marker` -/// and exits with `exit_code` — proving both whether the install ran and that -/// the exit code propagates. -fn write_fake_pip(dir: &Path, marker: &Path, exit_code: i32) { - use std::os::unix::fs::PermissionsExt; - // Simulate an old pip with no `--report`: exit 2 on the tree dry-run - // *without* touching the marker, so these tests exercise the named-only - // fallback path and keep their pre-tree semantics. - let script = format!( - "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) exit 2;; esac\nprintf '%s' \"$*\" > '{}'\nexit {}\n", - marker.display(), - exit_code - ); - let path = dir.join("pip"); - std::fs::write(&path, script).expect("write fake pip"); - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) - .expect("chmod fake pip"); + spawn_http_stub(|path| match path { + "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + }) } /// `corgea` wired to the registry stub, a fake pip, and a vuln-api stub. @@ -136,7 +80,7 @@ impl RemediationHarness { let (mut cmd, home) = corgea_isolated(); let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); - write_fake_pip(bin.path(), &marker, pip_exit_code); + write_fake_pip_without_report(bin.path(), &marker, pip_exit_code); let registry = spawn_pypi_stub(); let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); cmd.env("PATH", bin.path()) diff --git a/tests/cli_tree.rs b/tests/cli_tree.rs index 9ba5e91..49a9ab4 100644 --- a/tests/cli_tree.rs +++ b/tests/cli_tree.rs @@ -12,14 +12,14 @@ mod common; -use common::corgea_isolated; +use common::{ + corgea_isolated, spawn_http_stub, write_fake_tree_pm, NOT_FOUND_JSON, OLDPKG_NPM_PACKUMENT, + OLDPKG_PYPI_JSON, RESOLUTION_FAILS, +}; use corgea::vuln_api_stub::{self, PackageKey}; use std::collections::HashMap; -use std::io::{Read, Write}; -use std::net::TcpListener; use std::path::{Path, PathBuf}; use std::process::Command; -use std::thread; use tempfile::TempDir; fn key(eco: &str, name: &str, ver: &str) -> PackageKey { @@ -42,93 +42,11 @@ fn vulnerable_evildep_body(ecosystem: &str) -> String { /// Registry stub serving `/pypi/oldpkg/json` (pypi) and `/oldpkg` (npm /// packument), both published 2020 → never recent. Everything else 404s. fn spawn_pypi_stub() -> String { - let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); - let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); - thread::spawn(move || { - for stream in listener.incoming() { - let Ok(mut stream) = stream else { continue }; - let mut buf = Vec::with_capacity(4096); - let mut chunk = [0u8; 1024]; - while let Ok(n) = stream.read(&mut chunk) { - if n == 0 { - break; - } - buf.extend_from_slice(&chunk[..n]); - if buf.windows(4).any(|w| w == b"\r\n\r\n") { - break; - } - } - let req = String::from_utf8_lossy(&buf); - let path = req - .lines() - .next() - .and_then(|l| l.split_whitespace().nth(1)) - .unwrap_or("") - .to_string(); - - let (status, body) = match path.as_str() { - "/pypi/oldpkg/json" => ( - "200 OK", - r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#.to_string(), - ), - "/oldpkg" => ( - "200 OK", - r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#.to_string(), - ), - _ => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), - }; - let response = format!( - "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - status, - body.len(), - body - ); - let _ = stream.write_all(response.as_bytes()); - } - }); - base_url -} - -/// Sentinel payload that makes the fake manager exit non-zero on its tree -/// (resolution) invocation, forcing the named-only fallback. -const RESOLUTION_FAILS: &str = "RESOLUTION_FAILS"; - -/// Write an executable fake package manager into `dir`. On an invocation -/// whose argv contains `tree_flag` it emits `payload` (to stdout for pip's -/// `--dry-run --report -`, into `./package-lock.json` for npm's -/// `--package-lock-only`, whose cwd is the resolver's throwaway temp dir) and -/// exits 0 — the tree pass; if `payload` is `RESOLUTION_FAILS` it exits -/// non-zero instead, emitting nothing. Any other invocation records its argv -/// to `marker` and exits `exit_code`. -/// -/// The payload is read from a sibling file via shell builtins so it works -/// under the test's locked-down `PATH` (which has no `cat`); the -/// `|| [ -n "$line" ]` guard keeps the final line when the payload file has -/// no trailing newline. -fn write_fake_pm(dir: &Path, marker: &Path, binary: &str, payload: &str, exit_code: i32) { - use std::os::unix::fs::PermissionsExt; - let (tree_flag, redirect, fail_exit) = match binary { - "pip" => ("--dry-run", "", 2), - "npm" => ("--package-lock-only", " > package-lock.json", 1), - other => panic!("unsupported fake manager {other}"), - }; - let tree_branch = if payload == RESOLUTION_FAILS { - format!("exit {fail_exit}") - } else { - let payload_path = dir.join(format!("{binary}-tree-payload.json")); - std::fs::write(&payload_path, payload).expect("write fake pm payload"); - format!( - "while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{}'{redirect}; exit 0", - payload_path.display() - ) - }; - let script = format!( - "#!/bin/sh\ncase \" $* \" in *\" {tree_flag} \"*) {tree_branch};; esac\nprintf '%s' \"$*\" > '{marker}'\nexit {exit_code}\n", - marker = marker.display(), - ); - let path = dir.join(binary); - std::fs::write(&path, script).expect("write fake pm"); - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).expect("chmod fake pm"); + spawn_http_stub(|path| match path { + "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), + "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + }) } /// npm lockfile-v3 fixture: named `oldpkg` 1.0.0 + transitive `evildep` 0.4.2. @@ -161,7 +79,7 @@ impl TreeHarness { let (mut cmd, home) = corgea_isolated(); let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); - write_fake_pm(bin.path(), &marker, binary, payload, exit_code); + write_fake_tree_pm(bin.path(), binary, &marker, payload, exit_code); let registry = spawn_pypi_stub(); let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); cmd.env("PATH", bin.path()) diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs index 1aab577..87c0af2 100644 --- a/tests/cli_verdict.rs +++ b/tests/cli_verdict.rs @@ -10,14 +10,11 @@ mod common; -use common::corgea_isolated; +use common::{corgea_isolated, spawn_http_stub, write_fake_pip_without_report, NOT_FOUND_JSON}; use corgea::vuln_api_stub::{self, PackageKey}; use std::collections::HashMap; -use std::io::{Read, Write}; -use std::net::TcpListener; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::process::Command; -use std::thread; use tempfile::TempDir; fn key(eco: &str, name: &str, ver: &str) -> PackageKey { @@ -34,72 +31,21 @@ fn vulnerable_oldpkg_body() -> String { /// Registry stub serving `/pypi//json` for any single-segment name, /// always version 1.0.0 published 2020 → never recent. Everything else 404s. fn spawn_pypi_stub() -> String { - let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); - let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); - thread::spawn(move || { - for stream in listener.incoming() { - let Ok(mut stream) = stream else { continue }; - let mut buf = Vec::with_capacity(4096); - let mut chunk = [0u8; 1024]; - while let Ok(n) = stream.read(&mut chunk) { - if n == 0 { - break; - } - buf.extend_from_slice(&chunk[..n]); - if buf.windows(4).any(|w| w == b"\r\n\r\n") { - break; - } - } - let req = String::from_utf8_lossy(&buf); - let path = req - .lines() - .next() - .and_then(|l| l.split_whitespace().nth(1)) - .unwrap_or("") - .to_string(); - - let name = path - .strip_prefix("/pypi/") - .and_then(|p| p.strip_suffix("/json")) - .filter(|n| !n.is_empty() && !n.contains('/')); - let (status, body) = match name { - Some(name) => ( - "200 OK", - format!( - r#"{{"info":{{"name":"{name}"}},"releases":{{"1.0.0":[{{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}}]}}}}"# - ), + spawn_http_stub(|path| { + let name = path + .strip_prefix("/pypi/") + .and_then(|p| p.strip_suffix("/json")) + .filter(|n| !n.is_empty() && !n.contains('/')); + match name { + Some(name) => ( + "200 OK", + format!( + r#"{{"info":{{"name":"{name}"}},"releases":{{"1.0.0":[{{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}}]}}}}"# ), - None => ("404 Not Found", r#"{"message":"not found"}"#.to_string()), - }; - let response = format!( - "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - status, - body.len(), - body - ); - let _ = stream.write_all(response.as_bytes()); + ), + None => ("404 Not Found", NOT_FOUND_JSON.to_string()), } - }); - base_url -} - -/// Write an executable fake `pip` into `dir`. It records its argv to `marker` -/// and exits with `exit_code` — proving both whether the install ran and that -/// the exit code propagates. -fn write_fake_pip(dir: &Path, marker: &Path, exit_code: i32) { - use std::os::unix::fs::PermissionsExt; - // Simulate an old pip with no `--report`: exit 2 on the tree dry-run - // *without* touching the marker, so these tests exercise the named-only - // fallback path and keep their pre-tree semantics. - let script = format!( - "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) exit 2;; esac\nprintf '%s' \"$*\" > '{}'\nexit {}\n", - marker.display(), - exit_code - ); - let path = dir.join("pip"); - std::fs::write(&path, script).expect("write fake pip"); - std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) - .expect("chmod fake pip"); + }) } /// `corgea` wired to the registry stub, a fake pip, and a vuln-api stub. @@ -121,7 +67,7 @@ impl VerdictHarness { let (mut cmd, home) = corgea_isolated(); let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); - write_fake_pip(bin.path(), &marker, pip_exit_code); + write_fake_pip_without_report(bin.path(), &marker, pip_exit_code); let registry = spawn_pypi_stub(); let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); cmd.env("PATH", bin.path()) diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 1f23471..b61a6c3 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -34,3 +34,163 @@ pub fn corgea_isolated() -> (Command, TempDir) { pub fn fixture(name: &str) -> String { format!("{}/tests/fixtures/{}", env!("CARGO_MANIFEST_DIR"), name) } + +/// Canned 404 body for stub route tables. +#[allow(dead_code)] +pub const NOT_FOUND_JSON: &str = r#"{"message":"not found"}"#; + +/// PyPI release JSON for `oldpkg` 1.0.0, published 2020 → never recent. +#[allow(dead_code)] +pub const OLDPKG_PYPI_JSON: &str = r#"{"info":{"name":"oldpkg"},"releases":{"1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}]}}"#; + +/// npm packument for `oldpkg` 1.0.0, published 2020 → never recent. +#[allow(dead_code)] +pub const OLDPKG_NPM_PACKUMENT: &str = r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#; + +/// Spawn a one-response-per-connection HTTP stub on an ephemeral 127.0.0.1 +/// port; `route` maps a request path to `(status line, body)`. Returns the +/// base URL. `Connection: close` is load-bearing — without it reqwest pools +/// the socket and a second request races the close and fails. +#[allow(dead_code)] +pub fn spawn_http_stub(route: F) -> String +where + F: Fn(&str) -> (&'static str, String) + Send + 'static, +{ + use std::io::{Read, Write}; + use std::net::TcpListener; + + let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + std::thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let mut buf = Vec::with_capacity(4096); + let mut chunk = [0u8; 1024]; + while let Ok(n) = stream.read(&mut chunk) { + if n == 0 { + break; + } + buf.extend_from_slice(&chunk[..n]); + if buf.windows(4).any(|w| w == b"\r\n\r\n") { + break; + } + } + let req = String::from_utf8_lossy(&buf); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or(""); + let (status, body) = route(path); + let response = format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + status, + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + base_url +} + +/// Write `script` as the executable `dir/binary`. +#[cfg(unix)] +#[allow(dead_code)] +pub fn write_script(dir: &std::path::Path, binary: &str, script: &str) { + use std::os::unix::fs::PermissionsExt; + let path = dir.join(binary); + std::fs::write(&path, script).expect("write fake script"); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)) + .expect("chmod fake script"); +} + +/// Shell loop that emits the file at `path` line by line via builtins — +/// works under the locked-down test PATH (no `cat`); the `|| [ -n "$line" ]` +/// guard keeps a final line with no trailing newline. +#[cfg(unix)] +#[allow(dead_code)] +pub fn emit(path: &std::path::Path) -> String { + format!( + "while IFS= read -r line || [ -n \"$line\" ]; do printf '%s\\n' \"$line\"; done < '{}'", + path.display() + ) +} + +/// Write an executable fake package manager named `binary` into `dir`. It +/// records its argv to `marker` and exits `exit_code` — proving both "the +/// install ran (with these args)" and exit-code forwarding. +#[cfg(unix)] +#[allow(dead_code)] +pub fn write_fake_recorder( + dir: &std::path::Path, + binary: &str, + marker: &std::path::Path, + exit_code: i32, +) { + let script = format!( + "#!/bin/sh\nprintf '%s' \"$*\" > '{}'\nexit {}\n", + marker.display(), + exit_code + ); + write_script(dir, binary, &script); +} + +/// Write an executable fake `pip` that simulates an old pip with no +/// `--report`: the tree dry-run exits 2 *without* touching the marker, so +/// tests exercise the named-only fallback path. Any other invocation +/// records its argv to `marker` and exits `exit_code`. +#[cfg(unix)] +#[allow(dead_code)] +pub fn write_fake_pip_without_report( + dir: &std::path::Path, + marker: &std::path::Path, + exit_code: i32, +) { + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) exit 2;; esac\nprintf '%s' \"$*\" > '{}'\nexit {}\n", + marker.display(), + exit_code + ); + write_script(dir, "pip", &script); +} + +/// Sentinel payload that makes a tree-aware fake manager exit non-zero on +/// its tree (resolution) invocation, forcing the named-only fallback. +#[allow(dead_code)] +pub const RESOLUTION_FAILS: &str = "RESOLUTION_FAILS"; + +/// Write an executable tree-aware fake package manager into `dir`. An +/// invocation carrying the manager's tree flag emits `payload` (stdout for +/// pip's `--dry-run --report -`, `./package-lock.json` for npm's +/// `--package-lock-only`, whose cwd is the resolver's throwaway temp dir) +/// and exits 0 — the tree pass; if `payload` is `RESOLUTION_FAILS` it exits +/// non-zero instead, emitting nothing. Any other invocation records its +/// argv to `marker` and exits `exit_code`. +#[cfg(unix)] +#[allow(dead_code)] +pub fn write_fake_tree_pm( + dir: &std::path::Path, + binary: &str, + marker: &std::path::Path, + payload: &str, + exit_code: i32, +) { + let (tree_flag, redirect, fail_exit) = match binary { + "pip" => ("--dry-run", "", 2), + "npm" => ("--package-lock-only", " > package-lock.json", 1), + other => panic!("unsupported fake manager {other}"), + }; + let tree_branch = if payload == RESOLUTION_FAILS { + format!("exit {fail_exit}") + } else { + let payload_path = dir.join(format!("{binary}-tree-payload.json")); + std::fs::write(&payload_path, payload).expect("write fake pm payload"); + format!("{}{redirect}; exit 0", emit(&payload_path)) + }; + let script = format!( + "#!/bin/sh\ncase \" $* \" in *\" {tree_flag} \"*) {tree_branch};; esac\nprintf '%s' \"$*\" > '{marker}'\nexit {exit_code}\n", + marker = marker.display(), + ); + write_script(dir, binary, &script); +} From bd877a9f8b9cc8a88b5138c910bab2a32d991039 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 10:06:41 +0200 Subject: [PATCH 22/59] Tighten install parsing helpers --- src/precheck/parse.rs | 25 +++++++++++++------------ src/vuln_api/mod.rs | 2 ++ tests/cli_tree.rs | 2 +- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs index 6ce7187..220ad11 100644 --- a/src/precheck/parse.rs +++ b/src/precheck/parse.rs @@ -20,23 +20,24 @@ pub struct ParsedInstall { /// `uv pip install` argument list (everything after `pip install`). pub fn parse_pip_install_args(args: &[String]) -> Result { - Ok(build_parsed_install(extract_pip_positionals(args)?, true)) + Ok(build_parsed_install( + extract_pip_positionals(args)?, + parse_pypi_spec, + )) } /// `uv add` argument list (everything after `add`). pub fn parse_pypi_positionals_args(args: &[String]) -> ParsedInstall { - build_parsed_install(extract_node_positionals(args), true) + build_parsed_install(extract_node_positionals(args), parse_pypi_spec) } -fn build_parsed_install(positionals: PositionalSplit, pypi: bool) -> ParsedInstall { +fn build_parsed_install( + positionals: PositionalSplit, + parse_spec: fn(&str) -> InstallTarget, +) -> ParsedInstall { let mut parsed = ParsedInstall::default(); for raw in &positionals.specs { - let target = if pypi { - parse_pypi_spec(raw) - } else { - parse_npm_spec(raw) - }; - parsed.targets.push(target); + parsed.targets.push(parse_spec(raw)); } parsed.requirements_files = positionals.requirements_files; parsed @@ -48,9 +49,9 @@ pub fn parse_install_args( ) -> Result { match manager { PackageManager::Pip => parse_pip_install_args(args), - PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => { - Ok(build_parsed_install(extract_node_positionals(args), false)) - } + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => Ok( + build_parsed_install(extract_node_positionals(args), parse_npm_spec), + ), PackageManager::Uv => unreachable!("uv uses classify_uv_command"), } } diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs index e410861..bd4c5b6 100644 --- a/src/vuln_api/mod.rs +++ b/src/vuln_api/mod.rs @@ -65,6 +65,8 @@ pub fn http_client() -> Result { .clone() } +// Twin of `is_jwt` in the binary crate's `utils/api.rs` — unreachable from +// this library crate (like `log`, re-declared in `lib.rs`). Keep in sync. fn is_jwt(token: &str) -> bool { let parts: Vec<&str> = token.splitn(4, '.').collect(); parts.len() == 3 && parts.iter().all(|p| !p.is_empty()) diff --git a/tests/cli_tree.rs b/tests/cli_tree.rs index 49a9ab4..5ede5bd 100644 --- a/tests/cli_tree.rs +++ b/tests/cli_tree.rs @@ -18,7 +18,7 @@ use common::{ }; use corgea::vuln_api_stub::{self, PackageKey}; use std::collections::HashMap; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::process::Command; use tempfile::TempDir; From c5a8d7bf80fec5b4360023fe8f1b11d7b8e08e55 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 10:50:57 +0200 Subject: [PATCH 23/59] Dedup integration-test scaffolding into common Promote the byte-identical per-file copies of key(), the vulnerable-body JSON builders, the NPM_LOCK/TREE_REPORT fixtures, and the oldpkg registry stubs into tests/common, and share the pip/tree harnesses that differed only by name. Collapse the pip/npm mirror tests in cli_tree and the bare ungated-manager note tests into table-driven loops. No coverage change; net -296 test LOC. --- tests/cli_bare_install.rs | 118 ++++------------- tests/cli_exec_fallback.rs | 15 +-- tests/cli_npm_audit.rs | 35 +---- tests/cli_provenance.rs | 110 +++------------- tests/cli_refusal_context.rs | 134 +++++++------------ tests/cli_remediation.rs | 101 ++------------- tests/cli_tree.rs | 241 +++++++++++------------------------ tests/cli_verdict.rs | 101 +++------------ tests/common/mod.rs | 165 ++++++++++++++++++++++++ 9 files changed, 362 insertions(+), 658 deletions(-) diff --git a/tests/cli_bare_install.rs b/tests/cli_bare_install.rs index bc9a845..df2065a 100644 --- a/tests/cli_bare_install.rs +++ b/tests/cli_bare_install.rs @@ -16,8 +16,8 @@ mod common; use common::{ - corgea_isolated, spawn_http_stub, write_fake_recorder, write_fake_tree_pm, NOT_FOUND_JSON, - OLDPKG_NPM_PACKUMENT, RESOLUTION_FAILS, + corgea_isolated, key, spawn_oldpkg_registry_stub, vulnerable_body, write_fake_recorder, + write_fake_tree_pm, NPM_LOCK, RESOLUTION_FAILS, }; use corgea::vuln_api_stub::{self, PackageKey}; use std::collections::HashMap; @@ -25,33 +25,10 @@ use std::path::PathBuf; use std::process::Command; use tempfile::TempDir; -fn key(eco: &str, name: &str, ver: &str) -> PackageKey { - (eco.to_string(), name.to_string(), ver.to_string()) -} - -/// npm lockfile-v3 fixture the fake npm "resolves" from `package.json`: -/// `oldpkg` 1.0.0 + `evildep` 0.4.2 — with zero specs, both are transitive. -const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ - "":{"name":"proj","version":"1.0.0"}, - "node_modules/oldpkg":{"version":"1.0.0"}, - "node_modules/evildep":{"version":"0.4.2"}}}"#; - const PACKAGE_JSON: &str = r#"{"name":"proj","version":"1.0.0","dependencies":{"oldpkg":"1.0.0"}}"#; fn vulnerable_evildep_body() -> String { - r#"{"ecosystem":"npm","package_name":"evildep","version":"0.4.2","is_vulnerable":true, - "matches":[{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":null}]}"# - .to_string() -} - -/// Registry stub serving the `/oldpkg` npm packument, published 2020 → never -/// recent. Everything else 404s. -fn spawn_registry_stub() -> String { - spawn_http_stub(|path| match path { - "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), - _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), - }) + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None) } /// `corgea` wired to a fake package manager, the registry + vuln-api stubs, @@ -83,7 +60,7 @@ impl BareHarness { Some(payload) => write_fake_tree_pm(bin.path(), "npm", &marker, payload, exit_code), None => write_fake_recorder(bin.path(), binary, &marker, exit_code), } - let registry = spawn_registry_stub(); + let registry = spawn_oldpkg_registry_stub(); let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); cmd.env("PATH", bin.path()) .env("CORGEA_NPM_REGISTRY", ®istry) @@ -237,26 +214,29 @@ fn bare_npm_tokenless_passes_through() { } #[test] -fn bare_yarn_install_prints_note_and_execs() { - let mut h = BareHarness::new("yarn", HashMap::new(), None, 7); - let out = h - .cmd - .args(["yarn", "install"]) - .output() - .expect("run corgea"); - assert_eq!( - out.status.code(), - Some(7), - "yarn's own exit code propagates" - ); - assert_eq!(h.recorded_argv().as_deref(), Some("install")); - assert!( - String::from_utf8_lossy(&out.stderr).contains( - "note: bare 'yarn install' is not gated (no safe dry-run) — dependencies install unchecked" - ), - "stderr: {}", - String::from_utf8_lossy(&out.stderr) - ); +fn bare_ungated_managers_print_note_and_exec() { + // yarn's nonzero exit also proves the manager's own exit code propagates. + let cases = [ + ("yarn", &["yarn", "install"][..], "install", 7), + ("pnpm", &["pnpm", "install"][..], "install", 0), + ("uv", &["uv", "add"][..], "add", 0), + ("uv", &["uv", "pip", "install"][..], "pip install", 0), + ]; + for (binary, args, forwarded_argv, exit_code) in cases { + let mut h = BareHarness::new(binary, HashMap::new(), None, exit_code); + let out = h.cmd.args(args).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(exit_code), "{args:?}"); + assert_eq!(h.recorded_argv().as_deref(), Some(forwarded_argv)); + let note = format!( + "note: bare '{}' is not gated (no safe dry-run) — dependencies install unchecked", + args.join(" ") + ); + assert!( + String::from_utf8_lossy(&out.stderr).contains(¬e), + "{args:?} stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); + } } #[test] @@ -276,50 +256,6 @@ fn bare_yarn_note_prints_without_token_too() { ); } -#[test] -fn bare_pnpm_install_prints_note() { - let mut h = BareHarness::new("pnpm", HashMap::new(), None, 0); - let out = h - .cmd - .args(["pnpm", "install"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(0)); - assert_eq!(h.recorded_argv().as_deref(), Some("install")); - assert!( - String::from_utf8_lossy(&out.stderr).contains("bare 'pnpm install' is not gated"), - "stderr: {}", - String::from_utf8_lossy(&out.stderr) - ); -} - -#[test] -fn bare_uv_add_and_pip_install_print_note() { - let mut h = BareHarness::new("uv", HashMap::new(), None, 0); - let out = h.cmd.args(["uv", "add"]).output().expect("run corgea"); - assert_eq!(out.status.code(), Some(0)); - assert_eq!(h.recorded_argv().as_deref(), Some("add")); - assert!( - String::from_utf8_lossy(&out.stderr).contains("bare 'uv add' is not gated"), - "stderr: {}", - String::from_utf8_lossy(&out.stderr) - ); - - let mut h = BareHarness::new("uv", HashMap::new(), None, 0); - let out = h - .cmd - .args(["uv", "pip", "install"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(0)); - assert_eq!(h.recorded_argv().as_deref(), Some("pip install")); - assert!( - String::from_utf8_lossy(&out.stderr).contains("bare 'uv pip install' is not gated"), - "stderr: {}", - String::from_utf8_lossy(&out.stderr) - ); -} - #[test] fn yarn_named_target_does_not_print_bare_note() { // A named target takes the gated path: named-only warning, no bare note. diff --git a/tests/cli_exec_fallback.rs b/tests/cli_exec_fallback.rs index a3a4609..4b29a5c 100644 --- a/tests/cli_exec_fallback.rs +++ b/tests/cli_exec_fallback.rs @@ -10,22 +10,11 @@ mod common; -use common::{ - corgea_isolated, spawn_http_stub, write_fake_recorder, NOT_FOUND_JSON, OLDPKG_PYPI_JSON, -}; +use common::{corgea_isolated, spawn_oldpkg_registry_stub, write_fake_recorder}; use std::path::PathBuf; use std::process::Command; use tempfile::TempDir; -/// Spawn a PyPI stub serving `/pypi/oldpkg/json` (published 2020-01-01, -/// safely past the recency threshold). Anything else 404s. -fn spawn_pypi_stub() -> String { - spawn_http_stub(|path| match path { - "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), - _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), - }) -} - /// Isolated `corgea` wired to the PyPI stub, with `PATH` set to a private /// temp dir containing only the named fake binaries. struct FallbackHarness { @@ -43,7 +32,7 @@ impl FallbackHarness { for binary in binaries { write_fake_recorder(bin.path(), binary, &marker, 0); } - let registry = spawn_pypi_stub(); + let registry = spawn_oldpkg_registry_stub(); cmd.env("PATH", bin.path()) .env("CORGEA_PYPI_REGISTRY", ®istry); Self { diff --git a/tests/cli_npm_audit.rs b/tests/cli_npm_audit.rs index 3cc6d22..41a313d 100644 --- a/tests/cli_npm_audit.rs +++ b/tests/cli_npm_audit.rs @@ -14,7 +14,7 @@ mod common; use common::{ - corgea_isolated, emit, spawn_http_stub, write_script, NOT_FOUND_JSON, OLDPKG_NPM_PACKUMENT, + corgea_isolated, emit, key, spawn_oldpkg_registry_stub, vulnerable_body, write_script, NPM_LOCK, }; use corgea::vuln_api_stub::{self, PackageKey}; use std::collections::HashMap; @@ -22,16 +22,6 @@ use std::path::{Path, PathBuf}; use std::process::Command; use tempfile::TempDir; -fn key(eco: &str, name: &str, ver: &str) -> PackageKey { - (eco.to_string(), name.to_string(), ver.to_string()) -} - -/// npm lockfile-v3 fixture: named `oldpkg` 1.0.0 + transitive `evildep` 0.4.2. -const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ - "":{"name":"proj","version":"1.0.0"}, - "node_modules/oldpkg":{"version":"1.0.0"}, - "node_modules/evildep":{"version":"0.4.2"}}}"#; - /// npm audit report v2 with two advisories: 1 critical + 1 high. const AUDIT_ADVISORIES: &str = r#"{"auditReportVersion":2, "vulnerabilities":{ @@ -45,13 +35,6 @@ const AUDIT_CLEAN: &str = r#"{"auditReportVersion":2,"vulnerabilities":{}, "metadata":{"vulnerabilities": {"info":0,"low":0,"moderate":0,"high":0,"critical":0,"total":0}}}"#; -fn vulnerable_evildep_body() -> String { - r#"{"ecosystem":"npm","package_name":"evildep","version":"0.4.2","is_vulnerable":true, - "matches":[{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":null}]}"# - .to_string() -} - /// How the fake npm behaves on its `audit --json` invocation. #[derive(Clone, Copy)] enum AuditScenario { @@ -66,15 +49,6 @@ enum AuditScenario { Hang, } -/// Registry stub serving the `/oldpkg` npm packument, published 2020 → -/// never recent. Everything else 404s. -fn spawn_registry_stub() -> String { - spawn_http_stub(|path| match path { - "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), - _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), - }) -} - /// Write an executable fake npm into `dir`: /// * `audit` (checked first — the audit argv also carries /// `--package-lock-only`) → records argv to `audit_marker`, then acts out @@ -141,7 +115,7 @@ impl AuditHarness { let audit_marker = bin.path().join("audit-argv.txt"); let audit_pid = bin.path().join("audit-pid.txt"); write_fake_npm(bin.path(), &marker, &audit_marker, &audit_pid, scenario); - let registry = spawn_registry_stub(); + let registry = spawn_oldpkg_registry_stub(); let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); cmd.env("PATH", bin.path()) .env("CORGEA_NPM_REGISTRY", ®istry) @@ -315,7 +289,10 @@ fn audit_never_unblocks_a_vulnerable_verdict() { // findings. Block behaviour and exit code are the verdict's alone — the // audit note still prints as a supplementary signal. let mut checks = HashMap::new(); - checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); let mut h = AuditHarness::new(checks, AuditScenario::Advisories); let out = h .cmd diff --git a/tests/cli_provenance.rs b/tests/cli_provenance.rs index 5c58360..bbd0c8b 100644 --- a/tests/cli_provenance.rs +++ b/tests/cli_provenance.rs @@ -13,28 +13,13 @@ mod common; -use common::{ - corgea_isolated, spawn_http_stub, write_fake_tree_pm, NOT_FOUND_JSON, OLDPKG_NPM_PACKUMENT, - OLDPKG_PYPI_JSON, -}; -use corgea::vuln_api_stub::{self, PackageKey}; +use common::{key, TreeHarness, NPM_LOCK}; use std::collections::HashMap; -use std::path::PathBuf; -use std::process::Command; use tempfile::TempDir; -fn key(eco: &str, name: &str, ver: &str) -> PackageKey { - (eco.to_string(), name.to_string(), ver.to_string()) -} - -/// Vulnerable verdict body; `fixed_version` is spliced in as given -/// (`"1.2.2"` or `null`). -fn vulnerable_body(ecosystem: &str, name: &str, version: &str, fixed: &str) -> String { - format!( - r#"{{"ecosystem":"{ecosystem}","package_name":"{name}","version":"{version}","is_vulnerable":true, - "matches":[{{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":{fixed}}}]}}"# - ) +/// Vulnerable verdict body; `fixed: None` renders `"fixed_version":null`. +fn vulnerable_body(ecosystem: &str, name: &str, version: &str, fixed: Option<&str>) -> String { + common::vulnerable_body(ecosystem, name, version, "MAL-2024-0002", fixed) } /// Pip report: only `reqpkg`, requested (as if it came from a `-r` file). @@ -48,71 +33,10 @@ const PIP_MIXED_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install": {"metadata":{"name":"reqpkg","version":"6.0.0"},"requested":true}, {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; -/// npm lockfile-v3: named `oldpkg` 1.0.0 + `evildep` 0.4.2 (resolved from the -/// project's pre-existing direct dep). -const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ - "":{"name":"proj","version":"1.0.0"}, - "node_modules/oldpkg":{"version":"1.0.0"}, - "node_modules/evildep":{"version":"0.4.2"}}}"#; - /// Project manifest that already declares `evildep` as a direct dep. const PROJECT_MANIFEST: &str = r#"{"name":"proj","version":"1.0.0","dependencies":{"evildep":"^0.4.0"}}"#; -/// Registry stub serving `/pypi/oldpkg/json` (pypi) and `/oldpkg` (npm -/// packument), both published 2020 → never recent. Everything else 404s. -fn spawn_registry_stub() -> String { - spawn_http_stub(|path| match path { - "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), - "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), - _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), - }) -} - -/// `corgea` wired to the registry stub, a tree-aware fake manager, and a -/// vuln-api stub. -struct Harness { - cmd: Command, - marker: PathBuf, - _home: TempDir, - _bin: TempDir, -} - -impl Harness { - fn new(binary: &str, checks: HashMap, payload: &str) -> Self { - Self::new_with_statuses(binary, checks, HashMap::new(), payload) - } - - fn new_with_statuses( - binary: &str, - checks: HashMap, - statuses: HashMap, - payload: &str, - ) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let marker = bin.path().join("pm-argv.txt"); - write_fake_tree_pm(bin.path(), binary, &marker, payload, 0); - let registry = spawn_registry_stub(); - let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); - cmd.env("PATH", bin.path()) - .env("CORGEA_PYPI_REGISTRY", ®istry) - .env("CORGEA_NPM_REGISTRY", ®istry) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) - .env("CORGEA_TOKEN", "test-token"); - Self { - cmd, - marker, - _home: home, - _bin: bin, - } - } - - fn recorded_argv(&self) -> Option { - std::fs::read_to_string(&self.marker).ok() - } -} - /// Project dir holding a `package.json` that already declares `evildep`. fn npm_project() -> TempDir { let project = TempDir::new().expect("project dir"); @@ -128,9 +52,9 @@ fn pip_requirements_finding_labeled_from_requirements() { let mut checks = HashMap::new(); checks.insert( key("pypi", "reqpkg", "6.0.0"), - vulnerable_body("pypi", "reqpkg", "6.0.0", "null"), + vulnerable_body("pypi", "reqpkg", "6.0.0", None), ); - let mut h = Harness::new("pip", checks, PIP_REQ_REPORT); + let mut h = TreeHarness::new("pip", checks, HashMap::new(), PIP_REQ_REPORT); let out = h .cmd .args(["pip", "install", "-r", "reqs.txt"]) @@ -156,9 +80,9 @@ fn npm_preexisting_direct_dep_labeled_with_fix_hint() { let mut checks = HashMap::new(); checks.insert( key("npm", "evildep", "0.4.2"), - vulnerable_body("npm", "evildep", "0.4.2", r#""1.2.2""#), + vulnerable_body("npm", "evildep", "0.4.2", Some("1.2.2")), ); - let mut h = Harness::new("npm", checks, NPM_LOCK); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); let out = h .cmd .current_dir(project.path()) @@ -186,11 +110,11 @@ fn npm_preexisting_fix_hint_keeps_hedge_when_unverifiable() { let mut checks = HashMap::new(); checks.insert( key("npm", "evildep", "0.4.2"), - vulnerable_body("npm", "evildep", "0.4.2", r#""1.2.2""#), + vulnerable_body("npm", "evildep", "0.4.2", Some("1.2.2")), ); let mut statuses = HashMap::new(); statuses.insert(key("npm", "evildep", "1.2.2"), 503u16); - let mut h = Harness::new_with_statuses("npm", checks, statuses, NPM_LOCK); + let mut h = TreeHarness::new("npm", checks, statuses, NPM_LOCK); let out = h .cmd .current_dir(project.path()) @@ -224,11 +148,11 @@ fn preexisting_vulnerable_with_unverifiable_transitive_keeps_generic_refusal() { let mut checks = HashMap::new(); checks.insert( key("npm", "evildep", "0.4.2"), - vulnerable_body("npm", "evildep", "0.4.2", "null"), + vulnerable_body("npm", "evildep", "0.4.2", None), ); let mut statuses = HashMap::new(); statuses.insert(key("npm", "newdep", "2.0.0"), 503u16); - let mut h = Harness::new_with_statuses("npm", checks, statuses, LOCK_WITH_NEWDEP); + let mut h = TreeHarness::new("npm", checks, statuses, LOCK_WITH_NEWDEP); let out = h .cmd .current_dir(project.path()) @@ -253,9 +177,9 @@ fn npm_preexisting_without_fix_has_no_hint() { let mut checks = HashMap::new(); checks.insert( key("npm", "evildep", "0.4.2"), - vulnerable_body("npm", "evildep", "0.4.2", "null"), + vulnerable_body("npm", "evildep", "0.4.2", None), ); - let mut h = Harness::new("npm", checks, NPM_LOCK); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); let out = h .cmd .current_dir(project.path()) @@ -279,7 +203,7 @@ fn pip_json_carries_origin_per_tree_entry() { // All-clean run mixing origins: the named `oldpkg` matches its outcome, // `reqpkg` (requested) and `evildep` (transitive) land in `tree.transitive` // with their origins. - let mut h = Harness::new("pip", HashMap::new(), PIP_MIXED_REPORT); + let mut h = TreeHarness::new("pip", HashMap::new(), HashMap::new(), PIP_MIXED_REPORT); let out = h .cmd .args([ @@ -317,9 +241,9 @@ fn npm_json_carries_preexisting_origin() { let mut checks = HashMap::new(); checks.insert( key("npm", "evildep", "0.4.2"), - vulnerable_body("npm", "evildep", "0.4.2", r#""1.2.2""#), + vulnerable_body("npm", "evildep", "0.4.2", Some("1.2.2")), ); - let mut h = Harness::new("npm", checks, NPM_LOCK); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); let out = h .cmd .current_dir(project.path()) diff --git a/tests/cli_refusal_context.rs b/tests/cli_refusal_context.rs index 36cdfca..a14daaa 100644 --- a/tests/cli_refusal_context.rs +++ b/tests/cli_refusal_context.rs @@ -15,13 +15,9 @@ mod common; -use common::{ - corgea_isolated, spawn_http_stub, write_fake_tree_pm, NOT_FOUND_JSON, OLDPKG_PYPI_JSON, -}; -use corgea::vuln_api_stub::{self, PackageKey}; +use common::{key, TreeHarness, TREE_REPORT}; +use corgea::vuln_api_stub::PackageKey; use std::collections::HashMap; -use std::path::PathBuf; -use std::process::Command; use tempfile::TempDir; /// Refusal when the existing tree alone caused the block. @@ -29,83 +25,19 @@ const TREE_REFUSAL: &str = "Refusing to run install: your existing dependency tr /// Refusal when a named target carries a blocking verdict. const GENERIC_REFUSAL: &str = "Refusing to run install. Pass --force to proceed despite findings."; -fn key(eco: &str, name: &str, ver: &str) -> PackageKey { - (eco.to_string(), name.to_string(), ver.to_string()) -} - -/// Pip `--report -` payload: `oldpkg` (named) + `evildep` (transitive). -const TREE_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ - {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, - {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; - fn vulnerable_body(name: &str, version: &str) -> String { - format!( - r#"{{"ecosystem":"pypi","package_name":"{name}","version":"{version}","is_vulnerable":true, - "matches":[{{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":null}}]}}"# - ) -} - -/// Registry stub serving `/pypi/oldpkg/json`, published 2020 → never recent. -/// Everything else 404s. -fn spawn_pypi_stub() -> String { - spawn_http_stub(|path| match path { - "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), - _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), - }) + common::vulnerable_body("pypi", name, version, "MAL-2024-0002", None) } -/// `corgea` wired to the registry stub, a tree-aware fake pip, and a vuln-api -/// stub. -struct Harness { - cmd: Command, - marker: PathBuf, - _home: TempDir, - _bin: TempDir, +fn harness(checks: HashMap, statuses: HashMap) -> TreeHarness { + TreeHarness::new("pip", checks, statuses, TREE_REPORT) } -impl Harness { - fn new(checks: HashMap, statuses: HashMap) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let marker = bin.path().join("pm-argv.txt"); - write_fake_tree_pm(bin.path(), "pip", &marker, TREE_REPORT, 0); - let registry = spawn_pypi_stub(); - let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); - cmd.env("PATH", bin.path()) - .env("CORGEA_PYPI_REGISTRY", ®istry) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) - .env("CORGEA_TOKEN", "test-token"); - Self { - cmd, - marker, - _home: home, - _bin: bin, - } - } - - fn run_install(&mut self) -> std::process::Output { - self.cmd - .args(["pip", "install", "oldpkg==1.0.0"]) - .output() - .expect("run corgea") - } - - /// `pip install -r reqs.txt` with no named targets — the canned tree - /// report still resolves oldpkg (requested) + evildep (transitive). - fn run_requirements_install(&mut self) -> std::process::Output { - let reqs = self._bin.path().join("reqs.txt"); - std::fs::write(&reqs, "oldpkg==1.0.0\n").expect("write reqs.txt"); - self.cmd - .args(["pip", "install", "-r"]) - .arg(&reqs) - .output() - .expect("run corgea") - } - - fn pip_ran(&self) -> bool { - self.marker.exists() - } +fn run_install(h: &mut TreeHarness) -> std::process::Output { + h.cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea") } #[test] @@ -118,11 +50,14 @@ fn named_install_with_transitive_vulnerable_keeps_generic_refusal() { key("pypi", "evildep", "0.4.2"), vulnerable_body("evildep", "0.4.2"), ); - let mut h = Harness::new(checks, HashMap::new()); - let out = h.run_install(); + let mut h = harness(checks, HashMap::new()); + let out = run_install(&mut h); assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); - assert!(!h.pip_ran(), "pip must not run on a blocked install"); + assert!( + h.recorded_argv().is_none(), + "pip must not run on a blocked install" + ); let stderr = String::from_utf8_lossy(&out.stderr); assert!( stderr.contains(GENERIC_REFUSAL), @@ -150,11 +85,24 @@ fn requirements_only_install_with_vulnerable_transitive_keeps_generic_refusal() key("pypi", "evildep", "0.4.2"), vulnerable_body("evildep", "0.4.2"), ); - let mut h = Harness::new(checks, HashMap::new()); - let out = h.run_requirements_install(); + let mut h = harness(checks, HashMap::new()); + // `pip install -r reqs.txt` with no named targets — the canned tree + // report still resolves oldpkg (requested) + evildep (transitive). + let reqs_dir = TempDir::new().expect("reqs dir"); + let reqs = reqs_dir.path().join("reqs.txt"); + std::fs::write(&reqs, "oldpkg==1.0.0\n").expect("write reqs.txt"); + let out = h + .cmd + .args(["pip", "install", "-r"]) + .arg(&reqs) + .output() + .expect("run corgea"); assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); - assert!(!h.pip_ran(), "pip must not run on a blocked install"); + assert!( + h.recorded_argv().is_none(), + "pip must not run on a blocked install" + ); let stderr = String::from_utf8_lossy(&out.stderr); assert!( stderr.contains(GENERIC_REFUSAL), @@ -174,11 +122,14 @@ fn named_vulnerable_keeps_generic_refusal() { key("pypi", "oldpkg", "1.0.0"), vulnerable_body("oldpkg", "1.0.0"), ); - let mut h = Harness::new(checks, HashMap::new()); - let out = h.run_install(); + let mut h = harness(checks, HashMap::new()); + let out = run_install(&mut h); assert_eq!(out.status.code(), Some(1), "named vuln must block"); - assert!(!h.pip_ran(), "pip must not run on a blocked install"); + assert!( + h.recorded_argv().is_none(), + "pip must not run on a blocked install" + ); let stderr = String::from_utf8_lossy(&out.stderr); assert!( stderr.contains(GENERIC_REFUSAL), @@ -207,11 +158,14 @@ fn named_unverifiable_with_transitive_vulnerable_keeps_generic_refusal() { ); let mut statuses = HashMap::new(); statuses.insert(key("pypi", "oldpkg", "1.0.0"), 503u16); - let mut h = Harness::new(checks, statuses); - let out = h.run_install(); + let mut h = harness(checks, statuses); + let out = run_install(&mut h); assert_eq!(out.status.code(), Some(1), "must block"); - assert!(!h.pip_ran(), "pip must not run on a blocked install"); + assert!( + h.recorded_argv().is_none(), + "pip must not run on a blocked install" + ); let stderr = String::from_utf8_lossy(&out.stderr); assert!( stderr.contains(GENERIC_REFUSAL), diff --git a/tests/cli_remediation.rs b/tests/cli_remediation.rs index 9769d43..393731f 100644 --- a/tests/cli_remediation.rs +++ b/tests/cli_remediation.rs @@ -4,102 +4,29 @@ //! proposal prints the rejection note instead; a failed re-check suppresses //! the steer quietly without moving counts or exit codes. //! -//! Mirrors the `cli_verdict.rs` harness (inline PyPI stub published 2020 so -//! recency never blocks, a fake pip recording its argv, the in-crate vuln-api -//! stub, and a set token) — every block here is the verdict's doing. +//! Uses the shared `common::PipHarness` (pypi stub published 2020 so recency +//! never blocks, a fake pip recording its argv, the in-crate vuln-api stub, +//! and a set token) — every block here is the verdict's doing. #![cfg(unix)] mod common; -use common::{ - corgea_isolated, spawn_http_stub, write_fake_pip_without_report, NOT_FOUND_JSON, - OLDPKG_PYPI_JSON, -}; -use corgea::vuln_api_stub::{self, PackageKey}; +use common::{key, vulnerable_body, PipHarness}; use std::collections::HashMap; -use std::path::PathBuf; -use std::process::Command; -use tempfile::TempDir; - -fn key(eco: &str, name: &str, ver: &str) -> PackageKey { - (eco.to_string(), name.to_string(), ver.to_string()) -} fn fixed_body() -> String { - r#"{"ecosystem":"pypi","package_name":"oldpkg","version":"1.0.0","is_vulnerable":true, - "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":"2.0.0"}]}"# - .to_string() + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")) } fn no_fix_body() -> String { - r#"{"ecosystem":"pypi","package_name":"oldpkg","version":"1.0.0","is_vulnerable":true, - "matches":[{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":null}]}"# - .to_string() + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0002", None) } /// The advertised fix `oldpkg@2.0.0` is itself flagged — the steer re-check /// must reject it. fn flagged_fix_body() -> String { - r#"{"ecosystem":"pypi","package_name":"oldpkg","version":"2.0.0","is_vulnerable":true, - "matches":[{"advisory_id":"MAL-2024-0003","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":null}]}"# - .to_string() -} - -/// Registry stub serving only `/pypi/oldpkg/json` (published 2020 → never -/// recent). Everything else 404s. -fn spawn_pypi_stub() -> String { - spawn_http_stub(|path| match path { - "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), - _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), - }) -} - -/// `corgea` wired to the registry stub, a fake pip, and a vuln-api stub. -struct RemediationHarness { - cmd: Command, - marker: PathBuf, - _home: TempDir, - _bin: TempDir, -} - -impl RemediationHarness { - fn new(checks: HashMap, token: Option<&str>, pip_exit_code: i32) -> Self { - Self::with_statuses(checks, HashMap::new(), token, pip_exit_code) - } - - fn with_statuses( - checks: HashMap, - statuses: HashMap, - token: Option<&str>, - pip_exit_code: i32, - ) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let marker = bin.path().join("pm-argv.txt"); - write_fake_pip_without_report(bin.path(), &marker, pip_exit_code); - let registry = spawn_pypi_stub(); - let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); - cmd.env("PATH", bin.path()) - .env("CORGEA_PYPI_REGISTRY", ®istry) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); - if let Some(t) = token { - cmd.env("CORGEA_TOKEN", t); - } - Self { - cmd, - marker, - _home: home, - _bin: bin, - } - } - - fn recorded_argv(&self) -> Option { - std::fs::read_to_string(&self.marker).ok() - } + vulnerable_body("pypi", "oldpkg", "2.0.0", "MAL-2024-0003", None) } #[test] @@ -108,7 +35,7 @@ fn fixed_match_blocks_and_names_safe_version() { // re-check, so the proposal verifies and the steer prints. let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); - let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -132,7 +59,7 @@ fn fixed_match_blocks_and_names_safe_version() { fn no_fix_match_reports_no_fixed_version_known() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), no_fix_body()); - let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -159,7 +86,7 @@ fn no_fix_match_reports_no_fixed_version_known() { fn json_remediation_carries_safe_version() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); - let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "--json", "install", "oldpkg==1.0.0"]) @@ -179,7 +106,7 @@ fn json_remediation_carries_safe_version() { fn json_remediation_null_when_no_fix() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), no_fix_body()); - let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "--json", "install", "oldpkg==1.0.0"]) @@ -207,7 +134,7 @@ fn rejected_fix_prints_rejection_instead_of_steer() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); checks.insert(key("pypi", "oldpkg", "2.0.0"), flagged_fix_body()); - let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -238,7 +165,7 @@ fn unverified_fix_suppresses_steer_quietly() { checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); let mut statuses = HashMap::new(); statuses.insert(key("pypi", "oldpkg", "2.0.0"), 503u16); - let mut h = RemediationHarness::with_statuses(checks, statuses, Some("test-token"), 0); + let mut h = PipHarness::new(checks, statuses, Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -270,7 +197,7 @@ fn json_remediation_null_when_fix_rejected() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); checks.insert(key("pypi", "oldpkg", "2.0.0"), flagged_fix_body()); - let mut h = RemediationHarness::new(checks, Some("test-token"), 0); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "--json", "install", "oldpkg==1.0.0"]) diff --git a/tests/cli_tree.rs b/tests/cli_tree.rs index 5ede5bd..902662b 100644 --- a/tests/cli_tree.rs +++ b/tests/cli_tree.rs @@ -12,137 +12,86 @@ mod common; -use common::{ - corgea_isolated, spawn_http_stub, write_fake_tree_pm, NOT_FOUND_JSON, OLDPKG_NPM_PACKUMENT, - OLDPKG_PYPI_JSON, RESOLUTION_FAILS, -}; -use corgea::vuln_api_stub::{self, PackageKey}; +use common::{key, vulnerable_body, TreeHarness, NPM_LOCK, RESOLUTION_FAILS, TREE_REPORT}; use std::collections::HashMap; -use std::path::PathBuf; -use std::process::Command; use tempfile::TempDir; -fn key(eco: &str, name: &str, ver: &str) -> PackageKey { - (eco.to_string(), name.to_string(), ver.to_string()) -} - -/// Pip `--report -` payload: `oldpkg` (named) + `evildep` (transitive). -const TREE_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ - {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, - {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; - fn vulnerable_evildep_body(ecosystem: &str) -> String { - format!( - r#"{{"ecosystem":"{ecosystem}","package_name":"evildep","version":"0.4.2","is_vulnerable":true, - "matches":[{{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":null}}]}}"# - ) -} - -/// Registry stub serving `/pypi/oldpkg/json` (pypi) and `/oldpkg` (npm -/// packument), both published 2020 → never recent. Everything else 404s. -fn spawn_pypi_stub() -> String { - spawn_http_stub(|path| match path { - "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), - "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), - _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), - }) -} - -/// npm lockfile-v3 fixture: named `oldpkg` 1.0.0 + transitive `evildep` 0.4.2. -const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ - "":{"name":"proj","version":"1.0.0"}, - "node_modules/oldpkg":{"version":"1.0.0"}, - "node_modules/evildep":{"version":"0.4.2"}}}"#; - -/// `corgea` wired to the registry stub, a tree-aware fake pip, and a vuln-api -/// stub. -struct TreeHarness { - cmd: Command, - marker: PathBuf, - _home: TempDir, - _bin: TempDir, -} - -impl TreeHarness { - /// Wires the registry + vuln-api stubs, token, and a fake `binary` - /// (`"pip"` or `"npm"`) into a private PATH dir. `payload` is the canned - /// tree-resolution output (pip report / npm lockfile), or - /// `RESOLUTION_FAILS` to simulate a failed resolution. - fn new( - binary: &str, - checks: HashMap, - statuses: HashMap, - payload: &str, - exit_code: i32, - ) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let marker = bin.path().join("pm-argv.txt"); - write_fake_tree_pm(bin.path(), binary, &marker, payload, exit_code); - let registry = spawn_pypi_stub(); - let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); - cmd.env("PATH", bin.path()) - .env("CORGEA_PYPI_REGISTRY", ®istry) - .env("CORGEA_NPM_REGISTRY", ®istry) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) - .env("CORGEA_TOKEN", "test-token"); - Self { - cmd, - marker, - _home: home, - _bin: bin, - } - } - - fn recorded_argv(&self) -> Option { - std::fs::read_to_string(&self.marker).ok() - } + vulnerable_body(ecosystem, "evildep", "0.4.2", "MAL-2024-0002", None) } #[test] -fn pip_transitive_vulnerable_blocks_install() { +fn transitive_vulnerable_blocks_install() { // Only the transitive `evildep` is flagged; the named `oldpkg` is clean. - let mut checks = HashMap::new(); - checks.insert( - key("pypi", "evildep", "0.4.2"), - vulnerable_evildep_body("pypi"), - ); - let mut h = TreeHarness::new("pip", checks, HashMap::new(), TREE_REPORT, 0); - let out = h - .cmd - .args(["pip", "--concurrency", "2", "install", "oldpkg==1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); - assert_eq!( - h.recorded_argv(), - None, - "pip must not run on a transitive vulnerable verdict" - ); - let stdout = String::from_utf8_lossy(&out.stdout); - assert!(stdout.contains("evildep"), "stdout: {stdout}"); - assert!(stdout.contains("MAL-2024-0002"), "stdout: {stdout}"); - assert!(stdout.contains("(transitive)"), "stdout: {stdout}"); + let cases = [ + ( + "pip", + "pypi", + TREE_REPORT, + &["pip", "--concurrency", "2", "install", "oldpkg==1.0.0"][..], + ), + ( + "npm", + "npm", + NPM_LOCK, + &["npm", "install", "oldpkg@1.0.0"][..], + ), + ]; + for (binary, eco, payload, args) in cases { + let mut checks = HashMap::new(); + checks.insert(key(eco, "evildep", "0.4.2"), vulnerable_evildep_body(eco)); + let mut h = TreeHarness::new(binary, checks, HashMap::new(), payload); + let out = h.cmd.args(args).output().expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "{binary}: transitive vuln must block" + ); + assert_eq!( + h.recorded_argv(), + None, + "{binary} must not run on a transitive vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(transitive)"] { + assert!(stdout.contains(needle), "{binary} stdout: {stdout}"); + } + } } #[test] -fn pip_dry_run_failure_falls_back_with_loud_warning() { - // Fake pip exits 2 on `--dry-run` (simulates old pip with no `--report`). - // Stub is all-clean, so the named-only fallback proceeds. - let mut h = TreeHarness::new("pip", HashMap::new(), HashMap::new(), RESOLUTION_FAILS, 0); - let out = h - .cmd - .args(["pip", "install", "oldpkg==1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(0), "clean named-only must proceed"); - assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); - assert!( - String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), - "stderr must carry the fallback warning: {}", - String::from_utf8_lossy(&out.stderr) - ); +fn resolution_failure_falls_back_with_loud_warning() { + // The fake manager fails its tree invocation (pip: exits 2 on `--dry-run`, + // simulating an old pip with no `--report`; npm: exits 1 on + // `--package-lock-only`). Stub is all-clean, so the named-only fallback + // proceeds. + let cases = [ + ( + "pip", + &["pip", "install", "oldpkg==1.0.0"][..], + "install oldpkg==1.0.0", + ), + ( + "npm", + &["npm", "install", "oldpkg@1.0.0"][..], + "install oldpkg@1.0.0", + ), + ]; + for (binary, args, forwarded_argv) in cases { + let mut h = TreeHarness::new(binary, HashMap::new(), HashMap::new(), RESOLUTION_FAILS); + let out = h.cmd.args(args).output().expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "{binary}: clean named-only must proceed" + ); + assert_eq!(h.recorded_argv().as_deref(), Some(forwarded_argv)); + assert!( + String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), + "{binary} stderr must carry the fallback warning: {}", + String::from_utf8_lossy(&out.stderr) + ); + } } #[test] @@ -152,7 +101,7 @@ fn pip_json_carries_tree_object() { key("pypi", "evildep", "0.4.2"), vulnerable_evildep_body("pypi"), ); - let mut h = TreeHarness::new("pip", checks, HashMap::new(), TREE_REPORT, 0); + let mut h = TreeHarness::new("pip", checks, HashMap::new(), TREE_REPORT); let out = h .cmd .args(["pip", "--json", "install", "oldpkg==1.0.0"]) @@ -174,7 +123,7 @@ fn pip_json_carries_tree_object() { #[test] fn pip_clean_tree_proceeds() { // Stub default-clean (no overrides), so every resolved package is clean. - let mut h = TreeHarness::new("pip", HashMap::new(), HashMap::new(), TREE_REPORT, 0); + let mut h = TreeHarness::new("pip", HashMap::new(), HashMap::new(), TREE_REPORT); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -189,52 +138,6 @@ fn pip_clean_tree_proceeds() { ); } -#[test] -fn npm_transitive_vulnerable_blocks_install() { - // The generated lockfile carries a transitive `evildep` 0.4.2 that the - // vuln stub flags; the named `oldpkg` is clean. - let mut checks = HashMap::new(); - checks.insert( - key("npm", "evildep", "0.4.2"), - vulnerable_evildep_body("npm"), - ); - let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK, 0); - let out = h - .cmd - .args(["npm", "install", "oldpkg@1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); - assert_eq!( - h.recorded_argv(), - None, - "npm must not run on a transitive vulnerable verdict" - ); - let stdout = String::from_utf8_lossy(&out.stdout); - assert!(stdout.contains("evildep"), "stdout: {stdout}"); - assert!(stdout.contains("MAL-2024-0002"), "stdout: {stdout}"); - assert!(stdout.contains("(transitive)"), "stdout: {stdout}"); -} - -#[test] -fn npm_resolution_failure_falls_back_with_warning() { - // Fake npm exits 1 on `--package-lock-only`. Stub is all-clean, so the - // named-only fallback proceeds with a loud warning. - let mut h = TreeHarness::new("npm", HashMap::new(), HashMap::new(), RESOLUTION_FAILS, 0); - let out = h - .cmd - .args(["npm", "install", "oldpkg@1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(0), "clean named-only must proceed"); - assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); - assert!( - String::from_utf8_lossy(&out.stderr).contains("transitive dependencies not checked"), - "stderr must carry the fallback warning: {}", - String::from_utf8_lossy(&out.stderr) - ); -} - #[test] fn npm_does_not_touch_project_lockfile() { // Run from a project dir holding sentinel manifests; the resolver works in @@ -252,7 +155,7 @@ fn npm_does_not_touch_project_lockfile() { key("npm", "evildep", "0.4.2"), vulnerable_evildep_body("npm"), ); - let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK, 0); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); let out = h .cmd .current_dir(project.path()) diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs index 87c0af2..e11b904 100644 --- a/tests/cli_verdict.rs +++ b/tests/cli_verdict.rs @@ -2,98 +2,27 @@ //! (`corgea pip install …` with a token + `CORGEA_VULN_API_URL` stub). //! //! Composes the `cli_install.rs` harness pattern (fake package manager on a -//! private PATH + local pypi registry stub) with the in-crate vuln-api stub. -//! `oldpkg==1.0.0` is published in 2020, so recency never blocks here — -//! every block in this file is the verdict's doing. +//! private PATH + local pypi registry stub) with the in-crate vuln-api stub — +//! the shared `common::PipHarness`. `oldpkg==1.0.0` is published in 2020, so +//! recency never blocks here — every block in this file is the verdict's +//! doing. #![cfg(unix)] mod common; -use common::{corgea_isolated, spawn_http_stub, write_fake_pip_without_report, NOT_FOUND_JSON}; -use corgea::vuln_api_stub::{self, PackageKey}; +use common::{key, vulnerable_body, PipHarness}; use std::collections::HashMap; -use std::path::PathBuf; -use std::process::Command; -use tempfile::TempDir; - -fn key(eco: &str, name: &str, ver: &str) -> PackageKey { - (eco.to_string(), name.to_string(), ver.to_string()) -} fn vulnerable_oldpkg_body() -> String { - r#"{"ecosystem":"pypi","package_name":"oldpkg","version":"1.0.0","is_vulnerable":true, - "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":"2.0.0"}]}"# - .to_string() -} - -/// Registry stub serving `/pypi//json` for any single-segment name, -/// always version 1.0.0 published 2020 → never recent. Everything else 404s. -fn spawn_pypi_stub() -> String { - spawn_http_stub(|path| { - let name = path - .strip_prefix("/pypi/") - .and_then(|p| p.strip_suffix("/json")) - .filter(|n| !n.is_empty() && !n.contains('/')); - match name { - Some(name) => ( - "200 OK", - format!( - r#"{{"info":{{"name":"{name}"}},"releases":{{"1.0.0":[{{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}}]}}}}"# - ), - ), - None => ("404 Not Found", NOT_FOUND_JSON.to_string()), - } - }) -} - -/// `corgea` wired to the registry stub, a fake pip, and a vuln-api stub. -struct VerdictHarness { - cmd: Command, - marker: PathBuf, - _home: TempDir, - _bin: TempDir, -} - -impl VerdictHarness { - /// `token: None` exercises tokenless mode (no CORGEA_TOKEN set). - fn new( - checks: HashMap, - statuses: HashMap, - token: Option<&str>, - pip_exit_code: i32, - ) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let marker = bin.path().join("pm-argv.txt"); - write_fake_pip_without_report(bin.path(), &marker, pip_exit_code); - let registry = spawn_pypi_stub(); - let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, statuses); - cmd.env("PATH", bin.path()) - .env("CORGEA_PYPI_REGISTRY", ®istry) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); - if let Some(t) = token { - cmd.env("CORGEA_TOKEN", t); - } - Self { - cmd, - marker, - _home: home, - _bin: bin, - } - } - - fn recorded_argv(&self) -> Option { - std::fs::read_to_string(&self.marker).ok() - } + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")) } #[test] fn vulnerable_pin_blocks_without_running_install() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); - let mut h = VerdictHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -118,7 +47,7 @@ fn vulnerable_pin_blocks_without_running_install() { fn force_overrides_vulnerable_block_and_propagates_exit_code() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); - let mut h = VerdictHarness::new(checks, HashMap::new(), Some("test-token"), 7); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 7); let out = h .cmd .args(["pip", "--force", "install", "oldpkg==1.0.0"]) @@ -141,7 +70,7 @@ fn force_overrides_vulnerable_block_and_propagates_exit_code() { fn verdict_503_fails_closed() { let mut statuses = HashMap::new(); statuses.insert(key("pypi", "oldpkg", "1.0.0"), 503u16); - let mut h = VerdictHarness::new(HashMap::new(), statuses, Some("test-token"), 0); + let mut h = PipHarness::new(HashMap::new(), statuses, Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -162,7 +91,7 @@ fn tokenless_degrades_to_recency_only_with_login_prompt() { // Stub would flag oldpkg, but with no token it must never be consulted. let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); - let mut h = VerdictHarness::new(checks, HashMap::new(), None, 0); + let mut h = PipHarness::new(checks, HashMap::new(), None, 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -188,7 +117,7 @@ fn tokenless_degrades_to_recency_only_with_login_prompt() { #[test] fn progress_line_prints_only_above_eight_verdict_jobs() { // Nine resolvable named targets → 9 verdict jobs (> 8) → progress line. - let mut h = VerdictHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); let mut args = vec!["pip".to_string(), "install".to_string()]; args.extend((1..=9).map(|i| format!("pkg{i}==1.0.0"))); let out = h.cmd.args(&args).output().expect("run corgea"); @@ -200,7 +129,7 @@ fn progress_line_prints_only_above_eight_verdict_jobs() { ); // Two jobs → quiet. - let mut h = VerdictHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "pkg1==1.0.0", "pkg2==1.0.0"]) @@ -219,7 +148,7 @@ fn outage_noise_collapses_above_three_unverifiable() { // vuln-api refuses connections: every check fails with the same // error-prefix (only the per-package URL differs). Four findings → // one collapsed line; counts and fail-closed exit code unchanged. - let mut h = VerdictHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); let out = h .cmd @@ -250,7 +179,7 @@ fn outage_noise_collapses_above_three_unverifiable() { ); // Three findings stay per-line — no collapse at the threshold. - let mut h = VerdictHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); let out = h .cmd @@ -280,7 +209,7 @@ fn outage_noise_collapses_above_three_unverifiable() { fn json_carries_verdict_object_and_mode() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); - let mut h = VerdictHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "--json", "install", "oldpkg==1.0.0"]) diff --git a/tests/common/mod.rs b/tests/common/mod.rs index b61a6c3..a691f6e 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -2,6 +2,11 @@ //! pattern — included via `mod common;` from each integration-test crate, so //! items unused by one consumer are `#[allow(dead_code)]`). +use corgea::vuln_api_stub::PackageKey; +#[cfg(unix)] +use std::collections::HashMap; +#[cfg(unix)] +use std::path::PathBuf; use std::process::Command; use tempfile::TempDir; @@ -47,6 +52,43 @@ pub const OLDPKG_PYPI_JSON: &str = r#"{"info":{"name":"oldpkg"},"releases":{"1.0 #[allow(dead_code)] pub const OLDPKG_NPM_PACKUMENT: &str = r#"{"dist-tags":{"latest":"1.0.0"},"versions":{"1.0.0":{}},"time":{"1.0.0":"2020-01-01T00:00:00Z"}}"#; +#[allow(dead_code)] +pub fn key(eco: &str, name: &str, ver: &str) -> PackageKey { + (eco.to_string(), name.to_string(), ver.to_string()) +} + +/// Single-match vulnerable verdict body for the vuln-api stub; `fixed: None` +/// renders `"fixed_version":null`. +#[allow(dead_code)] +pub fn vulnerable_body( + ecosystem: &str, + name: &str, + version: &str, + advisory: &str, + fixed: Option<&str>, +) -> String { + let fixed = fixed.map_or("null".to_string(), |f| format!(r#""{f}""#)); + format!( + r#"{{"ecosystem":"{ecosystem}","package_name":"{name}","version":"{version}","is_vulnerable":true, + "matches":[{{"advisory_id":"{advisory}","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":{fixed}}}]}}"# + ) +} + +/// Pip `--report -` payload: `oldpkg` (named/requested) + `evildep` +/// (transitive). +#[allow(dead_code)] +pub const TREE_REPORT: &str = r#"{"version":"1","pip_version":"24.0","install":[ + {"metadata":{"name":"oldpkg","version":"1.0.0"},"requested":true}, + {"metadata":{"name":"evildep","version":"0.4.2"},"requested":false}]}"#; + +/// npm lockfile-v3 fixture: named `oldpkg` 1.0.0 + transitive `evildep` 0.4.2. +#[allow(dead_code)] +pub const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ + "":{"name":"proj","version":"1.0.0"}, + "node_modules/oldpkg":{"version":"1.0.0"}, + "node_modules/evildep":{"version":"0.4.2"}}}"#; + /// Spawn a one-response-per-connection HTTP stub on an ephemeral 127.0.0.1 /// port; `route` maps a request path to `(status line, body)`. Returns the /// base URL. `Connection: close` is load-bearing — without it reqwest pools @@ -94,6 +136,38 @@ where base_url } +/// Registry stub serving `/pypi/oldpkg/json` (pypi) and `/oldpkg` (npm +/// packument), both published 2020 → never recent. Everything else 404s. +#[allow(dead_code)] +pub fn spawn_oldpkg_registry_stub() -> String { + spawn_http_stub(|path| match path { + "/pypi/oldpkg/json" => ("200 OK", OLDPKG_PYPI_JSON.to_string()), + "/oldpkg" => ("200 OK", OLDPKG_NPM_PACKUMENT.to_string()), + _ => ("404 Not Found", NOT_FOUND_JSON.to_string()), + }) +} + +/// Registry stub serving `/pypi//json` for any single-segment name, +/// always version 1.0.0 published 2020 → never recent. Everything else 404s. +#[allow(dead_code)] +pub fn spawn_wildcard_pypi_stub() -> String { + spawn_http_stub(|path| { + let name = path + .strip_prefix("/pypi/") + .and_then(|p| p.strip_suffix("/json")) + .filter(|n| !n.is_empty() && !n.contains('/')); + match name { + Some(name) => ( + "200 OK", + format!( + r#"{{"info":{{"name":"{name}"}},"releases":{{"1.0.0":[{{"upload_time_iso_8601":"2020-01-01T00:00:00Z"}}]}}}}"# + ), + ), + None => ("404 Not Found", NOT_FOUND_JSON.to_string()), + } + }) +} + /// Write `script` as the executable `dir/binary`. #[cfg(unix)] #[allow(dead_code)] @@ -194,3 +268,94 @@ pub fn write_fake_tree_pm( ); write_script(dir, binary, &script); } + +/// `corgea` wired to the wildcard pypi registry stub, a report-less fake pip +/// (recording its argv to a marker), and a vuln-api stub. +#[cfg(unix)] +#[allow(dead_code)] +pub struct PipHarness { + pub cmd: Command, + marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +#[cfg(unix)] +#[allow(dead_code)] +impl PipHarness { + /// `token: None` exercises tokenless mode (no CORGEA_TOKEN set). + pub fn new( + checks: HashMap, + statuses: HashMap, + token: Option<&str>, + pip_exit_code: i32, + ) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_pip_without_report(bin.path(), &marker, pip_exit_code); + let registry = spawn_wildcard_pypi_stub(); + let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(checks, statuses); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); + if let Some(t) = token { + cmd.env("CORGEA_TOKEN", t); + } + Self { + cmd, + marker, + _home: home, + _bin: bin, + } + } + + pub fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +/// `corgea` wired to the oldpkg registry stub, a tree-aware fake `binary` +/// (`"pip"` or `"npm"`) answering the tree pass with `payload`, a vuln-api +/// stub, and a token. +#[cfg(unix)] +#[allow(dead_code)] +pub struct TreeHarness { + pub cmd: Command, + marker: PathBuf, + _home: TempDir, + _bin: TempDir, +} + +#[cfg(unix)] +#[allow(dead_code)] +impl TreeHarness { + pub fn new( + binary: &str, + checks: HashMap, + statuses: HashMap, + payload: &str, + ) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_tree_pm(bin.path(), binary, &marker, payload, 0); + let registry = spawn_oldpkg_registry_stub(); + let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(checks, statuses); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", ®istry) + .env("CORGEA_NPM_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) + .env("CORGEA_TOKEN", "test-token"); + Self { + cmd, + marker, + _home: home, + _bin: bin, + } + } + + pub fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} From 2ef26e635f7a6433469c80c7996b2cced7f7700d Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 10:54:50 +0200 Subject: [PATCH 24/59] Consolidate vuln-api test stubs and collapse operator parsing Replace the 107-line inline package-check stub in vuln_api's unit tests with the in-crate vuln_api_stub, teaching the stub a retry-once mode (first hit 429 + Retry-After, then the scripted response) for the client-retry test. Extract the triplicated HTTP request-read loop into vuln_api_stub::read_http_request. Table-drive the PEP 440 operator prefix chain in registry.rs, longest prefixes first. --- src/verify_deps/registry.rs | 33 ++++----- src/vuln_api/mod.rs | 142 +++--------------------------------- src/vuln_api_stub/mod.rs | 67 +++++++++++------ tests/common/mod.rs | 14 +--- 4 files changed, 74 insertions(+), 182 deletions(-) diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs index bfe61cd..9ce6b16 100644 --- a/src/verify_deps/registry.rs +++ b/src/verify_deps/registry.rs @@ -450,26 +450,21 @@ fn pypi_resolve_specifier(candidates: &[(String, DateTime)], spec: &str) -> let parts: Vec<&str> = spec.split(',').map(|s| s.trim()).collect(); let mut requirements: Vec<(&'static str, semver::Version)> = Vec::new(); + // Longest prefixes first so `>=` never matches as `>`. + const OPERATORS: &[(&str, &str)] = &[ + ("===", "=="), + ("==", "=="), + (">=", ">="), + ("<=", "<="), + ("!=", "!="), + ("~=", "~="), + (">", ">"), + ("<", "<"), + ]; for p in &parts { - let (op, val): (&str, &str) = if let Some(v) = p.strip_prefix("===") { - ("==", v.trim()) - } else if let Some(v) = p.strip_prefix("==") { - ("==", v.trim()) - } else if let Some(v) = p.strip_prefix(">=") { - (">=", v.trim()) - } else if let Some(v) = p.strip_prefix("<=") { - ("<=", v.trim()) - } else if let Some(v) = p.strip_prefix("!=") { - ("!=", v.trim()) - } else if let Some(v) = p.strip_prefix("~=") { - ("~=", v.trim()) - } else if let Some(v) = p.strip_prefix(">") { - (">", v.trim()) - } else if let Some(v) = p.strip_prefix("<") { - ("<", v.trim()) - } else { - return None; - }; + let (op, val) = OPERATORS + .iter() + .find_map(|(prefix, op)| p.strip_prefix(prefix).map(|v| (*op, v.trim())))?; let v = semver::Version::parse(&normalize_for_semver(val)).ok()?; requirements.push((op, v)); } diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs index bd4c5b6..d49ca9f 100644 --- a/src/vuln_api/mod.rs +++ b/src/vuln_api/mod.rs @@ -280,126 +280,11 @@ pub fn check_package_version( #[cfg(test)] mod tests { use super::*; - use std::collections::HashMap; - use std::io::{Read, Write}; - use std::net::TcpListener; - use std::sync::{Arc, Mutex}; - use std::thread; - use std::time::Duration; - - /// `(ecosystem, name, version)` request key for the stub's route table. - type CheckKey = (String, String, String); - /// Maps a request key to a canned `(status, body)` response. - type KeyedResponses = HashMap; - - use crate::vuln_api_stub::status_text; - - struct PackageCheckStub { - base_url: String, - _handle: thread::JoinHandle<()>, - } - - /// Keys in `retry_after_keys`: first hit → 429 + Retry-After: 1, second hit → - /// response from `responses` (or clean 200 fallback). - fn spawn_package_check_stub_with_retry_keys( - responses: KeyedResponses, - retry_after_keys: KeyedResponses, - ) -> PackageCheckStub { - let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); - let port = listener.local_addr().unwrap().port(); - let base_url = format!("http://127.0.0.1:{}", port); - let responses = Arc::new(Mutex::new(responses)); - let retry_after_keys = Arc::new(Mutex::new(retry_after_keys)); - let hit_counts: Arc>> = Arc::new(Mutex::new(HashMap::new())); - - let handle = thread::spawn(move || { - for stream in listener.incoming().take(32) { - let Ok(mut stream) = stream else { - continue; - }; - let mut buf = Vec::with_capacity(4096); - let mut chunk = [0u8; 1024]; - while let Ok(n) = stream.read(&mut chunk) { - if n == 0 { - break; - } - buf.extend_from_slice(&chunk[..n]); - if buf.windows(4).any(|w| w == b"\r\n\r\n") { - break; - } - } - let req = String::from_utf8_lossy(&buf); - - let (status_code, status_text, body, extra_headers) = if let Some(path) = - req.lines().next().and_then(|l| l.split_whitespace().nth(1)) - { - let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); - if parts.len() >= 7 - && parts[0] == "v1" - && parts[1] == "packages" - && parts[4] == "versions" - && parts[6] == "check" - { - let eco = parts[2].to_string(); - let name = urlencoding::decode(parts[3]) - .unwrap_or_default() - .into_owned(); - let ver = urlencoding::decode(parts[5]) - .unwrap_or_default() - .into_owned(); - let key = (eco.clone(), name.clone(), ver.clone()); - let hits = { - let mut counts = hit_counts.lock().unwrap(); - let entry = counts.entry(key.clone()).or_insert(0); - *entry += 1; - *entry - }; - - let retry_body = retry_after_keys.lock().unwrap().get(&key).cloned(); - if retry_body.is_some() && hits == 1 { - let (code, body) = (429, r#"{"error":"rate limited"}"#.to_string()); - let text = "Too Many Requests"; - (code, text, body, "Retry-After: 1\r\n".to_string()) - } else { - let (code, body) = responses - .lock() - .unwrap() - .get(&key) - .cloned() - .or(retry_body) - .unwrap_or((200, r#"{"is_vulnerable":false,"matches":[]}"#.into())); - (code, status_text(code), body, String::new()) - } - } else { - ( - 404, - "Not Found", - r#"{"error":"not found"}"#.into(), - String::new(), - ) - } - } else { - ( - 400, - "Bad Request", - r#"{"error":"bad request"}"#.into(), - String::new(), - ) - }; - - let response = format!( - "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\n{}Content-Length: {}\r\n\r\n{}", - status_code, status_text, extra_headers, body.len(), body - ); - let _ = stream.write_all(response.as_bytes()); - } - }); - - thread::sleep(Duration::from_millis(50)); - PackageCheckStub { - base_url, - _handle: handle, - } + use crate::vuln_api_stub::{self, PackageKey}; + use std::collections::{HashMap, HashSet}; + + fn lodash_key() -> PackageKey { + ("npm".into(), "lodash".into(), "4.17.20".into()) } fn check_with_stub_status( @@ -407,12 +292,10 @@ mod tests { body: &str, ) -> Result> { let client = http_client().expect("test client"); - let mut responses = HashMap::new(); - responses.insert( - ("npm".into(), "lodash".into(), "4.17.20".into()), - (status_code, body.to_string()), + let stub = vuln_api_stub::spawn_with_statuses( + HashMap::from([(lodash_key(), body.to_string())]), + HashMap::from([(lodash_key(), status_code)]), ); - let stub = spawn_package_check_stub_with_retry_keys(responses, HashMap::new()); check_package_version( &client, &stub.base_url, @@ -470,12 +353,11 @@ mod tests { "fixed_version": "4.17.21" }] }"#; - let mut retry_after_keys = HashMap::new(); - retry_after_keys.insert( - ("npm".into(), "lodash".into(), "4.17.20".into()), - (200, vulnerable_body.to_string()), + let stub = vuln_api_stub::spawn_with_retry_once( + HashMap::from([(lodash_key(), vulnerable_body.to_string())]), + HashMap::new(), + HashSet::from([lodash_key()]), ); - let stub = spawn_package_check_stub_with_retry_keys(HashMap::new(), retry_after_keys); let resp = check_package_version( &client, &stub.base_url, diff --git a/src/vuln_api_stub/mod.rs b/src/vuln_api_stub/mod.rs index 7e3bf47..64a6936 100644 --- a/src/vuln_api_stub/mod.rs +++ b/src/vuln_api_stub/mod.rs @@ -1,7 +1,6 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::io::{Read, Write}; use std::net::TcpListener; -use std::sync::Arc; use std::thread; use std::time::Duration; @@ -19,20 +18,34 @@ pub struct VulnApiStub { pub fn spawn_with_statuses( package_checks: HashMap, status_overrides: HashMap, +) -> VulnApiStub { + spawn_with_retry_once(package_checks, status_overrides, HashSet::new()) +} + +/// Like [`spawn_with_statuses`], but keys in `retry_once` answer their first +/// hit with 429 + `Retry-After: 1` and fall through to the scripted response +/// from the second hit on — for exercising the client's retry path. +pub fn spawn_with_retry_once( + package_checks: HashMap, + status_overrides: HashMap, + retry_once: HashSet, ) -> VulnApiStub { let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); let bound_port = listener.local_addr().expect("stub local_addr").port(); let base_url = format!("http://127.0.0.1:{bound_port}"); - let package_checks = Arc::new(package_checks); - let status_overrides = Arc::new(status_overrides); - let handle = thread::spawn(move || { + let mut pending_retries = retry_once; for stream in listener.incoming() { let Ok(mut stream) = stream else { continue; }; - handle_connection(&mut stream, &package_checks, &status_overrides); + handle_connection( + &mut stream, + &package_checks, + &status_overrides, + &mut pending_retries, + ); } }); @@ -44,11 +57,8 @@ pub fn spawn_with_statuses( } } -fn handle_connection( - stream: &mut std::net::TcpStream, - package_checks: &Arc>, - status_overrides: &Arc>, -) { +/// Read one HTTP request's bytes (through the header terminator) off `stream`. +pub fn read_http_request(stream: &mut std::net::TcpStream) -> Vec { let mut buf = Vec::with_capacity(4096); let mut chunk = [0u8; 1024]; while let Ok(n) = stream.read(&mut chunk) { @@ -60,11 +70,21 @@ fn handle_connection( break; } } + buf +} + +fn handle_connection( + stream: &mut std::net::TcpStream, + package_checks: &HashMap, + status_overrides: &HashMap, + pending_retries: &mut HashSet, +) { + let buf = read_http_request(stream); let req = String::from_utf8_lossy(&buf); let path = req.lines().next().and_then(|l| l.split_whitespace().nth(1)); - let (status_code, response_body) = match path { + let (status_code, response_body, retry_after) = match path { Some(path) => { let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); if parts.len() >= 7 @@ -82,17 +102,21 @@ fn handle_connection( .unwrap_or_default() .into_owned(), ); - let body = package_checks - .get(&key) - .cloned() - .unwrap_or_else(|| default_clean_response(&key.0, &key.1, &key.2)); - let status = status_overrides.get(&key).copied().unwrap_or(200); - (status, body) + if pending_retries.remove(&key) { + (429, r#"{"error":"rate limited"}"#.to_string(), true) + } else { + let body = package_checks + .get(&key) + .cloned() + .unwrap_or_else(|| default_clean_response(&key.0, &key.1, &key.2)); + let status = status_overrides.get(&key).copied().unwrap_or(200); + (status, body, false) + } } else { - (404, NOT_FOUND_BODY.to_string()) + (404, NOT_FOUND_BODY.to_string(), false) } } - None => (400, r#"{"error":"bad request"}"#.to_string()), + None => (400, r#"{"error":"bad request"}"#.to_string(), false), }; let status_text = status_text(status_code); @@ -100,9 +124,10 @@ fn handle_connection( // connection, so without it reqwest pools the socket and a second request // (the gate's tree pass makes several per run) races the close and fails. let response = format!( - "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\n{}Content-Length: {}\r\nConnection: close\r\n\r\n{}", status_code, status_text, + if retry_after { "Retry-After: 1\r\n" } else { "" }, response_body.len(), response_body ); diff --git a/tests/common/mod.rs b/tests/common/mod.rs index a691f6e..043f79a 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -98,7 +98,7 @@ pub fn spawn_http_stub(route: F) -> String where F: Fn(&str) -> (&'static str, String) + Send + 'static, { - use std::io::{Read, Write}; + use std::io::Write; use std::net::TcpListener; let listener = TcpListener::bind("127.0.0.1:0").expect("bind stub"); @@ -106,17 +106,7 @@ where std::thread::spawn(move || { for stream in listener.incoming() { let Ok(mut stream) = stream else { continue }; - let mut buf = Vec::with_capacity(4096); - let mut chunk = [0u8; 1024]; - while let Ok(n) = stream.read(&mut chunk) { - if n == 0 { - break; - } - buf.extend_from_slice(&chunk[..n]); - if buf.windows(4).any(|w| w == b"\r\n\r\n") { - break; - } - } + let buf = corgea::vuln_api_stub::read_http_request(&mut stream); let req = String::from_utf8_lossy(&buf); let path = req .lines() From 0f6b2b22b05e591bcb3b82bc739a5daf730f91c0 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 10:59:10 +0200 Subject: [PATCH 25/59] Collapse duplicated parsing and test-option boilerplate in precheck Share the unknown-flag skip heuristic between the node and pip positional extractors, fold the npm:/workspace: special cases into the unverifiable-prefix table, collapse build_parsed_install to a struct expression, and rewrite parse_npm_lockfile as a filter_map chain. Make stub_opts() parameterless (every caller passed the same dead address) and add verdict_opts() for the repeated VerdictConfig wiring. --- src/precheck/mod.rs | 58 +++++++++---------- src/precheck/parse.rs | 129 +++++++++++++++++++----------------------- src/precheck/tree.rs | 44 +++++++------- 3 files changed, 106 insertions(+), 125 deletions(-) diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index d4e40b2..d51bb1d 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -1452,21 +1452,35 @@ mod tests { assert!(!PackageManager::Pip.is_install_subcommand("freeze")); } - fn stub_opts(pypi_registry: String, no_fail: bool) -> PrecheckOptions { + /// Baseline options: pypi registry at a dead address (a port that + /// refuses connections — these tests never dial it), no verdict config. + /// Override fields per test via struct update. + fn stub_opts() -> PrecheckOptions { PrecheckOptions { threshold: Duration::from_secs(2 * 86400), - no_fail, + no_fail: false, force: false, json: false, verdict: None, npm_registry: None, - pypi_registry: Some(pypi_registry), + pypi_registry: Some("http://127.0.0.1:9".to_string()), concurrency: 4, // Unit tests never want the real `npm audit` subprocess. npm_audit: false, } } + /// `stub_opts()` plus a verdict config pointing at `base_url`. + fn verdict_opts(base_url: &str) -> PrecheckOptions { + PrecheckOptions { + verdict: Some(VerdictConfig { + base_url: base_url.to_string(), + token: "test-token".to_string(), + }), + ..stub_opts() + } + } + /// Run `run_parsed_install` for `pip install ` with an exec /// closure that records whether it ran (returning 42 instead of /// spawning anything). @@ -1491,7 +1505,7 @@ mod tests { #[test] fn unverifiable_target_skips_and_proceeds() { // git+ spec → Skipped outcome, no registry hit, install proceeds. - let opts = stub_opts("http://127.0.0.1:9".to_string(), false); + let opts = stub_opts(); let (code, exec_ran) = gate_pip_install(&["git+https://github.com/psf/requests.git"], opts); assert_eq!(code, 42); assert!(exec_ran); @@ -1500,7 +1514,7 @@ mod tests { #[test] fn bare_install_passes_through_without_verification() { // Bare `pip install` (no targets) → straight exec, no registry hit. - let opts = stub_opts("http://127.0.0.1:9".to_string(), false); + let opts = stub_opts(); let (code, exec_ran) = gate_pip_install(&[], opts); assert_eq!(code, 42); assert!(exec_ran); @@ -1509,7 +1523,7 @@ mod tests { #[test] fn requirements_files_note_then_exec() { // `-r reqs.txt` alone → printed note, no verification, exec runs. - let opts = stub_opts("http://127.0.0.1:9".to_string(), false); + let opts = stub_opts(); let (code, exec_ran) = gate_pip_install(&["-r", "reqs.txt"], opts); assert_eq!(code, 42); assert!(exec_ran); @@ -1595,7 +1609,7 @@ mod tests { let opts = |no_fail: bool, force: bool| PrecheckOptions { no_fail, force, - ..stub_opts("http://127.0.0.1:9".to_string(), false) + ..stub_opts() }; let clean = { @@ -1658,7 +1672,7 @@ mod tests { assert_eq!(report.vulnerable_count(), 1); let opts = |force: bool| PrecheckOptions { force, - ..stub_opts("http://127.0.0.1:9".to_string(), false) + ..stub_opts() }; assert!(should_block_install(&report, &opts(false))); assert!(!should_block_install(&report, &opts(true))); @@ -1685,11 +1699,7 @@ mod tests { statuses.insert(key("flaky"), 503u16); let stub = crate::vuln_api_stub::spawn_with_statuses(checks, statuses); - let mut opts = stub_opts("http://127.0.0.1:9".to_string(), false); - opts.verdict = Some(VerdictConfig { - base_url: stub.base_url.clone(), - token: "test-token".to_string(), - }); + let opts = verdict_opts(&stub.base_url); let mut outcomes = vec![ resolved_outcome("evil", "1.0.0", false), @@ -1713,7 +1723,7 @@ mod tests { // Without a VerdictConfig the pass is a no-op. let mut untouched = vec![resolved_outcome("evil", "1.0.0", false)]; - let no_verdict = stub_opts("http://127.0.0.1:9".to_string(), false); + let no_verdict = stub_opts(); run_verdict_pass(PackageManager::Pip, &mut untouched, &no_verdict); assert!(matches!( &untouched[0], @@ -1873,11 +1883,7 @@ mod tests { statuses.insert(key("flaky", "4.0.0"), 503u16); let stub = crate::vuln_api_stub::spawn_with_statuses(checks, statuses); - let mut opts = stub_opts("http://127.0.0.1:9".to_string(), false); - opts.verdict = Some(VerdictConfig { - base_url: stub.base_url.clone(), - token: "test-token".to_string(), - }); + let opts = verdict_opts(&stub.base_url); // oldpkg's fix is unknown to the stub → default clean; badfix's fix is // flagged; flaky's fix 503s. badfix arrives via the transitive arm. @@ -1916,7 +1922,7 @@ mod tests { /// missing map entry as Unverified. #[test] fn verify_steers_noop_without_token() { - let opts = stub_opts("http://127.0.0.1:9".to_string(), false); + let opts = stub_opts(); let mut report = report_with(vec![vulnerable_outcome("oldpkg", "1.0.0", Some("2.0.0"))]); verify_steers(&mut report, &opts); assert!(report.steers.is_empty()); @@ -1930,11 +1936,7 @@ mod tests { /// dead address, an attempted request would land as Unverified. #[test] fn verify_steers_skips_requests_without_proposals() { - let mut opts = stub_opts("http://127.0.0.1:9".to_string(), false); - opts.verdict = Some(VerdictConfig { - base_url: "http://127.0.0.1:9".to_string(), - token: "test-token".to_string(), - }); + let opts = verdict_opts("http://127.0.0.1:9"); let mut report = report_with(vec![vulnerable_outcome("oldpkg", "1.0.0", None)]); verify_steers(&mut report, &opts); assert!(report.steers.is_empty()); @@ -1946,11 +1948,7 @@ mod tests { #[test] fn verify_steers_dedups_by_normalized_name() { let stub = crate::vuln_api_stub::spawn_with_statuses(HashMap::new(), HashMap::new()); - let mut opts = stub_opts("http://127.0.0.1:9".to_string(), false); - opts.verdict = Some(VerdictConfig { - base_url: stub.base_url.clone(), - token: "test-token".to_string(), - }); + let opts = verdict_opts(&stub.base_url); let mut report = report_with(vec![ vulnerable_outcome("Flask_Cors", "1.0.0", Some("2.0.0")), vulnerable_outcome("flask-cors", "1.0.0", Some("2.0.0")), diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs index 220ad11..9f36f39 100644 --- a/src/precheck/parse.rs +++ b/src/precheck/parse.rs @@ -35,12 +35,14 @@ fn build_parsed_install( positionals: PositionalSplit, parse_spec: fn(&str) -> InstallTarget, ) -> ParsedInstall { - let mut parsed = ParsedInstall::default(); - for raw in &positionals.specs { - parsed.targets.push(parse_spec(raw)); + ParsedInstall { + targets: positionals + .specs + .iter() + .map(|raw| parse_spec(raw)) + .collect(), + requirements_files: positionals.requirements_files, } - parsed.requirements_files = positionals.requirements_files; - parsed } pub fn parse_install_args( @@ -112,27 +114,7 @@ fn extract_node_positionals(args: &[String]) -> PositionalSplit { break; } if a.starts_with('-') { - // Flag. Skip the next token if it looks like a value. - if a.contains('=') { - // `--flag=value` already self-contained. - i += 1; - continue; - } - // Heuristic: peek at the next arg. If it doesn't look - // like a package spec (i.e. contains `://` or starts with - // `/` or `.`) skip it; otherwise leave it alone for the - // next iteration. - let next_is_value = args - .get(i + 1) - .map(|n| { - !n.starts_with('-') - && (n.contains("://") - || n.starts_with('/') - || n.starts_with("./") - || n.starts_with('~')) - }) - .unwrap_or(false); - i += if next_is_value { 2 } else { 1 }; + i = skip_unknown_flag(args, i); continue; } out.specs.push(a.clone()); @@ -141,6 +123,27 @@ fn extract_node_positionals(args: &[String]) -> PositionalSplit { out } +/// Advance past an unknown flag at `i`. `--flag=value` is self-contained; +/// otherwise peek at the next arg and skip it too if it doesn't look like +/// a package spec (contains `://` or is path-like) — see the heuristic +/// rationale on [`extract_node_positionals`]. +fn skip_unknown_flag(args: &[String], i: usize) -> usize { + if args[i].contains('=') { + return i + 1; + } + let next_is_value = args + .get(i + 1) + .map(|n| { + !n.starts_with('-') + && (n.contains("://") + || n.starts_with('/') + || n.starts_with("./") + || n.starts_with('~')) + }) + .unwrap_or(false); + i + if next_is_value { 2 } else { 1 } +} + /// pip's argument grammar is more structured than npm's: there are /// known flags that take a value (`-r FILE`, `-c FILE`, `-e PATH`, /// `--index-url URL`, `--target DIR`, ...). We special-case `-r/-c/-e` @@ -192,23 +195,7 @@ fn extract_pip_positionals(args: &[String]) -> Result { continue; } if a.starts_with('-') { - // Unknown flag — apply the same value-skipping heuristic - // as in node land. - if a.contains('=') { - i += 1; - continue; - } - let next_is_value = args - .get(i + 1) - .map(|n| { - !n.starts_with('-') - && (n.contains("://") - || n.starts_with('/') - || n.starts_with("./") - || n.starts_with('~')) - }) - .unwrap_or(false); - i += if next_is_value { 2 } else { 1 }; + i = skip_unknown_flag(args, i); continue; } out.specs.push(a.clone()); @@ -225,33 +212,34 @@ pub(crate) fn parse_npm_spec(raw: &str) -> InstallTarget { let trimmed = raw.trim(); let unverifiable_prefixes = [ - "git+", "git:", "git@", "ssh://", "http://", "https://", "file:", "./", "../", "/", "~/", + "git+", + "git:", + "git@", + "ssh://", + "http://", + "https://", + "file:", + "./", + "../", + "/", + "~/", + "npm:", + "workspace:", ]; - if unverifiable_prefixes.iter().any(|p| trimmed.starts_with(p)) { - return InstallTarget { - name: trimmed.to_string(), - display, - kind: TargetKind::Unverifiable { - reason: "spec is a URL/git/filesystem reference — registry verification skipped" - .to_string(), - }, + if let Some(p) = unverifiable_prefixes + .iter() + .find(|p| trimmed.starts_with(*p)) + { + let reason = match *p { + "npm:" => "npm: aliased dependency — registry verification skipped", + "workspace:" => "workspace: dependency — registry verification skipped", + _ => "spec is a URL/git/filesystem reference — registry verification skipped", }; - } - if trimmed.starts_with("npm:") { - return InstallTarget { - name: trimmed.to_string(), - display, - kind: TargetKind::Unverifiable { - reason: "npm: aliased dependency — registry verification skipped".to_string(), - }, - }; - } - if trimmed.starts_with("workspace:") { return InstallTarget { name: trimmed.to_string(), display, kind: TargetKind::Unverifiable { - reason: "workspace: dependency — registry verification skipped".to_string(), + reason: reason.to_string(), }, }; } @@ -534,14 +522,13 @@ mod tests { parse_pypi_spec("requests[security]==2.31.0").name, "requests" ); - assert_eq!( - parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\"").name, - "requests" + let t = parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\""); + assert_eq!(t.name, "requests"); + assert!( + matches!(t.kind, TargetKind::Pypi(PypiSpec::Exact(ref v)) if v == "2.31.0"), + "env marker must not leak into the spec: {:?}", + t.kind ); - match parse_pypi_spec("requests==2.31.0; python_version >= \"3.7\"").kind { - TargetKind::Pypi(PypiSpec::Exact(v)) => assert_eq!(v, "2.31.0"), - _ => panic!("expected exact spec"), - } } #[test] diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index 54e4509..0b7fae6 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -389,30 +389,26 @@ fn parse_npm_lockfile(json: &str) -> Result, String> { .get("packages") .and_then(|v| v.as_object()) .ok_or("package-lock.json has no packages map (npm < 7?)")?; - let mut out = Vec::new(); - for (path, entry) in packages { - if path.is_empty() { - continue; // root project entry - } - if entry.get("link").and_then(|v| v.as_bool()) == Some(true) { - continue; - } - let name = entry - .get("name") - .and_then(|v| v.as_str()) - .map(str::to_string) - .or_else(|| name_from_lock_path(path)); - let (Some(name), Some(version)) = (name, entry.get("version").and_then(|v| v.as_str())) - else { - continue; - }; - out.push(TreePackage { - name, - version: version.to_string(), - requested: false, - }); - } - Ok(out) + Ok(packages + .iter() + // Skip the root project entry ("") and symlinked (workspace) entries. + .filter(|(path, entry)| { + !path.is_empty() && entry.get("link").and_then(|v| v.as_bool()) != Some(true) + }) + .filter_map(|(path, entry)| { + let name = entry + .get("name") + .and_then(|v| v.as_str()) + .map(str::to_string) + .or_else(|| name_from_lock_path(path))?; + let version = entry.get("version").and_then(|v| v.as_str())?; + Some(TreePackage { + name, + version: version.to_string(), + requested: false, + }) + }) + .collect()) } /// Derive a package name from a lockfile path key like From 7cb12f8a0ea78acf40f18bd3713de934e81b8075 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 11:07:53 +0200 Subject: [PATCH 26/59] Deflake port_is_available test under parallel suite load A freed ephemeral port returns to the OS pool, where a concurrent test's bind(":0") can snatch it before the re-check asserts it available. Accept any of five freshly freed ports reading available; the chain stays lazy so fresh ports are reserved only after a collision. --- src/authorize.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/authorize.rs b/src/authorize.rs index 854415c..80fbbcf 100644 --- a/src/authorize.rs +++ b/src/authorize.rs @@ -633,7 +633,16 @@ mod tests { assert!(!port_is_available(port)); drop(listener); - assert!(port_is_available(port)); + // The freed port returns to the OS ephemeral pool, where a parallel + // test's `bind(":0")` can snatch it before the re-check — so accept + // any of several freshly freed ports reading available. The chain is + // lazy: fresh ports are only reserved after a collision. + assert!( + std::iter::once(port) + .chain((0..4).map(|_| reserve_ephemeral_port())) + .any(port_is_available), + "five consecutive freed ports all read unavailable" + ); } #[test] From a3fee906df35099d3b77d3c1ec0c6748c5dceffb Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 11:34:18 +0200 Subject: [PATCH 27/59] Dedup is_jwt across crates and drop the stub readiness sleep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make vuln_api::is_jwt pub and import it in the binary crate's utils/api.rs, replacing the keep-in-sync twin; its overlapping test sets merge at the definition site. Remove the 50ms sleep in spawn_with_retry_once — the listener is bound before the accept thread spawns, so connections queue in the OS backlog. Demote status_text and the two spec parsers to private (no external callers) and inline the single-use url binding in get_vuln_api_url. --- src/config.rs | 8 +++++--- src/precheck/parse.rs | 4 ++-- src/utils/api.rs | 27 +-------------------------- src/vuln_api/mod.rs | 10 +++++++--- src/vuln_api_stub/mod.rs | 5 +---- 5 files changed, 16 insertions(+), 38 deletions(-) diff --git a/src/config.rs b/src/config.rs index d3c2125..805b426 100644 --- a/src/config.rs +++ b/src/config.rs @@ -109,10 +109,12 @@ impl Config { /// Base URL for the vuln-api service: `CORGEA_VULN_API_URL` env var, /// then the config file's `vuln_api_url`, then the public default. pub fn get_vuln_api_url(&self) -> String { - let url = crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") + crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") .or_else(|| self.vuln_api_url.clone()) - .unwrap_or_else(|| "https://vuln-api.corgea.app".to_string()); - url.trim().trim_end_matches('/').to_string() + .unwrap_or_else(|| "https://vuln-api.corgea.app".to_string()) + .trim() + .trim_end_matches('/') + .to_string() } } diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs index 9f36f39..b1b952a 100644 --- a/src/precheck/parse.rs +++ b/src/precheck/parse.rs @@ -207,7 +207,7 @@ fn extract_pip_positionals(args: &[String]) -> Result { /// Parse a single npm-style positional, e.g. `axios`, `axios@1.0.0`, /// `axios@^1.0.0`, `axios@latest`, `@types/node@20.10.5`, /// `git+https://...`, `file:./local`, `./local`, `npm:other@1.0.0`. -pub(crate) fn parse_npm_spec(raw: &str) -> InstallTarget { +fn parse_npm_spec(raw: &str) -> InstallTarget { let display = raw.to_string(); let trimmed = raw.trim(); @@ -320,7 +320,7 @@ fn is_npm_dist_tag(s: &str) -> bool { /// Parse a single pip-style positional, e.g. `requests`, `requests==2.31.0`, /// `requests>=2.0`, `requests[security]`, `git+https://...`, `./local`. -pub(crate) fn parse_pypi_spec(raw: &str) -> InstallTarget { +fn parse_pypi_spec(raw: &str) -> InstallTarget { let display = raw.to_string(); let trimmed = raw.trim(); diff --git a/src/utils/api.rs b/src/utils/api.rs index 9b9a445..c82e38e 100644 --- a/src/utils/api.rs +++ b/src/utils/api.rs @@ -1,5 +1,6 @@ use crate::log::debug; use crate::utils; +use corgea::vuln_api::is_jwt; use reqwest::header::HeaderMap; use reqwest::StatusCode; use reqwest::{ @@ -22,11 +23,6 @@ fn get_source() -> String { std::env::var("CORGEA_SOURCE").unwrap_or_else(|_| "cli".to_string()) } -fn is_jwt(token: &str) -> bool { - let parts: Vec<&str> = token.splitn(4, '.').collect(); - parts.len() == 3 && parts.iter().all(|p| !p.is_empty()) -} - fn auth_headers(token: &str) -> HeaderMap { let mut headers = HeaderMap::new(); if is_jwt(token) { @@ -1039,27 +1035,6 @@ mod tests { use super::*; use reqwest::header::{HeaderMap, HeaderValue}; - #[test] - fn is_jwt_accepts_three_dot_separated_non_empty_parts() { - assert!(is_jwt("aaa.bbb.ccc")); - assert!(is_jwt("header.payload.signature")); - } - - #[test] - fn is_jwt_rejects_wrong_part_count() { - assert!(!is_jwt("aaa.bbb")); - assert!(!is_jwt("aaa.bbb.ccc.ddd")); - assert!(!is_jwt("plainstring")); - assert!(!is_jwt("")); - } - - #[test] - fn is_jwt_rejects_when_any_part_is_empty() { - assert!(!is_jwt("aaa..ccc")); - assert!(!is_jwt(".bbb.ccc")); - assert!(!is_jwt("aaa.bbb.")); - } - #[test] fn auth_headers_uses_bearer_for_jwt_tokens() { let headers = auth_headers("aaa.bbb.ccc"); diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs index d49ca9f..155c4fb 100644 --- a/src/vuln_api/mod.rs +++ b/src/vuln_api/mod.rs @@ -65,9 +65,9 @@ pub fn http_client() -> Result { .clone() } -// Twin of `is_jwt` in the binary crate's `utils/api.rs` — unreachable from -// this library crate (like `log`, re-declared in `lib.rs`). Keep in sync. -fn is_jwt(token: &str) -> bool { +/// Whether `token` looks like a JWT (three non-empty dot-separated parts). +/// Decides the auth header shape here and in the binary crate's `utils/api.rs`. +pub fn is_jwt(token: &str) -> bool { let parts: Vec<&str> = token.splitn(4, '.').collect(); parts.len() == 3 && parts.iter().all(|p| !p.is_empty()) } @@ -476,8 +476,12 @@ mod tests { fn is_jwt_detection() { assert!(is_jwt("a.b.c")); assert!(!is_jwt("plain-token")); + assert!(!is_jwt("")); assert!(!is_jwt("a.b")); + assert!(!is_jwt("a.b.c.d")); assert!(!is_jwt("a..c")); + assert!(!is_jwt(".b.c")); + assert!(!is_jwt("a.b.")); } // Fixture-based deserialization tests — committed JSON under tests/fixtures/vuln_api/, diff --git a/src/vuln_api_stub/mod.rs b/src/vuln_api_stub/mod.rs index 64a6936..df8a092 100644 --- a/src/vuln_api_stub/mod.rs +++ b/src/vuln_api_stub/mod.rs @@ -2,7 +2,6 @@ use std::collections::{HashMap, HashSet}; use std::io::{Read, Write}; use std::net::TcpListener; use std::thread; -use std::time::Duration; pub type PackageKey = (String, String, String); @@ -49,8 +48,6 @@ pub fn spawn_with_retry_once( } }); - thread::sleep(Duration::from_millis(50)); - VulnApiStub { base_url, _handle: handle, @@ -136,7 +133,7 @@ fn handle_connection( /// Reason phrase for a stub status line. Shared with the in-crate test /// stubs so the mapping lives once. -pub fn status_text(status_code: u16) -> &'static str { +fn status_text(status_code: u16) -> &'static str { match status_code { 404 => "Not Found", 401 => "Unauthorized", From 0df65f5c37581ec5ff4e3ece6b12bbdd7a2a754e Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 11:53:40 +0200 Subject: [PATCH 28/59] Gate the vuln-api test stub out of release builds vuln_api_stub shipped in the release binary (207 lines of raw-TCP HTTP server) despite every caller living under #[cfg(test)] or in tests/. Gate it behind a test-stub feature enabled for test builds via a self dev-dependency, so cargo test keeps working unchanged while cargo build --release excludes it. --- Cargo.lock | 1 + Cargo.toml | 8 ++++++++ src/lib.rs | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 1c6a6fc..c2b933e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -344,6 +344,7 @@ version = "1.8.8" dependencies = [ "chrono", "clap", + "corgea", "dirs", "env_logger", "git2", diff --git a/Cargo.toml b/Cargo.toml index 2ff42c1..afaf048 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,14 @@ edition = "2021" name = "corgea" path = "src/main.rs" +[features] +# Compiles the in-crate vuln-api test stub (`vuln_api_stub`). Enabled for all +# test builds via the self dev-dependency below; never part of release builds. +test-stub = [] + +[dev-dependencies] +corgea = { path = ".", features = ["test-stub"] } + [dependencies] clap = { version = "4.4.13", features = ["derive"] } dirs = "5.0.1" diff --git a/src/lib.rs b/src/lib.rs index 399b15f..498e83d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,4 +6,8 @@ pub mod verify_deps; // compiles cleanly in both crates. mod log; pub mod vuln_api; +// Test-only HTTP stub for the vuln-api. Gated out of release builds; the +// `test-stub` feature is enabled for every test build by the self +// dev-dependency in Cargo.toml, so integration tests can use it too. +#[cfg(any(test, feature = "test-stub"))] pub mod vuln_api_stub; From bfc8cf1ddcbd665088d6366cf129b659fdf2e5d4 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 11:56:18 +0200 Subject: [PATCH 29/59] Drop the --concurrency flag; fix verdict parallelism at 8 A knob nobody tunes: verdict-request parallelism is an implementation detail, not a user decision. The clamped default of 8 becomes the VERDICT_CONCURRENCY constant; verdict_pool keeps its worker-count parameter for the unit test that exercises pool sizes. --- skills/corgea/SKILL.md | 3 +-- src/main.rs | 8 -------- src/precheck/mod.rs | 14 +++++++------- tests/cli_tree.rs | 2 +- 4 files changed, 9 insertions(+), 18 deletions(-) diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index 79c4e0a..3b0fa1b 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -140,7 +140,7 @@ in a temp dir, never touching your lockfile) and verdict every package, so a fla dry-run, so they verify the named targets only and print `warning: transitive dependencies not checked (…); only named packages were verified.` The same warning is emitted (and the gate falls back to named-only) whenever a pip/npm -dry-run fails. Verdict requests run in a bounded pool (`--concurrency`, default 8). +dry-run fails. Verdict requests run in a bounded pool (8 parallel). ```bash corgea pip install requests==2.31.0 # resolves, checks recency + vuln verdict, then runs pip @@ -156,7 +156,6 @@ corgea pip list # non-install subcommands pass straight th | `--threshold` | `-t` | Recency threshold (`2d`, `12h`). Younger resolved versions block. | | `--no-fail` | | Demote a recency block to a warning. Does NOT bypass vulnerable/unverifiable blocks. | | `--force` | | Proceed despite all findings (vulnerable, unverifiable, recent). Findings still print. | -| `--concurrency` | | Max parallel vuln-verdict requests during the gate (1-32, default 8). | | `--json` | | JSON report instead of text. Per-result `verdict` object + `verdict_mode` + `tree`. | `--json` adds a `tree` object: `null` in recency-only mode; otherwise `mode` is `"full"` diff --git a/src/main.rs b/src/main.rs index 4bf6d31..7ea61d0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -261,13 +261,6 @@ struct InstallWrapArgs { )] json: bool, - #[arg( - long, - default_value_t = 8, - help = "Max parallel vulnerability-verdict requests during the gate (1-32)." - )] - concurrency: usize, - /// Arguments forwarded to the package manager (subcommand and package specs). #[arg(trailing_var_arg = true, allow_hyphen_values = true)] cmd: Vec, @@ -295,7 +288,6 @@ fn install_wrap_options( verdict, npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), - concurrency: args.concurrency, npm_audit: utils::generic::get_env_var_if_exists("CORGEA_NO_NPM_AUDIT").is_none(), } } diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index d51bb1d..6be6731 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -134,8 +134,6 @@ pub struct PrecheckOptions { /// Optional registry overrides, used by tests. pub npm_registry: Option, pub pypi_registry: Option, - /// Max parallel vuln-api verdict requests; `verdict_pool` clamps to 1..=32. - pub concurrency: usize, /// Run the warn-only `npm audit` second opinion during the npm tree /// pass. Cleared by `CORGEA_NO_NPM_AUDIT` (read in `main`, like the /// registry overrides). @@ -637,7 +635,7 @@ fn run_tree_pass( .verdict .as_ref() .expect("tree pass requires verdict config"); - let results = verdict_pool(jobs, cfg, manager, opts.concurrency); + let results = verdict_pool(jobs, cfg, manager, VERDICT_CONCURRENCY); // Collect the warn-only npm audit second opinion only after the verdict // pool so the two truly overlap. The wait is capped tight: this signal // never changes the outcome, so a finished gate won't stall long for it — @@ -656,6 +654,9 @@ fn run_tree_pass( /// pass doesn't look hung. const VERDICT_PROGRESS_THRESHOLD: usize = 8; +/// Max parallel vuln-api verdict requests. +const VERDICT_CONCURRENCY: usize = 8; + /// Bounded worker pool over the verdict jobs — owns client creation and the /// fail-closed policy: on client failure every job comes back `Unverifiable`. /// Plain work queue, no new crates; `reqwest::blocking::Client` is @@ -685,7 +686,7 @@ fn verdict_pool( } let ecosystem = manager.ecosystem(); - let workers = concurrency.clamp(1, 32).min(jobs.len().max(1)); + let workers = concurrency.min(jobs.len()).max(1); let queue = Mutex::new(VecDeque::from(jobs)); let results = Mutex::new(Vec::new()); std::thread::scope(|s| { @@ -790,7 +791,7 @@ fn run_verdict_pass( }) .collect(); - let results = verdict_pool(jobs, cfg, manager, opts.concurrency); + let results = verdict_pool(jobs, cfg, manager, VERDICT_CONCURRENCY); apply_verdicts(manager, results, outcomes, &Default::default()); } @@ -842,7 +843,7 @@ fn verify_steers(report: &mut PrecheckReport, opts: &PrecheckOptions) { return; } - let results = verdict_pool(jobs, cfg, manager, opts.concurrency); + let results = verdict_pool(jobs, cfg, manager, VERDICT_CONCURRENCY); report.steers = results .into_iter() .map(|(pkg, verdict)| { @@ -1464,7 +1465,6 @@ mod tests { verdict: None, npm_registry: None, pypi_registry: Some("http://127.0.0.1:9".to_string()), - concurrency: 4, // Unit tests never want the real `npm audit` subprocess. npm_audit: false, } diff --git a/tests/cli_tree.rs b/tests/cli_tree.rs index 902662b..a63a5d2 100644 --- a/tests/cli_tree.rs +++ b/tests/cli_tree.rs @@ -28,7 +28,7 @@ fn transitive_vulnerable_blocks_install() { "pip", "pypi", TREE_REPORT, - &["pip", "--concurrency", "2", "install", "oldpkg==1.0.0"][..], + &["pip", "install", "oldpkg==1.0.0"][..], ), ( "npm", From e62399ce85cce96927c3519896326429e644715a Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 12:02:02 +0200 Subject: [PATCH 30/59] Drop the steer re-verification pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The '→ safe version' steer re-verdicted its proposed fix against vuln-api before printing — an extra network round-trip per blocked package, a 3-state SteerCheck, and silent-suppression semantics, all guarding a server-side data-quality case (vuln-api advertising a fix it also flags). The steer now prints straight from verdict fix data: safe_version still requires every advisory to carry a fix, and the pre-existing 'fix with:' hint hedges with '(advertised fix)' exactly when the fix doesn't cover every advisory. --- skills/corgea/SKILL.md | 9 +- src/precheck/mod.rs | 284 +++++---------------------------------- tests/cli_provenance.rs | 27 ++-- tests/cli_remediation.rs | 107 +-------------- 4 files changed, 53 insertions(+), 374 deletions(-) diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index 3b0fa1b..5222289 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -127,10 +127,7 @@ note when the tree pass doesn't cover them. Blocked findings steer to the fix: each advisory line shows `fixed in ` (or `no fixed version known`). When every advisory on a package has a fix, the gate -re-checks that version against vuln-api before suggesting it: a clean re-check prints -`→ safe version: @`; a flagged one prints `→ advertised fix -is also flagged — no safe version to suggest`; a failed re-check suppresses the -suggestion quietly (it never changes counts or exit codes). +prints `→ safe version: @` — the highest fix covering every advisory. With a token, the vuln check covers the **full would-install set**, not just the named targets: `pip` and `npm` resolve the complete tree (named + transitive) via a @@ -161,8 +158,8 @@ corgea pip list # non-install subcommands pass straight th `--json` adds a `tree` object: `null` in recency-only mode; otherwise `mode` is `"full"` (transitive checked) or `"named-only"` (with a `reason`), plus `resolved_count` and a `transitive[]` array of `{name, version, verdict}` for packages beyond the named targets. -Vulnerable `verdict` objects carry a `remediation` field: the certified safe version, -or `null` when any advisory has no known fix. +Vulnerable `verdict` objects carry a `remediation` field: the safe version covering +every advisory, or `null` when any advisory has no known fix. Recency gating needs no token; the vuln verdict uses the configured Corgea token when present. Overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 6be6731..16d2897 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -16,7 +16,6 @@ pub mod parse; pub mod tree; -use std::collections::HashMap; use std::ffi::OsString; use std::process::Command; use std::time::Duration; @@ -103,19 +102,6 @@ pub enum VerdictStatus { NotChecked, } -/// Result of re-verdicting a proposed `→ safe version` steer against -/// vuln-api before it prints. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum SteerCheck { - /// vuln-api confirmed the proposed version is clean — print the steer. - Verified, - /// vuln-api flagged the proposed version too — print the rejection note. - Rejected, - /// The re-check failed (network/5xx/auth) — suppress the steer quietly. - /// Never feeds counts or the block decision. - Unverified, -} - /// Reason recorded on resolved targets when no token is configured. const NO_TOKEN_REASON: &str = "no Corgea token; vulnerability verdict skipped"; @@ -253,10 +239,6 @@ pub struct PrecheckReport { pub threshold: Duration, /// `None` ⇒ recency-only mode, the tree pass never ran. pub tree: Option, - /// Verification results for proposed safe-version steers, keyed by - /// (normalized name, proposed version). Populated by `verify_steers`; - /// consulted only at render time, never by the block predicate. - pub steers: HashMap<(String, String), SteerCheck>, /// True when the command named nothing — no CLI targets and no /// requirements files — so everything the tree pass resolved predates /// this command (bare `npm install`). Distinct from @@ -460,17 +442,15 @@ fn run_parsed_install( ); } - let mut report = PrecheckReport { + let report = PrecheckReport { manager, subcommand: subcommand_label.to_string(), original_args: rest.to_vec(), outcomes, threshold: opts.threshold, tree, - steers: HashMap::new(), bare_install, }; - verify_steers(&mut report, &opts); if opts.json { print_json(&report, &opts); @@ -795,70 +775,6 @@ fn run_verdict_pass( apply_verdicts(manager, results, outcomes, &Default::default()); } -/// Re-verdict every proposed `→ safe version` steer before anything prints. -/// Populates `report.steers` keyed by (normalized name, proposed version): -/// `Clean` ⇒ `Verified`, flagged ⇒ `Rejected`, request failure ⇒ `Unverified` -/// (suppressed quietly — never feeds counts or exit codes). Sends requests -/// only when a token is configured and at least one vulnerable verdict -/// proposed a steer; proposals dedup by normalized (name, version). -fn verify_steers(report: &mut PrecheckReport, opts: &PrecheckOptions) { - let Some(cfg) = &opts.verdict else { return }; - let manager = report.manager; - - let mut proposals: Vec<(&str, &[crate::vuln_api::VulnMatch])> = Vec::new(); - for o in &report.outcomes { - if let TargetOutcome::Resolved { - resolved, - verdict: VerdictStatus::Vulnerable(matches), - .. - } = o - { - proposals.push((&resolved.name, matches)); - } - } - if let Some(TreeReport::Full { transitive, .. }) = &report.tree { - for t in transitive { - if let VerdictStatus::Vulnerable(matches) = &t.verdict { - proposals.push((&t.name, matches)); - } - } - } - - let mut seen = std::collections::HashSet::new(); - let mut jobs: Vec = Vec::new(); - for (name, matches) in proposals { - let Some(safe) = safe_version(matches) else { - continue; - }; - if seen.insert((manager.normalize_name(name), safe.clone())) { - jobs.push(tree::TreePackage { - name: name.to_string(), - version: safe, - // Steer re-check jobs are synthetic, not user-requested. - requested: false, - }); - } - } - if jobs.is_empty() { - return; - } - - let results = verdict_pool(jobs, cfg, manager, VERDICT_CONCURRENCY); - report.steers = results - .into_iter() - .map(|(pkg, verdict)| { - let check = match verdict { - VerdictStatus::Clean => SteerCheck::Verified, - VerdictStatus::Vulnerable(_) => SteerCheck::Rejected, - VerdictStatus::Unverifiable(_) | VerdictStatus::NotChecked => { - SteerCheck::Unverified - } - }; - ((manager.normalize_name(&pkg.name), pkg.version), check) - }) - .collect(); -} - fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { if opts.force { return false; @@ -1014,24 +930,6 @@ fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { highest_fix(fixes, true) } -/// The safe-version proposal for a vulnerable package, paired with its -/// `verify_steers` re-check. `None` when no version can be proposed at all; -/// a proposal absent from the steer map counts as `Unverified` so callers -/// suppress it. -fn steer_for( - report: &PrecheckReport, - name: &str, - matches: &[crate::vuln_api::VulnMatch], -) -> Option<(String, SteerCheck)> { - let safe = safe_version(matches)?; - let check = report - .steers - .get(&(report.manager.normalize_name(name), safe.clone())) - .copied() - .unwrap_or(SteerCheck::Unverified); - Some((safe, check)) -} - /// Highest `fixed_version` the advisories advertise, by lenient semver. /// Unlike `safe_version` this is *not* a certification: matches without a /// fix are ignored, so the result may still be vulnerable to them. `None` @@ -1044,13 +942,9 @@ fn advertised_fix(matches: &[crate::vuln_api::VulnMatch]) -> Option { highest_fix(fixes, false) } -/// Per-match advisory lines plus the verified safe-version steer, shared by -/// the named-target and transitive vulnerable render arms. -fn print_vulnerable_matches( - report: &PrecheckReport, - name: &str, - matches: &[crate::vuln_api::VulnMatch], -) { +/// Per-match advisory lines plus the safe-version steer, shared by the +/// named-target and transitive vulnerable render arms. +fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) { for m in matches { println!( " {} ({}){}", @@ -1059,14 +953,8 @@ fn print_vulnerable_matches( fix_note(m) ); } - match steer_for(report, name, matches) { - Some((safe, SteerCheck::Verified)) => { - println!(" → safe version: {name}@{safe}"); - } - Some((safe, SteerCheck::Rejected)) => { - println!(" → advertised fix {safe} is also flagged — no safe version to suggest"); - } - Some((_, SteerCheck::Unverified)) | None => {} + if let Some(safe) = safe_version(matches) { + println!(" → safe version: {name}@{safe}"); } } @@ -1195,32 +1083,27 @@ fn print_text(report: &PrecheckReport) { t.version, t.origin.label() ); - print_vulnerable_matches(report, &t.name, matches); + print_vulnerable_matches(&t.name, matches); // A vulnerable dep the project already declares can be - // bumped directly — point at the fix as a command. The - // caveat follows the steer check above: a Verified - // steer certified this same version (when `safe_version` - // is `Some` it equals `advertised_fix`), a Rejected one - // already said the fix is flagged, so only an - // unverified proposal keeps the "(advertised fix)" - // hedge. + // bumped directly — point at the fix as a command. + // When `safe_version` is `Some` it equals + // `advertised_fix` and clears every advisory; otherwise + // some advisory has no fix, so the "(advertised fix)" + // hedge marks the bump as partial. if t.origin == TreeOrigin::PreExisting { if let Some(fix) = advertised_fix(matches) { - match steer_for(report, &t.name, matches) { - Some((_, SteerCheck::Rejected)) => {} - Some((_, SteerCheck::Verified)) => println!( - " fix with: corgea {} install {}@{}", - report.manager.binary_name(), - t.name, - fix - ), - Some((_, SteerCheck::Unverified)) | None => println!( - " fix with: corgea {} install {}@{} (advertised fix)", - report.manager.binary_name(), - t.name, - fix - ), - } + let hedge = if safe_version(matches).is_some() { + "" + } else { + " (advertised fix)" + }; + println!( + " fix with: corgea {} install {}@{}{}", + report.manager.binary_name(), + t.name, + fix, + hedge + ); } } } @@ -1266,7 +1149,7 @@ fn print_text(report: &PrecheckReport) { " ✗ {} → {}@{} known vulnerable:", target.display, resolved.name, resolved.version, ); - print_vulnerable_matches(report, &resolved.name, matches); + print_vulnerable_matches(&resolved.name, matches); } VerdictStatus::Unverifiable(error) => { if !is_collapsed(error) { @@ -1309,21 +1192,17 @@ fn print_text(report: &PrecheckReport) { /// JSON shape for a single verdict. Shared by named outcomes and tree /// (transitive) outcomes so both render verdicts identically. -/// `remediation` carries the safe version only when its steer re-check -/// came back `Verified`; rejected/unverified steers emit `null`. -fn verdict_json(report: &PrecheckReport, name: &str, verdict: &VerdictStatus) -> serde_json::Value { +/// `remediation` carries the version that clears every advisory +/// (`safe_version`); `null` when any advisory has no known fix. +fn verdict_json(verdict: &VerdictStatus) -> serde_json::Value { use serde_json::json; match verdict { VerdictStatus::Clean => json!({ "status": "clean" }), VerdictStatus::Vulnerable(matches) => { - let remediation = match steer_for(report, name, matches) { - Some((safe, SteerCheck::Verified)) => Some(safe), - _ => None, - }; json!({ "status": "vulnerable", "matches": matches, - "remediation": remediation, + "remediation": safe_version(matches), }) } VerdictStatus::Unverifiable(error) => { @@ -1364,7 +1243,7 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { age, verdict, } => { - let verdict_json = verdict_json(report, &resolved.name, verdict); + let verdict_json = verdict_json(verdict); json!({ "status": if report.is_recent(*age) { "recent" } else { "ok" }, "spec": target.display, @@ -1414,7 +1293,7 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { "name": o.name, "version": o.version, "origin": o.origin.json_name(), - "verdict": verdict_json(report, &o.name, &o.verdict), + "verdict": verdict_json(&o.verdict), })).collect::>(), "npm_audit": audit.as_ref().map(npm_audit_json), }), @@ -1563,7 +1442,6 @@ mod tests { outcomes, threshold: Duration::from_secs(2 * 86400), tree: None, - steers: HashMap::new(), // Most tests model an install that named something; bare-install // cases set this explicitly. bare_install: false, @@ -1864,106 +1742,6 @@ mod tests { o } - /// `verify_steers` re-verdicts each proposed fix, from named and - /// transitive findings alike: clean → Verified, flagged → Rejected, - /// 5xx → Unverified. Counts and the block predicate never move. - #[test] - fn verify_steers_maps_reverdicts() { - let key = |name: &str, ver: &str| ("pypi".to_string(), name.to_string(), ver.to_string()); - let mut checks = HashMap::new(); - checks.insert( - key("badfix", "3.0.0"), - r#"{"ecosystem":"pypi","package_name":"badfix","version":"3.0.0","is_vulnerable":true, - "matches":[{"advisory_id":"MAL-2024-0009","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":null}]}"# - .to_string(), - ); - checks.insert(key("flaky", "4.0.0"), "{}".to_string()); - let mut statuses = HashMap::new(); - statuses.insert(key("flaky", "4.0.0"), 503u16); - let stub = crate::vuln_api_stub::spawn_with_statuses(checks, statuses); - - let opts = verdict_opts(&stub.base_url); - - // oldpkg's fix is unknown to the stub → default clean; badfix's fix is - // flagged; flaky's fix 503s. badfix arrives via the transitive arm. - let mut report = report_with(vec![ - vulnerable_outcome("oldpkg", "1.0.0", Some("2.0.0")), - vulnerable_outcome("flaky", "1.0.0", Some("4.0.0")), - ]); - report.tree = Some(TreeReport::Full { - resolved_count: 3, - transitive: vec![TreeOutcome { - name: "badfix".to_string(), - version: "0.1.0".to_string(), - verdict: VerdictStatus::Vulnerable(vec![vm("A-2", Some("3.0.0"))]), - origin: TreeOrigin::Transitive, - }], - audit: None, - }); - verify_steers(&mut report, &opts); - - let steer = |name: &str, ver: &str| { - report - .steers - .get(&(name.to_string(), ver.to_string())) - .copied() - }; - assert_eq!(steer("oldpkg", "2.0.0"), Some(SteerCheck::Verified)); - assert_eq!(steer("badfix", "3.0.0"), Some(SteerCheck::Rejected)); - assert_eq!(steer("flaky", "4.0.0"), Some(SteerCheck::Unverified)); - - // Steer re-checks never feed counts or the block decision. - assert_eq!(report.vulnerable_count(), 3); - assert_eq!(report.unverifiable_count(), 0); - } - - /// Tokenless mode never sends steer requests; `steer_for` treats a - /// missing map entry as Unverified. - #[test] - fn verify_steers_noop_without_token() { - let opts = stub_opts(); - let mut report = report_with(vec![vulnerable_outcome("oldpkg", "1.0.0", Some("2.0.0"))]); - verify_steers(&mut report, &opts); - assert!(report.steers.is_empty()); - assert_eq!( - steer_for(&report, "oldpkg", &[vm("A-1", Some("2.0.0"))]), - Some(("2.0.0".to_string(), SteerCheck::Unverified)) - ); - } - - /// No proposal (fix unknown) ⇒ no requests at all: with the vuln-api at a - /// dead address, an attempted request would land as Unverified. - #[test] - fn verify_steers_skips_requests_without_proposals() { - let opts = verdict_opts("http://127.0.0.1:9"); - let mut report = report_with(vec![vulnerable_outcome("oldpkg", "1.0.0", None)]); - verify_steers(&mut report, &opts); - assert!(report.steers.is_empty()); - } - - /// Proposals dedup by normalized (name, version): two pypi spellings of - /// the same package produce one steer entry, and `steer_for` resolves it - /// for either spelling. - #[test] - fn verify_steers_dedups_by_normalized_name() { - let stub = crate::vuln_api_stub::spawn_with_statuses(HashMap::new(), HashMap::new()); - let opts = verdict_opts(&stub.base_url); - let mut report = report_with(vec![ - vulnerable_outcome("Flask_Cors", "1.0.0", Some("2.0.0")), - vulnerable_outcome("flask-cors", "1.0.0", Some("2.0.0")), - ]); - verify_steers(&mut report, &opts); - assert_eq!(report.steers.len(), 1); - for spelling in ["Flask_Cors", "flask-cors"] { - assert_eq!( - steer_for(&report, spelling, &[vm("A-1", Some("2.0.0"))]), - Some(("2.0.0".to_string(), SteerCheck::Verified)), - "spelling {spelling}" - ); - } - } - #[test] fn error_prefix_strips_parenthesized_detail() { // The reqwest network-failure shape: per-package URL in parens. diff --git a/tests/cli_provenance.rs b/tests/cli_provenance.rs index bbd0c8b..2851fcf 100644 --- a/tests/cli_provenance.rs +++ b/tests/cli_provenance.rs @@ -74,8 +74,8 @@ fn pip_requirements_finding_labeled_from_requirements() { fn npm_preexisting_direct_dep_labeled_with_fix_hint() { // `evildep` is already a direct dep in the project's package.json; the // finding gets the pre-existing label plus the fix-command hint. The - // steer re-check verified 1.2.2 clean (the stub defaults unknown - // versions to clean), so the hint drops the "(advertised fix)" hedge. + // fix 1.2.2 covers every advisory (`safe_version` is Some), so the hint + // drops the "(advertised fix)" hedge. let project = npm_project(); let mut checks = HashMap::new(); checks.insert( @@ -103,18 +103,23 @@ fn npm_preexisting_direct_dep_labeled_with_fix_hint() { } #[test] -fn npm_preexisting_fix_hint_keeps_hedge_when_unverifiable() { - // The steer re-check for 1.2.2 fails (503), so the bare steer line stays - // quiet and the fix-command hint keeps its "(advertised fix)" hedge. +fn npm_preexisting_fix_hint_keeps_hedge_when_fix_is_partial() { + // One advisory advertises fix 1.2.2, the other has no fix: bumping is + // still the best move but doesn't clear everything, so the steer line + // stays quiet and the fix-command hint keeps its "(advertised fix)" + // hedge. let project = npm_project(); let mut checks = HashMap::new(); checks.insert( key("npm", "evildep", "0.4.2"), - vulnerable_body("npm", "evildep", "0.4.2", Some("1.2.2")), + r#"{"ecosystem":"npm","package_name":"evildep","version":"0.4.2","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0002","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":"1.2.2"}, + {"advisory_id":"MAL-2024-0003","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string(), ); - let mut statuses = HashMap::new(); - statuses.insert(key("npm", "evildep", "1.2.2"), 503u16); - let mut h = TreeHarness::new("npm", checks, statuses, NPM_LOCK); + let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); let out = h .cmd .current_dir(project.path()) @@ -125,11 +130,11 @@ fn npm_preexisting_fix_hint_keeps_hedge_when_unverifiable() { let stdout = String::from_utf8_lossy(&out.stdout); assert!( stdout.contains("fix with: corgea npm install evildep@1.2.2 (advertised fix)"), - "unverified fix hint must keep the hedge: {stdout}" + "partial fix hint must keep the hedge: {stdout}" ); assert!( !stdout.contains("→ safe version"), - "an unverified steer must stay quiet: {stdout}" + "a partial fix must not print the steer: {stdout}" ); } diff --git a/tests/cli_remediation.rs b/tests/cli_remediation.rs index 393731f..a266192 100644 --- a/tests/cli_remediation.rs +++ b/tests/cli_remediation.rs @@ -1,8 +1,7 @@ //! Hermetic e2e tests for remediation steering: a blocked install names the -//! safe version from the verdict's `fixed_version` data — but only after the -//! proposed version itself re-verdicts clean against vuln-api. A flagged -//! proposal prints the rejection note instead; a failed re-check suppresses -//! the steer quietly without moving counts or exit codes. +//! safe version from the verdict's `fixed_version` data — the highest fix +//! covering every advisory. When any advisory has no known fix, no steer +//! prints and JSON `remediation` is null. //! //! Uses the shared `common::PipHarness` (pypi stub published 2020 so recency //! never blocks, a fake pip recording its argv, the in-crate vuln-api stub, @@ -23,16 +22,8 @@ fn no_fix_body() -> String { vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0002", None) } -/// The advertised fix `oldpkg@2.0.0` is itself flagged — the steer re-check -/// must reject it. -fn flagged_fix_body() -> String { - vulnerable_body("pypi", "oldpkg", "2.0.0", "MAL-2024-0003", None) -} - #[test] fn fixed_match_blocks_and_names_safe_version() { - // The stub answers default-clean for the unscripted `oldpkg@2.0.0` steer - // re-check, so the proposal verifies and the steer prints. let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); @@ -126,95 +117,3 @@ fn json_remediation_null_when_no_fix() { "remediation must be null when no fix is known: {parsed}" ); } - -#[test] -fn rejected_fix_prints_rejection_instead_of_steer() { - // oldpkg@1.0.0 is vulnerable with an advertised fix of 2.0.0 — but the - // stub flags 2.0.0 too, so the steer must turn into the rejection note. - let mut checks = HashMap::new(); - checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); - checks.insert(key("pypi", "oldpkg", "2.0.0"), flagged_fix_body()); - let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); - let out = h - .cmd - .args(["pip", "install", "oldpkg==1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(1)); - assert_eq!(h.recorded_argv(), None, "pip must not run"); - let stdout = String::from_utf8_lossy(&out.stdout); - assert!( - stdout.contains("advertised fix 2.0.0 is also flagged — no safe version to suggest"), - "stdout: {stdout}" - ); - assert!( - !stdout.contains("safe version:"), - "a rejected fix must not print the steer: {stdout}" - ); - assert!( - stdout.contains("1 vulnerable, 0 unverifiable"), - "the steer re-check must not inflate counts: {stdout}" - ); -} - -#[test] -fn unverified_fix_suppresses_steer_quietly() { - // The steer re-check for oldpkg@2.0.0 503s. The steer disappears with no - // substitute line, and counts/exit stay exactly as without the re-check. - let mut checks = HashMap::new(); - checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); - let mut statuses = HashMap::new(); - statuses.insert(key("pypi", "oldpkg", "2.0.0"), 503u16); - let mut h = PipHarness::new(checks, statuses, Some("test-token"), 0); - let out = h - .cmd - .args(["pip", "install", "oldpkg==1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(1)); - assert_eq!(h.recorded_argv(), None, "pip must not run"); - let stdout = String::from_utf8_lossy(&out.stdout); - assert!( - !stdout.contains("safe version:"), - "an unverified fix must not print the steer: {stdout}" - ); - assert!( - !stdout.contains("also flagged"), - "an unverified fix must stay quiet, not claim rejection: {stdout}" - ); - assert!( - stdout.contains("1 vulnerable, 0 unverifiable"), - "a failed steer re-check must not change counts: {stdout}" - ); - assert!( - stdout.contains("fixed in 2.0.0"), - "advisory fix data still prints: {stdout}" - ); -} - -#[test] -fn json_remediation_null_when_fix_rejected() { - let mut checks = HashMap::new(); - checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); - checks.insert(key("pypi", "oldpkg", "2.0.0"), flagged_fix_body()); - let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); - let out = h - .cmd - .args(["pip", "--json", "install", "oldpkg==1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(1)); - assert_eq!(h.recorded_argv(), None); - let parsed: serde_json::Value = - serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); - let v = &parsed["results"][0]["verdict"]; - assert_eq!(v["status"], "vulnerable", "parsed: {parsed}"); - assert!( - v["remediation"].is_null(), - "remediation must be null when the fix re-verdicts vulnerable: {parsed}" - ); - assert_eq!( - parsed["summary"]["vulnerable"], 1, - "the steer re-check must not inflate counts: {parsed}" - ); -} From ccceb7a73b287613558b9fe20d63ef9c6c79d292 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 12:06:33 +0200 Subject: [PATCH 31/59] Remove the warn-only npm audit second opinion It duplicated the vuln-api verdict gate without ever affecting the outcome: warn-only, fail-silent, and shipped with its own kill switch (CORGEA_NO_NPM_AUDIT). Cutting it removes a concurrent subprocess with a 5s deadline poll and a 1s collect window (flake surface), the AuditHandle reaping machinery, and ~460 lines across src and tests. TreeResolution collapses back to the package list. --- skills/corgea/SKILL.md | 3 +- src/main.rs | 1 - src/precheck/mod.rs | 62 +------- src/precheck/tree.rs | 282 +------------------------------------ tests/cli_npm_audit.rs | 313 ----------------------------------------- 5 files changed, 10 insertions(+), 651 deletions(-) delete mode 100644 tests/cli_npm_audit.rs diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index 5222289..53bccec 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -163,8 +163,7 @@ every advisory, or `null` when any advisory has no known fix. Recency gating needs no token; the vuln verdict uses the configured Corgea token when present. Overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, -`CORGEA_VULN_API_URL`; `CORGEA_NO_NPM_AUDIT=1` disables the warn-only `npm audit` -second opinion. +`CORGEA_VULN_API_URL`. #### Testing the gate diff --git a/src/main.rs b/src/main.rs index 7ea61d0..e127f85 100644 --- a/src/main.rs +++ b/src/main.rs @@ -288,7 +288,6 @@ fn install_wrap_options( verdict, npm_registry: utils::generic::get_env_var_if_exists("CORGEA_NPM_REGISTRY"), pypi_registry: utils::generic::get_env_var_if_exists("CORGEA_PYPI_REGISTRY"), - npm_audit: utils::generic::get_env_var_if_exists("CORGEA_NO_NPM_AUDIT").is_none(), } } diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 16d2897..3bbc15b 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -120,10 +120,6 @@ pub struct PrecheckOptions { /// Optional registry overrides, used by tests. pub npm_registry: Option, pub pypi_registry: Option, - /// Run the warn-only `npm audit` second opinion during the npm tree - /// pass. Cleared by `CORGEA_NO_NPM_AUDIT` (read in `main`, like the - /// registry overrides). - pub npm_audit: bool, } /// Each item the user (or a `-r` requirements file) asked us to install. @@ -222,9 +218,6 @@ pub enum TreeReport { resolved_count: usize, /// Verdicts for resolved packages beyond the named targets. transitive: Vec, - /// Warn-only `npm audit` second opinion (npm only; `None` when - /// unavailable, disabled, or failed). Never consulted for blocking. - audit: Option, }, /// Resolution unavailable or failed — only named targets were verified. NamedOnly { reason: String }, @@ -417,20 +410,6 @@ fn run_parsed_install( "warning: transitive dependencies not checked ({reason}); only named packages were verified." ); } - // Warn-only npm audit second opinion: never blocks, never changes - // exit codes (`should_block_install` ignores it by design). - if let Some(TreeReport::Full { - audit: Some(audit), .. - }) = &tree - { - if audit.total > 0 { - eprintln!( - "note: npm audit reports {} advisories ({} high/critical) — supplementary signal, not blocking", - audit.total, - audit.high + audit.critical - ); - } - } // The requirements note only matters when the tree pass did *not* cover // those files (fallback to named-only, or recency-only mode). if !matches!(&tree, Some(TreeReport::Full { .. })) { @@ -561,11 +540,8 @@ fn run_tree_pass( outcomes: &mut [TargetOutcome], opts: &PrecheckOptions, ) -> TreeReport { - let tree::TreeResolution { - packages: set, - audit: audit_rx, - } = match tree::resolve_tree(manager, rest, opts.npm_audit) { - Ok(Some(resolution)) => resolution, + let set = match tree::resolve_tree(manager, rest) { + Ok(Some(set)) => set, Ok(None) => { run_verdict_pass(manager, outcomes, opts); return TreeReport::NamedOnly { @@ -616,17 +592,10 @@ fn run_tree_pass( .as_ref() .expect("tree pass requires verdict config"); let results = verdict_pool(jobs, cfg, manager, VERDICT_CONCURRENCY); - // Collect the warn-only npm audit second opinion only after the verdict - // pool so the two truly overlap. The wait is capped tight: this signal - // never changes the outcome, so a finished gate won't stall long for it — - // a slow audit is killed and skipped (collect also reaps the subprocess, - // so nothing outlives the CLI). - let audit = audit_rx.and_then(|handle| handle.collect(Duration::from_secs(1))); let transitive = apply_verdicts(manager, results, outcomes, &direct_deps); TreeReport::Full { resolved_count, transitive, - audit, } } @@ -1214,23 +1183,6 @@ fn verdict_json(verdict: &VerdictStatus) -> serde_json::Value { } } -/// JSON shape for the warn-only npm audit second opinion in the tree arm. -fn npm_audit_json(audit: &tree::AuditSummary) -> serde_json::Value { - use serde_json::json; - json!({ - "total": audit.total, - "critical": audit.critical, - "high": audit.high, - "moderate": audit.moderate, - "low": audit.low, - "info": audit.info, - "top": audit.top.iter().map(|(name, severity)| json!({ - "name": name, - "severity": severity, - })).collect::>(), - }) -} - fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { use serde_json::json; let outcomes: Vec<_> = report @@ -1285,7 +1237,7 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { "verdict_mode": if opts.verdict.is_some() { "full" } else { "recency-only" }, "results": outcomes, "tree": report.tree.as_ref().map(|t| match t { - TreeReport::Full { resolved_count, transitive, audit } => json!({ + TreeReport::Full { resolved_count, transitive } => json!({ "mode": "full", "reason": serde_json::Value::Null, "resolved_count": resolved_count, @@ -1295,14 +1247,12 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { "origin": o.origin.json_name(), "verdict": verdict_json(&o.verdict), })).collect::>(), - "npm_audit": audit.as_ref().map(npm_audit_json), }), TreeReport::NamedOnly { reason } => json!({ "mode": "named-only", "reason": reason, "resolved_count": 0, "transitive": [], - "npm_audit": serde_json::Value::Null, }), }), }); @@ -1344,8 +1294,6 @@ mod tests { verdict: None, npm_registry: None, pypi_registry: Some("http://127.0.0.1:9".to_string()), - // Unit tests never want the real `npm audit` subprocess. - npm_audit: false, } } @@ -1544,7 +1492,6 @@ mod tests { origin: TreeOrigin::Transitive, verdict: VerdictStatus::Vulnerable(vec![]), }], - audit: None, }); assert_eq!(report.vulnerable_count(), 1); @@ -1786,7 +1733,6 @@ mod tests { ), origin: TreeOrigin::Transitive, }], - audit: None, }); let groups = collapsed_unverifiable_groups(&report); assert_eq!(groups.len(), 1); @@ -1885,7 +1831,6 @@ mod tests { report.tree = Some(TreeReport::Full { resolved_count: 1, transitive: vec![tree_vulnerable(origin)], - audit: None, }); assert_eq!( refusal_blames_existing_tree(&report), @@ -1924,7 +1869,6 @@ mod tests { TreeOrigin::Transitive, ), ], - audit: None, }) }; diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index 0b7fae6..d155a39 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -4,10 +4,7 @@ //! pip: `--only-binary :all:` prevents sdist builds (pypa/pip#13091). //! npm: `--ignore-scripts` guards npm/cli#2787. -use std::path::PathBuf; -use std::process::{Command, Stdio}; -use std::sync::{mpsc, Arc, Mutex}; -use std::time::{Duration, Instant}; +use std::process::Command; use super::PackageManager; @@ -21,59 +18,6 @@ pub struct TreePackage { pub requested: bool, } -/// Warn-only `npm audit` second opinion: counts from -/// `metadata.vulnerabilities` plus the worst few advisories. Never blocks. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct AuditSummary { - pub total: u64, - pub critical: u64, - pub high: u64, - pub moderate: u64, - pub low: u64, - pub info: u64, - /// Worst advisories as `(package name, severity)`, capped at - /// `AUDIT_TOP_LIMIT`, severest first. - pub top: Vec<(String, String)>, -} - -/// What `resolve_tree` hands back: the would-install set, plus (npm only) -/// a handle to the concurrent `npm audit` second opinion. -pub struct TreeResolution { - pub packages: Vec, - pub audit: Option, -} - -/// The in-flight `npm audit` second opinion: a receiver for the summary plus -/// deterministic cleanup. The CLI exits the process as soon as the gate -/// returns, which would strand the audit thread mid-poll and orphan the -/// `npm audit` child — so `collect` owns reaping both before the gate moves -/// on. -pub struct AuditHandle { - rx: mpsc::Receiver, - /// The audit subprocess, shared with the polling thread. Emptied by - /// whichever side reaps it first. - child: Arc>>, - thread: std::thread::JoinHandle<()>, -} - -impl AuditHandle { - /// Wait up to `window` for the summary, then kill whatever is left of - /// the subprocess and join the thread. On the happy path the child has - /// already exited and the join is instant; a hung audit is killed now - /// rather than left running past the CLI's exit. - pub fn collect(self, window: Duration) -> Option { - let summary = self.rx.recv_timeout(window).ok(); - if let Ok(mut slot) = self.child.lock() { - if let Some(mut child) = slot.take() { - let _ = child.kill(); - let _ = child.wait(); - } - } - let _ = self.thread.join(); - summary - } -} - /// Whether this manager's resolver has anything to resolve for the parsed /// install. pip's dry-run also reads `-r` requirements files, so those make /// a pip install eligible even with no named targets. npm's lockfile @@ -90,20 +34,10 @@ pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstal pub fn resolve_tree( manager: PackageManager, install_args: &[String], - run_audit: bool, -) -> Result, String> { +) -> Result>, String> { match manager { - PackageManager::Pip => { - resolve_pip_tree(manager.binary_name(), install_args).map(|packages| { - Some(TreeResolution { - packages, - audit: None, - }) - }) - } - PackageManager::Npm => { - resolve_npm_tree(manager.binary_name(), install_args, run_audit).map(Some) - } + PackageManager::Pip => resolve_pip_tree(manager.binary_name(), install_args).map(Some), + PackageManager::Npm => resolve_npm_tree(manager.binary_name(), install_args).map(Some), // yarn/pnpm/uv have no safe dry-run for installs. _ => Ok(None), } @@ -204,11 +138,7 @@ fn direct_deps_from_manifest(json: &str) -> std::collections::HashSet { /// /// `--ignore-scripts` because npm has run lifecycle scripts under /// `--package-lock-only` before (npm/cli#2787). -fn resolve_npm_tree( - binary: &str, - install_args: &[String], - run_audit: bool, -) -> Result { +fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result, String> { let resolved = which::which(binary).map_err(|e| format!("{binary} not found on PATH: {e}"))?; let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; for manifest in [ @@ -242,144 +172,7 @@ fn resolve_npm_tree( } let lock = std::fs::read_to_string(work.path().join("package-lock.json")) .map_err(|e| format!("read generated package-lock.json: {e}"))?; - let packages = parse_npm_lockfile(&lock)?; - let audit = run_audit.then(|| spawn_audit(work, resolved)); - Ok(TreeResolution { packages, audit }) -} - -/// Kill the audit subprocess if it hasn't finished by then. -const AUDIT_DEADLINE: Duration = Duration::from_secs(5); - -/// Cap on `AuditSummary::top` advisory entries. -const AUDIT_TOP_LIMIT: usize = 5; - -/// Run `npm audit --json` in the dry-run temp dir, concurrent with the -/// verdict pool. The thread owns `work` so the dir outlives the resolver and -/// is cleaned up when the audit finishes. Any failure (spawn error, timeout, -/// unparsable output) drops the sender — the receiver sees a disconnect and -/// the gate silently skips the second opinion. -fn spawn_audit(work: tempfile::TempDir, npm: PathBuf) -> AuditHandle { - let (tx, rx) = mpsc::channel(); - let child = Arc::new(Mutex::new(None)); - let slot = Arc::clone(&child); - let thread = std::thread::spawn(move || { - if let Some(summary) = run_audit(work.path(), &npm, &slot) { - let _ = tx.send(summary); - } - drop(work); - }); - AuditHandle { rx, child, thread } -} - -/// `npm audit` exits 1 when it finds advisories — that's the success case, -/// so stdout is parsed regardless of exit code. Stdout goes through a file -/// (not a pipe) so the deadline poll can't deadlock on a full pipe buffer. -/// `--package-lock-only` because the work dir holds only manifests and the -/// generated lockfile — never a `node_modules`. -/// -/// The subprocess lives in `slot`, shared with `AuditHandle::collect`: the -/// poll relocks each iteration, and an empty slot means the collector -/// already reaped the child — stop quietly. -fn run_audit( - work: &std::path::Path, - npm: &std::path::Path, - slot: &Mutex>, -) -> Option { - let stdout_path = work.join("corgea-npm-audit.json"); - let stdout_file = std::fs::File::create(&stdout_path).ok()?; - let child = Command::new(npm) - .args(["audit", "--json", "--package-lock-only"]) - .current_dir(work) - .stdin(Stdio::null()) - .stdout(stdout_file) - .stderr(Stdio::null()) - .spawn() - .ok()?; - *slot.lock().ok()? = Some(child); - let deadline = Instant::now() + AUDIT_DEADLINE; - loop { - let mut guard = slot.lock().ok()?; - let Some(child) = guard.as_mut() else { - return None; // collector reaped the child first - }; - match child.try_wait() { - Ok(Some(_)) => { - // Exited on its own: clear the slot so the collector has - // nothing left to kill. - guard.take(); - break; - } - Ok(None) if Instant::now() < deadline => { - drop(guard); - std::thread::sleep(Duration::from_millis(50)); - } - _ => { - let mut child = guard.take().expect("checked above"); - drop(guard); - let _ = child.kill(); - let _ = child.wait(); - return None; - } - } - } - parse_npm_audit(&std::fs::read_to_string(&stdout_path).ok()?) -} - -/// Parse npm audit report v2 (npm 7+): counts from `metadata.vulnerabilities`, -/// `top` from the `vulnerabilities` map, severest first. -fn parse_npm_audit(json: &str) -> Option { - let report: serde_json::Value = serde_json::from_str(json).ok()?; - let counts = report.get("metadata")?.get("vulnerabilities")?; - let count = |k: &str| counts.get(k).and_then(|v| v.as_u64()).unwrap_or(0); - let (critical, high, moderate, low, info) = ( - count("critical"), - count("high"), - count("moderate"), - count("low"), - count("info"), - ); - let total = counts - .get("total") - .and_then(|v| v.as_u64()) - .unwrap_or(critical + high + moderate + low + info); - let mut top: Vec<(String, String)> = report - .get("vulnerabilities") - .and_then(|v| v.as_object()) - .map(|vulns| { - vulns - .values() - .filter_map(|entry| { - Some(( - entry.get("name")?.as_str()?.to_string(), - entry.get("severity")?.as_str()?.to_string(), - )) - }) - .collect() - }) - .unwrap_or_default(); - top.sort_by(|a, b| (severity_rank(&a.1), &a.0).cmp(&(severity_rank(&b.1), &b.0))); - top.truncate(AUDIT_TOP_LIMIT); - Some(AuditSummary { - total, - critical, - high, - moderate, - low, - info, - top, - }) -} - -/// Sort key for npm audit severities, severest first. -fn severity_rank(severity: &str) -> u8 { - match severity { - "critical" => 0, - "high" => 1, - "moderate" => 2, - "low" => 3, - "info" => 4, - _ => 5, - } + parse_npm_lockfile(&lock) } fn parse_npm_lockfile(json: &str) -> Result, String> { @@ -515,69 +308,6 @@ mod tests { assert!(err.contains("no packages map"), "got: {err}"); } - // npm audit report v2 shape: per-package `vulnerabilities` map plus - // `metadata.vulnerabilities` counts. - const AUDIT_REPORT: &str = r#"{ - "auditReportVersion": 2, - "vulnerabilities": { - "minimist": {"name": "minimist", "severity": "critical", "via": []}, - "lodash": {"name": "lodash", "severity": "high", "via": []}, - "ms": {"name": "ms", "severity": "moderate", "via": []} - }, - "metadata": {"vulnerabilities": - {"info": 0, "low": 0, "moderate": 1, "high": 1, "critical": 1, "total": 3}} - }"#; - - #[test] - fn parse_npm_audit_counts_and_top() { - let summary = parse_npm_audit(AUDIT_REPORT).expect("parse audit report"); - assert_eq!(summary.total, 3); - assert_eq!(summary.critical, 1); - assert_eq!(summary.high, 1); - assert_eq!(summary.moderate, 1); - assert_eq!(summary.low, 0); - assert_eq!(summary.info, 0); - // Severest first: critical, high, moderate. - assert_eq!( - summary.top, - vec![ - ("minimist".to_string(), "critical".to_string()), - ("lodash".to_string(), "high".to_string()), - ("ms".to_string(), "moderate".to_string()), - ] - ); - } - - #[test] - fn parse_npm_audit_caps_top_entries() { - let entries: Vec = (0..8) - .map(|i| format!(r#""p{i}": {{"name": "p{i}", "severity": "low"}}"#)) - .collect(); - let json = format!( - r#"{{"vulnerabilities": {{{}}}, - "metadata": {{"vulnerabilities": {{"low": 8, "total": 8}}}}}}"#, - entries.join(",") - ); - let summary = parse_npm_audit(&json).expect("parse audit report"); - assert_eq!(summary.total, 8); - assert_eq!(summary.top.len(), AUDIT_TOP_LIMIT); - } - - #[test] - fn parse_npm_audit_missing_total_sums_levels() { - let json = r#"{"vulnerabilities": {}, - "metadata": {"vulnerabilities": {"high": 2, "low": 1}}}"#; - let summary = parse_npm_audit(json).expect("parse audit report"); - assert_eq!(summary.total, 3); - } - - #[test] - fn parse_npm_audit_rejects_garbage() { - assert_eq!(parse_npm_audit("not json"), None); - assert_eq!(parse_npm_audit("{}"), None); - assert_eq!(parse_npm_audit(r#"{"metadata": {}}"#), None); - } - #[test] fn name_from_lock_path_handles_nested_and_scoped() { assert_eq!( diff --git a/tests/cli_npm_audit.rs b/tests/cli_npm_audit.rs deleted file mode 100644 index 41a313d..0000000 --- a/tests/cli_npm_audit.rs +++ /dev/null @@ -1,313 +0,0 @@ -//! Hermetic e2e tests for the warn-only `npm audit` second opinion -//! (`corgea npm install …` with a token + vuln-api stub). -//! -//! Extends the `cli_tree.rs` harness pattern with an audit-aware fake npm: -//! a `--package-lock-only` invocation writes a canned lockfile (the tree -//! pass), an `audit` invocation emits a canned audit report on stdout (real -//! `npm audit` exits 1 when it finds advisories — that's the success case), -//! and any other invocation records its argv to a marker. The audit is a -//! supplementary signal only: it must never block, never unblock, and never -//! change exit codes. - -#![cfg(unix)] - -mod common; - -use common::{ - corgea_isolated, emit, key, spawn_oldpkg_registry_stub, vulnerable_body, write_script, NPM_LOCK, -}; -use corgea::vuln_api_stub::{self, PackageKey}; -use std::collections::HashMap; -use std::path::{Path, PathBuf}; -use std::process::Command; -use tempfile::TempDir; - -/// npm audit report v2 with two advisories: 1 critical + 1 high. -const AUDIT_ADVISORIES: &str = r#"{"auditReportVersion":2, - "vulnerabilities":{ - "minimist":{"name":"minimist","severity":"critical","via":[]}, - "lodash":{"name":"lodash","severity":"high","via":[]}}, - "metadata":{"vulnerabilities": - {"info":0,"low":0,"moderate":0,"high":1,"critical":1,"total":2}}}"#; - -/// npm audit report v2 with no advisories. -const AUDIT_CLEAN: &str = r#"{"auditReportVersion":2,"vulnerabilities":{}, - "metadata":{"vulnerabilities": - {"info":0,"low":0,"moderate":0,"high":0,"critical":0,"total":0}}}"#; - -/// How the fake npm behaves on its `audit --json` invocation. -#[derive(Clone, Copy)] -enum AuditScenario { - /// Emits `AUDIT_ADVISORIES` and exits 1 — real npm audit's - /// advisories-found behaviour. - Advisories, - /// Emits `AUDIT_CLEAN` and exits 0. - Clean, - /// Emits nothing and exits 1 — unparsable output must be a silent skip. - Broken, - /// Never answers — the gate's `recv_timeout` must move on without it. - Hang, -} - -/// Write an executable fake npm into `dir`: -/// * `audit` (checked first — the audit argv also carries -/// `--package-lock-only`) → records argv to `audit_marker`, then acts out -/// `scenario`; -/// * `--package-lock-only` → writes `NPM_LOCK` to `./package-lock.json` -/// (cwd is the resolver's throwaway temp dir), exits 0 — the tree pass; -/// * anything else → records argv to `marker`, exits 0 — the real install. -fn write_fake_npm( - dir: &Path, - marker: &Path, - audit_marker: &Path, - audit_pid: &Path, - scenario: AuditScenario, -) { - let lock_payload = dir.join("npm-lock-payload.json"); - std::fs::write(&lock_payload, NPM_LOCK).expect("write lock payload"); - let audit_branch = match scenario { - AuditScenario::Advisories | AuditScenario::Clean => { - let (body, code) = match scenario { - AuditScenario::Advisories => (AUDIT_ADVISORIES, 1), - _ => (AUDIT_CLEAN, 0), - }; - let audit_payload = dir.join("npm-audit-payload.json"); - std::fs::write(&audit_payload, body).expect("write audit payload"); - format!("{}; exit {code}", emit(&audit_payload)) - } - AuditScenario::Broken => "exit 1".to_string(), - // Record the PID, then `exec` so the sleep IS the audit child (a - // plain `/bin/sleep 10` would be a grandchild the gate's kill never - // reaches). - AuditScenario::Hang => format!( - "printf '%s' $$ > '{}'; exec /bin/sleep 10", - audit_pid.display() - ), - }; - let script = format!( - "#!/bin/sh\ncase \" $* \" in\n\ - *\" audit \"*) printf '%s' \"$*\" > '{audit_marker}'; {audit_branch};;\n\ - *\" --package-lock-only \"*) {lock} > package-lock.json; exit 0;;\n\ - esac\nprintf '%s' \"$*\" > '{marker}'\nexit 0\n", - lock = emit(&lock_payload), - audit_marker = audit_marker.display(), - marker = marker.display(), - ); - write_script(dir, "npm", &script); -} - -/// `corgea` wired to the registry stub, an audit-aware fake npm, and a -/// vuln-api stub. -struct AuditHarness { - cmd: Command, - marker: PathBuf, - audit_marker: PathBuf, - audit_pid: PathBuf, - _home: TempDir, - _bin: TempDir, -} - -impl AuditHarness { - fn new(checks: HashMap, scenario: AuditScenario) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let marker = bin.path().join("pm-argv.txt"); - let audit_marker = bin.path().join("audit-argv.txt"); - let audit_pid = bin.path().join("audit-pid.txt"); - write_fake_npm(bin.path(), &marker, &audit_marker, &audit_pid, scenario); - let registry = spawn_oldpkg_registry_stub(); - let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); - cmd.env("PATH", bin.path()) - .env("CORGEA_NPM_REGISTRY", ®istry) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) - .env("CORGEA_TOKEN", "test-token") - .env_remove("CORGEA_NO_NPM_AUDIT"); - Self { - cmd, - marker, - audit_marker, - audit_pid, - _home: home, - _bin: bin, - } - } - - fn recorded_argv(&self) -> Option { - std::fs::read_to_string(&self.marker).ok() - } -} - -#[test] -fn audit_advisories_warn_on_stderr_without_blocking() { - // Verdicts all clean; only npm audit complains → note on stderr, the - // install still runs, exit code stays 0. - let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Advisories); - let out = h - .cmd - .args(["npm", "install", "oldpkg@1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(0), "audit findings must not block"); - assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); - let stderr = String::from_utf8_lossy(&out.stderr); - assert!( - stderr.contains( - "note: npm audit reports 2 advisories (2 high/critical) — supplementary signal, not blocking" - ), - "stderr: {stderr}" - ); - assert_eq!( - std::fs::read_to_string(&h.audit_marker).as_deref().ok(), - Some("audit --json --package-lock-only"), - "audit must run as `npm audit --json --package-lock-only`" - ); -} - -#[test] -fn audit_clean_report_prints_no_note() { - let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Clean); - let out = h - .cmd - .args(["npm", "install", "oldpkg@1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(0)); - assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); - let stderr = String::from_utf8_lossy(&out.stderr); - assert!( - !stderr.contains("npm audit reports"), - "zero advisories must stay silent: {stderr}" - ); -} - -#[test] -fn audit_json_object_in_tree_arm() { - let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Advisories); - let out = h - .cmd - .args(["npm", "--json", "install", "oldpkg@1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(0)); - let parsed: serde_json::Value = - serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); - let audit = &parsed["tree"]["npm_audit"]; - assert_eq!(audit["total"], 2); - assert_eq!(audit["critical"], 1); - assert_eq!(audit["high"], 1); - assert_eq!(audit["moderate"], 0); - // `top` is sorted severest first. - assert_eq!(audit["top"][0]["name"], "minimist"); - assert_eq!(audit["top"][0]["severity"], "critical"); - assert_eq!(audit["top"][1]["name"], "lodash"); - assert_eq!(audit["top"][1]["severity"], "high"); -} - -#[test] -fn audit_disabled_by_env_var() { - let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Advisories); - let out = h - .cmd - .env("CORGEA_NO_NPM_AUDIT", "1") - .args(["npm", "--json", "install", "oldpkg@1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(0)); - assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); - let stderr = String::from_utf8_lossy(&out.stderr); - assert!(!stderr.contains("npm audit reports"), "stderr: {stderr}"); - assert!( - !h.audit_marker.exists(), - "CORGEA_NO_NPM_AUDIT=1 must skip the audit subprocess entirely" - ); - let parsed: serde_json::Value = - serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); - assert_eq!(parsed["tree"]["mode"], "full"); - assert!(parsed["tree"]["npm_audit"].is_null()); -} - -#[test] -fn audit_failure_is_a_silent_skip() { - // Audit exits 1 with no output (unparsable) → no note, null in JSON, - // gate result untouched. - let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Broken); - let out = h - .cmd - .args(["npm", "--json", "install", "oldpkg@1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(0)); - assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); - assert!( - !String::from_utf8_lossy(&out.stderr).contains("npm audit"), - "a failed audit must stay silent" - ); - let parsed: serde_json::Value = - serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); - assert!(parsed["tree"]["npm_audit"].is_null()); -} - -#[test] -fn audit_hang_is_skipped_within_the_collect_window() { - // The fake audit sleeps 10s; the gate's 1s collect window must move on — - // and must kill the audit child on its way out, not orphan it past the - // CLI's exit. - let started = std::time::Instant::now(); - let mut h = AuditHarness::new(HashMap::new(), AuditScenario::Hang); - let out = h - .cmd - .args(["npm", "install", "oldpkg@1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(0)); - assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); - assert!( - !String::from_utf8_lossy(&out.stderr).contains("npm audit"), - "a timed-out audit must stay silent" - ); - assert!( - started.elapsed() < std::time::Duration::from_secs(8), - "gate must not wait out the hung audit (took {:?})", - started.elapsed() - ); - let pid = std::fs::read_to_string(&h.audit_pid).expect("audit must have started"); - let alive = Command::new("kill") - .args(["-0", pid.trim()]) - .status() - .expect("run kill -0") - .success(); - assert!( - !alive, - "hung audit child (pid {}) must be dead after the CLI exits", - pid.trim() - ); -} - -#[test] -fn audit_never_unblocks_a_vulnerable_verdict() { - // Transitive `evildep` is flagged by the verdict; the audit also has - // findings. Block behaviour and exit code are the verdict's alone — the - // audit note still prints as a supplementary signal. - let mut checks = HashMap::new(); - checks.insert( - key("npm", "evildep", "0.4.2"), - vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), - ); - let mut h = AuditHarness::new(checks, AuditScenario::Advisories); - let out = h - .cmd - .args(["npm", "install", "oldpkg@1.0.0"]) - .output() - .expect("run corgea"); - assert_eq!(out.status.code(), Some(1), "verdict block must stand"); - assert_eq!( - h.recorded_argv(), - None, - "npm must not run on a vulnerable verdict regardless of audit" - ); - let stderr = String::from_utf8_lossy(&out.stderr); - assert!( - stderr.contains("npm audit reports 2 advisories"), - "stderr: {stderr}" - ); -} From 204fb470b8246bc58ae83902b02dd885f381b3a2 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 12:40:43 +0200 Subject: [PATCH 32/59] Drop the persisted vuln_api_url config override; env var only --- src/config.rs | 61 +++++++-------------------------------------------- 1 file changed, 8 insertions(+), 53 deletions(-) diff --git a/src/config.rs b/src/config.rs index 805b426..f508bb8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -7,10 +7,6 @@ pub struct Config { pub(crate) url: String, pub(crate) debug: i8, pub(crate) token: String, - /// Override for the vuln-api host (install-gate package checks). - /// `#[serde(default)]` keeps pre-existing config files loading. - #[serde(default)] - pub(crate) vuln_api_url: Option, } impl Config { @@ -38,7 +34,6 @@ impl Config { url: "https://www.corgea.app".to_string(), debug: 0, token: "".to_string(), - vuln_api_url: None, }; let toml = toml::to_string(&config).expect("Failed to serialize config"); @@ -107,10 +102,9 @@ impl Config { } /// Base URL for the vuln-api service: `CORGEA_VULN_API_URL` env var, - /// then the config file's `vuln_api_url`, then the public default. + /// then the public default. pub fn get_vuln_api_url(&self) -> String { crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") - .or_else(|| self.vuln_api_url.clone()) .unwrap_or_else(|| "https://vuln-api.corgea.app".to_string()) .trim() .trim_end_matches('/') @@ -122,12 +116,11 @@ impl Config { mod tests { use super::*; - fn config_with(vuln_api_url: Option<&str>) -> Config { + fn test_config() -> Config { Config { url: "https://www.corgea.app".to_string(), debug: 0, token: "".to_string(), - vuln_api_url: vuln_api_url.map(str::to_string), } } @@ -138,60 +131,22 @@ mod tests { fn get_vuln_api_url_resolution_order() { env::remove_var("CORGEA_VULN_API_URL"); - // Default when neither env nor config is set. + // Default when the env var is unset. assert_eq!( - config_with(None).get_vuln_api_url(), + test_config().get_vuln_api_url(), "https://vuln-api.corgea.app" ); - // Config value wins over the default; trailing slash trimmed. - assert_eq!( - config_with(Some("https://custom.example.com/")).get_vuln_api_url(), - "https://custom.example.com" - ); - - // Surrounding whitespace trimmed. - assert_eq!( - config_with(Some(" https://ws.example.com ")).get_vuln_api_url(), - "https://ws.example.com" - ); - - // Env var wins over the config value (and gets the same trims). + // Env var wins; whitespace and trailing slash trimmed. env::set_var("CORGEA_VULN_API_URL", " https://env.example.com/ "); - assert_eq!( - config_with(Some("https://custom.example.com")).get_vuln_api_url(), - "https://env.example.com" - ); + assert_eq!(test_config().get_vuln_api_url(), "https://env.example.com"); // Empty / whitespace-only env var is treated as unset. env::set_var("CORGEA_VULN_API_URL", " "); assert_eq!( - config_with(Some("https://custom.example.com")).get_vuln_api_url(), - "https://custom.example.com" + test_config().get_vuln_api_url(), + "https://vuln-api.corgea.app" ); env::remove_var("CORGEA_VULN_API_URL"); } - - /// `Config::load()` writes the default file with `vuln_api_url: None` - /// and `save()` reserializes every config — both must round-trip. - #[test] - fn config_toml_round_trips_with_and_without_vuln_api_url() { - let without = toml::to_string(&config_with(None)).expect("serialize None field"); - let parsed: Config = toml::from_str(&without).expect("deserialize"); - assert_eq!(parsed.vuln_api_url, None); - - let with = toml::to_string(&config_with(Some("https://custom.example.com"))) - .expect("serialize Some field"); - let parsed: Config = toml::from_str(&with).expect("deserialize"); - assert_eq!( - parsed.vuln_api_url.as_deref(), - Some("https://custom.example.com") - ); - - // Pre-existing config files (no vuln_api_url key) must still load. - let legacy: Config = - toml::from_str("url = \"https://www.corgea.app\"\ndebug = 0\ntoken = \"\"\n") - .expect("legacy config without vuln_api_url"); - assert_eq!(legacy.vuln_api_url, None); - } } From a928bcf6182087349621ce9546fc3408a007703d Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 13:39:38 +0200 Subject: [PATCH 33/59] Skip yanked PyPI releases for non-exact resolution; fail closed on unsupported specifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "skip yanked-only releases" check only tested for missing upload timestamps, which yanked files keep — range/latest resolution could pick a version pip would never choose. Deserialize the PEP 592 yanked flag (bool, or a mirror-style reason string) and exclude all-yanked releases from Latest/Specifier candidates; exact pins still resolve them, like pip. A PEP 440 expression the resolver can't represent (==1.*, unknown operators) used to fall back to latest stable, gating a different version than the one pip installs. It is now a resolution error instead. --- src/verify_deps/registry.rs | 181 ++++++++++++++++++++++++++++++------ 1 file changed, 150 insertions(+), 31 deletions(-) diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs index 9ce6b16..a7d32c6 100644 --- a/src/verify_deps/registry.rs +++ b/src/verify_deps/registry.rs @@ -49,6 +49,20 @@ pub(crate) fn encode_npm_name(name: &str) -> String { struct PypiUrl { upload_time_iso_8601: Option, upload_time: Option, + /// PEP 592. PyPI's JSON API emits a bool; some mirrors emit the + /// yank reason string instead. Either form means yanked. + #[serde(default)] + yanked: Option, +} + +impl PypiUrl { + fn is_yanked(&self) -> bool { + match &self.yanked { + Some(serde_json::Value::Bool(b)) => *b, + Some(serde_json::Value::String(_)) => true, + _ => false, + } + } } /// Parse an ISO-8601 timestamp from npm or PyPI. PyPI sometimes emits @@ -315,17 +329,21 @@ pub fn pypi_resolve( .map_err(|e| format!("failed to parse PyPI response for '{}': {}", name, e))?; let candidates = collect_pypi_candidates(&meta); + // A yanked release resolves only via an exact pin (PEP 592), matching + // pip — otherwise we'd gate a version pip would never choose. + let installable: Vec = + candidates.iter().filter(|c| !c.yanked).cloned().collect(); let chosen = match spec { - PypiSpec::Latest => pick_latest_stable(&candidates).map(|c| c.0.clone()), + PypiSpec::Latest => pick_latest_stable(&installable).map(|c| c.version.clone()), PypiSpec::Exact(v) => { - if candidates.iter().any(|(ver, _)| ver == v) { + if candidates.iter().any(|c| &c.version == v) { Some(v.clone()) } else { None } } - PypiSpec::Specifier(spec_str) => pypi_resolve_specifier(&candidates, spec_str) - .or_else(|| pick_latest_stable(&candidates).map(|c| c.0.clone())), + PypiSpec::Specifier(spec_str) => pypi_resolve_specifier(&installable, spec_str) + .map_err(|e| format!("{} for '{}'", e, name))?, }; let chosen = chosen.ok_or_else(|| match spec { @@ -340,8 +358,8 @@ pub fn pypi_resolve( let published_at = candidates .iter() - .find(|(ver, _)| ver == &chosen) - .map(|(_, dt)| *dt) + .find(|c| c.version == chosen) + .map(|c| c.uploaded) .ok_or_else(|| { format!( "no upload timestamp for '{}' version '{}' on PyPI", @@ -356,23 +374,26 @@ pub fn pypi_resolve( }) } -/// Returns `(version, earliest_upload_time)` for every non-yanked -/// release that has at least one uploaded artifact. Empty release -/// entries (which PyPI sometimes keeps around for yanked / private -/// versions) are filtered out so we never pick them. -fn collect_pypi_candidates(meta: &PypiInfoResponse) -> Vec<(String, DateTime)> { +/// One published release a `PypiSpec` can resolve to. +#[derive(Debug, Clone)] +struct PypiCandidate { + version: String, + uploaded: DateTime, + /// Every artifact of this release is yanked (PEP 592) — pip skips + /// it for anything but an exact pin, so non-exact resolution must too. + yanked: bool, +} + +/// Returns a candidate for every release that has at least one uploaded, +/// timestamped artifact. Empty or timestampless release entries (which +/// PyPI sometimes keeps around for deleted / private versions) are +/// filtered out so we never pick them. +fn collect_pypi_candidates(meta: &PypiInfoResponse) -> Vec { let mut out = Vec::new(); for (ver, files) in &meta.releases { if files.is_empty() { continue; } - // Skip yanked-only releases. - if files - .iter() - .all(|f| f.upload_time_iso_8601.is_none() && f.upload_time.is_none()) - { - continue; - } let mut earliest: Option> = None; for f in files { let raw = f @@ -389,7 +410,11 @@ fn collect_pypi_candidates(meta: &PypiInfoResponse) -> Vec<(String, DateTime Vec<(String, DateTime)]) -> Option<&(String, DateTime)> { - let mut best_semver: Option<(semver::Version, &(String, DateTime))> = None; +fn pick_latest_stable(candidates: &[PypiCandidate]) -> Option<&PypiCandidate> { + let mut best_semver: Option<(semver::Version, &PypiCandidate)> = None; for c in candidates { - let normalized = normalize_for_semver(&c.0); + let normalized = normalize_for_semver(&c.version); if let Ok(v) = semver::Version::parse(&normalized) { if !v.pre.is_empty() { continue; @@ -415,7 +440,7 @@ fn pick_latest_stable(candidates: &[(String, DateTime)]) -> Option<&(String if let Some((_, picked)) = best_semver { return Some(picked); } - candidates.iter().max_by_key(|c| c.1) + candidates.iter().max_by_key(|c| c.uploaded) } /// Best-effort PEP 440 → semver: PyPI versions are usually `X.Y.Z` or @@ -443,10 +468,15 @@ pub(crate) fn normalize_for_semver(v: &str) -> String { } /// Apply a PEP 440-style specifier expression to the candidate list -/// and return the highest match. Supported operators: `==`, `>=`, `>`, -/// `<=`, `<`, `~=`, `!=`. Unknown operators cause us to give up and -/// return `None` (the caller falls back to "latest stable"). -fn pypi_resolve_specifier(candidates: &[(String, DateTime)], spec: &str) -> Option { +/// and return the highest match (`Ok(None)` when nothing satisfies it). +/// Supported operators: `==`, `>=`, `>`, `<=`, `<`, `~=`, `!=`. An +/// expression we can't parse (unknown operator, wildcard like `==1.*`) +/// is `Err` — resolving anything else would gate a different version +/// than the package manager installs. +fn pypi_resolve_specifier( + candidates: &[PypiCandidate], + spec: &str, +) -> Result, String> { let parts: Vec<&str> = spec.split(',').map(|s| s.trim()).collect(); let mut requirements: Vec<(&'static str, semver::Version)> = Vec::new(); @@ -462,15 +492,18 @@ fn pypi_resolve_specifier(candidates: &[(String, DateTime)], spec: &str) -> ("<", "<"), ]; for p in &parts { + let unsupported = || format!("unsupported version specifier '{}'", spec); let (op, val) = OPERATORS .iter() - .find_map(|(prefix, op)| p.strip_prefix(prefix).map(|v| (*op, v.trim())))?; - let v = semver::Version::parse(&normalize_for_semver(val)).ok()?; + .find_map(|(prefix, op)| p.strip_prefix(prefix).map(|v| (*op, v.trim()))) + .ok_or_else(unsupported)?; + let v = semver::Version::parse(&normalize_for_semver(val)).map_err(|_| unsupported())?; requirements.push((op, v)); } let mut best: Option<(semver::Version, String)> = None; - for (raw, _) in candidates { + for c in candidates { + let raw = &c.version; let v = match semver::Version::parse(&normalize_for_semver(raw)) { Ok(v) => v, Err(_) => continue, @@ -502,7 +535,7 @@ fn pypi_resolve_specifier(candidates: &[(String, DateTime)], spec: &str) -> _ => best = Some((v, raw.clone())), } } - best.map(|(_, raw)| raw) + Ok(best.map(|(_, raw)| raw)) } #[cfg(test)] @@ -516,6 +549,92 @@ mod tests { assert_eq!(encode_npm_name("@types/node"), "@types%2fnode"); } + fn candidates(versions: &[&str]) -> Vec { + versions + .iter() + .map(|v| PypiCandidate { + version: v.to_string(), + uploaded: Utc::now(), + yanked: false, + }) + .collect() + } + + #[test] + fn specifier_resolves_highest_match() { + let c = candidates(&["1.0.0", "2.5.0", "3.0.0"]); + assert_eq!( + pypi_resolve_specifier(&c, ">=1.0,<3").expect("parse"), + Some("2.5.0".to_string()) + ); + } + + #[test] + fn specifier_with_no_match_is_ok_none() { + let c = candidates(&["1.0.0"]); + assert_eq!(pypi_resolve_specifier(&c, ">=9.0").expect("parse"), None); + } + + #[test] + fn unparseable_specifier_errors_instead_of_falling_back() { + // `==1.*` is valid PEP 440 but not representable here; resolving + // "latest stable" instead would gate the wrong version. + let c = candidates(&["1.0.0", "2.0.0"]); + for spec in ["==1.*", "@weird", ">= not-a-version"] { + let err = pypi_resolve_specifier(&c, spec).expect_err(spec); + assert!( + err.contains("unsupported version specifier"), + "{spec}: {err}" + ); + } + } + + #[test] + fn yanked_only_releases_are_flagged() { + // 2.0.0 has every file yanked (one bool, one mirror-style reason + // string); 1.0.0 has a non-yanked file. Timestamps alone must not + // decide yanked status — yanked files keep theirs. + let meta: PypiInfoResponse = serde_json::from_str( + r#"{"releases":{ + "1.0.0":[{"upload_time_iso_8601":"2020-01-01T00:00:00Z","yanked":false}], + "2.0.0":[{"upload_time_iso_8601":"2021-01-01T00:00:00Z","yanked":true}, + {"upload_time_iso_8601":"2021-01-01T00:00:00Z","yanked":"broken build"}] + }}"#, + ) + .expect("parse pypi json"); + let candidates = collect_pypi_candidates(&meta); + let yanked_of = |v: &str| candidates.iter().find(|c| c.version == v).unwrap().yanked; + assert!(!yanked_of("1.0.0")); + assert!(yanked_of("2.0.0")); + + // Latest/specifier resolution must skip the yanked release… + let installable: Vec = + candidates.iter().filter(|c| !c.yanked).cloned().collect(); + assert_eq!( + pick_latest_stable(&installable).map(|c| c.version.as_str()), + Some("1.0.0") + ); + assert_eq!( + pypi_resolve_specifier(&installable, ">=1.0").expect("parse"), + Some("1.0.0".to_string()) + ); + // …while an exact pin still finds it (pip installs it with a warning). + assert!(candidates.iter().any(|c| c.version == "2.0.0")); + } + + #[test] + fn release_with_partially_yanked_files_stays_installable() { + let meta: PypiInfoResponse = serde_json::from_str( + r#"{"releases":{"1.5.0":[ + {"upload_time_iso_8601":"2020-06-01T00:00:00Z","yanked":true}, + {"upload_time_iso_8601":"2020-06-01T00:00:00Z","yanked":false} + ]}}"#, + ) + .expect("parse pypi json"); + let candidates = collect_pypi_candidates(&meta); + assert!(!candidates[0].yanked); + } + #[test] fn parses_iso8601_variants() { assert!(parse_iso8601("2024-01-02T03:04:05Z").is_ok()); From 5e28b1ea8d5faa17b88f9d98ba67403055565d7b Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 13:40:17 +0200 Subject: [PATCH 34/59] Close install-gate bypasses: normalize vuln-api names, fail closed on errors, gate uv installs and sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the cursor[bot] review on PR #99: - Query the vuln-api with PEP 503-normalized names. Advisories are keyed canonically and the server only lowercases, so an alternate spelling (Flask_Cors vs flask-cors) read as clean and bypassed the gate. - Treat registry resolution errors as blocking in tokened mode (unless --force) — a registry outage was a silent fail-open. - Share the exec path's pip -> pip3 fallback with the tree pass so pip3-only systems keep transitive coverage. - Recognize per-manager value-taking install flags (npm -w/--workspace, pnpm --filter, ...) so workspace names are not verified or blocked as package specs; pnpm -w and yarn -W stay boolean. - Resolve uv installs through `uv pip compile --only-binary :all:`: `uv pip install -r requirements.txt` previously exec'd with zero checks, and named uv targets got no transitive coverage. - Gate `uv sync` from uv.lock (index-sourced pins, "(locked)" origin); missing lock gets an explicit ungated note, malformed lock fails closed. `uv lock` stays passthrough since it installs nothing. --- src/precheck/mod.rs | 169 +++++++++++++++++++++++++++++++++-- src/precheck/parse.rs | 134 ++++++++++++++++++++++++---- src/precheck/tree.rs | 203 ++++++++++++++++++++++++++++++++++++++++-- tests/cli_install.rs | 4 +- tests/cli_tree.rs | 75 +++++++++++++++- tests/cli_uv_sync.rs | 191 +++++++++++++++++++++++++++++++++++++++ tests/cli_verdict.rs | 50 +++++++++++ tests/common/mod.rs | 18 ++-- 8 files changed, 801 insertions(+), 43 deletions(-) create mode 100644 tests/cli_uv_sync.rs diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 3bbc15b..ef76a64 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -179,6 +179,8 @@ pub enum TreeOrigin { Requested, /// Already a direct dependency in the project's `package.json`. PreExisting, + /// Pinned by the project's lockfile (`uv sync` from `uv.lock`). + Locked, } impl TreeOrigin { @@ -187,6 +189,7 @@ impl TreeOrigin { TreeOrigin::Transitive => "(transitive)", TreeOrigin::Requested => "(from requirements)", TreeOrigin::PreExisting => "(already in package.json)", + TreeOrigin::Locked => "(locked)", } } @@ -195,6 +198,7 @@ impl TreeOrigin { TreeOrigin::Transitive => "transitive", TreeOrigin::Requested => "requested", TreeOrigin::PreExisting => "pre-existing", + TreeOrigin::Locked => "locked", } } } @@ -367,9 +371,114 @@ fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { exec, opts, ), + parse::UvCommand::Sync => run_uv_sync(cmd, opts, exec), } } +/// Gate `uv sync` from the project's `uv.lock`. The lockfile is the full +/// locked universe (all groups/extras) — a superset of what sync installs, +/// conservative in the blocking direction; a stale lock that sync would +/// re-resolve is gated as written. Recency isn't checked (locked versions +/// aren't newly chosen by this command); the verdict pass is the gate. We +/// never run `uv lock` ourselves — locking can build sdists, which would +/// execute package code before any verdict. +fn run_uv_sync(cmd: &[String], opts: PrecheckOptions, exec: impl FnOnce() -> i32) -> i32 { + let Some(cfg) = &opts.verdict else { + // Tokenless mode has no verdict to gate with. + return exec(); + }; + let lock = match std::fs::read_to_string("uv.lock") { + Ok(content) => content, + Err(_) => { + eprintln!( + "note: no uv.lock here — 'uv sync' is not gated; dependencies install unchecked (run 'uv lock' first to enable the gate)" + ); + return exec(); + } + }; + let jobs = match parse_uv_lock(&lock) { + Ok(jobs) => jobs, + Err(e) if opts.force => { + eprintln!("warning: cannot verify 'uv sync' ({e}); proceeding under --force"); + return exec(); + } + Err(e) => { + eprintln!("error: cannot verify 'uv sync': {e} (pass --force to proceed unchecked)"); + return 1; + } + }; + + let resolved_count = jobs.len(); + let results = verdict_pool(jobs, cfg, PackageManager::Uv, VERDICT_CONCURRENCY); + let transitive = results + .into_iter() + .map(|(pkg, verdict)| TreeOutcome { + name: pkg.name, + version: pkg.version, + origin: TreeOrigin::Locked, + verdict, + }) + .collect(); + let report = PrecheckReport { + manager: PackageManager::Uv, + subcommand: "sync".to_string(), + original_args: cmd[1..].to_vec(), + outcomes: Vec::new(), + threshold: opts.threshold, + tree: Some(TreeReport::Full { + resolved_count, + transitive, + }), + bare_install: true, + }; + + if opts.json { + print_json(&report, &opts); + } else { + print_text(&report); + } + if should_block_install(&report, &opts) { + if !opts.json { + print_refusal(&report); + } + return 1; + } + exec() +} + +/// Packages from `uv.lock` that `uv sync` installs from an index. Local +/// stanzas (the project itself and path deps: editable / virtual / +/// directory / path sources) carry no registry identity and are skipped. +fn parse_uv_lock(content: &str) -> Result, String> { + #[derive(serde::Deserialize)] + struct Lock { + #[serde(default)] + package: Vec, + } + #[derive(serde::Deserialize)] + struct Pkg { + name: String, + version: Option, + #[serde(default)] + source: std::collections::BTreeMap, + } + const LOCAL_SOURCES: [&str; 4] = ["editable", "virtual", "directory", "path"]; + + let lock: Lock = toml::from_str(content).map_err(|e| format!("parse uv.lock: {e}"))?; + Ok(lock + .package + .into_iter() + .filter(|p| !LOCAL_SOURCES.iter().any(|k| p.source.contains_key(*k))) + .filter_map(|p| { + Some(tree::TreePackage { + name: p.name, + version: p.version?, + requested: false, + }) + }) + .collect()) +} + /// Post-parse verification shared by npm/yarn/pnpm/pip and uv install paths. fn run_parsed_install( manager: PackageManager, @@ -385,7 +494,11 @@ fn run_parsed_install( let bare_install = parsed.targets.is_empty() && parsed.requirements_files.is_empty(); if parsed.targets.is_empty() && !tree_eligible { - bare_install_note(manager, subcommand_label); + // Only a truly bare install gets the bare note — a tokenless + // `-r requirements.txt` install is covered by `requirements_note`. + if bare_install { + bare_install_note(manager, subcommand_label); + } requirements_note(&parsed); return exec(); } @@ -398,7 +511,7 @@ fn run_parsed_install( .collect(); let tree = if tree_eligible { - Some(run_tree_pass(manager, rest, &mut outcomes, &opts)) + Some(run_tree_pass(manager, rest, &parsed, &mut outcomes, &opts)) } else { run_verdict_pass(manager, &mut outcomes, &opts); // no-op tokenless None @@ -474,7 +587,10 @@ fn print_refusal(report: &PrecheckReport) { eprintln!( "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." ); - } else if report.vulnerable_count() > 0 || report.unverifiable_count() > 0 { + } else if report.vulnerable_count() > 0 + || report.unverifiable_count() > 0 + || report.error_count() > 0 + { eprintln!("Refusing to run install. Pass --force to proceed despite findings."); } else { eprintln!("Refusing to run install. Pass --no-fail to proceed anyway."); @@ -507,7 +623,8 @@ fn refusal_blames_existing_tree(report: &PrecheckReport) -> bool { ) }) .all(|t| match t.origin { - TreeOrigin::PreExisting => true, + // A locked pin predates the sync command that installs it. + TreeOrigin::PreExisting | TreeOrigin::Locked => true, TreeOrigin::Requested => false, TreeOrigin::Transitive => report.bare_install, }) @@ -537,10 +654,11 @@ fn requirements_note(parsed: &parse::ParsedInstall) { fn run_tree_pass( manager: PackageManager, rest: &[String], + parsed: &parse::ParsedInstall, outcomes: &mut [TargetOutcome], opts: &PrecheckOptions, ) -> TreeReport { - let set = match tree::resolve_tree(manager, rest) { + let set = match tree::resolve_tree(manager, rest, parsed) { Ok(Some(set)) => set, Ok(None) => { run_verdict_pass(manager, outcomes, opts); @@ -644,12 +762,15 @@ fn verdict_pool( let Some(job) = queue.lock().unwrap().pop_front() else { break; }; + // vuln-api advisories are keyed by canonical names; an + // alternate spelling (PEP 503: `Flask_Cors` ≡ `flask-cors`) + // would miss and read as clean. let verdict = match crate::vuln_api::check_package_version( &client, &cfg.base_url, &cfg.token, ecosystem, - &job.name, + &manager.normalize_name(&job.name), &job.version, ) { Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), @@ -748,8 +869,12 @@ fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool if opts.force { return false; } + // A resolution error means no verdict was obtained for that target, so + // in tokened mode it fails closed like `Unverifiable` — otherwise a + // registry outage silently bypasses the gate. report.vulnerable_count() > 0 || report.unverifiable_count() > 0 + || (opts.verdict.is_some() && report.error_count() > 0) || (!opts.no_fail && report.recent_count() > 0) } @@ -1282,6 +1407,38 @@ mod tests { assert!(!PackageManager::Pip.is_install_subcommand("freeze")); } + #[test] + fn parse_uv_lock_keeps_index_packages_and_skips_local_sources() { + let lock = r#" +version = 1 + +[[package]] +name = "proj" +version = "0.1.0" +source = { editable = "." } + +[[package]] +name = "evildep" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } + +[[package]] +name = "gitdep" +version = "1.2.3" +source = { git = "https://example.com/repo?rev=abc#abc" } +"#; + let pkgs = parse_uv_lock(lock).expect("parse uv.lock"); + let names: Vec<&str> = pkgs.iter().map(|p| p.name.as_str()).collect(); + assert_eq!(names, vec!["evildep", "gitdep"]); + assert_eq!(pkgs[0].version, "0.4.2"); + } + + #[test] + fn parse_uv_lock_rejects_invalid_toml() { + let err = parse_uv_lock("not = [valid").expect_err("invalid toml"); + assert!(err.contains("parse uv.lock"), "got: {err}"); + } + /// Baseline options: pypi registry at a dead address (a port that /// refuses connections — these tests never dial it), no verdict config. /// Override fields per test via struct update. diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs index b1b952a..30c31d5 100644 --- a/src/precheck/parse.rs +++ b/src/precheck/parse.rs @@ -28,7 +28,10 @@ pub fn parse_pip_install_args(args: &[String]) -> Result /// `uv add` argument list (everything after `add`). pub fn parse_pypi_positionals_args(args: &[String]) -> ParsedInstall { - build_parsed_install(extract_node_positionals(args), parse_pypi_spec) + build_parsed_install( + extract_node_positionals(PackageManager::Uv, args), + parse_pypi_spec, + ) } fn build_parsed_install( @@ -52,7 +55,7 @@ pub fn parse_install_args( match manager { PackageManager::Pip => parse_pip_install_args(args), PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => Ok( - build_parsed_install(extract_node_positionals(args), parse_npm_spec), + build_parsed_install(extract_node_positionals(manager, args), parse_npm_spec), ), PackageManager::Uv => unreachable!("uv uses classify_uv_command"), } @@ -62,8 +65,15 @@ pub fn parse_install_args( #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum UvCommand<'a> { Passthrough, - PipInstall { install_args: &'a [String] }, - Add { add_args: &'a [String] }, + PipInstall { + install_args: &'a [String], + }, + Add { + add_args: &'a [String], + }, + /// `uv sync` — installs the locked project environment; gated from + /// `uv.lock`. (`uv lock` stays passthrough: it installs nothing.) + Sync, } pub fn classify_uv_command(cmd: &[String]) -> UvCommand<'_> { @@ -76,6 +86,7 @@ pub fn classify_uv_command(cmd: &[String]) -> UvCommand<'_> { Some("add") => UvCommand::Add { add_args: &cmd[1..], }, + Some("sync") => UvCommand::Sync, _ => UvCommand::Passthrough, } } @@ -86,22 +97,51 @@ struct PositionalSplit { requirements_files: Vec, } -/// Strip flags from a npm/yarn/pnpm install argument list, returning -/// only the positional package specs. +/// Known install flags that take a separate value argument, per manager. +/// The fallback heuristic in [`skip_unknown_flag`] only skips URL/path-like +/// values, so a bare-word value (`-w my-workspace`) would otherwise parse — +/// and get verified or blocked — as a package spec. Not exhaustive; the +/// heuristic still backstops anything unlisted. The same letter can differ +/// by manager: npm's `-w ` takes a value, while pnpm's `-w` +/// (workspace-root) and yarn's `-W` are boolean. +fn takes_value(manager: PackageManager, flag: &str) -> bool { + match manager { + PackageManager::Npm => matches!( + flag, + "-w" | "--workspace" + | "--prefix" + | "--registry" + | "--tag" + | "--omit" + | "--include" + | "--loglevel" + ), + PackageManager::Pnpm => matches!( + flag, + "-C" | "--dir" | "--filter" | "--registry" | "--reporter" | "--loglevel" + ), + PackageManager::Yarn => matches!( + flag, + "--registry" | "--modules-folder" | "--cache-folder" | "--mutex" | "--network-timeout" + ), + PackageManager::Uv => matches!( + flag, + "--group" | "--extra" | "--index" | "--tag" | "--branch" | "--rev" | "--package" + ), + PackageManager::Pip => false, + } +} + +/// Strip flags from a npm/yarn/pnpm (or `uv add`) install argument list, +/// returning only the positional package specs. /// /// We treat anything starting with `-` as a flag. Boolean flags (`-D`, /// `--save-dev`, `--no-save`, ...) are dropped on their own. Flags /// that take a value can be written as either `--flag=value` or -/// `--flag value`; we handle both by skipping the next token if it -/// looks like a value (doesn't start with `-` and contains `:` or `/` -/// or starts with a digit, suggesting a URL / path / port / version). -/// -/// We deliberately avoid maintaining an exhaustive flag whitelist — -/// real-world install commands are too varied. The heuristic above -/// is correct for the common cases (`--registry url`, `--prefix path`, -/// `-w pkgname`, etc.) and conservatively skips occasional ambiguous -/// values (no spec we'd want to verify ever starts with `:` or `/`). -fn extract_node_positionals(args: &[String]) -> PositionalSplit { +/// `--flag value`; known value-taking flags ([`takes_value`]) skip the +/// next token outright, anything else skips it only if it looks like a +/// value (a URL / path), never like a package spec. +fn extract_node_positionals(manager: PackageManager, args: &[String]) -> PositionalSplit { let mut out = PositionalSplit::default(); let mut i = 0; while i < args.len() { @@ -114,6 +154,10 @@ fn extract_node_positionals(args: &[String]) -> PositionalSplit { break; } if a.starts_with('-') { + if !a.contains('=') && takes_value(manager, a) { + i += 2; + continue; + } i = skip_unknown_flag(args, i); continue; } @@ -403,7 +447,7 @@ mod tests { "https://example.com/registry".to_string(), "lodash@^4.0.0".to_string(), ]; - let p = extract_node_positionals(&args); + let p = extract_node_positionals(PackageManager::Npm, &args); assert_eq!( p.specs, vec![ @@ -414,6 +458,58 @@ mod tests { ); } + #[test] + fn npm_workspace_flag_value_is_not_a_spec() { + // npm's `-w ` / `--workspace ` take a bare-word value; + // it must never be verified (or blocked) as a package spec. + for flag in ["-w", "--workspace"] { + let args = vec![ + flag.to_string(), + "my-workspace".to_string(), + "lodash".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()], "flag {flag}"); + } + // `--workspace=name` is self-contained. + let args = vec!["--workspace=my-workspace".to_string(), "lodash".to_string()]; + let p = extract_node_positionals(PackageManager::Npm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + } + + #[test] + fn pnpm_and_yarn_boolean_workspace_flags_keep_the_spec() { + // pnpm's `-w` (--workspace-root) and yarn's `-W` are boolean — + // the next token is the package being installed. + let args = vec!["-w".to_string(), "lodash".to_string()]; + let p = extract_node_positionals(PackageManager::Pnpm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + + let args = vec!["-W".to_string(), "lodash".to_string()]; + let p = extract_node_positionals(PackageManager::Yarn, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + + // pnpm's `--filter ` does take a value. + let args = vec![ + "--filter".to_string(), + "my-app".to_string(), + "lodash".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Pnpm, &args); + assert_eq!(p.specs, vec!["lodash".to_string()]); + } + + #[test] + fn uv_add_group_flag_value_is_not_a_spec() { + let args = vec![ + "--group".to_string(), + "dev".to_string(), + "requests".to_string(), + ]; + let p = extract_node_positionals(PackageManager::Uv, &args); + assert_eq!(p.specs, vec!["requests".to_string()]); + } + #[test] fn extracts_npm_positionals_after_double_dash() { let args = vec![ @@ -422,7 +518,7 @@ mod tests { "axios".to_string(), "--this-is-positional-now".to_string(), ]; - let p = extract_node_positionals(&args); + let p = extract_node_positionals(PackageManager::Npm, &args); assert_eq!( p.specs, vec!["axios".to_string(), "--this-is-positional-now".to_string()] @@ -570,7 +666,7 @@ mod tests { )); assert_eq!( classify_uv_command(&["sync".to_string(), "--extra".to_string(), "dev".to_string()]), - UvCommand::Passthrough + UvCommand::Sync ); assert_eq!( classify_uv_command(&["run".to_string(), "pytest".to_string()]), diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index d155a39..7a0444c 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -19,13 +19,14 @@ pub struct TreePackage { } /// Whether this manager's resolver has anything to resolve for the parsed -/// install. pip's dry-run also reads `-r` requirements files, so those make -/// a pip install eligible even with no named targets. npm's lockfile -/// resolution reads `package.json`, so a bare `npm install` is eligible -/// whenever the working directory has one. +/// install. pip's dry-run and uv's compile also read `-r` requirements +/// files, so those make an install eligible even with no named targets. +/// npm's lockfile resolution reads `package.json`, so a bare `npm install` +/// is eligible whenever the working directory has one. pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstall) -> bool { !parsed.targets.is_empty() - || (manager == PackageManager::Pip && !parsed.requirements_files.is_empty()) + || (matches!(manager, PackageManager::Pip | PackageManager::Uv) + && !parsed.requirements_files.is_empty()) || (manager == PackageManager::Npm && std::path::Path::new("package.json").exists()) } @@ -34,12 +35,14 @@ pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstal pub fn resolve_tree( manager: PackageManager, install_args: &[String], + parsed: &super::parse::ParsedInstall, ) -> Result>, String> { match manager { PackageManager::Pip => resolve_pip_tree(manager.binary_name(), install_args).map(Some), PackageManager::Npm => resolve_npm_tree(manager.binary_name(), install_args).map(Some), - // yarn/pnpm/uv have no safe dry-run for installs. - _ => Ok(None), + PackageManager::Uv => resolve_uv_tree(parsed).map(Some), + // yarn/pnpm have no safe dry-run for installs. + PackageManager::Yarn | PackageManager::Pnpm => Ok(None), } } @@ -54,7 +57,9 @@ fn stderr_tail(output: &std::process::Output) -> String { } fn resolve_pip_tree(binary: &str, install_args: &[String]) -> Result, String> { - let resolved = which::which(binary).map_err(|e| format!("{binary} not found on PATH: {e}"))?; + // Same binary resolution as the exec path (pip → pip3 fallback) — the + // tree pass must not silently degrade on pip3-only systems. + let resolved = super::resolve_binary(binary)?; let output = Command::new(resolved) .arg("install") .args([ @@ -104,6 +109,124 @@ fn parse_pip_report(json: &str) -> Result, String> { .collect() } +/// Resolve uv's would-install set with `uv pip compile` — uv's own +/// resolver, run without executing package code (`--only-binary :all:` +/// blocks sdist builds, mirroring the pip dry-run guard). Compile takes +/// requirements files rather than bare specs, so named registry specs and +/// absolutized `-r` includes are written to a temp `.in` file. +/// Unverifiable targets (URL / git / editable / path) are excluded — they +/// are already surfaced as skipped warnings. Index selection comes from +/// uv's env/config; index flags on the wrapped command don't carry over. +fn resolve_uv_tree(parsed: &super::parse::ParsedInstall) -> Result, String> { + let uv = super::resolve_binary("uv")?; + let mut input = String::new(); + for t in &parsed.targets { + if !matches!(t.kind, super::TargetKind::Unverifiable { .. }) { + input.push_str(&t.display); + input.push('\n'); + } + } + for f in &parsed.requirements_files { + let abs = std::fs::canonicalize(f).map_err(|e| format!("read {}: {e}", f.display()))?; + input.push_str(&format!("-r {}\n", abs.display())); + } + if input.is_empty() { + return Err("nothing uv pip compile can resolve (all targets are URL/path refs)".into()); + } + + let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; + let in_file = work.path().join("corgea-gate.in"); + std::fs::write(&in_file, &input).map_err(|e| format!("write compile input: {e}"))?; + let output = Command::new(&uv) + .args([ + "pip", + "compile", + "--only-binary", + ":all:", + "--no-header", + "--no-annotate", + "--quiet", + ]) + .arg(&in_file) + .output() + .map_err(|e| format!("run uv pip compile: {e}"))?; + if !output.status.success() { + return Err(format!("uv pip compile failed: {}", stderr_tail(&output))); + } + parse_compiled_requirements( + &String::from_utf8_lossy(&output.stdout), + &requested_names(parsed), + ) +} + +/// Normalized names the user asked for — named CLI targets plus entries of +/// `-r` files — so tree findings label "(from requirements)" like pip's +/// `requested` report flag. Best-effort line parse; anything unparsed just +/// labels "(transitive)". +fn requested_names(parsed: &super::parse::ParsedInstall) -> std::collections::HashSet { + let norm = |n: &str| PackageManager::Uv.normalize_name(n); + let mut out: std::collections::HashSet = parsed + .targets + .iter() + .filter(|t| !matches!(t.kind, super::TargetKind::Unverifiable { .. })) + .map(|t| norm(&t.name)) + .collect(); + for f in &parsed.requirements_files { + let Ok(content) = std::fs::read_to_string(f) else { + continue; + }; + for line in content.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with(['#', '-']) || line.contains("://") { + continue; + } + let name: String = line + .chars() + .take_while(|c| !matches!(c, '[' | '<' | '>' | '=' | '!' | '~' | ';' | ' ')) + .collect(); + if !name.is_empty() { + out.insert(norm(&name)); + } + } + } + out +} + +/// Parse `uv pip compile` stdout (requirements.txt-format `name==version` +/// pins) into the would-install set. Any line that isn't a pin is an error — +/// silently skipping could hide part of the tree. +fn parse_compiled_requirements( + out: &str, + requested: &std::collections::HashSet, +) -> Result, String> { + let mut pkgs = Vec::new(); + for line in out.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with(['#', '-']) { + continue; + } + // Strip env markers and trailing comments: `pkg==1.0 ; marker # via`. + let line = line.split(';').next().unwrap_or(line).trim(); + let line = line.split(" #").next().unwrap_or(line).trim(); + let Some((name, version)) = line.split_once("==") else { + return Err(format!( + "unexpected line in uv pip compile output: '{line}'" + )); + }; + // Strip extras: `celery[redis]==5.3.4`. + let name = name.split('[').next().unwrap_or(name).trim().to_string(); + pkgs.push(TreePackage { + requested: requested.contains(&PackageManager::Uv.normalize_name(&name)), + name, + version: version.trim().to_string(), + }); + } + if pkgs.is_empty() { + return Err("uv pip compile produced no packages".to_string()); + } + Ok(pkgs) +} + /// Direct dependency names declared by the project's `package.json` in the /// current directory (the manifest `resolve_npm_tree` copies). Empty when /// the manifest is absent or unparsable — origin labeling then degrades to @@ -139,7 +262,7 @@ fn direct_deps_from_manifest(json: &str) -> std::collections::HashSet { /// `--ignore-scripts` because npm has run lifecycle scripts under /// `--package-lock-only` before (npm/cli#2787). fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result, String> { - let resolved = which::which(binary).map_err(|e| format!("{binary} not found on PATH: {e}"))?; + let resolved = super::resolve_binary(binary)?; let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; for manifest in [ "package.json", @@ -266,6 +389,68 @@ mod tests { assert!(err.contains("parse pip report"), "got: {err}"); } + #[test] + fn parse_compiled_requirements_pins_extras_and_markers() { + let requested = std::collections::HashSet::from(["flask-cors".to_string()]); + let out = "Flask_Cors==4.0.0\ncelery[redis]==5.3.4\nwerkzeug==3.1.8 ; python_version >= \"3.9\"\n\n# comment\n--index-url https://example.com\n"; + let pkgs = parse_compiled_requirements(out, &requested).expect("parse pins"); + assert_eq!( + pkgs, + vec![ + TreePackage { + name: "Flask_Cors".to_string(), + version: "4.0.0".to_string(), + requested: true, + }, + TreePackage { + name: "celery".to_string(), + version: "5.3.4".to_string(), + requested: false, + }, + TreePackage { + name: "werkzeug".to_string(), + version: "3.1.8".to_string(), + requested: false, + }, + ] + ); + } + + #[test] + fn parse_compiled_requirements_rejects_non_pins() { + let none = std::collections::HashSet::new(); + let err = parse_compiled_requirements("flask>=2.0\n", &none).expect_err("not a pin"); + assert!(err.contains("unexpected line"), "got: {err}"); + let err = parse_compiled_requirements("", &none).expect_err("empty"); + assert!(err.contains("no packages"), "got: {err}"); + } + + #[test] + fn requested_names_unions_targets_and_requirements_files() { + let dir = tempfile::tempdir().expect("temp dir"); + let req = dir.path().join("requirements.txt"); + std::fs::write( + &req, + "# comment\nFlask_Cors==4.0.0\nrequests[security]>=2.0 ; python_version >= \"3.9\"\n-r other.txt\nhttps://example.com/pkg.whl\n", + ) + .expect("write requirements"); + let parsed = super::super::parse::ParsedInstall { + targets: vec![super::super::InstallTarget { + name: "celery".to_string(), + display: "celery==5.3.4".to_string(), + kind: super::super::TargetKind::Pypi( + crate::verify_deps::registry::PypiSpec::Exact("5.3.4".to_string()), + ), + }], + requirements_files: vec![req], + }; + let names = requested_names(&parsed); + for name in ["celery", "flask-cors", "requests"] { + assert!(names.contains(name), "missing {name}: {names:?}"); + } + assert_eq!(names.len(), 3); + } + // lockfile-v3 with: root entry (skipped), a plain dep, a nested dep, // a scoped dep, and a workspace `link: true` entry (skipped). const NPM_LOCK: &str = r#"{ diff --git a/tests/cli_install.rs b/tests/cli_install.rs index e51f9f0..ad51c7d 100644 --- a/tests/cli_install.rs +++ b/tests/cli_install.rs @@ -196,8 +196,8 @@ fn pip_json_reports_fresh_pin_as_recent() { #[test] fn pip_resolution_error_prints_error_but_install_proceeds() { // `nosuchpkg` hits the stub's 404 route → an error outcome, which - // warns but never blocks in the baseline (fail-closed is a later - // chunk) — the install must still run. + // warns but does not block in tokenless mode (tokened mode fails + // closed — see cli_verdict.rs) — the install must still run. let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); let out = h .cmd diff --git a/tests/cli_tree.rs b/tests/cli_tree.rs index a63a5d2..0da51d7 100644 --- a/tests/cli_tree.rs +++ b/tests/cli_tree.rs @@ -12,7 +12,9 @@ mod common; -use common::{key, vulnerable_body, TreeHarness, NPM_LOCK, RESOLUTION_FAILS, TREE_REPORT}; +use common::{ + key, vulnerable_body, TreeHarness, NPM_LOCK, RESOLUTION_FAILS, TREE_REPORT, UV_COMPILED, +}; use std::collections::HashMap; use tempfile::TempDir; @@ -36,6 +38,12 @@ fn transitive_vulnerable_blocks_install() { NPM_LOCK, &["npm", "install", "oldpkg@1.0.0"][..], ), + ( + "uv", + "pypi", + UV_COMPILED, + &["uv", "pip", "install", "oldpkg==1.0.0"][..], + ), ]; for (binary, eco, payload, args) in cases { let mut checks = HashMap::new(); @@ -59,6 +67,66 @@ fn transitive_vulnerable_blocks_install() { } } +#[test] +fn uv_requirements_file_install_is_tree_gated() { + // `uv pip install -r requirements.txt` names no targets — the gate must + // still resolve the full set via `uv pip compile` and block on the + // vulnerable pin instead of exec'ing unchecked. + let cwd = TempDir::new().expect("temp cwd"); + std::fs::write(cwd.path().join("requirements.txt"), "oldpkg==1.0.0\n") + .expect("write requirements.txt"); + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_evildep_body("pypi"), + ); + let mut h = TreeHarness::new("uv", checks, HashMap::new(), UV_COMPILED); + let out = h + .cmd + .current_dir(cwd.path()) + .args(["uv", "pip", "install", "-r", "requirements.txt"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert_eq!(h.recorded_argv(), None, "uv must not run when blocked"); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(transitive)"] { + assert!(stdout.contains(needle), "stdout: {stdout}"); + } + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("not gated"), + "gated uv requirements install must not print the bare note: {stderr}" + ); +} + +#[test] +fn tree_pass_runs_via_pip3_when_pip_is_absent() { + // Only `pip3` exists on PATH (common Linux/macOS). The tree pass must + // use the same pip → pip3 fallback as the exec path instead of silently + // degrading to named-only — the transitive `evildep` must still block. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_evildep_body("pypi"), + ); + let mut h = TreeHarness::new("pip3", checks, HashMap::new(), TREE_REPORT); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "transitive vuln must block"); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("transitive dependencies not checked"), + "tree pass must not degrade with only pip3 on PATH: {stderr}" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("evildep"), "stdout: {stdout}"); +} + #[test] fn resolution_failure_falls_back_with_loud_warning() { // The fake manager fails its tree invocation (pip: exits 2 on `--dry-run`, @@ -76,6 +144,11 @@ fn resolution_failure_falls_back_with_loud_warning() { &["npm", "install", "oldpkg@1.0.0"][..], "install oldpkg@1.0.0", ), + ( + "uv", + &["uv", "pip", "install", "oldpkg==1.0.0"][..], + "pip install oldpkg==1.0.0", + ), ]; for (binary, args, forwarded_argv) in cases { let mut h = TreeHarness::new(binary, HashMap::new(), HashMap::new(), RESOLUTION_FAILS); diff --git a/tests/cli_uv_sync.rs b/tests/cli_uv_sync.rs new file mode 100644 index 0000000..96cf170 --- /dev/null +++ b/tests/cli_uv_sync.rs @@ -0,0 +1,191 @@ +//! Hermetic e2e tests for the `corgea uv sync` gate. +//! +//! With a token, `uv sync` is gated from the project's `uv.lock`: every +//! index-sourced pin is verdicted against the vuln-api stub before uv runs. +//! Without a lockfile (or without a token) it execs behind an honest note. +//! Harness: fake `uv` argv recorder on a private PATH + in-crate vuln-api +//! stub + throwaway project dir as cwd. No registry stub — the sync gate +//! does no recency resolution. + +#![cfg(unix)] + +mod common; + +use common::{corgea_isolated, key, vulnerable_body, write_fake_recorder}; +use corgea::vuln_api_stub::{self, PackageKey}; +use std::collections::HashMap; +use std::path::PathBuf; +use std::process::Command; +use tempfile::TempDir; + +/// `proj` is the project itself (editable — skipped); `evildep` is the one +/// index-sourced pin the gate must verdict. +const UV_LOCK: &str = r#" +version = 1 + +[[package]] +name = "proj" +version = "0.1.0" +source = { editable = "." } + +[[package]] +name = "evildep" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +"#; + +struct SyncHarness { + cmd: Command, + marker: PathBuf, + project: TempDir, + _home: TempDir, + _bin: TempDir, +} + +impl SyncHarness { + fn new(checks: HashMap) -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let project = TempDir::new().expect("project dir"); + let marker = bin.path().join("pm-argv.txt"); + write_fake_recorder(bin.path(), "uv", &marker, 0); + let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + cmd.env("PATH", bin.path()) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) + .env("CORGEA_TOKEN", "test-token") + .current_dir(project.path()); + Self { + cmd, + marker, + project, + _home: home, + _bin: bin, + } + } + + fn with_uv_lock(self, content: &str) -> Self { + std::fs::write(self.project.path().join("uv.lock"), content).expect("write uv.lock"); + self + } + + fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +fn vulnerable_evildep_checks() -> HashMap { + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "evildep", "0.4.2"), + vulnerable_body("pypi", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + checks +} + +#[test] +fn uv_sync_vulnerable_lockfile_blocks() { + let mut h = SyncHarness::new(vulnerable_evildep_checks()).with_uv_lock(UV_LOCK); + let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "vulnerable lock must block"); + assert_eq!( + h.recorded_argv(), + None, + "uv must not run on a vulnerable verdict" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + for needle in ["evildep", "MAL-2024-0002", "(locked)"] { + assert!(stdout.contains(needle), "stdout: {stdout}"); + } + // Nothing was named by this command — the refusal blames the lock, not + // the user's input. + assert!( + String::from_utf8_lossy(&out.stderr) + .contains("your existing dependency tree has known-vulnerable packages"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn uv_sync_clean_lockfile_proceeds() { + let mut h = SyncHarness::new(HashMap::new()).with_uv_lock(UV_LOCK); + let out = h + .cmd + .args(["uv", "sync", "--frozen"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "clean lock must proceed"); + assert_eq!( + h.recorded_argv().as_deref(), + Some("sync --frozen"), + "uv's own args must be forwarded untouched" + ); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("tree: 1 packages resolved"), + "the project's own editable stanza must be skipped: {stdout}" + ); +} + +#[test] +fn uv_sync_force_overrides_block() { + let mut h = SyncHarness::new(vulnerable_evildep_checks()).with_uv_lock(UV_LOCK); + let out = h + .cmd + .args(["uv", "--force", "sync"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0), "--force must run the sync"); + assert_eq!(h.recorded_argv().as_deref(), Some("sync")); + assert!( + String::from_utf8_lossy(&out.stdout).contains("evildep"), + "findings still printed under --force" + ); +} + +#[test] +fn uv_sync_without_lockfile_execs_with_note() { + let mut h = SyncHarness::new(HashMap::new()); + let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("sync")); + assert!( + String::from_utf8_lossy(&out.stderr).contains("'uv sync' is not gated"), + "stderr must carry the explicit ungated note: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn uv_sync_malformed_lockfile_fails_closed() { + let mut h = SyncHarness::new(HashMap::new()).with_uv_lock("not = [valid"); + let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(1), "unparseable lock must block"); + assert_eq!(h.recorded_argv(), None); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("cannot verify 'uv sync'"), + "stderr: {stderr}" + ); + assert!(stderr.contains("--force"), "stderr: {stderr}"); +} + +#[test] +fn uv_sync_tokenless_passes_through() { + let mut h = SyncHarness::new(HashMap::new()).with_uv_lock(UV_LOCK); + h.cmd.env_remove("CORGEA_TOKEN"); + let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("sync")); + assert!(!String::from_utf8_lossy(&out.stdout).contains("Pre-checking")); +} + +#[test] +fn uv_lock_stays_passthrough() { + // `uv lock` installs nothing; the gate applies to the sync that follows. + let mut h = SyncHarness::new(vulnerable_evildep_checks()).with_uv_lock(UV_LOCK); + let out = h.cmd.args(["uv", "lock"]).output().expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + assert_eq!(h.recorded_argv().as_deref(), Some("lock")); + assert!(!String::from_utf8_lossy(&out.stdout).contains("Pre-checking")); +} diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs index e11b904..5c0392c 100644 --- a/tests/cli_verdict.rs +++ b/tests/cli_verdict.rs @@ -43,6 +43,31 @@ fn vulnerable_pin_blocks_without_running_install() { ); } +#[test] +fn alternate_pypi_spelling_hits_canonical_verdict() { + // Advisories are keyed by the PEP 503 canonical name; `Flask_Cors` + // must query (and block on) the `flask-cors` verdict. + let mut checks = HashMap::new(); + checks.insert( + key("pypi", "flask-cors", "1.0.0"), + vulnerable_body("pypi", "flask-cors", "1.0.0", "GHSA-TEST-0001", None), + ); + let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "Flask_Cors==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "alternate spelling must not bypass the gate" + ); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("GHSA-TEST-0001"), "stdout: {stdout}"); +} + #[test] fn force_overrides_vulnerable_block_and_propagates_exit_code() { let mut checks = HashMap::new(); @@ -66,6 +91,31 @@ fn force_overrides_vulnerable_block_and_propagates_exit_code() { ); } +#[test] +fn resolution_error_fails_closed_with_token() { + // The wildcard registry stub only knows version 1.0.0, so `==2.0.0` + // is a resolution error: no verdict was obtained, and with a token + // that must block — otherwise a registry outage bypasses the gate. + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let out = h + .cmd + .args(["pip", "install", "nosuchpkg==2.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(1), + "a resolution error must fail closed in tokened mode" + ); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("1 errors"), "stdout: {stdout}"); + assert!( + String::from_utf8_lossy(&out.stderr).contains("--force"), + "block message must name --force" + ); +} + #[test] fn verdict_503_fails_closed() { let mut statuses = HashMap::new(); diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 043f79a..505ca85 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -89,6 +89,11 @@ pub const NPM_LOCK: &str = r#"{"name":"proj","lockfileVersion":3,"packages":{ "node_modules/oldpkg":{"version":"1.0.0"}, "node_modules/evildep":{"version":"0.4.2"}}}"#; +/// `uv pip compile` stdout: `oldpkg` + transitive `evildep`, same shape as +/// `TREE_REPORT` / `NPM_LOCK`. +#[allow(dead_code)] +pub const UV_COMPILED: &str = "oldpkg==1.0.0\nevildep==0.4.2\n"; + /// Spawn a one-response-per-connection HTTP stub on an ephemeral 127.0.0.1 /// port; `route` maps a request path to `(status line, body)`. Returns the /// base URL. `Connection: close` is load-bearing — without it reqwest pools @@ -226,11 +231,11 @@ pub const RESOLUTION_FAILS: &str = "RESOLUTION_FAILS"; /// Write an executable tree-aware fake package manager into `dir`. An /// invocation carrying the manager's tree flag emits `payload` (stdout for -/// pip's `--dry-run --report -`, `./package-lock.json` for npm's -/// `--package-lock-only`, whose cwd is the resolver's throwaway temp dir) -/// and exits 0 — the tree pass; if `payload` is `RESOLUTION_FAILS` it exits -/// non-zero instead, emitting nothing. Any other invocation records its -/// argv to `marker` and exits `exit_code`. +/// pip's `--dry-run --report -` and uv's `pip compile`, +/// `./package-lock.json` for npm's `--package-lock-only`, whose cwd is the +/// resolver's throwaway temp dir) and exits 0 — the tree pass; if `payload` +/// is `RESOLUTION_FAILS` it exits non-zero instead, emitting nothing. Any +/// other invocation records its argv to `marker` and exits `exit_code`. #[cfg(unix)] #[allow(dead_code)] pub fn write_fake_tree_pm( @@ -241,8 +246,9 @@ pub fn write_fake_tree_pm( exit_code: i32, ) { let (tree_flag, redirect, fail_exit) = match binary { - "pip" => ("--dry-run", "", 2), + "pip" | "pip3" => ("--dry-run", "", 2), "npm" => ("--package-lock-only", " > package-lock.json", 1), + "uv" => ("compile", "", 1), other => panic!("unsupported fake manager {other}"), }; let tree_branch = if payload == RESOLUTION_FAILS { From 345b1a2f80639ffd8fd7c2db37b26582eaf77d61 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 14:44:33 +0200 Subject: [PATCH 35/59] Point default vuln-api at staging worker --- src/config.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/config.rs b/src/config.rs index f508bb8..dcd0217 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,6 +2,8 @@ use serde::{Deserialize, Serialize}; use std::path::PathBuf; use std::{env, fs, io}; +pub const DEFAULT_VULN_API_URL: &str = "https://cve-worker-staging.corgea.workers.dev"; + #[derive(Serialize, Deserialize, Clone)] pub struct Config { pub(crate) url: String, @@ -105,7 +107,7 @@ impl Config { /// then the public default. pub fn get_vuln_api_url(&self) -> String { crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") - .unwrap_or_else(|| "https://vuln-api.corgea.app".to_string()) + .unwrap_or_else(|| DEFAULT_VULN_API_URL.to_string()) .trim() .trim_end_matches('/') .to_string() @@ -134,7 +136,7 @@ mod tests { // Default when the env var is unset. assert_eq!( test_config().get_vuln_api_url(), - "https://vuln-api.corgea.app" + "https://cve-worker-staging.corgea.workers.dev" ); // Env var wins; whitespace and trailing slash trimmed. @@ -145,7 +147,7 @@ mod tests { env::set_var("CORGEA_VULN_API_URL", " "); assert_eq!( test_config().get_vuln_api_url(), - "https://vuln-api.corgea.app" + "https://cve-worker-staging.corgea.workers.dev" ); env::remove_var("CORGEA_VULN_API_URL"); } From bb2a043c9659df0bc7a5b4dc7af3c3822ab21421 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 15:22:37 +0200 Subject: [PATCH 36/59] Improve install wrapper command guidance --- README.md | 16 +- skills/corgea/SKILL.md | 63 ++-- src/main.rs | 69 ++++- src/precheck/mod.rs | 603 ++++++++++++++++++++++++++++++++----- src/vuln_api/mod.rs | 118 ++++++-- tests/cli_bare_install.rs | 14 +- tests/cli_exec_fallback.rs | 26 ++ tests/cli_install.rs | 351 ++++++++++++++++++++- tests/cli_provenance.rs | 1 + tests/cli_tree.rs | 2 +- tests/cli_uv_sync.rs | 13 +- tests/cli_verdict.rs | 102 +++++-- tests/common/mod.rs | 59 +++- 13 files changed, 1260 insertions(+), 177 deletions(-) diff --git a/README.md b/README.md index 03b116d..e3aec87 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,8 @@ pip install corgea-cli You can get the latest binaries for your OS from https://github.com/Corgea/cli/releases. ### Setup -Once the binary is installed, login with your token from the Corgea app. +Once the binary is installed, login with your token from the Corgea app to enable +authenticated enforcement and private Corgea intelligence. ``` corgea login ``` @@ -50,6 +51,19 @@ corgea deps policy init --exist-ok # write starter policy, or keep existing See [Dependency Scanning (CLI)](https://docs.corgea.app/cli/deps) for the full flag and exit-code reference. +## Install Wrappers + +`corgea pip|npm|yarn|pnpm|uv ` runs package-manager install commands +through Corgea's install gate. Baseline CVE checks need no token: known vulnerable +or malicious package versions block, while vuln-api lookup outages warn and +continue in public fail-open mode. + +Logging in enables authenticated enforcement against the default Corgea vuln-api, +including fail-closed behavior for lookup failures and any private Corgea +intelligence. A custom `CORGEA_VULN_API_URL` is public by default, even when +`CORGEA_TOKEN` exists. Set `CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL=1` to send +the token to a custom URL and make lookup failures fail closed. + ## Development Setup ### Prerequisites diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index 53bccec..dc6d023 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -113,28 +113,30 @@ Installs a pre-commit hook running `corgea scan blast --only-uncommitted`. Bypas Run a package manager through Corgea's install gate. Install commands with named targets are resolved against the public registry first, then gated twice: a version -published within `--threshold` (default `2d`) blocks (exit 1), and — when a Corgea -token is configured — each resolved version is checked against Corgea's vuln-api; -known-vulnerable or malicious versions block, and a verdict that cannot be obtained -(network/5xx/auth errors) also blocks (fail-closed). Without a token the vuln check -is skipped (recency-only) and stderr suggests `corgea login`. Everything else passes -through with the package manager's own exit code. Git/URL/path specs are noted, never -blocked. With a token, bare `npm install` (zero specs, `package.json` present) is gated -too: the full lockfile-resolved tree is verdicted, so a vulnerable lockfile blocks. Bare -`yarn`/`pnpm`/`uv` installs have no safe dry-run; they run unchecked after a stderr note -(`note: bare ' ' is not gated …`). `-r requirements.txt` files get a printed -note when the tree pass doesn't cover them. +published within `--threshold` (default `2d`) blocks (exit 1), and each resolved +version is checked against Corgea's vuln-api. Baseline public CVE checks need no +token: known-vulnerable or malicious versions block, but vuln-api lookup outages +warn and continue because public mode is fail-open. A Corgea token on the default +vuln-api enables authenticated enforcement and private Corgea intelligence; in +that mode, verdict lookup failures also block (fail-closed). Everything else +passes through with the package manager's own exit code. Git/URL/path specs are +noted, never blocked. Bare `npm install` (zero specs, `package.json` present) is +gated too: the full lockfile-resolved tree is verdicted, so a vulnerable lockfile +blocks. Bare `yarn`/`pnpm`/`uv` installs have no safe dry-run; they run unchecked +after a stderr note (`note: bare ' ' is not gated …`). `-r requirements.txt` +files get a printed note when the tree pass doesn't cover them. Blocked findings steer to the fix: each advisory line shows `fixed in ` (or `no fixed version known`). When every advisory on a package has a fix, the gate prints `→ safe version: @` — the highest fix covering every advisory. -With a token, the vuln check covers the **full would-install set**, not just the -named targets: `pip` and `npm` resolve the complete tree (named + transitive) via a -safe dry-run (`pip install --dry-run …`; an isolated `npm install --package-lock-only` -in a temp dir, never touching your lockfile) and verdict every package, so a flagged -**transitive** dependency blocks the install too. `yarn`, `pnpm`, and `uv` have no safe -dry-run, so they verify the named targets only and print +The vuln check covers the **full would-install set** where the manager has a safe +resolver, not just the named targets: `pip` and `npm` resolve the complete tree +(named + transitive) via a safe dry-run (`pip install --dry-run …`; an isolated +`npm install --package-lock-only` in a temp dir, never touching your lockfile) and +verdict every package, so a flagged **transitive** dependency blocks the install +too. `yarn`, `pnpm`, and `uv` have no safe dry-run, so they verify the named targets +only and print `warning: transitive dependencies not checked (…); only named packages were verified.` The same warning is emitted (and the gate falls back to named-only) whenever a pip/npm dry-run fails. Verdict requests run in a bounded pool (8 parallel). @@ -151,25 +153,29 @@ corgea pip list # non-install subcommands pass straight th | Flag | Short | Description | |------|-------|-------------| | `--threshold` | `-t` | Recency threshold (`2d`, `12h`). Younger resolved versions block. | -| `--no-fail` | | Demote a recency block to a warning. Does NOT bypass vulnerable/unverifiable blocks. | +| `--no-fail` | | Demote a recency block to a warning. Does NOT bypass vulnerable blocks or authenticated unverifiable blocks. | | `--force` | | Proceed despite all findings (vulnerable, unverifiable, recent). Findings still print. | | `--json` | | JSON report instead of text. Per-result `verdict` object + `verdict_mode` + `tree`. | -`--json` adds a `tree` object: `null` in recency-only mode; otherwise `mode` is `"full"` -(transitive checked) or `"named-only"` (with a `reason`), plus `resolved_count` and a -`transitive[]` array of `{name, version, verdict}` for packages beyond the named targets. -Vulnerable `verdict` objects carry a `remediation` field: the safe version covering -every advisory, or `null` when any advisory has no known fix. +`--json` adds `verdict_mode` (`"public"`, `"authenticated"`, or `"recency-only"`) and a +`tree` object: `null` when no tree pass ran; otherwise `mode` is `"full"` (transitive +checked) or `"named-only"` (with a `reason`), plus `resolved_count` and a `transitive[]` +array of `{name, version, verdict}` for packages beyond the named targets. Vulnerable +`verdict` objects carry a `remediation` field: the safe version covering every advisory, +or `null` when any advisory has no known fix. -Recency gating needs no token; the vuln verdict uses the configured Corgea token when -present. Overrides for testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, -`CORGEA_VULN_API_URL`. +Recency gating and baseline CVE checks need no token. The default vuln-api uses +`CORGEA_TOKEN` when present. A custom `CORGEA_VULN_API_URL` is public by default, even +when `CORGEA_TOKEN` exists; set `CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL=1` to send +the token to that custom URL and make lookup failures fail closed. Overrides for +testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, `CORGEA_VULN_API_URL`. #### Testing the gate Staging vuln-api (`CORGEA_VULN_API_URL=https://cve-worker-staging.corgea.workers.dev`) -serves deterministic verdicts for dogfooding. It ignores auth — any non-empty -`CORGEA_TOKEN` value enables full-gate mode. Known-vulnerable targets: +serves deterministic verdicts for dogfooding. It runs in public mode by default; +add `CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL=1` if you need to exercise authenticated +custom-URL behavior. Known-vulnerable targets: | Ecosystem | Target | Verdict | |-----------|--------|---------| @@ -183,6 +189,7 @@ Verify the gate end-to-end: ```bash CORGEA_TOKEN=dogfood-dummy \ CORGEA_VULN_API_URL=https://cve-worker-staging.corgea.workers.dev \ +CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL=1 \ corgea npm install axios@0.21.0 ``` diff --git a/src/main.rs b/src/main.rs index e127f85..1bcc922 100644 --- a/src/main.rs +++ b/src/main.rs @@ -272,14 +272,22 @@ fn install_wrap_options( ) -> corgea::precheck::PrecheckOptions { let token = config.get_token(); let token = token.trim(); - let verdict = if token.is_empty() { - None - } else { - Some(corgea::precheck::VerdictConfig { - base_url: config.get_vuln_api_url(), - token: token.to_string(), + let custom_vuln_api_url = utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") + .map(|url| { + url.trim() + .trim_end_matches('/') + .ne(config::DEFAULT_VULN_API_URL) }) - }; + .unwrap_or(false); + let send_token_to_custom = + utils::generic::get_env_var_if_exists("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL") + .is_some_and(|v| v.trim() == "1"); + let mode = select_verdict_mode(token, custom_vuln_api_url, send_token_to_custom); + let verdict = Some(corgea::precheck::VerdictConfig { + base_url: config.get_vuln_api_url(), + mode, + public_login_hint: token.is_empty(), + }); corgea::precheck::PrecheckOptions { threshold: args.threshold, no_fail: args.no_fail, @@ -291,6 +299,20 @@ fn install_wrap_options( } } +fn select_verdict_mode( + token: &str, + custom_vuln_api_url: bool, + send_token_to_custom: bool, +) -> corgea::precheck::VerdictMode { + if !token.is_empty() && (!custom_vuln_api_url || send_token_to_custom) { + corgea::precheck::VerdictMode::Authenticated { + token: token.to_string(), + } + } else { + corgea::precheck::VerdictMode::Public + } +} + fn run_install_wrap_command( manager: corgea::precheck::PackageManager, args: &InstallWrapArgs, @@ -584,9 +606,9 @@ fn main() { // Offline: no token / network. Exit code propagates fail-on policy. std::process::exit(i32::from(corgea::deps::run::run(command.clone()))); } - // Install wrappers: no hard auth gate — the recency check is offline, - // and a token (when present) additionally enables the vuln-api verdict. - // Tokenless degrades to recency-only with a login prompt. + // Install wrappers: no hard auth gate. Public CVE checks run without a + // token; a token on the default service enables authenticated fail-closed + // enforcement. Some(Commands::Npm(args)) => { run_install_wrap_command(corgea::precheck::PackageManager::Npm, args, &corgea_config) } @@ -603,6 +625,10 @@ fn main() { run_install_wrap_command(corgea::precheck::PackageManager::Uv, args, &corgea_config) } None => { + if let Some(message) = corgea::precheck::pip3_alias_message(&cli.args) { + eprintln!("{message}"); + std::process::exit(1); + } utils::terminal::show_welcome_message(); let _ = Cli::command().print_help(); println!(); @@ -621,4 +647,27 @@ mod tests { assert_eq!(default_log_level(2), "info"); // only ==1 means debug assert_eq!(default_log_level(-1), "info"); } + + #[test] + fn verdict_mode_selection_matrix() { + use corgea::precheck::VerdictMode; + + assert_eq!( + select_verdict_mode("token", false, false), + VerdictMode::Authenticated { + token: "token".to_string() + } + ); + assert_eq!(select_verdict_mode("", false, false), VerdictMode::Public); + assert_eq!( + select_verdict_mode("token", true, false), + VerdictMode::Public + ); + assert_eq!( + select_verdict_mode("token", true, true), + VerdictMode::Authenticated { + token: "token".to_string() + } + ); + } } diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index ef76a64..98e8074 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -17,6 +17,7 @@ pub mod parse; pub mod tree; use std::ffi::OsString; +use std::path::Path; use std::process::Command; use std::time::Duration; @@ -78,13 +79,41 @@ impl PackageManager { } } +/// Auth and failure policy for the vuln-api verdict pass. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum VerdictMode { + /// No auth header; vuln-api lookup errors warn and fail open. + Public, + /// Auth header sent; vuln-api lookup errors fail closed. + Authenticated { token: String }, +} + +impl VerdictMode { + fn auth_token(&self) -> Option<&str> { + match self { + VerdictMode::Public => None, + VerdictMode::Authenticated { token } => Some(token.as_str()), + } + } + + fn is_authenticated(&self) -> bool { + matches!(self, VerdictMode::Authenticated { .. }) + } + + fn is_public(&self) -> bool { + matches!(self, VerdictMode::Public) + } +} + /// Connection details for the vuln-api verdict pass. -/// `None` in `PrecheckOptions.verdict` ⇒ tokenless mode: verdicts are -/// skipped and the gate degrades to recency-only cover. +/// Public mode is still a verdict pass: known vulnerable/malicious verdicts +/// block, while lookup errors warn and continue. #[derive(Debug, Clone)] pub struct VerdictConfig { pub base_url: String, - pub token: String, + pub mode: VerdictMode, + /// Print the tokenless public-mode hint after a check is attempted. + pub public_login_hint: bool, } /// Threat verdict for one resolved target. @@ -95,15 +124,15 @@ pub enum VerdictStatus { /// vuln-api answered: known vulnerable or malicious — blocks. Vulnerable(Vec), /// The verdict could not be obtained (network/5xx/auth/integrity). - /// Blocks fail-closed. + /// Blocks only in authenticated mode. Unverifiable(String), - /// Verdict never attempted (no token). Recency-only cover; the - /// constant reason (`NO_TOKEN_REASON`) is attached at render time. + /// Verdict never attempted. The constant reason (`NO_VERDICT_REASON`) + /// is attached at render time. NotChecked, } -/// Reason recorded on resolved targets when no token is configured. -const NO_TOKEN_REASON: &str = "no Corgea token; vulnerability verdict skipped"; +/// Reason recorded on resolved targets when no verdict pass ran. +const NO_VERDICT_REASON: &str = "vulnerability verdict not checked"; #[derive(Debug, Clone)] pub struct PrecheckOptions { @@ -114,8 +143,9 @@ pub struct PrecheckOptions { /// unverifiable) and run the install anyway. pub force: bool, pub json: bool, - /// `Some` ⇒ run the vuln-api verdict pass against this endpoint; - /// `None` ⇒ tokenless recency-only mode. + /// `Some` ⇒ run the vuln-api verdict pass against this endpoint. + /// `None` is retained for tests and direct library callers that want + /// recency-only behavior. pub verdict: Option, /// Optional registry overrides, used by tests. pub npm_registry: Option, @@ -213,7 +243,7 @@ pub struct TreeOutcome { } /// Result of the tree pass. `PrecheckReport.tree` is `None` when the pass -/// never ran (recency-only / tokenless mode). +/// never ran (named-only managers, or verdicts disabled). #[derive(Debug)] pub enum TreeReport { /// The full would-install set was resolved and verdicted. @@ -234,7 +264,7 @@ pub struct PrecheckReport { pub original_args: Vec, pub outcomes: Vec, pub threshold: Duration, - /// `None` ⇒ recency-only mode, the tree pass never ran. + /// `None` ⇒ no tree pass ran. pub tree: Option, /// True when the command named nothing — no CLI targets and no /// requirements files — so everything the tree pass resolved predates @@ -319,6 +349,11 @@ pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOption let subcommand = &cmd[0]; let rest = &cmd[1..]; + if manager == PackageManager::Pip && subcommand == "add" { + eprintln!("{}", unsupported_pip_add_message(rest)); + return 1; + } + if !manager.is_install_subcommand(subcommand) { return exec_install_with_args(manager, subcommand, rest); } @@ -331,6 +366,16 @@ pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOption } }; + if let Some(message) = wrong_package_manager_message(manager, rest, &parsed) { + eprintln!("{message}"); + return 1; + } + + if let Some(message) = externally_managed_pip_message(manager, rest, &parsed) { + eprintln!("{message}"); + return 1; + } + run_parsed_install( manager, subcommand, @@ -341,9 +386,291 @@ pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOption ) } +pub fn pip3_alias_message(args: &[String]) -> Option { + let rest = args.strip_prefix(&["pip3".to_string()])?; + let mut parts = vec!["corgea".to_string(), "pip".to_string()]; + parts.extend(rest.iter().cloned()); + Some(format!( + "error: unknown package manager `pip3`.\nDid you mean `{}`?", + parts.join(" ") + )) +} + +fn unsupported_pip_add_message(rest: &[String]) -> String { + let mut parts = vec![ + "corgea".to_string(), + "pip".to_string(), + "install".to_string(), + ]; + parts.extend(rest.iter().cloned()); + format!( + "error: pip does not support `add`.\nDid you mean `{}`?", + parts.join(" ") + ) +} + +fn wrong_package_manager_message( + manager: PackageManager, + rest: &[String], + parsed: &parse::ParsedInstall, +) -> Option { + let cwd = std::env::current_dir().ok()?; + wrong_package_manager_message_from(&cwd, manager, rest, parsed) +} + +fn wrong_package_manager_message_from( + cwd: &Path, + manager: PackageManager, + rest: &[String], + parsed: &parse::ParsedInstall, +) -> Option { + let expected = match manager { + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => { + let expected = detect_node_manager_from(cwd)?; + (expected != manager).then_some(expected)? + } + PackageManager::Pip if detect_uv_project_from(cwd) => PackageManager::Uv, + PackageManager::Uv if detect_pip_project_from(cwd) => PackageManager::Pip, + _ => return None, + }; + + let suggestion = suggested_install_command(expected, rest, parsed); + Some(format!( + "error: this project appears to use {}, but you ran {}.\nDid you mean `{suggestion}`?", + expected.binary_name(), + manager.binary_name() + )) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ProjectManagerDetection { + None, + Ambiguous, + Found(PackageManager), +} + +fn detect_node_manager_from(start: &Path) -> Option { + for dir in start.ancestors() { + match detect_node_manager_in_dir(dir) { + ProjectManagerDetection::Found(manager) => return Some(manager), + ProjectManagerDetection::Ambiguous => return None, + ProjectManagerDetection::None => {} + } + } + None +} + +fn detect_node_manager_in_dir(dir: &Path) -> ProjectManagerDetection { + match package_json_manager(dir) { + Some(ProjectManagerDetection::Found(manager)) => { + return ProjectManagerDetection::Found(manager); + } + Some(ProjectManagerDetection::Ambiguous) => return ProjectManagerDetection::Ambiguous, + Some(ProjectManagerDetection::None) | None => {} + } + + let mut found = Vec::new(); + if dir.join("pnpm-lock.yaml").is_file() { + found.push(PackageManager::Pnpm); + } + if dir.join("yarn.lock").is_file() { + found.push(PackageManager::Yarn); + } + if dir.join("package-lock.json").is_file() || dir.join("npm-shrinkwrap.json").is_file() { + found.push(PackageManager::Npm); + } + + found.sort_by_key(|manager| manager.binary_name()); + found.dedup(); + match found.as_slice() { + [] => ProjectManagerDetection::None, + [manager] => ProjectManagerDetection::Found(*manager), + _ => ProjectManagerDetection::Ambiguous, + } +} + +fn package_json_manager(dir: &Path) -> Option { + let raw = std::fs::read_to_string(dir.join("package.json")).ok()?; + let json: serde_json::Value = serde_json::from_str(&raw).ok()?; + let Some(package_manager) = json.get("packageManager").and_then(|v| v.as_str()) else { + return Some(ProjectManagerDetection::None); + }; + Some( + parse_node_package_manager(package_manager) + .map(ProjectManagerDetection::Found) + .unwrap_or(ProjectManagerDetection::Ambiguous), + ) +} + +fn parse_node_package_manager(raw: &str) -> Option { + let name = raw.trim().split('@').next().unwrap_or("").trim(); + match name { + "npm" => Some(PackageManager::Npm), + "yarn" => Some(PackageManager::Yarn), + "pnpm" => Some(PackageManager::Pnpm), + _ => None, + } +} + +fn detect_uv_project_from(start: &Path) -> bool { + start.ancestors().any(|dir| dir.join("uv.lock").is_file()) +} + +fn detect_pip_project_from(start: &Path) -> bool { + start + .ancestors() + .take_while(|dir| !dir.join("pyproject.toml").is_file() && !dir.join("uv.lock").is_file()) + .any(has_requirements_file) +} + +fn has_requirements_file(dir: &Path) -> bool { + let Ok(entries) = std::fs::read_dir(dir) else { + return false; + }; + entries.filter_map(Result::ok).any(|entry| { + let name = entry.file_name(); + let name = name.to_string_lossy(); + entry.path().is_file() + && ((name.starts_with("requirements") + && (name.ends_with(".txt") || name.ends_with(".in"))) + || name.ends_with("-requirements.txt")) + }) +} + +fn suggested_install_command( + expected: PackageManager, + rest: &[String], + parsed: &parse::ParsedInstall, +) -> String { + let mut parts = vec!["corgea".to_string(), expected.binary_name().to_string()]; + match expected { + PackageManager::Npm => parts.push("install".to_string()), + PackageManager::Yarn | PackageManager::Pnpm => { + if parsed.targets.is_empty() && parsed.requirements_files.is_empty() { + parts.push("install".to_string()); + } else { + parts.push("add".to_string()); + } + } + PackageManager::Uv => { + if is_plain_pip_target_install(rest, parsed) { + parts.push("add".to_string()); + parts.extend(parsed.targets.iter().map(|target| target.display.clone())); + return parts.join(" "); + } + parts.push("pip".to_string()); + parts.push("install".to_string()); + } + PackageManager::Pip => parts.push("install".to_string()), + } + parts.extend(rest.iter().cloned()); + parts.join(" ") +} + +fn is_plain_pip_target_install(rest: &[String], parsed: &parse::ParsedInstall) -> bool { + !parsed.targets.is_empty() + && parsed.requirements_files.is_empty() + && rest.len() == parsed.targets.len() + && rest + .iter() + .zip(&parsed.targets) + .all(|(arg, target)| arg == &target.display) +} + +fn externally_managed_pip_message( + manager: PackageManager, + rest: &[String], + parsed: &parse::ParsedInstall, +) -> Option { + if manager != PackageManager::Pip + || (parsed.targets.is_empty() && parsed.requirements_files.is_empty()) + || pip_install_overrides_external_management(rest) + || !pip_environment_is_externally_managed() + { + return None; + } + + let mut retry = vec![ + "corgea".to_string(), + "pip".to_string(), + "install".to_string(), + ]; + retry.extend(rest.iter().cloned()); + Some(format!( + "error: this Python environment is externally managed (PEP 668).\nCreate and activate a virtualenv, then retry `{}`.", + retry.join(" ") + )) +} + +fn pip_install_overrides_external_management(args: &[String]) -> bool { + const VALUE_FLAGS: [&str; 3] = ["--target", "--prefix", "--root"]; + args.iter().enumerate().any(|(i, arg)| { + arg == "--break-system-packages" + || VALUE_FLAGS + .iter() + .any(|flag| arg == flag || arg.starts_with(&format!("{flag}="))) + || matches!(arg.as_str(), "-t" | "--target" | "--prefix" | "--root") + && args.get(i + 1).is_some() + }) +} + +fn pip_environment_is_externally_managed() -> bool { + let Ok(pip) = resolve_binary("pip") else { + return false; + }; + let Some(interpreter) = python_interpreter_from_shebang(&pip) else { + return false; + }; + + let mut command = Command::new(&interpreter[0]); + command.args(&interpreter[1..]); + let Ok(output) = command.arg("-c").arg(EXTERNALLY_MANAGED_PYTHON).output() else { + return false; + }; + output.status.success() && String::from_utf8_lossy(&output.stdout).trim() == "1" +} + +const EXTERNALLY_MANAGED_PYTHON: &str = r#" +import pathlib +import sysconfig + +paths = [] +for key in ("stdlib", "platstdlib"): + path = sysconfig.get_path(key) + if path and path not in paths: + paths.append(path) + +print("1" if any((pathlib.Path(path) / "EXTERNALLY-MANAGED").is_file() for path in paths) else "0") +"#; + +fn python_interpreter_from_shebang(path: &Path) -> Option> { + let content = std::fs::read_to_string(path).ok()?; + let first = content.lines().next()?.strip_prefix("#!")?.trim(); + let mut parts: Vec<&str> = first.split_whitespace().collect(); + if parts.is_empty() { + return None; + } + if parts[0].ends_with("/env") || parts[0] == "env" { + parts.remove(0); + if parts.first() == Some(&"-S") { + parts.remove(0); + } + } + let executable = parts.first()?; + if !executable.contains("python") { + return None; + } + Some(parts.iter().map(OsString::from).collect()) +} + fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { let exec = || exec_command("uv", cmd); + if matches!(cmd.first().map(String::as_str), Some("install" | "i")) { + eprintln!("{}", unsupported_uv_install_message(&cmd[1..])); + return 1; + } + match parse::classify_uv_command(cmd) { parse::UvCommand::Passthrough => exec(), parse::UvCommand::PipInstall { install_args } => { @@ -363,18 +690,34 @@ fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { opts, ) } - parse::UvCommand::Add { add_args } => run_parsed_install( - PackageManager::Uv, - "add", - add_args, - parse::parse_pypi_positionals_args(add_args), - exec, - opts, - ), + parse::UvCommand::Add { add_args } => { + let parsed = parse::parse_pypi_positionals_args(add_args); + if let Some(message) = + wrong_package_manager_message(PackageManager::Uv, add_args, &parsed) + { + eprintln!("{message}"); + return 1; + } + run_parsed_install(PackageManager::Uv, "add", add_args, parsed, exec, opts) + } parse::UvCommand::Sync => run_uv_sync(cmd, opts, exec), } } +fn unsupported_uv_install_message(rest: &[String]) -> String { + let mut parts = vec![ + "corgea".to_string(), + "uv".to_string(), + "pip".to_string(), + "install".to_string(), + ]; + parts.extend(rest.iter().cloned()); + format!( + "error: uv does not support top-level `install`.\nDid you mean `{}`?", + parts.join(" ") + ) +} + /// Gate `uv sync` from the project's `uv.lock`. The lockfile is the full /// locked universe (all groups/extras) — a superset of what sync installs, /// conservative in the blocking direction; a stale lock that sync would @@ -384,7 +727,7 @@ fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { /// execute package code before any verdict. fn run_uv_sync(cmd: &[String], opts: PrecheckOptions, exec: impl FnOnce() -> i32) -> i32 { let Some(cfg) = &opts.verdict else { - // Tokenless mode has no verdict to gate with. + // Direct callers may still disable verdicts completely. return exec(); }; let lock = match std::fs::read_to_string("uv.lock") { @@ -437,9 +780,10 @@ fn run_uv_sync(cmd: &[String], opts: PrecheckOptions, exec: impl FnOnce() -> i32 } else { print_text(&report); } + warn_public_lookup_failures(&report, &opts); if should_block_install(&report, &opts) { if !opts.json { - print_refusal(&report); + print_refusal(&report, &opts); } return 1; } @@ -494,8 +838,8 @@ fn run_parsed_install( let bare_install = parsed.targets.is_empty() && parsed.requirements_files.is_empty(); if parsed.targets.is_empty() && !tree_eligible { - // Only a truly bare install gets the bare note — a tokenless - // `-r requirements.txt` install is covered by `requirements_note`. + // Only a truly bare install gets the bare note. A `-r requirements.txt` + // install is covered by `requirements_note`. if bare_install { bare_install_note(manager, subcommand_label); } @@ -513,7 +857,7 @@ fn run_parsed_install( let tree = if tree_eligible { Some(run_tree_pass(manager, rest, &parsed, &mut outcomes, &opts)) } else { - run_verdict_pass(manager, &mut outcomes, &opts); // no-op tokenless + run_verdict_pass(manager, &mut outcomes, &opts); None }; @@ -524,13 +868,17 @@ fn run_parsed_install( ); } // The requirements note only matters when the tree pass did *not* cover - // those files (fallback to named-only, or recency-only mode). + // those files (fallback to named-only, or verdicts disabled). if !matches!(&tree, Some(TreeReport::Full { .. })) { requirements_note(&parsed); } - if opts.verdict.is_none() { + if opts + .verdict + .as_ref() + .is_some_and(|cfg| cfg.mode.is_public() && cfg.public_login_hint) + { eprintln!( - "warning: no Corgea token — known-vulnerable packages will NOT be blocked (recency-only). Run 'corgea login' for the full gate." + "warning: using public CVE checks; login enables authenticated enforcement and private Corgea intelligence." ); } @@ -549,10 +897,11 @@ fn run_parsed_install( } else { print_text(&report); } + warn_public_lookup_failures(&report, &opts); if should_block_install(&report, &opts) { if !opts.json { - print_refusal(&report); + print_refusal(&report, &opts); } return 1; } @@ -582,14 +931,14 @@ fn bare_install_note(manager: PackageManager, subcommand_label: &str) { /// entirely the existing tree's doing, so say that instead of implying the /// package the user typed is at fault. Messaging only; the block decision /// stays with `should_block_install`. -fn print_refusal(report: &PrecheckReport) { - if refusal_blames_existing_tree(report) { +fn print_refusal(report: &PrecheckReport, opts: &PrecheckOptions) { + if refusal_blames_existing_tree(report, opts) { eprintln!( "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." ); } else if report.vulnerable_count() > 0 - || report.unverifiable_count() > 0 - || report.error_count() > 0 + || (authenticated_verdict(opts) && report.unverifiable_count() > 0) + || (authenticated_verdict(opts) && report.error_count() > 0) { eprintln!("Refusing to run install. Pass --force to proceed despite findings."); } else { @@ -606,8 +955,14 @@ fn print_refusal(report: &PrecheckReport) { /// that names targets or requirements files is being pulled in by them /// right now. Only a truly bare install (`report.bare_install`) or /// manifest-declared `PreExisting` findings may blame the existing tree. -fn refusal_blames_existing_tree(report: &PrecheckReport) -> bool { - let named_findings = report.named_vulnerable_count() + report.named_unverifiable_count(); +fn refusal_blames_existing_tree(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + let fail_closed = authenticated_verdict(opts); + let named_findings = report.named_vulnerable_count() + + if fail_closed { + report.named_unverifiable_count() + } else { + 0 + }; if report.vulnerable_count() == 0 || named_findings > 0 { return false; } @@ -617,10 +972,8 @@ fn refusal_blames_existing_tree(report: &PrecheckReport) -> bool { transitive .iter() .filter(|t| { - matches!( - t.verdict, - VerdictStatus::Vulnerable(_) | VerdictStatus::Unverifiable(_) - ) + matches!(t.verdict, VerdictStatus::Vulnerable(_)) + || (fail_closed && matches!(t.verdict, VerdictStatus::Unverifiable(_))) }) .all(|t| match t.origin { // A locked pin predates the sync command that installs it. @@ -724,8 +1077,9 @@ const VERDICT_PROGRESS_THRESHOLD: usize = 8; /// Max parallel vuln-api verdict requests. const VERDICT_CONCURRENCY: usize = 8; -/// Bounded worker pool over the verdict jobs — owns client creation and the -/// fail-closed policy: on client failure every job comes back `Unverifiable`. +/// Bounded worker pool over the verdict jobs. On client/request failure every +/// job comes back `Unverifiable`; `should_block_install` decides whether that +/// fails closed for the selected mode. /// Plain work queue, no new crates; `reqwest::blocking::Client` is /// `Send + Sync`. Result order is not preserved; callers match results back /// by `(name, version)`. @@ -768,7 +1122,7 @@ fn verdict_pool( let verdict = match crate::vuln_api::check_package_version( &client, &cfg.base_url, - &cfg.token, + cfg.mode.auth_token(), ecosystem, &manager.normalize_name(&job.name), &job.version, @@ -836,10 +1190,9 @@ fn apply_verdicts( } /// Vuln-api verdict pass over resolved targets, run through the bounded -/// worker pool. No-op without a `VerdictConfig` (tokenless mode — `verify_one` -/// already marked every resolved target `NotChecked`). Any client/call failure -/// is fail-closed: the target becomes `Unverifiable`, which blocks unless -/// `--force`. +/// worker pool. No-op without a `VerdictConfig` (direct recency-only callers). +/// Any client/call failure becomes `Unverifiable`; authenticated mode blocks +/// on that and public mode warns but continues. fn run_verdict_pass( manager: PackageManager, outcomes: &mut [TargetOutcome], @@ -865,16 +1218,35 @@ fn run_verdict_pass( apply_verdicts(manager, results, outcomes, &Default::default()); } +fn authenticated_verdict(opts: &PrecheckOptions) -> bool { + opts.verdict + .as_ref() + .is_some_and(|cfg| cfg.mode.is_authenticated()) +} + +fn public_verdict(opts: &PrecheckOptions) -> bool { + opts.verdict + .as_ref() + .is_some_and(|cfg| cfg.mode.is_public()) +} + +fn warn_public_lookup_failures(report: &PrecheckReport, opts: &PrecheckOptions) { + if public_verdict(opts) && report.unverifiable_count() > 0 { + eprintln!("warning: CVE check unavailable; continuing because public mode is fail-open."); + } +} + fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { if opts.force { return false; } // A resolution error means no verdict was obtained for that target, so - // in tokened mode it fails closed like `Unverifiable` — otherwise a + // in authenticated mode it fails closed like `Unverifiable` — otherwise a // registry outage silently bypasses the gate. + let fail_closed = authenticated_verdict(opts); report.vulnerable_count() > 0 - || report.unverifiable_count() > 0 - || (opts.verdict.is_some() && report.error_count() > 0) + || (fail_closed && report.unverifiable_count() > 0) + || (fail_closed && report.error_count() > 0) || (!opts.no_fail && report.recent_count() > 0) } @@ -1303,13 +1675,18 @@ fn verdict_json(verdict: &VerdictStatus) -> serde_json::Value { json!({ "status": "unverifiable", "error": error }) } VerdictStatus::NotChecked => { - json!({ "status": "not_checked", "reason": NO_TOKEN_REASON }) + json!({ "status": "not_checked", "reason": NO_VERDICT_REASON }) } } } fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { use serde_json::json; + let verdict_mode = match opts.verdict.as_ref().map(|cfg| &cfg.mode) { + Some(VerdictMode::Public) => "public", + Some(VerdictMode::Authenticated { .. }) => "authenticated", + None => "recency-only", + }; let outcomes: Vec<_> = report .outcomes .iter() @@ -1359,7 +1736,7 @@ fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { "skipped": report.skipped_count(), "errors": report.error_count(), }, - "verdict_mode": if opts.verdict.is_some() { "full" } else { "recency-only" }, + "verdict_mode": verdict_mode, "results": outcomes, "tree": report.tree.as_ref().map(|t| match t { TreeReport::Full { resolved_count, transitive } => json!({ @@ -1459,7 +1836,10 @@ source = { git = "https://example.com/repo?rev=abc#abc" } PrecheckOptions { verdict: Some(VerdictConfig { base_url: base_url.to_string(), - token: "test-token".to_string(), + mode: VerdictMode::Authenticated { + token: "test-token".to_string(), + }, + public_login_hint: false, }), ..stub_opts() } @@ -1584,17 +1964,39 @@ source = { git = "https://example.com/repo?rev=abc#abc" } assert_eq!(PackageManager::Npm.normalize_name("Left_Pad"), "Left_Pad"); } - /// Full predicate matrix: force ⇒ never block; vulnerable and - /// unverifiable block regardless of --no-fail; recency keeps its - /// task-2 --no-fail demotion. - #[test] - fn block_predicate_matrix() { - let opts = |no_fail: bool, force: bool| PrecheckOptions { + fn public_opts(no_fail: bool, force: bool) -> PrecheckOptions { + PrecheckOptions { no_fail, force, + verdict: Some(VerdictConfig { + base_url: "http://127.0.0.1:9".to_string(), + mode: VerdictMode::Public, + public_login_hint: true, + }), ..stub_opts() - }; + } + } + + fn authenticated_opts(no_fail: bool, force: bool) -> PrecheckOptions { + PrecheckOptions { + no_fail, + force, + verdict: Some(VerdictConfig { + base_url: "http://127.0.0.1:9".to_string(), + mode: VerdictMode::Authenticated { + token: "test-token".to_string(), + }, + public_login_hint: false, + }), + ..stub_opts() + } + } + /// Predicate matrix: force ⇒ never block; vulnerable blocks in every + /// verdict mode; unverifiable/error findings block only in authenticated + /// mode; recency keeps its task-2 --no-fail demotion. + #[test] + fn block_predicate_matrix() { let clean = { let mut o = resolved_outcome("pkg", "1.0.0", false); set_verdict(&mut o, VerdictStatus::Clean); @@ -1611,25 +2013,59 @@ source = { git = "https://example.com/repo?rev=abc#abc" } set_verdict(&mut o, VerdictStatus::Unverifiable("503".to_string())); report_with(vec![o]) }; - - assert!(!should_block_install(&clean, &opts(false, false))); - assert!(should_block_install(&recent, &opts(false, false))); - assert!(!should_block_install(&recent, &opts(true, false))); - assert!(should_block_install(&vulnerable, &opts(false, false))); + let resolution_error = report_with(vec![TargetOutcome::Error { + target: InstallTarget { + name: "pkg".to_string(), + display: "pkg==1.0.0".to_string(), + kind: TargetKind::Unverifiable { + reason: "test".to_string(), + }, + }, + error: "registry unavailable".to_string(), + }]); + + assert!(!should_block_install(&clean, &public_opts(false, false))); + assert!(should_block_install(&recent, &public_opts(false, false))); + assert!(!should_block_install(&recent, &public_opts(true, false))); + assert!(should_block_install( + &vulnerable, + &public_opts(false, false) + )); assert!( - should_block_install(&vulnerable, &opts(true, false)), + should_block_install(&vulnerable, &public_opts(true, false)), "--no-fail must not waive a vulnerable block" ); assert!( - should_block_install(&unverifiable, &opts(true, false)), - "--no-fail must not waive an unverifiable block" + !should_block_install(&unverifiable, &public_opts(false, false)), + "public mode must fail open on lookup errors" + ); + assert!( + should_block_install(&unverifiable, &authenticated_opts(true, false)), + "authenticated mode must fail closed on lookup errors" ); - for report in [&clean, &recent, &vulnerable, &unverifiable] { + assert!( + !should_block_install(&resolution_error, &public_opts(false, false)), + "public mode must fail open when no verdict can be obtained" + ); + assert!( + should_block_install(&resolution_error, &authenticated_opts(false, false)), + "authenticated mode must fail closed when no verdict can be obtained" + ); + for report in [ + &clean, + &recent, + &vulnerable, + &unverifiable, + &resolution_error, + ] { assert!( - !should_block_install(report, &opts(false, true)), + !should_block_install(report, &public_opts(false, true)), "--force must never block" ); - assert!(!should_block_install(report, &opts(true, true))); + assert!(!should_block_install( + report, + &authenticated_opts(true, true) + )); } } @@ -1736,7 +2172,10 @@ source = { git = "https://example.com/repo?rev=abc#abc" } let cfg = VerdictConfig { base_url: stub.base_url.clone(), - token: "test-token".to_string(), + mode: VerdictMode::Authenticated { + token: "test-token".to_string(), + }, + public_login_hint: false, }; let jobs: Vec = ["a", "b", "evil", "c", "d", "e"] @@ -1840,12 +2279,6 @@ source = { git = "https://example.com/repo?rev=abc#abc" } assert_eq!(safe_version(&[]), None); } - fn vulnerable_outcome(name: &str, version: &str, fixed: Option<&str>) -> TargetOutcome { - let mut o = resolved_outcome(name, version, false); - set_verdict(&mut o, VerdictStatus::Vulnerable(vec![vm("A-1", fixed)])); - o - } - #[test] fn error_prefix_strips_parenthesized_detail() { // The reqwest network-failure shape: per-package URL in parens. @@ -1990,7 +2423,7 @@ source = { git = "https://example.com/repo?rev=abc#abc" } transitive: vec![tree_vulnerable(origin)], }); assert_eq!( - refusal_blames_existing_tree(&report), + refusal_blames_existing_tree(&report, &authenticated_opts(false, false)), blames_tree, "origin {origin:?}, with_named {with_named}, bare {bare_install}" ); @@ -2033,13 +2466,23 @@ source = { git = "https://example.com/repo?rev=abc#abc" } // command, so "none were added by this command" would lie. let mut report = report_with(vec![resolved_outcome("cleanpkg", "1.0.0", false)]); report.tree = mixed_tree(); - assert!(!refusal_blames_existing_tree(&report)); + assert!(!refusal_blames_existing_tree( + &report, + &authenticated_opts(false, false) + )); + assert!(refusal_blames_existing_tree( + &report, + &public_opts(false, false) + )); // Bare install: nothing named, everything resolved predates the // command — the mixed findings still blame the existing tree. let mut report = report_with(vec![]); report.bare_install = true; report.tree = mixed_tree(); - assert!(refusal_blames_existing_tree(&report)); + assert!(refusal_blames_existing_tree( + &report, + &authenticated_opts(false, false) + )); } } diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs index 155c4fb..5c49721 100644 --- a/src/vuln_api/mod.rs +++ b/src/vuln_api/mod.rs @@ -86,33 +86,33 @@ fn encode_package_name(ecosystem: &str, name: &str) -> String { } } -/// Build an authed JSON GET: the standard `Accept` / `CORGEA-SOURCE` headers -/// plus the per-call auth header (JWT → `Authorization: Bearer`, otherwise -/// `CORGEA-TOKEN`). The single place auth is attached, shared by every route. -fn build_authed_get( +/// Build a JSON GET: the standard `Accept` / `CORGEA-SOURCE` headers plus, +/// when present, the per-call auth header (JWT → `Authorization: Bearer`, +/// otherwise `CORGEA-TOKEN`). The single place auth is attached, shared by +/// every route. +fn build_json_get( client: &reqwest::blocking::Client, url: &str, - token: &str, + token: Option<&str>, ) -> reqwest::blocking::RequestBuilder { let mut req = client .get(url) .header("Accept", "application/json") .header("CORGEA-SOURCE", "cli"); - if is_jwt(token) { - req = req.header("Authorization", format!("Bearer {}", token)); - } else { - req = req.header("CORGEA-TOKEN", token); + if let Some(token) = token { + if is_jwt(token) { + req = req.header("Authorization", format!("Bearer {}", token)); + } else { + req = req.header("CORGEA-TOKEN", token); + } } req } /// Validate the per-call preconditions shared by every vuln-api request: -/// a non-empty token and a non-empty (trailing-slash-normalized) base URL. -/// Returns the normalized base so callers don't re-derive it. -fn validated_base(token: &str, base_url: &str) -> Result> { - if token.is_empty() { - return Err("missing Corgea token for vuln-api request".into()); - } +/// a non-empty (trailing-slash-normalized) base URL. Returns the normalized +/// base so callers don't re-derive it. +fn validated_base(base_url: &str) -> Result> { let base = normalize_base_url(base_url); if base.is_empty() { return Err("vuln-api base URL is empty".into()); @@ -163,16 +163,16 @@ fn retry_after_seconds(response: &reqwest::blocking::Response) -> u64 { fn send_package_check_with_429_retry( client: &reqwest::blocking::Client, url: &str, - token: &str, + token: Option<&str>, ) -> Result> { - let response = build_authed_get(client, url, token) + let response = build_json_get(client, url, token) .send() .map_err(|e| format!("Failed to send vuln-api request: {}", e))?; if response.status().as_u16() == 429 { let wait = retry_after_seconds(&response); std::thread::sleep(Duration::from_secs(wait)); - return build_authed_get(client, url, token) + return build_json_get(client, url, token) .send() .map_err(|e| format!("Failed to send vuln-api request: {}", e).into()); } @@ -182,12 +182,12 @@ fn send_package_check_with_429_retry( pub fn check_package_version( client: &reqwest::blocking::Client, base_url: &str, - token: &str, + token: Option<&str>, ecosystem: &str, name: &str, version: &str, ) -> Result> { - let base = validated_base(token, base_url)?; + let base = validated_base(base_url)?; let encoded_name = encode_package_name(ecosystem, name); let encoded_version = urlencoding::encode(version); let url = format!( @@ -202,9 +202,12 @@ pub fn check_package_version( let status = response.status(); match status.as_u16() { 401 => { - return Err( - "vuln-api rejected the Corgea token (run `corgea login` to refresh)".into(), - ); + if token.is_some() { + return Err( + "vuln-api rejected the Corgea token (run `corgea login` to refresh)".into(), + ); + } + return Err("vuln-api requires authentication".into()); } 403 => { return Err("vuln-api access denied (check your Corgea plan/permissions)".into()); @@ -299,13 +302,78 @@ mod tests { check_package_version( &client, &stub.base_url, - "test-token", + Some("test-token"), "npm", "lodash", "4.17.20", ) } + fn header_value(request: &str, name: &str) -> Option { + request + .lines() + .skip(1) + .take_while(|line| !line.trim().is_empty()) + .filter_map(|line| line.split_once(':')) + .find(|(key, _)| key.eq_ignore_ascii_case(name)) + .map(|(_, value)| value.trim().to_string()) + } + + fn captured_request(auth_token: Option<&str>) -> String { + use std::io::Write; + use std::net::TcpListener; + use std::sync::mpsc; + + let listener = TcpListener::bind("127.0.0.1:0").expect("bind capture stub"); + let base_url = format!("http://{}", listener.local_addr().unwrap()); + let (tx, rx) = mpsc::channel(); + std::thread::spawn(move || { + let (mut stream, _) = listener.accept().expect("accept request"); + let buf = crate::vuln_api_stub::read_http_request(&mut stream); + let request = String::from_utf8_lossy(&buf).into_owned(); + tx.send(request).expect("send captured request"); + let body = r#"{"ecosystem":"npm","package_name":"lodash","version":"4.17.20","is_vulnerable":false,"matches":[]}"#; + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + body.len(), + body + ); + stream.write_all(response.as_bytes()).unwrap(); + }); + + let client = http_client().expect("test client"); + check_package_version(&client, &base_url, auth_token, "npm", "lodash", "4.17.20") + .expect("captured request should succeed"); + rx.recv().expect("captured request") + } + + #[test] + fn public_check_sends_no_auth_headers() { + let request = captured_request(None); + assert!(header_value(&request, "Authorization").is_none()); + assert!(header_value(&request, "CORGEA-TOKEN").is_none()); + } + + #[test] + fn jwt_auth_sends_authorization_bearer() { + let request = captured_request(Some("aaa.bbb.ccc")); + assert_eq!( + header_value(&request, "Authorization").as_deref(), + Some("Bearer aaa.bbb.ccc") + ); + assert!(header_value(&request, "CORGEA-TOKEN").is_none()); + } + + #[test] + fn opaque_auth_sends_corgea_token() { + let request = captured_request(Some("opaque-token")); + assert_eq!( + header_value(&request, "CORGEA-TOKEN").as_deref(), + Some("opaque-token") + ); + assert!(header_value(&request, "Authorization").is_none()); + } + #[test] fn check_package_version_401_returns_actionable_error() { let err = check_with_stub_status(401, r#"{"error":"unauthorized"}"#) @@ -361,7 +429,7 @@ mod tests { let resp = check_package_version( &client, &stub.base_url, - "test-token", + Some("test-token"), "npm", "lodash", "4.17.20", diff --git a/tests/cli_bare_install.rs b/tests/cli_bare_install.rs index df2065a..c607a43 100644 --- a/tests/cli_bare_install.rs +++ b/tests/cli_bare_install.rs @@ -1,6 +1,6 @@ //! Hermetic e2e tests for zero-spec ("bare") installs. //! -//! With a token and a `package.json`, bare `npm install` is gated like any +//! With a `package.json`, bare `npm install` is gated like any //! other install: the tree pass resolves the full lockfile set and verdicts //! every package, so a vulnerable lockfile blocks (exit 1, `--force` escape). //! Bare yarn/pnpm/uv installs have no safe dry-run — they exec unchecked @@ -202,15 +202,19 @@ fn bare_npm_without_package_json_passes_through() { } #[test] -fn bare_npm_tokenless_passes_through() { - // package.json present but no token → recency-only mode has no tree pass; - // bare install execs untouched. +fn bare_npm_tokenless_runs_public_tree_check() { + // package.json present but no token → public mode still verdicts the tree. let mut h = BareHarness::new("npm", HashMap::new(), Some(NPM_LOCK), 0).with_package_json(); h.cmd.env_remove("CORGEA_TOKEN"); let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(0)); assert_eq!(h.recorded_argv().as_deref(), Some("install")); - assert!(!String::from_utf8_lossy(&out.stdout).contains("Pre-checking")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("Pre-checking"), "stdout: {stdout}"); + assert!( + stdout.contains("tree: 2 packages resolved"), + "stdout: {stdout}" + ); } #[test] diff --git a/tests/cli_exec_fallback.rs b/tests/cli_exec_fallback.rs index 4b29a5c..01ec582 100644 --- a/tests/cli_exec_fallback.rs +++ b/tests/cli_exec_fallback.rs @@ -96,6 +96,32 @@ fn pip_missing_both_pip_and_pip3_exits_127_with_message() { ); } +#[test] +fn pip3_top_level_command_prints_pip_wrapper_suggestion() { + let mut h = FallbackHarness::new(&["pip3"]); + let out = h + .cmd + .args(["pip3", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip3 must not run"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: unknown package manager `pip3`."), + "stderr: {stderr}" + ); + assert!( + stderr.contains("Did you mean `corgea pip install oldpkg==1.0.0`?"), + "stderr: {stderr}" + ); + assert!( + String::from_utf8_lossy(&out.stdout).is_empty(), + "stdout: {}", + String::from_utf8_lossy(&out.stdout) + ); +} + #[test] fn npm_missing_binary_error_names_binary_without_fallback() { let mut h = FallbackHarness::new(&[]); diff --git a/tests/cli_install.rs b/tests/cli_install.rs index ad51c7d..55f3485 100644 --- a/tests/cli_install.rs +++ b/tests/cli_install.rs @@ -14,9 +14,10 @@ mod common; use common::{ - corgea_isolated, spawn_http_stub, write_fake_recorder, NOT_FOUND_JSON, OLDPKG_NPM_PACKUMENT, - OLDPKG_PYPI_JSON, + corgea_isolated, spawn_http_stub, write_fake_recorder, write_fake_tree_pm, write_script, + NOT_FOUND_JSON, OLDPKG_NPM_PACKUMENT, OLDPKG_PYPI_JSON, RESOLUTION_FAILS, }; +use std::collections::HashMap; use std::path::PathBuf; use std::process::Command; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -70,6 +71,7 @@ struct WrapperHarness { registry_hits: Arc, _home: TempDir, _bin: TempDir, + _vuln_stub: corgea::vuln_api_stub::VulnApiStub, } impl WrapperHarness { @@ -79,15 +81,50 @@ impl WrapperHarness { let (mut cmd, home) = corgea_isolated(); let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); - write_fake_recorder(bin.path(), binary, &marker, pm_exit_code); + match binary { + "npm" | "pip" => { + write_fake_tree_pm(bin.path(), binary, &marker, RESOLUTION_FAILS, pm_exit_code) + } + _ => write_fake_recorder(bin.path(), binary, &marker, pm_exit_code), + } let (base_url, registry_hits) = spawn_registry_stub(); - cmd.env("PATH", bin.path()).env(registry_env, &base_url); + let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(HashMap::new(), HashMap::new()); + cmd.env("PATH", bin.path()) + .env(registry_env, &base_url) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); Self { cmd, marker, registry_hits, _home: home, _bin: bin, + _vuln_stub: vuln_stub, + } + } + + fn new_externally_managed_pip() -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + let fake_python = bin.path().join("python-managed"); + let python_script = format!( + "#!/bin/sh\nif [ \"$1\" = \"-c\" ]; then printf '1\\n'; exit 0; fi\nprintf '%s' \"$*\" > '{}'\nexit 0\n", + marker.display() + ); + write_script(bin.path(), "python-managed", &python_script); + write_script(bin.path(), "pip", &format!("#!{}\n", fake_python.display())); + let (base_url, registry_hits) = spawn_registry_stub(); + let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(HashMap::new(), HashMap::new()); + cmd.env("PATH", bin.path()) + .env("CORGEA_PYPI_REGISTRY", &base_url) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); + Self { + cmd, + marker, + registry_hits, + _home: home, + _bin: bin, + _vuln_stub: vuln_stub, } } @@ -176,6 +213,67 @@ fn pip_non_install_subcommand_passes_through_without_registry_hit() { ); } +#[test] +fn pip_add_blocks_with_install_suggestion_without_running_pip() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["pip", "add", "oldpkg"]) + .output() + .expect("failed to run corgea"); + + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + assert_eq!( + h.registry_hits.load(Ordering::SeqCst), + 0, + "invalid pip command must not touch the registry" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: pip does not support `add`."), + "stderr: {stderr}" + ); + assert!( + stderr.contains("Did you mean `corgea pip install oldpkg`?"), + "stderr: {stderr}" + ); +} + +#[test] +fn externally_managed_pip_blocks_before_registry_checks() { + let mut h = WrapperHarness::new_externally_managed_pip(); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + assert_eq!( + h.registry_hits.load(Ordering::SeqCst), + 0, + "externally-managed preflight must run before registry checks" + ); + assert!( + String::from_utf8_lossy(&out.stdout).is_empty(), + "stdout: {}", + String::from_utf8_lossy(&out.stdout) + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: this Python environment is externally managed (PEP 668)."), + "stderr: {stderr}" + ); + assert!( + stderr.contains( + "Create and activate a virtualenv, then retry `corgea pip install oldpkg==1.0.0`." + ), + "stderr: {stderr}" + ); +} + #[test] fn pip_json_reports_fresh_pin_as_recent() { let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); @@ -196,7 +294,7 @@ fn pip_json_reports_fresh_pin_as_recent() { #[test] fn pip_resolution_error_prints_error_but_install_proceeds() { // `nosuchpkg` hits the stub's 404 route → an error outcome, which - // warns but does not block in tokenless mode (tokened mode fails + // warns but does not block in public mode (authenticated mode fails // closed — see cli_verdict.rs) — the install must still run. let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); let out = h @@ -279,6 +377,249 @@ fn npm_old_pin_runs_install_with_forwarded_args() { assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); } +#[test] +fn npm_in_pnpm_lock_project_blocks_with_pnpm_add_suggestion() { + let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let project = TempDir::new().expect("project dir"); + std::fs::write(project.path().join("package.json"), r#"{"name":"proj"}"#) + .expect("write package.json"); + std::fs::write( + project.path().join("pnpm-lock.yaml"), + "lockfileVersion: '9.0'\n", + ) + .expect("write pnpm lock"); + + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "i", "oldpkg"]) + .output() + .expect("failed to run corgea"); + + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + assert_eq!( + h.registry_hits.load(Ordering::SeqCst), + 0, + "wrong-manager guard must run before registry checks" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: this project appears to use pnpm, but you ran npm."), + "stderr: {stderr}" + ); + assert!( + stderr.contains("Did you mean `corgea pnpm add oldpkg`?"), + "stderr: {stderr}" + ); +} + +#[test] +fn package_manager_field_beats_missing_lockfile_for_node_guard() { + let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let project = TempDir::new().expect("project dir"); + std::fs::write( + project.path().join("package.json"), + r#"{"name":"proj","packageManager":"pnpm@9.12.0"}"#, + ) + .expect("write package.json"); + + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg"]) + .output() + .expect("failed to run corgea"); + + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "npm must not run"); + assert_eq!(h.registry_hits.load(Ordering::SeqCst), 0); + assert!( + String::from_utf8_lossy(&out.stderr).contains("Did you mean `corgea pnpm add oldpkg`?"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn conflicting_node_lockfiles_do_not_block_as_wrong_manager() { + let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let project = TempDir::new().expect("project dir"); + std::fs::write(project.path().join("package.json"), r#"{"name":"proj"}"#) + .expect("write package.json"); + std::fs::write(project.path().join("package-lock.json"), "{}").expect("write npm lock"); + std::fs::write( + project.path().join("pnpm-lock.yaml"), + "lockfileVersion: '9.0'\n", + ) + .expect("write pnpm lock"); + + let out = h + .cmd + .current_dir(project.path()) + .args(["npm", "install", "oldpkg@1.0.0"]) + .output() + .expect("failed to run corgea"); + + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); + assert!( + h.registry_hits.load(Ordering::SeqCst) >= 1, + "the normal install gate should still run" + ); +} + +#[test] +fn pip_in_uv_lock_project_blocks_with_uv_add_suggestion() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let project = TempDir::new().expect("project dir"); + std::fs::write(project.path().join("uv.lock"), "version = 1\n").expect("write uv lock"); + + let out = h + .cmd + .current_dir(project.path()) + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("failed to run corgea"); + + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + assert_eq!(h.registry_hits.load(Ordering::SeqCst), 0); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: this project appears to use uv, but you ran pip."), + "stderr: {stderr}" + ); + assert!( + stderr.contains("Did you mean `corgea uv add oldpkg==1.0.0`?"), + "stderr: {stderr}" + ); +} + +#[test] +fn pip_requirements_in_uv_project_suggests_uv_pip_install() { + let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let project = TempDir::new().expect("project dir"); + std::fs::write(project.path().join("uv.lock"), "version = 1\n").expect("write uv lock"); + std::fs::write(project.path().join("requirements.txt"), "oldpkg==1.0.0\n") + .expect("write requirements"); + + let out = h + .cmd + .current_dir(project.path()) + .args(["pip", "install", "-r", "requirements.txt"]) + .output() + .expect("failed to run corgea"); + + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "pip must not run"); + assert_eq!(h.registry_hits.load(Ordering::SeqCst), 0); + assert!( + String::from_utf8_lossy(&out.stderr) + .contains("Did you mean `corgea uv pip install -r requirements.txt`?"), + "stderr: {}", + String::from_utf8_lossy(&out.stderr) + ); +} + +#[test] +fn uv_add_in_requirements_project_blocks_with_pip_install_suggestion() { + let mut h = WrapperHarness::new("uv", "CORGEA_PYPI_REGISTRY", 0); + let project = TempDir::new().expect("project dir"); + std::fs::write(project.path().join("requirements.txt"), "oldpkg==1.0.0\n") + .expect("write requirements"); + + let out = h + .cmd + .current_dir(project.path()) + .args(["uv", "add", "oldpkg"]) + .output() + .expect("failed to run corgea"); + + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "uv must not run"); + assert_eq!( + h.registry_hits.load(Ordering::SeqCst), + 0, + "wrong-manager guard must run before registry checks" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: this project appears to use pip, but you ran uv."), + "stderr: {stderr}" + ); + assert!( + stderr.contains("Did you mean `corgea pip install oldpkg`?"), + "stderr: {stderr}" + ); +} + +#[test] +fn uv_install_blocks_with_uv_pip_install_suggestion_without_running_uv() { + let mut h = WrapperHarness::new("uv", "CORGEA_PYPI_REGISTRY", 0); + let out = h + .cmd + .args(["uv", "install", "oldpkg"]) + .output() + .expect("failed to run corgea"); + + assert_eq!(out.status.code(), Some(1)); + assert_eq!(h.recorded_argv(), None, "uv must not run"); + assert_eq!( + h.registry_hits.load(Ordering::SeqCst), + 0, + "invalid uv command must not touch the registry" + ); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stderr.contains("error: uv does not support top-level `install`."), + "stderr: {stderr}" + ); + assert!( + stderr.contains("Did you mean `corgea uv pip install oldpkg`?"), + "stderr: {stderr}" + ); +} + +#[test] +fn uv_add_in_pyproject_with_requirements_does_not_guess_pip() { + let mut h = WrapperHarness::new("uv", "CORGEA_PYPI_REGISTRY", 0); + let project = TempDir::new().expect("project dir"); + std::fs::write( + project.path().join("pyproject.toml"), + "[project]\nname = \"proj\"\nversion = \"0.1.0\"\n", + ) + .expect("write pyproject"); + std::fs::write(project.path().join("requirements.txt"), "oldpkg==1.0.0\n") + .expect("write requirements"); + + let out = h + .cmd + .current_dir(project.path()) + .args(["uv", "add", "oldpkg"]) + .output() + .expect("failed to run corgea"); + + assert_eq!( + out.status.code(), + Some(0), + "stdout: {}\nstderr: {}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + assert_eq!(h.recorded_argv().as_deref(), Some("add oldpkg")); + assert!( + h.registry_hits.load(Ordering::SeqCst) >= 1, + "the normal uv add gate should still run" + ); +} + #[test] fn wrapper_forwards_package_manager_exit_code() { let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 7); diff --git a/tests/cli_provenance.rs b/tests/cli_provenance.rs index 2851fcf..6b2ecc1 100644 --- a/tests/cli_provenance.rs +++ b/tests/cli_provenance.rs @@ -158,6 +158,7 @@ fn preexisting_vulnerable_with_unverifiable_transitive_keeps_generic_refusal() { let mut statuses = HashMap::new(); statuses.insert(key("npm", "newdep", "2.0.0"), 503u16); let mut h = TreeHarness::new("npm", checks, statuses, LOCK_WITH_NEWDEP); + h.cmd.env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); let out = h .cmd .current_dir(project.path()) diff --git a/tests/cli_tree.rs b/tests/cli_tree.rs index 0da51d7..13d7704 100644 --- a/tests/cli_tree.rs +++ b/tests/cli_tree.rs @@ -1,5 +1,5 @@ //! Hermetic e2e tests for the full-tree resolution pass -//! (`corgea pip install …` with a token + `CORGEA_VULN_API_URL` stub). +//! (`corgea pip install …` with a `CORGEA_VULN_API_URL` stub). //! //! Composes the `cli_verdict.rs` harness pattern (fake pip on a private PATH + //! local pypi registry stub + in-crate vuln-api stub) with a dry-run-aware diff --git a/tests/cli_uv_sync.rs b/tests/cli_uv_sync.rs index 96cf170..c8c54b2 100644 --- a/tests/cli_uv_sync.rs +++ b/tests/cli_uv_sync.rs @@ -1,8 +1,8 @@ //! Hermetic e2e tests for the `corgea uv sync` gate. //! -//! With a token, `uv sync` is gated from the project's `uv.lock`: every +//! `uv sync` is gated from the project's `uv.lock`: every //! index-sourced pin is verdicted against the vuln-api stub before uv runs. -//! Without a lockfile (or without a token) it execs behind an honest note. +//! Without a lockfile it execs behind an honest note. //! Harness: fake `uv` argv recorder on a private PATH + in-crate vuln-api //! stub + throwaway project dir as cwd. No registry stub — the sync gate //! does no recency resolution. @@ -171,13 +171,18 @@ fn uv_sync_malformed_lockfile_fails_closed() { } #[test] -fn uv_sync_tokenless_passes_through() { +fn uv_sync_tokenless_runs_public_lock_check() { let mut h = SyncHarness::new(HashMap::new()).with_uv_lock(UV_LOCK); h.cmd.env_remove("CORGEA_TOKEN"); let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(0)); assert_eq!(h.recorded_argv().as_deref(), Some("sync")); - assert!(!String::from_utf8_lossy(&out.stdout).contains("Pre-checking")); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("Pre-checking"), "stdout: {stdout}"); + assert!( + stdout.contains("tree: 1 packages resolved"), + "stdout: {stdout}" + ); } #[test] diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs index 5c0392c..2941d7c 100644 --- a/tests/cli_verdict.rs +++ b/tests/cli_verdict.rs @@ -1,5 +1,5 @@ //! Hermetic e2e tests for the install-gate vuln-api verdict -//! (`corgea pip install …` with a token + `CORGEA_VULN_API_URL` stub). +//! (`corgea pip install …` with public/authenticated `CORGEA_VULN_API_URL` stubs). //! //! Composes the `cli_install.rs` harness pattern (fake package manager on a //! private PATH + local pypi registry stub) with the in-crate vuln-api stub — @@ -11,7 +11,7 @@ mod common; -use common::{key, vulnerable_body, PipHarness}; +use common::{header_value, key, spawn_capturing_vuln_api_stub, vulnerable_body, PipHarness}; use std::collections::HashMap; fn vulnerable_oldpkg_body() -> String { @@ -92,11 +92,12 @@ fn force_overrides_vulnerable_block_and_propagates_exit_code() { } #[test] -fn resolution_error_fails_closed_with_token() { +fn resolution_error_fails_closed_when_authenticated() { // The wildcard registry stub only knows version 1.0.0, so `==2.0.0` - // is a resolution error: no verdict was obtained, and with a token - // that must block — otherwise a registry outage bypasses the gate. + // is a resolution error: no verdict was obtained, and authenticated + // mode must block — otherwise a registry outage bypasses the gate. let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + h.cmd.env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); let out = h .cmd .args(["pip", "install", "nosuchpkg==2.0.0"]) @@ -105,7 +106,7 @@ fn resolution_error_fails_closed_with_token() { assert_eq!( out.status.code(), Some(1), - "a resolution error must fail closed in tokened mode" + "a resolution error must fail closed in authenticated mode" ); assert_eq!(h.recorded_argv(), None); let stdout = String::from_utf8_lossy(&out.stdout); @@ -121,6 +122,7 @@ fn verdict_503_fails_closed() { let mut statuses = HashMap::new(); statuses.insert(key("pypi", "oldpkg", "1.0.0"), 503u16); let mut h = PipHarness::new(HashMap::new(), statuses, Some("test-token"), 0); + h.cmd.env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -129,7 +131,7 @@ fn verdict_503_fails_closed() { assert_eq!( out.status.code(), Some(1), - "unverifiable must block (fail-closed)" + "authenticated unverifiable must block (fail-closed)" ); assert_eq!(h.recorded_argv(), None); let stdout = String::from_utf8_lossy(&out.stdout); @@ -137,8 +139,8 @@ fn verdict_503_fails_closed() { } #[test] -fn tokenless_degrades_to_recency_only_with_login_prompt() { - // Stub would flag oldpkg, but with no token it must never be consulted. +fn tokenless_public_check_blocks_vulnerable_pin() { + // No token still runs public CVE checks and blocks a vulnerable verdict. let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); let mut h = PipHarness::new(checks, HashMap::new(), None, 0); @@ -149,18 +151,43 @@ fn tokenless_degrades_to_recency_only_with_login_prompt() { .expect("run corgea"); assert_eq!( out.status.code(), - Some(0), - "old + unchecked package must install" + Some(1), + "public CVE checks must block vulnerable packages" ); - assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + assert_eq!(h.recorded_argv(), None); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!(stdout.contains("MAL-2024-0001"), "stdout: {stdout}"); let stderr = String::from_utf8_lossy(&out.stderr); assert!( - stderr.contains("corgea login"), - "tokenless mode must prompt for login: {stderr}" + stderr.contains("using public CVE checks"), + "tokenless mode must disclose public CVE checks: {stderr}" + ); + assert!( + stderr.contains("authenticated enforcement") + && stderr.contains("private Corgea intelligence"), + "tokenless warning must name the authenticated benefit: {stderr}" + ); +} + +#[test] +fn tokenless_vuln_api_outage_warns_but_installs() { + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), None, 0); + h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!( + out.status.code(), + Some(0), + "public lookup outage must fail open" ); + assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg==1.0.0")); + let stderr = String::from_utf8_lossy(&out.stderr); assert!( - stderr.contains("warning: no Corgea token") && stderr.contains("will NOT be blocked"), - "tokenless warning must state the consequence: {stderr}" + stderr.contains("CVE check unavailable; continuing because public mode is fail-open"), + "stderr: {stderr}" ); } @@ -200,6 +227,7 @@ fn outage_noise_collapses_above_three_unverifiable() { // one collapsed line; counts and fail-closed exit code unchanged. let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); + h.cmd.env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); let out = h .cmd .args([ @@ -231,6 +259,7 @@ fn outage_noise_collapses_above_three_unverifiable() { // Three findings stay per-line — no collapse at the threshold. let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); + h.cmd.env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); let out = h .cmd .args([ @@ -269,7 +298,7 @@ fn json_carries_verdict_object_and_mode() { assert_eq!(h.recorded_argv(), None); let parsed: serde_json::Value = serde_json::from_slice(&out.stdout).expect("stdout must be valid JSON"); - assert_eq!(parsed["verdict_mode"], "full"); + assert_eq!(parsed["verdict_mode"], "public"); assert_eq!(parsed["results"][0]["verdict"]["status"], "vulnerable"); assert_eq!( parsed["results"][0]["verdict"]["matches"][0]["advisory_id"], @@ -281,3 +310,42 @@ fn json_carries_verdict_object_and_mode() { ); assert_eq!(parsed["summary"]["vulnerable"], 1); } + +#[test] +fn custom_vuln_api_url_with_token_does_not_send_token_by_default() { + let (base_url, requests) = spawn_capturing_vuln_api_stub(); + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("opaque-token"), 0); + h.cmd.env("CORGEA_VULN_API_URL", &base_url); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + let captured = requests.lock().unwrap(); + let request = captured.first().expect("one vuln-api request"); + assert!(header_value(request, "Authorization").is_none()); + assert!(header_value(request, "CORGEA-TOKEN").is_none()); +} + +#[test] +fn custom_vuln_api_url_sends_token_only_with_opt_in() { + let (base_url, requests) = spawn_capturing_vuln_api_stub(); + let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("opaque-token"), 0); + h.cmd + .env("CORGEA_VULN_API_URL", &base_url) + .env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); + let out = h + .cmd + .args(["pip", "install", "oldpkg==1.0.0"]) + .output() + .expect("run corgea"); + assert_eq!(out.status.code(), Some(0)); + let captured = requests.lock().unwrap(); + let request = captured.first().expect("one vuln-api request"); + assert_eq!( + header_value(request, "CORGEA-TOKEN").as_deref(), + Some("opaque-token") + ); + assert!(header_value(request, "Authorization").is_none()); +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 505ca85..2fd9e43 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -24,6 +24,7 @@ pub fn corgea_isolated() -> (Command, TempDir) { .env_remove("CORGEA_NPM_REGISTRY") .env_remove("CORGEA_PYPI_REGISTRY") .env_remove("CORGEA_VULN_API_URL") + .env_remove("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL") .env_remove("AI_AGENT") .env_remove("CODEX_SANDBOX") .env_remove("CLAUDECODE") @@ -131,6 +132,62 @@ where base_url } +/// Vuln-api stub that records raw requests and answers every package check +/// with a clean verdict. Used to assert auth-header behavior from the CLI. +#[allow(dead_code)] +pub fn spawn_capturing_vuln_api_stub() -> (String, std::sync::Arc>>) { + use std::io::Write; + use std::net::TcpListener; + use std::sync::{Arc, Mutex}; + + let listener = TcpListener::bind("127.0.0.1:0").expect("bind capture stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + let requests = Arc::new(Mutex::new(Vec::new())); + let requests_in_stub = Arc::clone(&requests); + std::thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let buf = corgea::vuln_api_stub::read_http_request(&mut stream); + let req = String::from_utf8_lossy(&buf).into_owned(); + requests_in_stub.lock().unwrap().push(req.clone()); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or(""); + let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); + let (eco, name, ver) = if parts.len() >= 7 { + (parts[2], parts[3], parts[5]) + } else { + ("pypi", "unknown", "0.0.0") + }; + let name = urlencoding::decode(name).unwrap_or_default(); + let ver = urlencoding::decode(ver).unwrap_or_default(); + let body = format!( + r#"{{"ecosystem":"{eco}","package_name":"{name}","version":"{ver}","is_vulnerable":false,"matches":[]}}"# + ); + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + (base_url, requests) +} + +#[allow(dead_code)] +pub fn header_value(request: &str, name: &str) -> Option { + request + .lines() + .skip(1) + .take_while(|line| !line.trim().is_empty()) + .filter_map(|line| line.split_once(':')) + .find(|(key, _)| key.eq_ignore_ascii_case(name)) + .map(|(_, value)| value.trim().to_string()) +} + /// Registry stub serving `/pypi/oldpkg/json` (pypi) and `/oldpkg` (npm /// packument), both published 2020 → never recent. Everything else 404s. #[allow(dead_code)] @@ -279,7 +336,7 @@ pub struct PipHarness { #[cfg(unix)] #[allow(dead_code)] impl PipHarness { - /// `token: None` exercises tokenless mode (no CORGEA_TOKEN set). + /// `token: None` exercises public mode (no CORGEA_TOKEN set). pub fn new( checks: HashMap, statuses: HashMap, From 6905b2fa8c28fa7274a75a6a9b9f96d5bfeb722a Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 15:52:22 +0200 Subject: [PATCH 37/59] Refine install gate verdict handling --- skills/corgea/SKILL.md | 12 +-- src/config.rs | 10 +-- src/deps/ecosystems/npm.rs | 6 +- src/main.rs | 11 +-- src/precheck/mod.rs | 174 +++++++++++++++++------------------- src/precheck/tree.rs | 9 +- src/utils/api.rs | 12 +-- src/verify_deps/registry.rs | 23 ++--- src/vuln_api/mod.rs | 61 ++++--------- src/vuln_api_stub/mod.rs | 52 +++++++++++ tests/cli_exec_fallback.rs | 9 +- tests/cli_verdict.rs | 3 +- tests/common/mod.rs | 79 +--------------- 13 files changed, 199 insertions(+), 262 deletions(-) diff --git a/skills/corgea/SKILL.md b/skills/corgea/SKILL.md index dc6d023..789b077 100644 --- a/skills/corgea/SKILL.md +++ b/skills/corgea/SKILL.md @@ -157,7 +157,7 @@ corgea pip list # non-install subcommands pass straight th | `--force` | | Proceed despite all findings (vulnerable, unverifiable, recent). Findings still print. | | `--json` | | JSON report instead of text. Per-result `verdict` object + `verdict_mode` + `tree`. | -`--json` adds `verdict_mode` (`"public"`, `"authenticated"`, or `"recency-only"`) and a +`--json` adds `verdict_mode` (`"public"` or `"authenticated"`) and a `tree` object: `null` when no tree pass ran; otherwise `mode` is `"full"` (transitive checked) or `"named-only"` (with a `reason`), plus `resolved_count` and a `transitive[]` array of `{name, version, verdict}` for packages beyond the named targets. Vulnerable @@ -172,10 +172,12 @@ testing: `CORGEA_PYPI_REGISTRY`, `CORGEA_NPM_REGISTRY`, `CORGEA_VULN_API_URL`. #### Testing the gate -Staging vuln-api (`CORGEA_VULN_API_URL=https://cve-worker-staging.corgea.workers.dev`) -serves deterministic verdicts for dogfooding. It runs in public mode by default; -add `CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL=1` if you need to exercise authenticated -custom-URL behavior. Known-vulnerable targets: +The staging vuln-api (`https://cve-worker-staging.corgea.workers.dev`) serves +deterministic verdicts for dogfooding and is currently the default endpoint, so +with `CORGEA_TOKEN` set it runs authenticated with no extra setup. The explicit +`CORGEA_VULN_API_URL` + `CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL=1` below keep +that true even if the default endpoint moves (a custom URL is public-mode unless +the opt-in is set). Known-vulnerable targets: | Ecosystem | Target | Verdict | |-----------|--------|---------| diff --git a/src/config.rs b/src/config.rs index dcd0217..5b6a18f 100644 --- a/src/config.rs +++ b/src/config.rs @@ -134,10 +134,7 @@ mod tests { env::remove_var("CORGEA_VULN_API_URL"); // Default when the env var is unset. - assert_eq!( - test_config().get_vuln_api_url(), - "https://cve-worker-staging.corgea.workers.dev" - ); + assert_eq!(test_config().get_vuln_api_url(), DEFAULT_VULN_API_URL); // Env var wins; whitespace and trailing slash trimmed. env::set_var("CORGEA_VULN_API_URL", " https://env.example.com/ "); @@ -145,10 +142,7 @@ mod tests { // Empty / whitespace-only env var is treated as unset. env::set_var("CORGEA_VULN_API_URL", " "); - assert_eq!( - test_config().get_vuln_api_url(), - "https://cve-worker-staging.corgea.workers.dev" - ); + assert_eq!(test_config().get_vuln_api_url(), DEFAULT_VULN_API_URL); env::remove_var("CORGEA_VULN_API_URL"); } } diff --git a/src/deps/ecosystems/npm.rs b/src/deps/ecosystems/npm.rs index edbc7cf..1fd8d91 100644 --- a/src/deps/ecosystems/npm.rs +++ b/src/deps/ecosystems/npm.rs @@ -312,7 +312,11 @@ fn parse_npm_lock(path: &Path) -> Result, DepsError Ok(out) } -fn package_name_from_lock_key(key: &str) -> &str { +/// Package name from a lockfile `packages` key: the path after the last +/// `node_modules/` (or the whole key), truncated to one component — two for +/// scoped names. Also shared with the install gate's lockfile parse +/// (`precheck::tree`). +pub(crate) fn package_name_from_lock_key(key: &str) -> &str { let package_path = key .rsplit_once("node_modules/") .map(|(_, name)| name) diff --git a/src/main.rs b/src/main.rs index 1bcc922..49d5963 100644 --- a/src/main.rs +++ b/src/main.rs @@ -272,19 +272,14 @@ fn install_wrap_options( ) -> corgea::precheck::PrecheckOptions { let token = config.get_token(); let token = token.trim(); - let custom_vuln_api_url = utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") - .map(|url| { - url.trim() - .trim_end_matches('/') - .ne(config::DEFAULT_VULN_API_URL) - }) - .unwrap_or(false); + let base_url = config.get_vuln_api_url(); + let custom_vuln_api_url = base_url != config::DEFAULT_VULN_API_URL; let send_token_to_custom = utils::generic::get_env_var_if_exists("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL") .is_some_and(|v| v.trim() == "1"); let mode = select_verdict_mode(token, custom_vuln_api_url, send_token_to_custom); let verdict = Some(corgea::precheck::VerdictConfig { - base_url: config.get_vuln_api_url(), + base_url, mode, public_login_hint: token.is_empty(), }); diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 98e8074..eee169e 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -386,26 +386,27 @@ pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOption ) } +/// `corgea ` — the suggested-command string used by the +/// "Did you mean …" messages. +fn corgea_cmd(words: &[&str], rest: &[String]) -> String { + let mut parts = vec!["corgea".to_string()]; + parts.extend(words.iter().map(|w| w.to_string())); + parts.extend(rest.iter().cloned()); + parts.join(" ") +} + pub fn pip3_alias_message(args: &[String]) -> Option { let rest = args.strip_prefix(&["pip3".to_string()])?; - let mut parts = vec!["corgea".to_string(), "pip".to_string()]; - parts.extend(rest.iter().cloned()); Some(format!( "error: unknown package manager `pip3`.\nDid you mean `{}`?", - parts.join(" ") + corgea_cmd(&["pip"], rest) )) } fn unsupported_pip_add_message(rest: &[String]) -> String { - let mut parts = vec![ - "corgea".to_string(), - "pip".to_string(), - "install".to_string(), - ]; - parts.extend(rest.iter().cloned()); format!( "error: pip does not support `add`.\nDid you mean `{}`?", - parts.join(" ") + corgea_cmd(&["pip", "install"], rest) ) } @@ -414,16 +415,7 @@ fn wrong_package_manager_message( rest: &[String], parsed: &parse::ParsedInstall, ) -> Option { - let cwd = std::env::current_dir().ok()?; - wrong_package_manager_message_from(&cwd, manager, rest, parsed) -} - -fn wrong_package_manager_message_from( - cwd: &Path, - manager: PackageManager, - rest: &[String], - parsed: &parse::ParsedInstall, -) -> Option { + let cwd = &std::env::current_dir().ok()?; let expected = match manager { PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => { let expected = detect_node_manager_from(cwd)?; @@ -462,11 +454,8 @@ fn detect_node_manager_from(start: &Path) -> Option { fn detect_node_manager_in_dir(dir: &Path) -> ProjectManagerDetection { match package_json_manager(dir) { - Some(ProjectManagerDetection::Found(manager)) => { - return ProjectManagerDetection::Found(manager); - } - Some(ProjectManagerDetection::Ambiguous) => return ProjectManagerDetection::Ambiguous, - Some(ProjectManagerDetection::None) | None => {} + ProjectManagerDetection::None => {} + found => return found, } let mut found = Vec::new(); @@ -480,8 +469,6 @@ fn detect_node_manager_in_dir(dir: &Path) -> ProjectManagerDetection { found.push(PackageManager::Npm); } - found.sort_by_key(|manager| manager.binary_name()); - found.dedup(); match found.as_slice() { [] => ProjectManagerDetection::None, [manager] => ProjectManagerDetection::Found(*manager), @@ -489,17 +476,22 @@ fn detect_node_manager_in_dir(dir: &Path) -> ProjectManagerDetection { } } -fn package_json_manager(dir: &Path) -> Option { - let raw = std::fs::read_to_string(dir.join("package.json")).ok()?; - let json: serde_json::Value = serde_json::from_str(&raw).ok()?; - let Some(package_manager) = json.get("packageManager").and_then(|v| v.as_str()) else { - return Some(ProjectManagerDetection::None); +/// `packageManager`-field detection. Missing/unparsable `package.json` and a +/// missing field both fall through to lockfile detection (`None`). +fn package_json_manager(dir: &Path) -> ProjectManagerDetection { + let json: Option = std::fs::read_to_string(dir.join("package.json")) + .ok() + .and_then(|raw| serde_json::from_str(&raw).ok()); + let Some(package_manager) = json + .as_ref() + .and_then(|j| j.get("packageManager")) + .and_then(|v| v.as_str()) + else { + return ProjectManagerDetection::None; }; - Some( - parse_node_package_manager(package_manager) - .map(ProjectManagerDetection::Found) - .unwrap_or(ProjectManagerDetection::Ambiguous), - ) + parse_node_package_manager(package_manager) + .map(ProjectManagerDetection::Found) + .unwrap_or(ProjectManagerDetection::Ambiguous) } fn parse_node_package_manager(raw: &str) -> Option { @@ -590,27 +582,19 @@ fn externally_managed_pip_message( return None; } - let mut retry = vec![ - "corgea".to_string(), - "pip".to_string(), - "install".to_string(), - ]; - retry.extend(rest.iter().cloned()); Some(format!( "error: this Python environment is externally managed (PEP 668).\nCreate and activate a virtualenv, then retry `{}`.", - retry.join(" ") + corgea_cmd(&["pip", "install"], rest) )) } fn pip_install_overrides_external_management(args: &[String]) -> bool { - const VALUE_FLAGS: [&str; 3] = ["--target", "--prefix", "--root"]; - args.iter().enumerate().any(|(i, arg)| { + const VALUE_FLAGS: [&str; 4] = ["-t", "--target", "--prefix", "--root"]; + args.iter().any(|arg| { arg == "--break-system-packages" || VALUE_FLAGS .iter() .any(|flag| arg == flag || arg.starts_with(&format!("{flag}="))) - || matches!(arg.as_str(), "-t" | "--target" | "--prefix" | "--root") - && args.get(i + 1).is_some() }) } @@ -618,6 +602,13 @@ fn pip_environment_is_externally_managed() -> bool { let Ok(pip) = resolve_binary("pip") else { return false; }; + // PEP 668 markers live in a system interpreter's stdlib; pip inside an + // active virtualenv can't be externally managed — skip the spawn. + if let Some(venv) = std::env::var_os("VIRTUAL_ENV") { + if pip.starts_with(&venv) { + return false; + } + } let Some(interpreter) = python_interpreter_from_shebang(&pip) else { return false; }; @@ -705,16 +696,9 @@ fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { } fn unsupported_uv_install_message(rest: &[String]) -> String { - let mut parts = vec![ - "corgea".to_string(), - "uv".to_string(), - "pip".to_string(), - "install".to_string(), - ]; - parts.extend(rest.iter().cloned()); format!( "error: uv does not support top-level `install`.\nDid you mean `{}`?", - parts.join(" ") + corgea_cmd(&["uv", "pip", "install"], rest) ) } @@ -775,15 +759,25 @@ fn run_uv_sync(cmd: &[String], opts: PrecheckOptions, exec: impl FnOnce() -> i32 bare_install: true, }; + report_and_exec(&report, &opts, exec) +} + +/// Shared tail of every gated path: render the report, refuse (exit 1) when +/// the block predicate fires, otherwise run the install. +fn report_and_exec( + report: &PrecheckReport, + opts: &PrecheckOptions, + exec: impl FnOnce() -> i32, +) -> i32 { if opts.json { - print_json(&report, &opts); + print_json(report, opts); } else { - print_text(&report); + print_text(report); } - warn_public_lookup_failures(&report, &opts); - if should_block_install(&report, &opts) { + warn_public_lookup_failures(report, opts); + if should_block_install(report, opts) { if !opts.json { - print_refusal(&report, &opts); + print_refusal(report, opts); } return 1; } @@ -847,15 +841,24 @@ fn run_parsed_install( return exec(); } + // The named-target registry lookups and the tree dry-run are independent + // network/subprocess work — overlap them; verdicts need both. let now = Utc::now(); - let mut outcomes: Vec<_> = parsed - .targets - .iter() - .map(|target| verify_one(target, &opts, &now)) - .collect(); + let (mut outcomes, tree_resolution) = std::thread::scope(|s| { + let tree = tree_eligible.then(|| s.spawn(|| tree::resolve_tree(manager, rest, &parsed))); + let outcomes: Vec<_> = parsed + .targets + .iter() + .map(|target| verify_one(target, &opts, &now)) + .collect(); + ( + outcomes, + tree.map(|handle| handle.join().expect("tree resolution thread panicked")), + ) + }); - let tree = if tree_eligible { - Some(run_tree_pass(manager, rest, &parsed, &mut outcomes, &opts)) + let tree = if let Some(resolution) = tree_resolution { + Some(run_tree_pass(manager, resolution, &mut outcomes, &opts)) } else { run_verdict_pass(manager, &mut outcomes, &opts); None @@ -892,21 +895,7 @@ fn run_parsed_install( bare_install, }; - if opts.json { - print_json(&report, &opts); - } else { - print_text(&report); - } - warn_public_lookup_failures(&report, &opts); - - if should_block_install(&report, &opts) { - if !opts.json { - print_refusal(&report, &opts); - } - return 1; - } - - exec() + report_and_exec(&report, &opts, exec) } /// One honest stderr line when a zero-spec install can't be gated: @@ -1000,18 +989,17 @@ fn requirements_note(parsed: &parse::ParsedInstall) { ); } -/// Resolve the full would-install set and verdict it. On any resolution -/// failure, fall back to the named-only verdict pass; the caller renders the -/// loud warning from the returned `NamedOnly` reason. Only called when -/// `opts.verdict.is_some()`. +/// Verdict the resolved would-install set (`tree::resolve_tree`'s result). +/// On any resolution failure, fall back to the named-only verdict pass; the +/// caller renders the loud warning from the returned `NamedOnly` reason. +/// Only called when `opts.verdict.is_some()`. fn run_tree_pass( manager: PackageManager, - rest: &[String], - parsed: &parse::ParsedInstall, + resolution: Result>, String>, outcomes: &mut [TargetOutcome], opts: &PrecheckOptions, ) -> TreeReport { - let set = match tree::resolve_tree(manager, rest, parsed) { + let set = match resolution { Ok(Some(set)) => set, Ok(None) => { run_verdict_pass(manager, outcomes, opts); @@ -1215,7 +1203,11 @@ fn run_verdict_pass( .collect(); let results = verdict_pool(jobs, cfg, manager, VERDICT_CONCURRENCY); - apply_verdicts(manager, results, outcomes, &Default::default()); + let leftovers = apply_verdicts(manager, results, outcomes, &Default::default()); + debug_assert!( + leftovers.is_empty(), + "named verdict pass left tree leftovers" + ); } fn authenticated_verdict(opts: &PrecheckOptions) -> bool { diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index 7a0444c..a3dd91a 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -328,10 +328,13 @@ fn parse_npm_lockfile(json: &str) -> Result, String> { } /// Derive a package name from a lockfile path key like -/// `node_modules/a/node_modules/@scope/pkg` → `@scope/pkg`. +/// `node_modules/a/node_modules/@scope/pkg` → `@scope/pkg`. `None` for keys +/// outside `node_modules/` (workspace stanzas carry an explicit `name`). fn name_from_lock_path(path: &str) -> Option { - let idx = path.rfind("node_modules/")?; - let name = &path[idx + "node_modules/".len()..]; + if !path.contains("node_modules/") { + return None; + } + let name = crate::deps::ecosystems::npm::package_name_from_lock_key(path); (!name.is_empty()).then(|| name.to_string()) } diff --git a/src/utils/api.rs b/src/utils/api.rs index c82e38e..32805ca 100644 --- a/src/utils/api.rs +++ b/src/utils/api.rs @@ -1,6 +1,6 @@ use crate::log::debug; use crate::utils; -use corgea::vuln_api::is_jwt; +use corgea::vuln_api::auth_header; use reqwest::header::HeaderMap; use reqwest::StatusCode; use reqwest::{ @@ -25,14 +25,8 @@ fn get_source() -> String { fn auth_headers(token: &str) -> HeaderMap { let mut headers = HeaderMap::new(); - if is_jwt(token) { - headers.insert( - "Authorization", - format!("Bearer {}", token).parse().unwrap(), - ); - } else { - headers.insert("CORGEA-TOKEN", token.parse().unwrap()); - } + let (name, value) = auth_header(token); + headers.insert(name, value.parse().unwrap()); headers.insert("CORGEA-SOURCE", get_source().parse().unwrap()); headers } diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs index a7d32c6..bcc965b 100644 --- a/src/verify_deps/registry.rs +++ b/src/verify_deps/registry.rs @@ -22,15 +22,15 @@ fn user_agent() -> String { format!("corgea-cli/{} (deps)", env!("CARGO_PKG_VERSION")) } -fn http_client() -> Result<&'static reqwest::blocking::Client, String> { +fn http_client() -> &'static reqwest::blocking::Client { static CLIENT: OnceLock = OnceLock::new(); - Ok(CLIENT.get_or_init(|| { + CLIENT.get_or_init(|| { reqwest::blocking::Client::builder() .timeout(REQUEST_TIMEOUT) .user_agent(user_agent()) .build() .expect("registry http client") - })) + }) } /// URL-encode an npm package name. Scoped names contain `@` and `/`, @@ -81,9 +81,6 @@ fn parse_iso8601(raw: &str) -> Result, String> { Err(format!("unrecognised timestamp format: {}", raw)) } -// Resolution helpers (npm + PyPI). Inserted before the tests module -// in registry.rs. - /// What the user typed after `pkg@` in an install command. #[derive(Debug, Clone, PartialEq, Eq)] pub enum NpmSpec { @@ -133,7 +130,7 @@ pub fn npm_resolve( .trim_end_matches('/'); let url = format!("{}/{}", base, encode_npm_name(name)); - let client = http_client()?; + let client = http_client(); let resp = client .get(&url) .header("Accept", "application/json") @@ -224,13 +221,10 @@ pub fn npm_resolve( }) } -/// Pick the highest semver-compatible version that satisfies `range`. -/// Pre-releases are excluded unless the range itself references a -/// pre-release (matches npm's behaviour). /// Translate an npm-style version range (`>=1.0.0 <2.0.0`, /// `1.x`, `>=1.0.0`) to a `semver::VersionReq`. The Rust crate uses /// `,` as the AND separator, npm uses whitespace, so we normalise -/// before parsing. +/// before parsing. npm's `||` OR syntax is unsupported — best-effort skipped. fn parse_npm_range(range: &str) -> Option { if let Ok(req) = semver::VersionReq::parse(range) { return Some(req); @@ -239,13 +233,12 @@ fn parse_npm_range(range: &str) -> Option { semver::VersionReq::parse(&normalised).ok() } +/// Pick the highest published version that satisfies `range`. Pre-releases +/// are excluded unless the range itself references one (matches npm). fn npm_pick_highest_matching( versions: &std::collections::BTreeMap, range: &str, ) -> Option { - // npm separates predicates with spaces (`>=1.0.0 <2.0.0`); the - // Rust `semver` crate uses commas. Try both. We don't support - // npm's `||` OR syntax here — those are best-effort skipped. let req = parse_npm_range(range)?; let range_has_prerelease = range.contains('-'); @@ -306,7 +299,7 @@ pub fn pypi_resolve( .trim_end_matches('/'); let url = format!("{}/pypi/{}/json", base, urlencoding::encode(name)); - let client = http_client()?; + let client = http_client(); let resp = client .get(&url) .header("Accept", "application/json") diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs index 5c49721..7d13571 100644 --- a/src/vuln_api/mod.rs +++ b/src/vuln_api/mod.rs @@ -66,14 +66,20 @@ pub fn http_client() -> Result { } /// Whether `token` looks like a JWT (three non-empty dot-separated parts). -/// Decides the auth header shape here and in the binary crate's `utils/api.rs`. pub fn is_jwt(token: &str) -> bool { let parts: Vec<&str> = token.splitn(4, '.').collect(); parts.len() == 3 && parts.iter().all(|p| !p.is_empty()) } -fn normalize_base_url(base_url: &str) -> String { - base_url.trim_end_matches('/').to_string() +/// The auth header for a Corgea token: JWT → `Authorization: Bearer`, +/// otherwise the opaque `CORGEA-TOKEN` header. The one definition of the +/// header shape, shared with the binary crate's `utils/api.rs`. +pub fn auth_header(token: &str) -> (&'static str, String) { + if is_jwt(token) { + ("Authorization", format!("Bearer {token}")) + } else { + ("CORGEA-TOKEN", token.to_string()) + } } /// Encode package name for the vuln-api path segment. @@ -100,11 +106,8 @@ fn build_json_get( .header("Accept", "application/json") .header("CORGEA-SOURCE", "cli"); if let Some(token) = token { - if is_jwt(token) { - req = req.header("Authorization", format!("Bearer {}", token)); - } else { - req = req.header("CORGEA-TOKEN", token); - } + let (name, value) = auth_header(token); + req = req.header(name, value); } req } @@ -113,7 +116,7 @@ fn build_json_get( /// a non-empty (trailing-slash-normalized) base URL. Returns the normalized /// base so callers don't re-derive it. fn validated_base(base_url: &str) -> Result> { - let base = normalize_base_url(base_url); + let base = base_url.trim_end_matches('/').to_string(); if base.is_empty() { return Err("vuln-api base URL is empty".into()); } @@ -309,42 +312,15 @@ mod tests { ) } - fn header_value(request: &str, name: &str) -> Option { - request - .lines() - .skip(1) - .take_while(|line| !line.trim().is_empty()) - .filter_map(|line| line.split_once(':')) - .find(|(key, _)| key.eq_ignore_ascii_case(name)) - .map(|(_, value)| value.trim().to_string()) - } + use crate::vuln_api_stub::{header_value, spawn_capturing_vuln_api_stub}; fn captured_request(auth_token: Option<&str>) -> String { - use std::io::Write; - use std::net::TcpListener; - use std::sync::mpsc; - - let listener = TcpListener::bind("127.0.0.1:0").expect("bind capture stub"); - let base_url = format!("http://{}", listener.local_addr().unwrap()); - let (tx, rx) = mpsc::channel(); - std::thread::spawn(move || { - let (mut stream, _) = listener.accept().expect("accept request"); - let buf = crate::vuln_api_stub::read_http_request(&mut stream); - let request = String::from_utf8_lossy(&buf).into_owned(); - tx.send(request).expect("send captured request"); - let body = r#"{"ecosystem":"npm","package_name":"lodash","version":"4.17.20","is_vulnerable":false,"matches":[]}"#; - let response = format!( - "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - body.len(), - body - ); - stream.write_all(response.as_bytes()).unwrap(); - }); - + let (base_url, requests) = spawn_capturing_vuln_api_stub(); let client = http_client().expect("test client"); check_package_version(&client, &base_url, auth_token, "npm", "lodash", "4.17.20") .expect("captured request should succeed"); - rx.recv().expect("captured request") + let requests = requests.lock().unwrap(); + requests[0].clone() } #[test] @@ -533,11 +509,12 @@ mod tests { } #[test] - fn normalize_base_url_strips_trailing_slash() { + fn validated_base_strips_trailing_slash() { assert_eq!( - normalize_base_url("http://localhost:8080/"), + validated_base("http://localhost:8080/").unwrap(), "http://localhost:8080" ); + assert!(validated_base("").is_err()); } #[test] diff --git a/src/vuln_api_stub/mod.rs b/src/vuln_api_stub/mod.rs index df8a092..421cd0e 100644 --- a/src/vuln_api_stub/mod.rs +++ b/src/vuln_api_stub/mod.rs @@ -54,6 +54,58 @@ pub fn spawn_with_retry_once( } } +/// Vuln-api stub that records raw requests and answers every package check +/// with a clean verdict (echoing the eco/name/version from the path). Used +/// to assert auth-header behavior, both in-crate and from the CLI. +pub fn spawn_capturing_vuln_api_stub() -> (String, std::sync::Arc>>) { + use std::sync::{Arc, Mutex}; + + let listener = TcpListener::bind("127.0.0.1:0").expect("bind capture stub"); + let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); + let requests = Arc::new(Mutex::new(Vec::new())); + let requests_in_stub = Arc::clone(&requests); + thread::spawn(move || { + for stream in listener.incoming() { + let Ok(mut stream) = stream else { continue }; + let buf = read_http_request(&mut stream); + let req = String::from_utf8_lossy(&buf).into_owned(); + requests_in_stub.lock().unwrap().push(req.clone()); + let path = req + .lines() + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .unwrap_or(""); + let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); + let (eco, name, ver) = if parts.len() >= 7 { + (parts[2], parts[3], parts[5]) + } else { + ("pypi", "unknown", "0.0.0") + }; + let name = urlencoding::decode(name).unwrap_or_default(); + let ver = urlencoding::decode(ver).unwrap_or_default(); + let body = default_clean_response(eco, &name, &ver); + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + body.len(), + body + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + (base_url, requests) +} + +/// The value of header `name` in a raw captured HTTP request, if present. +pub fn header_value(request: &str, name: &str) -> Option { + request + .lines() + .skip(1) + .take_while(|line| !line.trim().is_empty()) + .filter_map(|line| line.split_once(':')) + .find(|(key, _)| key.eq_ignore_ascii_case(name)) + .map(|(_, value)| value.trim().to_string()) +} + /// Read one HTTP request's bytes (through the header terminator) off `stream`. pub fn read_http_request(stream: &mut std::net::TcpStream) -> Vec { let mut buf = Vec::with_capacity(4096); diff --git a/tests/cli_exec_fallback.rs b/tests/cli_exec_fallback.rs index 01ec582..b5946ca 100644 --- a/tests/cli_exec_fallback.rs +++ b/tests/cli_exec_fallback.rs @@ -11,12 +11,13 @@ mod common; use common::{corgea_isolated, spawn_oldpkg_registry_stub, write_fake_recorder}; +use std::collections::HashMap; use std::path::PathBuf; use std::process::Command; use tempfile::TempDir; -/// Isolated `corgea` wired to the PyPI stub, with `PATH` set to a private -/// temp dir containing only the named fake binaries. +/// Isolated `corgea` wired to the PyPI and vuln-api stubs, with `PATH` set +/// to a private temp dir containing only the named fake binaries. struct FallbackHarness { cmd: Command, marker: PathBuf, @@ -33,8 +34,10 @@ impl FallbackHarness { write_fake_recorder(bin.path(), binary, &marker, 0); } let registry = spawn_oldpkg_registry_stub(); + let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(HashMap::new(), HashMap::new()); cmd.env("PATH", bin.path()) - .env("CORGEA_PYPI_REGISTRY", ®istry); + .env("CORGEA_PYPI_REGISTRY", ®istry) + .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); Self { cmd, marker, diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs index 2941d7c..16e46a0 100644 --- a/tests/cli_verdict.rs +++ b/tests/cli_verdict.rs @@ -11,7 +11,8 @@ mod common; -use common::{header_value, key, spawn_capturing_vuln_api_stub, vulnerable_body, PipHarness}; +use common::{key, vulnerable_body, PipHarness}; +use corgea::vuln_api_stub::{header_value, spawn_capturing_vuln_api_stub}; use std::collections::HashMap; fn vulnerable_oldpkg_body() -> String { diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 2fd9e43..2ccad96 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -132,62 +132,6 @@ where base_url } -/// Vuln-api stub that records raw requests and answers every package check -/// with a clean verdict. Used to assert auth-header behavior from the CLI. -#[allow(dead_code)] -pub fn spawn_capturing_vuln_api_stub() -> (String, std::sync::Arc>>) { - use std::io::Write; - use std::net::TcpListener; - use std::sync::{Arc, Mutex}; - - let listener = TcpListener::bind("127.0.0.1:0").expect("bind capture stub"); - let base_url = format!("http://127.0.0.1:{}", listener.local_addr().unwrap().port()); - let requests = Arc::new(Mutex::new(Vec::new())); - let requests_in_stub = Arc::clone(&requests); - std::thread::spawn(move || { - for stream in listener.incoming() { - let Ok(mut stream) = stream else { continue }; - let buf = corgea::vuln_api_stub::read_http_request(&mut stream); - let req = String::from_utf8_lossy(&buf).into_owned(); - requests_in_stub.lock().unwrap().push(req.clone()); - let path = req - .lines() - .next() - .and_then(|l| l.split_whitespace().nth(1)) - .unwrap_or(""); - let parts: Vec<&str> = path.trim_start_matches('/').split('/').collect(); - let (eco, name, ver) = if parts.len() >= 7 { - (parts[2], parts[3], parts[5]) - } else { - ("pypi", "unknown", "0.0.0") - }; - let name = urlencoding::decode(name).unwrap_or_default(); - let ver = urlencoding::decode(ver).unwrap_or_default(); - let body = format!( - r#"{{"ecosystem":"{eco}","package_name":"{name}","version":"{ver}","is_vulnerable":false,"matches":[]}}"# - ); - let response = format!( - "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - body.len(), - body - ); - let _ = stream.write_all(response.as_bytes()); - } - }); - (base_url, requests) -} - -#[allow(dead_code)] -pub fn header_value(request: &str, name: &str) -> Option { - request - .lines() - .skip(1) - .take_while(|line| !line.trim().is_empty()) - .filter_map(|line| line.split_once(':')) - .find(|(key, _)| key.eq_ignore_ascii_case(name)) - .map(|(_, value)| value.trim().to_string()) -} - /// Registry stub serving `/pypi/oldpkg/json` (pypi) and `/oldpkg` (npm /// packument), both published 2020 → never recent. Everything else 404s. #[allow(dead_code)] @@ -262,25 +206,6 @@ pub fn write_fake_recorder( write_script(dir, binary, &script); } -/// Write an executable fake `pip` that simulates an old pip with no -/// `--report`: the tree dry-run exits 2 *without* touching the marker, so -/// tests exercise the named-only fallback path. Any other invocation -/// records its argv to `marker` and exits `exit_code`. -#[cfg(unix)] -#[allow(dead_code)] -pub fn write_fake_pip_without_report( - dir: &std::path::Path, - marker: &std::path::Path, - exit_code: i32, -) { - let script = format!( - "#!/bin/sh\ncase \" $* \" in *\" --dry-run \"*) exit 2;; esac\nprintf '%s' \"$*\" > '{}'\nexit {}\n", - marker.display(), - exit_code - ); - write_script(dir, "pip", &script); -} - /// Sentinel payload that makes a tree-aware fake manager exit non-zero on /// its tree (resolution) invocation, forcing the named-only fallback. #[allow(dead_code)] @@ -346,7 +271,9 @@ impl PipHarness { let (mut cmd, home) = corgea_isolated(); let bin = TempDir::new().expect("temp bin dir"); let marker = bin.path().join("pm-argv.txt"); - write_fake_pip_without_report(bin.path(), &marker, pip_exit_code); + // RESOLUTION_FAILS models an old pip with no `--report`: the tree + // dry-run exits 2, so these tests exercise the named-only fallback. + write_fake_tree_pm(bin.path(), "pip", &marker, RESOLUTION_FAILS, pip_exit_code); let registry = spawn_wildcard_pypi_stub(); let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(checks, statuses); cmd.env("PATH", bin.path()) From 928bdc083975bd61fdf72741221f6ad9d4ceeeb7 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 17:29:15 +0200 Subject: [PATCH 38/59] Extract precheck detection module --- src/precheck/detect.rs | 252 ++++++++++++++++++++++++++++++++++++++++ src/precheck/mod.rs | 253 +---------------------------------------- 2 files changed, 257 insertions(+), 248 deletions(-) create mode 100644 src/precheck/detect.rs diff --git a/src/precheck/detect.rs b/src/precheck/detect.rs new file mode 100644 index 0000000..5508eb7 --- /dev/null +++ b/src/precheck/detect.rs @@ -0,0 +1,252 @@ +//! Package-manager/project detection: wrong-manager and +//! externally-managed-pip (PEP 668) guidance messages. + +use std::ffi::OsString; +use std::path::Path; +use std::process::Command; + +use super::{corgea_cmd, parse, PackageManager}; + +pub(super) fn wrong_package_manager_message( + manager: PackageManager, + rest: &[String], + parsed: &parse::ParsedInstall, +) -> Option { + let cwd = &std::env::current_dir().ok()?; + let expected = match manager { + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => { + let expected = detect_node_manager_from(cwd)?; + (expected != manager).then_some(expected)? + } + PackageManager::Pip if detect_uv_project_from(cwd) => PackageManager::Uv, + PackageManager::Uv if detect_pip_project_from(cwd) => PackageManager::Pip, + _ => return None, + }; + + let suggestion = suggested_install_command(expected, rest, parsed); + Some(format!( + "error: this project appears to use {}, but you ran {}.\nDid you mean `{suggestion}`?", + expected.binary_name(), + manager.binary_name() + )) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ProjectManagerDetection { + None, + Ambiguous, + Found(PackageManager), +} + +fn detect_node_manager_from(start: &Path) -> Option { + for dir in start.ancestors() { + match detect_node_manager_in_dir(dir) { + ProjectManagerDetection::Found(manager) => return Some(manager), + ProjectManagerDetection::Ambiguous => return None, + ProjectManagerDetection::None => {} + } + } + None +} + +fn detect_node_manager_in_dir(dir: &Path) -> ProjectManagerDetection { + match package_json_manager(dir) { + ProjectManagerDetection::None => {} + found => return found, + } + + let mut found = Vec::new(); + if dir.join("pnpm-lock.yaml").is_file() { + found.push(PackageManager::Pnpm); + } + if dir.join("yarn.lock").is_file() { + found.push(PackageManager::Yarn); + } + if dir.join("package-lock.json").is_file() || dir.join("npm-shrinkwrap.json").is_file() { + found.push(PackageManager::Npm); + } + + match found.as_slice() { + [] => ProjectManagerDetection::None, + [manager] => ProjectManagerDetection::Found(*manager), + _ => ProjectManagerDetection::Ambiguous, + } +} + +/// `packageManager`-field detection. Missing/unparsable `package.json` and a +/// missing field both fall through to lockfile detection (`None`). +fn package_json_manager(dir: &Path) -> ProjectManagerDetection { + let json: Option = std::fs::read_to_string(dir.join("package.json")) + .ok() + .and_then(|raw| serde_json::from_str(&raw).ok()); + let Some(package_manager) = json + .as_ref() + .and_then(|j| j.get("packageManager")) + .and_then(|v| v.as_str()) + else { + return ProjectManagerDetection::None; + }; + parse_node_package_manager(package_manager) + .map(ProjectManagerDetection::Found) + .unwrap_or(ProjectManagerDetection::Ambiguous) +} + +fn parse_node_package_manager(raw: &str) -> Option { + let name = raw.trim().split('@').next().unwrap_or("").trim(); + match name { + "npm" => Some(PackageManager::Npm), + "yarn" => Some(PackageManager::Yarn), + "pnpm" => Some(PackageManager::Pnpm), + _ => None, + } +} + +fn detect_uv_project_from(start: &Path) -> bool { + start.ancestors().any(|dir| dir.join("uv.lock").is_file()) +} + +fn detect_pip_project_from(start: &Path) -> bool { + start + .ancestors() + .take_while(|dir| !dir.join("pyproject.toml").is_file() && !dir.join("uv.lock").is_file()) + .any(has_requirements_file) +} + +fn has_requirements_file(dir: &Path) -> bool { + let Ok(entries) = std::fs::read_dir(dir) else { + return false; + }; + entries.filter_map(Result::ok).any(|entry| { + let name = entry.file_name(); + let name = name.to_string_lossy(); + entry.path().is_file() + && ((name.starts_with("requirements") + && (name.ends_with(".txt") || name.ends_with(".in"))) + || name.ends_with("-requirements.txt")) + }) +} + +fn suggested_install_command( + expected: PackageManager, + rest: &[String], + parsed: &parse::ParsedInstall, +) -> String { + let mut parts = vec!["corgea".to_string(), expected.binary_name().to_string()]; + match expected { + PackageManager::Npm => parts.push("install".to_string()), + PackageManager::Yarn | PackageManager::Pnpm => { + if parsed.targets.is_empty() && parsed.requirements_files.is_empty() { + parts.push("install".to_string()); + } else { + parts.push("add".to_string()); + } + } + PackageManager::Uv => { + if is_plain_pip_target_install(rest, parsed) { + parts.push("add".to_string()); + parts.extend(parsed.targets.iter().map(|target| target.display.clone())); + return parts.join(" "); + } + parts.push("pip".to_string()); + parts.push("install".to_string()); + } + PackageManager::Pip => parts.push("install".to_string()), + } + parts.extend(rest.iter().cloned()); + parts.join(" ") +} + +fn is_plain_pip_target_install(rest: &[String], parsed: &parse::ParsedInstall) -> bool { + !parsed.targets.is_empty() + && parsed.requirements_files.is_empty() + && rest.len() == parsed.targets.len() + && rest + .iter() + .zip(&parsed.targets) + .all(|(arg, target)| arg == &target.display) +} + +pub(super) fn externally_managed_pip_message( + manager: PackageManager, + rest: &[String], + parsed: &parse::ParsedInstall, +) -> Option { + if manager != PackageManager::Pip + || (parsed.targets.is_empty() && parsed.requirements_files.is_empty()) + || pip_install_overrides_external_management(rest) + || !pip_environment_is_externally_managed() + { + return None; + } + + Some(format!( + "error: this Python environment is externally managed (PEP 668).\nCreate and activate a virtualenv, then retry `{}`.", + corgea_cmd(&["pip", "install"], rest) + )) +} + +fn pip_install_overrides_external_management(args: &[String]) -> bool { + const VALUE_FLAGS: [&str; 4] = ["-t", "--target", "--prefix", "--root"]; + args.iter().any(|arg| { + arg == "--break-system-packages" + || VALUE_FLAGS + .iter() + .any(|flag| arg == flag || arg.starts_with(&format!("{flag}="))) + }) +} + +fn pip_environment_is_externally_managed() -> bool { + let Ok(pip) = super::resolve_binary("pip") else { + return false; + }; + // PEP 668 markers live in a system interpreter's stdlib; pip inside an + // active virtualenv can't be externally managed - skip the spawn. + if let Some(venv) = std::env::var_os("VIRTUAL_ENV") { + if pip.starts_with(&venv) { + return false; + } + } + let Some(interpreter) = python_interpreter_from_shebang(&pip) else { + return false; + }; + + let mut command = Command::new(&interpreter[0]); + command.args(&interpreter[1..]); + let Ok(output) = command.arg("-c").arg(EXTERNALLY_MANAGED_PYTHON).output() else { + return false; + }; + output.status.success() && String::from_utf8_lossy(&output.stdout).trim() == "1" +} + +const EXTERNALLY_MANAGED_PYTHON: &str = r#" +import pathlib +import sysconfig + +paths = [] +for key in ("stdlib", "platstdlib"): + path = sysconfig.get_path(key) + if path and path not in paths: + paths.append(path) + +print("1" if any((pathlib.Path(path) / "EXTERNALLY-MANAGED").is_file() for path in paths) else "0") +"#; + +fn python_interpreter_from_shebang(path: &Path) -> Option> { + let content = std::fs::read_to_string(path).ok()?; + let first = content.lines().next()?.strip_prefix("#!")?.trim(); + let mut parts: Vec<&str> = first.split_whitespace().collect(); + if parts.is_empty() { + return None; + } + if parts[0].ends_with("/env") || parts[0] == "env" { + parts.remove(0); + if parts.first() == Some(&"-S") { + parts.remove(0); + } + } + let executable = parts.first()?; + if !executable.contains("python") { + return None; + } + Some(parts.iter().map(OsString::from).collect()) +} diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index eee169e..64b3e38 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -16,8 +16,9 @@ pub mod parse; pub mod tree; +mod detect; + use std::ffi::OsString; -use std::path::Path; use std::process::Command; use std::time::Duration; @@ -366,12 +367,12 @@ pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOption } }; - if let Some(message) = wrong_package_manager_message(manager, rest, &parsed) { + if let Some(message) = detect::wrong_package_manager_message(manager, rest, &parsed) { eprintln!("{message}"); return 1; } - if let Some(message) = externally_managed_pip_message(manager, rest, &parsed) { + if let Some(message) = detect::externally_managed_pip_message(manager, rest, &parsed) { eprintln!("{message}"); return 1; } @@ -410,250 +411,6 @@ fn unsupported_pip_add_message(rest: &[String]) -> String { ) } -fn wrong_package_manager_message( - manager: PackageManager, - rest: &[String], - parsed: &parse::ParsedInstall, -) -> Option { - let cwd = &std::env::current_dir().ok()?; - let expected = match manager { - PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => { - let expected = detect_node_manager_from(cwd)?; - (expected != manager).then_some(expected)? - } - PackageManager::Pip if detect_uv_project_from(cwd) => PackageManager::Uv, - PackageManager::Uv if detect_pip_project_from(cwd) => PackageManager::Pip, - _ => return None, - }; - - let suggestion = suggested_install_command(expected, rest, parsed); - Some(format!( - "error: this project appears to use {}, but you ran {}.\nDid you mean `{suggestion}`?", - expected.binary_name(), - manager.binary_name() - )) -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum ProjectManagerDetection { - None, - Ambiguous, - Found(PackageManager), -} - -fn detect_node_manager_from(start: &Path) -> Option { - for dir in start.ancestors() { - match detect_node_manager_in_dir(dir) { - ProjectManagerDetection::Found(manager) => return Some(manager), - ProjectManagerDetection::Ambiguous => return None, - ProjectManagerDetection::None => {} - } - } - None -} - -fn detect_node_manager_in_dir(dir: &Path) -> ProjectManagerDetection { - match package_json_manager(dir) { - ProjectManagerDetection::None => {} - found => return found, - } - - let mut found = Vec::new(); - if dir.join("pnpm-lock.yaml").is_file() { - found.push(PackageManager::Pnpm); - } - if dir.join("yarn.lock").is_file() { - found.push(PackageManager::Yarn); - } - if dir.join("package-lock.json").is_file() || dir.join("npm-shrinkwrap.json").is_file() { - found.push(PackageManager::Npm); - } - - match found.as_slice() { - [] => ProjectManagerDetection::None, - [manager] => ProjectManagerDetection::Found(*manager), - _ => ProjectManagerDetection::Ambiguous, - } -} - -/// `packageManager`-field detection. Missing/unparsable `package.json` and a -/// missing field both fall through to lockfile detection (`None`). -fn package_json_manager(dir: &Path) -> ProjectManagerDetection { - let json: Option = std::fs::read_to_string(dir.join("package.json")) - .ok() - .and_then(|raw| serde_json::from_str(&raw).ok()); - let Some(package_manager) = json - .as_ref() - .and_then(|j| j.get("packageManager")) - .and_then(|v| v.as_str()) - else { - return ProjectManagerDetection::None; - }; - parse_node_package_manager(package_manager) - .map(ProjectManagerDetection::Found) - .unwrap_or(ProjectManagerDetection::Ambiguous) -} - -fn parse_node_package_manager(raw: &str) -> Option { - let name = raw.trim().split('@').next().unwrap_or("").trim(); - match name { - "npm" => Some(PackageManager::Npm), - "yarn" => Some(PackageManager::Yarn), - "pnpm" => Some(PackageManager::Pnpm), - _ => None, - } -} - -fn detect_uv_project_from(start: &Path) -> bool { - start.ancestors().any(|dir| dir.join("uv.lock").is_file()) -} - -fn detect_pip_project_from(start: &Path) -> bool { - start - .ancestors() - .take_while(|dir| !dir.join("pyproject.toml").is_file() && !dir.join("uv.lock").is_file()) - .any(has_requirements_file) -} - -fn has_requirements_file(dir: &Path) -> bool { - let Ok(entries) = std::fs::read_dir(dir) else { - return false; - }; - entries.filter_map(Result::ok).any(|entry| { - let name = entry.file_name(); - let name = name.to_string_lossy(); - entry.path().is_file() - && ((name.starts_with("requirements") - && (name.ends_with(".txt") || name.ends_with(".in"))) - || name.ends_with("-requirements.txt")) - }) -} - -fn suggested_install_command( - expected: PackageManager, - rest: &[String], - parsed: &parse::ParsedInstall, -) -> String { - let mut parts = vec!["corgea".to_string(), expected.binary_name().to_string()]; - match expected { - PackageManager::Npm => parts.push("install".to_string()), - PackageManager::Yarn | PackageManager::Pnpm => { - if parsed.targets.is_empty() && parsed.requirements_files.is_empty() { - parts.push("install".to_string()); - } else { - parts.push("add".to_string()); - } - } - PackageManager::Uv => { - if is_plain_pip_target_install(rest, parsed) { - parts.push("add".to_string()); - parts.extend(parsed.targets.iter().map(|target| target.display.clone())); - return parts.join(" "); - } - parts.push("pip".to_string()); - parts.push("install".to_string()); - } - PackageManager::Pip => parts.push("install".to_string()), - } - parts.extend(rest.iter().cloned()); - parts.join(" ") -} - -fn is_plain_pip_target_install(rest: &[String], parsed: &parse::ParsedInstall) -> bool { - !parsed.targets.is_empty() - && parsed.requirements_files.is_empty() - && rest.len() == parsed.targets.len() - && rest - .iter() - .zip(&parsed.targets) - .all(|(arg, target)| arg == &target.display) -} - -fn externally_managed_pip_message( - manager: PackageManager, - rest: &[String], - parsed: &parse::ParsedInstall, -) -> Option { - if manager != PackageManager::Pip - || (parsed.targets.is_empty() && parsed.requirements_files.is_empty()) - || pip_install_overrides_external_management(rest) - || !pip_environment_is_externally_managed() - { - return None; - } - - Some(format!( - "error: this Python environment is externally managed (PEP 668).\nCreate and activate a virtualenv, then retry `{}`.", - corgea_cmd(&["pip", "install"], rest) - )) -} - -fn pip_install_overrides_external_management(args: &[String]) -> bool { - const VALUE_FLAGS: [&str; 4] = ["-t", "--target", "--prefix", "--root"]; - args.iter().any(|arg| { - arg == "--break-system-packages" - || VALUE_FLAGS - .iter() - .any(|flag| arg == flag || arg.starts_with(&format!("{flag}="))) - }) -} - -fn pip_environment_is_externally_managed() -> bool { - let Ok(pip) = resolve_binary("pip") else { - return false; - }; - // PEP 668 markers live in a system interpreter's stdlib; pip inside an - // active virtualenv can't be externally managed — skip the spawn. - if let Some(venv) = std::env::var_os("VIRTUAL_ENV") { - if pip.starts_with(&venv) { - return false; - } - } - let Some(interpreter) = python_interpreter_from_shebang(&pip) else { - return false; - }; - - let mut command = Command::new(&interpreter[0]); - command.args(&interpreter[1..]); - let Ok(output) = command.arg("-c").arg(EXTERNALLY_MANAGED_PYTHON).output() else { - return false; - }; - output.status.success() && String::from_utf8_lossy(&output.stdout).trim() == "1" -} - -const EXTERNALLY_MANAGED_PYTHON: &str = r#" -import pathlib -import sysconfig - -paths = [] -for key in ("stdlib", "platstdlib"): - path = sysconfig.get_path(key) - if path and path not in paths: - paths.append(path) - -print("1" if any((pathlib.Path(path) / "EXTERNALLY-MANAGED").is_file() for path in paths) else "0") -"#; - -fn python_interpreter_from_shebang(path: &Path) -> Option> { - let content = std::fs::read_to_string(path).ok()?; - let first = content.lines().next()?.strip_prefix("#!")?.trim(); - let mut parts: Vec<&str> = first.split_whitespace().collect(); - if parts.is_empty() { - return None; - } - if parts[0].ends_with("/env") || parts[0] == "env" { - parts.remove(0); - if parts.first() == Some(&"-S") { - parts.remove(0); - } - } - let executable = parts.first()?; - if !executable.contains("python") { - return None; - } - Some(parts.iter().map(OsString::from).collect()) -} - fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { let exec = || exec_command("uv", cmd); @@ -684,7 +441,7 @@ fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { parse::UvCommand::Add { add_args } => { let parsed = parse::parse_pypi_positionals_args(add_args); if let Some(message) = - wrong_package_manager_message(PackageManager::Uv, add_args, &parsed) + detect::wrong_package_manager_message(PackageManager::Uv, add_args, &parsed) { eprintln!("{message}"); return 1; From 1041bf5ccb6d4484125979e6b6a568bc27a56dbf Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 17:40:49 +0200 Subject: [PATCH 39/59] Extract precheck exec module --- src/precheck/detect.rs | 2 +- src/precheck/exec.rs | 63 +++++++++++++++++++++++++++++++++++++++++ src/precheck/mod.rs | 64 ++++-------------------------------------- src/precheck/tree.rs | 6 ++-- 4 files changed, 72 insertions(+), 63 deletions(-) create mode 100644 src/precheck/exec.rs diff --git a/src/precheck/detect.rs b/src/precheck/detect.rs index 5508eb7..cf055af 100644 --- a/src/precheck/detect.rs +++ b/src/precheck/detect.rs @@ -196,7 +196,7 @@ fn pip_install_overrides_external_management(args: &[String]) -> bool { } fn pip_environment_is_externally_managed() -> bool { - let Ok(pip) = super::resolve_binary("pip") else { + let Ok(pip) = super::exec::resolve_binary("pip") else { return false; }; // PEP 668 markers live in a system interpreter's stdlib; pip inside an diff --git a/src/precheck/exec.rs b/src/precheck/exec.rs new file mode 100644 index 0000000..2e491e9 --- /dev/null +++ b/src/precheck/exec.rs @@ -0,0 +1,63 @@ +//! Resolve and exec the real package manager, forwarding args and exit codes. + +use std::ffi::OsString; +use std::process::Command; + +use super::PackageManager; + +pub(super) fn exec_install_with_args( + manager: PackageManager, + subcommand: &str, + rest: &[String], +) -> i32 { + let mut full = Vec::with_capacity(rest.len() + 1); + full.push(subcommand.to_string()); + full.extend(rest.iter().cloned()); + exec_command(manager.binary_name(), &full) +} + +/// Resolve `binary` on PATH. On Windows this finds `.cmd` shims. pip is the +/// one manager with a conventional alias, so a missing `pip` retries `pip3`. +/// The error names the binary and any fallback tried. +pub(super) fn resolve_binary(binary: &str) -> Result { + if let Ok(p) = which::which(binary) { + return Ok(p); + } + if binary == "pip" { + if let Ok(p) = which::which("pip3") { + return Ok(p); + } + return Err("error: 'pip' not found on PATH (also tried 'pip3')".to_string()); + } + Err(format!("error: '{binary}' not found on PATH")) +} + +pub(super) fn exec_command(binary: &str, args: &[String]) -> i32 { + let resolved = match resolve_binary(binary) { + Ok(p) => p, + Err(msg) => { + eprintln!("{msg}"); + return 127; + } + }; + + let os_args: Vec = args.iter().map(OsString::from).collect(); + + match Command::new(&resolved).args(&os_args).status() { + Ok(status) => status.code().unwrap_or_else(|| { + #[cfg(unix)] + { + use std::os::unix::process::ExitStatusExt; + if let Some(sig) = status.signal() { + return 128 + sig; + } + } + 1 + }), + Err(e) => { + // Name the resolved path: it may be the pip3 fallback, not `binary`. + eprintln!("failed to exec {}: {}", resolved.display(), e); + 1 + } + } +} diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 64b3e38..0a6244d 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -17,9 +17,8 @@ pub mod parse; pub mod tree; mod detect; +mod exec; -use std::ffi::OsString; -use std::process::Command; use std::time::Duration; use chrono::Utc; @@ -344,7 +343,7 @@ pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOption } if cmd.is_empty() { - return exec_command(manager.binary_name(), &[]); + return exec::exec_command(manager.binary_name(), &[]); } let subcommand = &cmd[0]; @@ -356,7 +355,7 @@ pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOption } if !manager.is_install_subcommand(subcommand) { - return exec_install_with_args(manager, subcommand, rest); + return exec::exec_install_with_args(manager, subcommand, rest); } let parsed = match parse::parse_install_args(manager, rest) { @@ -382,7 +381,7 @@ pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOption subcommand, rest, parsed, - || exec_install_with_args(manager, subcommand, rest), + || exec::exec_install_with_args(manager, subcommand, rest), opts, ) } @@ -412,7 +411,7 @@ fn unsupported_pip_add_message(rest: &[String]) -> String { } fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { - let exec = || exec_command("uv", cmd); + let exec = || exec::exec_command("uv", cmd); if matches!(cmd.first().map(String::as_str), Some("install" | "i")) { eprintln!("{}", unsupported_uv_install_message(&cmd[1..])); @@ -1042,59 +1041,6 @@ fn verify_one( } } -fn exec_install_with_args(manager: PackageManager, subcommand: &str, rest: &[String]) -> i32 { - let mut full = Vec::with_capacity(rest.len() + 1); - full.push(subcommand.to_string()); - full.extend(rest.iter().cloned()); - exec_command(manager.binary_name(), &full) -} - -/// Resolve `binary` on PATH. On Windows this finds `.cmd` shims. pip is the -/// one manager with a conventional alias, so a missing `pip` retries `pip3`. -/// The error names the binary and any fallback tried. -fn resolve_binary(binary: &str) -> Result { - if let Ok(p) = which::which(binary) { - return Ok(p); - } - if binary == "pip" { - if let Ok(p) = which::which("pip3") { - return Ok(p); - } - return Err("error: 'pip' not found on PATH (also tried 'pip3')".to_string()); - } - Err(format!("error: '{binary}' not found on PATH")) -} - -fn exec_command(binary: &str, args: &[String]) -> i32 { - let resolved = match resolve_binary(binary) { - Ok(p) => p, - Err(msg) => { - eprintln!("{msg}"); - return 127; - } - }; - - let os_args: Vec = args.iter().map(OsString::from).collect(); - - match Command::new(&resolved).args(&os_args).status() { - Ok(status) => status.code().unwrap_or_else(|| { - #[cfg(unix)] - { - use std::os::unix::process::ExitStatusExt; - if let Some(sig) = status.signal() { - return 128 + sig; - } - } - 1 - }), - Err(e) => { - // Name the resolved path: it may be the pip3 fallback, not `binary`. - eprintln!("failed to exec {}: {}", resolved.display(), e); - 1 - } - } -} - /// Suffix for a vulnerable match line: the advisory's fix, if known. fn fix_note(m: &crate::vuln_api::VulnMatch) -> String { match &m.fixed_version { diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index a3dd91a..03af40d 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -59,7 +59,7 @@ fn stderr_tail(output: &std::process::Output) -> String { fn resolve_pip_tree(binary: &str, install_args: &[String]) -> Result, String> { // Same binary resolution as the exec path (pip → pip3 fallback) — the // tree pass must not silently degrade on pip3-only systems. - let resolved = super::resolve_binary(binary)?; + let resolved = super::exec::resolve_binary(binary)?; let output = Command::new(resolved) .arg("install") .args([ @@ -118,7 +118,7 @@ fn parse_pip_report(json: &str) -> Result, String> { /// are already surfaced as skipped warnings. Index selection comes from /// uv's env/config; index flags on the wrapped command don't carry over. fn resolve_uv_tree(parsed: &super::parse::ParsedInstall) -> Result, String> { - let uv = super::resolve_binary("uv")?; + let uv = super::exec::resolve_binary("uv")?; let mut input = String::new(); for t in &parsed.targets { if !matches!(t.kind, super::TargetKind::Unverifiable { .. }) { @@ -262,7 +262,7 @@ fn direct_deps_from_manifest(json: &str) -> std::collections::HashSet { /// `--ignore-scripts` because npm has run lifecycle scripts under /// `--package-lock-only` before (npm/cli#2787). fn resolve_npm_tree(binary: &str, install_args: &[String]) -> Result, String> { - let resolved = super::resolve_binary(binary)?; + let resolved = super::exec::resolve_binary(binary)?; let work = tempfile::tempdir().map_err(|e| format!("create temp dir: {e}"))?; for manifest in [ "package.json", From 67e6e856178223cf86b92c599ed07584c5af9681 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 17:54:11 +0200 Subject: [PATCH 40/59] Extract precheck render module --- src/precheck/mod.rs | 881 +---------------------------------- src/precheck/render.rs | 762 ++++++++++++++++++++++++++++++ src/precheck/test_support.rs | 124 +++++ 3 files changed, 898 insertions(+), 869 deletions(-) create mode 100644 src/precheck/render.rs create mode 100644 src/precheck/test_support.rs diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 0a6244d..edb5317 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -18,13 +18,15 @@ pub mod tree; mod detect; mod exec; +mod render; + +#[cfg(test)] +mod test_support; use std::time::Duration; use chrono::Utc; -use crate::verify_deps; - /// Supported package managers. Each one shares enough behaviour with /// the others that we only need a small per-manager dispatch. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -131,9 +133,6 @@ pub enum VerdictStatus { NotChecked, } -/// Reason recorded on resolved targets when no verdict pass ran. -const NO_VERDICT_REASON: &str = "vulnerability verdict not checked"; - #[derive(Debug, Clone)] pub struct PrecheckOptions { pub threshold: Duration, @@ -526,14 +525,14 @@ fn report_and_exec( exec: impl FnOnce() -> i32, ) -> i32 { if opts.json { - print_json(report, opts); + render::print_json(report, opts); } else { - print_text(report); + render::print_text(report); } - warn_public_lookup_failures(report, opts); + render::warn_public_lookup_failures(report, opts); if should_block_install(report, opts) { if !opts.json { - print_refusal(report, opts); + render::print_refusal(report, opts); } return 1; } @@ -591,9 +590,9 @@ fn run_parsed_install( // Only a truly bare install gets the bare note. A `-r requirements.txt` // install is covered by `requirements_note`. if bare_install { - bare_install_note(manager, subcommand_label); + render::bare_install_note(manager, subcommand_label); } - requirements_note(&parsed); + render::requirements_note(&parsed); return exec(); } @@ -629,7 +628,7 @@ fn run_parsed_install( // The requirements note only matters when the tree pass did *not* cover // those files (fallback to named-only, or verdicts disabled). if !matches!(&tree, Some(TreeReport::Full { .. })) { - requirements_note(&parsed); + render::requirements_note(&parsed); } if opts .verdict @@ -654,97 +653,6 @@ fn run_parsed_install( report_and_exec(&report, &opts, exec) } -/// One honest stderr line when a zero-spec install can't be gated: -/// yarn/pnpm/uv have no safe dry-run, so a bare install pulls its whole -/// dependency set unchecked. No-op for other managers (bare npm is gated -/// via the tree pass; bare pip installs nothing). -fn bare_install_note(manager: PackageManager, subcommand_label: &str) { - if matches!( - manager, - PackageManager::Yarn | PackageManager::Pnpm | PackageManager::Uv - ) { - eprintln!( - "note: bare '{} {}' is not gated (no safe dry-run) — dependencies install unchecked", - manager.binary_name(), - subcommand_label - ); - } -} - -/// The refusal line on stderr. When vulnerable findings exist but none sit on -/// a named target — and no named target is unverifiable either — the block is -/// entirely the existing tree's doing, so say that instead of implying the -/// package the user typed is at fault. Messaging only; the block decision -/// stays with `should_block_install`. -fn print_refusal(report: &PrecheckReport, opts: &PrecheckOptions) { - if refusal_blames_existing_tree(report, opts) { - eprintln!( - "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." - ); - } else if report.vulnerable_count() > 0 - || (authenticated_verdict(opts) && report.unverifiable_count() > 0) - || (authenticated_verdict(opts) && report.error_count() > 0) - { - eprintln!("Refusing to run install. Pass --force to proceed despite findings."); - } else { - eprintln!("Refusing to run install. Pass --no-fail to proceed anyway."); - } -} - -/// True when the block is entirely the existing tree's doing: vulnerable -/// findings exist, none sit on a named target (or block as unverifiable -/// there), and every *blocking* tree finding — vulnerable or unverifiable, -/// since `should_block_install` refuses on both — genuinely predates this -/// command. A `Requested` finding (pip `-r`) is added by this command and -/// renders as `(from requirements)`; a `Transitive` finding on any install -/// that names targets or requirements files is being pulled in by them -/// right now. Only a truly bare install (`report.bare_install`) or -/// manifest-declared `PreExisting` findings may blame the existing tree. -fn refusal_blames_existing_tree(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { - let fail_closed = authenticated_verdict(opts); - let named_findings = report.named_vulnerable_count() - + if fail_closed { - report.named_unverifiable_count() - } else { - 0 - }; - if report.vulnerable_count() == 0 || named_findings > 0 { - return false; - } - let Some(TreeReport::Full { transitive, .. }) = &report.tree else { - return false; - }; - transitive - .iter() - .filter(|t| { - matches!(t.verdict, VerdictStatus::Vulnerable(_)) - || (fail_closed && matches!(t.verdict, VerdictStatus::Unverifiable(_))) - }) - .all(|t| match t.origin { - // A locked pin predates the sync command that installs it. - TreeOrigin::PreExisting | TreeOrigin::Locked => true, - TreeOrigin::Requested => false, - TreeOrigin::Transitive => report.bare_install, - }) -} - -/// Print the "requirements files are not recency-checked" note when the -/// install carried any `-r` files. No-op otherwise. -fn requirements_note(parsed: &parse::ParsedInstall) { - if parsed.requirements_files.is_empty() { - return; - } - let files: Vec = parsed - .requirements_files - .iter() - .map(|p| p.display().to_string()) - .collect(); - eprintln!( - "note: requirements files ({}) are not recency-checked by the baseline gate", - files.join(", ") - ); -} - /// Verdict the resolved would-install set (`tree::resolve_tree`'s result). /// On any resolution failure, fall back to the named-only verdict pass; the /// caller renders the loud warning from the returned `NamedOnly` reason. @@ -978,12 +886,6 @@ fn public_verdict(opts: &PrecheckOptions) -> bool { .is_some_and(|cfg| cfg.mode.is_public()) } -fn warn_public_lookup_failures(report: &PrecheckReport, opts: &PrecheckOptions) { - if public_verdict(opts) && report.unverifiable_count() > 0 { - eprintln!("warning: CVE check unavailable; continuing because public mode is fail-open."); - } -} - fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { if opts.force { return false; @@ -1041,424 +943,9 @@ fn verify_one( } } -/// Suffix for a vulnerable match line: the advisory's fix, if known. -fn fix_note(m: &crate::vuln_api::VulnMatch) -> String { - match &m.fixed_version { - Some(v) => format!(" — fixed in {v}"), - None => " — no fixed version known".to_string(), - } -} - -/// Highest of `fixes` after sort/dedup: a single distinct value is returned -/// as-is (no parsing — preserves odd-but-unambiguous forms); several distinct -/// values compare by lenient semver. With `all_must_parse`, one unparsable -/// candidate among several poisons the answer (`None`); otherwise unparsable -/// candidates are skipped. -fn highest_fix(mut fixes: Vec<&str>, all_must_parse: bool) -> Option { - fixes.sort_unstable(); - fixes.dedup(); - match fixes.as_slice() { - [] => None, - [only] => Some((*only).to_string()), - many => { - let mut best: Option<(semver::Version, &str)> = None; - for raw in many { - let v = - match semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) - { - Ok(v) => v, - Err(_) if all_must_parse => return None, - Err(_) => continue, - }; - match &best { - Some((cur, _)) if cur >= &v => {} - _ => best = Some((v, raw)), - } - } - best.map(|(_, raw)| (*raw).to_string()) - } - } -} - -/// The one version certified to clear every match. Requires every match to -/// carry a `fixed_version`; any match without one — or an unparsable -/// candidate among several — means no version can be certified, so `None`. -fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { - let fixes: Vec<&str> = matches - .iter() - .map(|m| m.fixed_version.as_deref()) - .collect::>()?; - highest_fix(fixes, true) -} - -/// Highest `fixed_version` the advisories advertise, by lenient semver. -/// Unlike `safe_version` this is *not* a certification: matches without a -/// fix are ignored, so the result may still be vulnerable to them. `None` -/// only when no match advertises a fix (or no candidate parses). -fn advertised_fix(matches: &[crate::vuln_api::VulnMatch]) -> Option { - let fixes: Vec<&str> = matches - .iter() - .filter_map(|m| m.fixed_version.as_deref()) - .collect(); - highest_fix(fixes, false) -} - -/// Per-match advisory lines plus the safe-version steer, shared by the -/// named-target and transitive vulnerable render arms. -fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) { - for m in matches { - println!( - " {} ({}){}", - m.advisory_id, - m.severity_level, - fix_note(m) - ); - } - if let Some(safe) = safe_version(matches) { - println!(" → safe version: {name}@{safe}"); - } -} - -/// One summary-line segment, e.g. `"2 vulnerable (2 from resolved tree)"`. -/// The parenthetical separates findings the resolved tree carried in from -/// findings on the targets this command names; omitted when the tree -/// contributed none. -fn summary_segment(total: usize, from_tree: usize, label: &str) -> String { - if from_tree > 0 { - format!("{total} {label} ({from_tree} from resolved tree)") - } else { - format!("{total} {label}") - } -} - -/// More than this many unverifiable findings with the same error-prefix -/// render as one collapsed line instead of one line per package. -const UNVERIFIABLE_COLLAPSE_THRESHOLD: usize = 3; - -/// Group key for collapsing repeated unverifiable errors: the text before -/// the first `(` — strips per-package detail (URLs, status codes) so one -/// outage groups under one key. -fn error_prefix(error: &str) -> &str { - match error.find('(') { - Some(i) => error[..i].trim_end(), - None => error, - } -} - -/// Unverifiable error strings across transitive tree findings and named -/// outcomes, in render order. -fn unverifiable_errors(report: &PrecheckReport) -> Vec<&str> { - let mut errors = Vec::new(); - if let Some(TreeReport::Full { transitive, .. }) = &report.tree { - for t in transitive { - if let VerdictStatus::Unverifiable(e) = &t.verdict { - errors.push(e.as_str()); - } - } - } - for o in &report.outcomes { - if let TargetOutcome::Resolved { - verdict: VerdictStatus::Unverifiable(e), - .. - } = o - { - errors.push(e.as_str()); - } - } - errors -} - -/// `(prefix, count, first error)` groups of unverifiable findings large -/// enough to collapse (> `UNVERIFIABLE_COLLAPSE_THRESHOLD` per prefix) — -/// the vuln-api outage case, where every package fails the same way. -/// Display-only: counts and exit codes never change. -fn collapsed_unverifiable_groups(report: &PrecheckReport) -> Vec<(&str, usize, &str)> { - let mut groups: Vec<(&str, usize, &str)> = Vec::new(); - for e in unverifiable_errors(report) { - let prefix = error_prefix(e); - match groups.iter_mut().find(|(p, _, _)| *p == prefix) { - Some((_, count, _)) => *count += 1, - None => groups.push((prefix, 1, e)), - } - } - groups.retain(|(_, count, _)| *count > UNVERIFIABLE_COLLAPSE_THRESHOLD); - groups -} - -fn print_text(report: &PrecheckReport) { - // Build the echoed command from non-empty parts: a bare gated install - // (e.g. `npm install` with zero specs) has no args to append. - let mut command = format!("{} {}", report.manager.binary_name(), report.subcommand); - if !report.original_args.is_empty() { - command.push(' '); - command.push_str(&report.original_args.join(" ")); - } - - let collapsed = collapsed_unverifiable_groups(report); - let is_collapsed = |error: &str| { - collapsed - .iter() - .any(|(prefix, _, _)| *prefix == error_prefix(error)) - }; - - println!( - "Pre-checking `{}` (threshold {})", - command, - verify_deps::format_duration(report.threshold) - ); - println!( - " {} ok, {} recent, {}, {}, {} skipped, {} errors", - report.ok_count(), - report.recent_count(), - summary_segment( - report.vulnerable_count(), - report.tree_vulnerable_count(), - "vulnerable" - ), - summary_segment( - report.unverifiable_count(), - report.tree_unverifiable_count(), - "unverifiable" - ), - report.skipped_count(), - report.error_count(), - ); - - match &report.tree { - Some(TreeReport::Full { - resolved_count, - transitive, - .. - }) => { - println!( - " tree: {} packages resolved, {} transitive checked", - resolved_count, - transitive.len() - ); - for t in transitive { - match &t.verdict { - VerdictStatus::Vulnerable(matches) => { - println!( - " ✗ {}@{} {} known vulnerable:", - t.name, - t.version, - t.origin.label() - ); - print_vulnerable_matches(&t.name, matches); - // A vulnerable dep the project already declares can be - // bumped directly — point at the fix as a command. - // When `safe_version` is `Some` it equals - // `advertised_fix` and clears every advisory; otherwise - // some advisory has no fix, so the "(advertised fix)" - // hedge marks the bump as partial. - if t.origin == TreeOrigin::PreExisting { - if let Some(fix) = advertised_fix(matches) { - let hedge = if safe_version(matches).is_some() { - "" - } else { - " (advertised fix)" - }; - println!( - " fix with: corgea {} install {}@{}{}", - report.manager.binary_name(), - t.name, - fix, - hedge - ); - } - } - } - VerdictStatus::Unverifiable(error) => { - if !is_collapsed(error) { - println!( - " ⚠ {}@{} {} could not be verified: {}", - t.name, - t.version, - t.origin.label(), - error - ); - } - } - // Clean / not-checked tree entries stay quiet in text mode. - VerdictStatus::Clean | VerdictStatus::NotChecked => {} - } - } - } - Some(TreeReport::NamedOnly { reason }) => { - println!(" tree: transitive dependencies NOT checked ({reason})"); - } - None => {} - } - - // One line per collapsed outage group instead of one per package. - for (_, count, first_error) in &collapsed { - println!( - " ⚠ {count} packages could not be verified (vuln-api unreachable: {first_error})" - ); - } - - for o in &report.outcomes { - match o { - TargetOutcome::Resolved { - target, - resolved, - age, - verdict, - } => match verdict { - VerdictStatus::Vulnerable(matches) => { - println!( - " ✗ {} → {}@{} known vulnerable:", - target.display, resolved.name, resolved.version, - ); - print_vulnerable_matches(&resolved.name, matches); - } - VerdictStatus::Unverifiable(error) => { - if !is_collapsed(error) { - println!( - " ⚠ {} → {}@{} could not be verified: {}", - target.display, resolved.name, resolved.version, error, - ); - } - } - VerdictStatus::Clean | VerdictStatus::NotChecked => { - if report.is_recent(*age) { - println!( - " ⚠ {} → {}@{} published {} ago at {} (within threshold)", - target.display, - resolved.name, - resolved.version, - verify_deps::format_duration(*age), - resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), - ); - } else { - println!( - " ✓ {} → {}@{} published {} ago", - target.display, - resolved.name, - resolved.version, - verify_deps::format_duration(*age), - ); - } - } - }, - TargetOutcome::Skipped { target, reason } => { - println!(" ? {}: {}", target.display, reason); - } - TargetOutcome::Error { target, error } => { - println!(" ✗ {}: {}", target.display, error); - } - } - } -} - -/// JSON shape for a single verdict. Shared by named outcomes and tree -/// (transitive) outcomes so both render verdicts identically. -/// `remediation` carries the version that clears every advisory -/// (`safe_version`); `null` when any advisory has no known fix. -fn verdict_json(verdict: &VerdictStatus) -> serde_json::Value { - use serde_json::json; - match verdict { - VerdictStatus::Clean => json!({ "status": "clean" }), - VerdictStatus::Vulnerable(matches) => { - json!({ - "status": "vulnerable", - "matches": matches, - "remediation": safe_version(matches), - }) - } - VerdictStatus::Unverifiable(error) => { - json!({ "status": "unverifiable", "error": error }) - } - VerdictStatus::NotChecked => { - json!({ "status": "not_checked", "reason": NO_VERDICT_REASON }) - } - } -} - -fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { - use serde_json::json; - let verdict_mode = match opts.verdict.as_ref().map(|cfg| &cfg.mode) { - Some(VerdictMode::Public) => "public", - Some(VerdictMode::Authenticated { .. }) => "authenticated", - None => "recency-only", - }; - let outcomes: Vec<_> = report - .outcomes - .iter() - .map(|o| match o { - TargetOutcome::Resolved { - target, - resolved, - age, - verdict, - } => { - let verdict_json = verdict_json(verdict); - json!({ - "status": if report.is_recent(*age) { "recent" } else { "ok" }, - "spec": target.display, - "name": resolved.name, - "resolved_version": resolved.version, - "published_at": resolved.published_at.to_rfc3339(), - "age_seconds": age.as_secs(), - "verdict": verdict_json, - }) - } - TargetOutcome::Skipped { target, reason } => json!({ - "status": "skipped", - "spec": target.display, - "name": target.name, - "reason": reason, - }), - TargetOutcome::Error { target, error } => json!({ - "status": "error", - "spec": target.display, - "name": target.name, - "error": error, - }), - }) - .collect(); - - let body = json!({ - "manager": report.manager.binary_name(), - "subcommand": report.subcommand, - "args": report.original_args, - "threshold_seconds": report.threshold.as_secs(), - "summary": { - "ok": report.ok_count(), - "recent": report.recent_count(), - "vulnerable": report.vulnerable_count(), - "unverifiable": report.unverifiable_count(), - "skipped": report.skipped_count(), - "errors": report.error_count(), - }, - "verdict_mode": verdict_mode, - "results": outcomes, - "tree": report.tree.as_ref().map(|t| match t { - TreeReport::Full { resolved_count, transitive } => json!({ - "mode": "full", - "reason": serde_json::Value::Null, - "resolved_count": resolved_count, - "transitive": transitive.iter().map(|o| json!({ - "name": o.name, - "version": o.version, - "origin": o.origin.json_name(), - "verdict": verdict_json(&o.verdict), - })).collect::>(), - }), - TreeReport::NamedOnly { reason } => json!({ - "mode": "named-only", - "reason": reason, - "resolved_count": 0, - "transitive": [], - }), - }), - }); - - println!("{}", serde_json::to_string_pretty(&body).unwrap()); -} - #[cfg(test)] mod tests { + use super::test_support::*; use super::*; #[test] @@ -1511,35 +998,6 @@ source = { git = "https://example.com/repo?rev=abc#abc" } assert!(err.contains("parse uv.lock"), "got: {err}"); } - /// Baseline options: pypi registry at a dead address (a port that - /// refuses connections — these tests never dial it), no verdict config. - /// Override fields per test via struct update. - fn stub_opts() -> PrecheckOptions { - PrecheckOptions { - threshold: Duration::from_secs(2 * 86400), - no_fail: false, - force: false, - json: false, - verdict: None, - npm_registry: None, - pypi_registry: Some("http://127.0.0.1:9".to_string()), - } - } - - /// `stub_opts()` plus a verdict config pointing at `base_url`. - fn verdict_opts(base_url: &str) -> PrecheckOptions { - PrecheckOptions { - verdict: Some(VerdictConfig { - base_url: base_url.to_string(), - mode: VerdictMode::Authenticated { - token: "test-token".to_string(), - }, - public_login_hint: false, - }), - ..stub_opts() - } - } - /// Run `run_parsed_install` for `pip install ` with an exec /// closure that records whether it ran (returning 42 instead of /// spawning anything). @@ -1588,52 +1046,6 @@ source = { git = "https://example.com/repo?rev=abc#abc" } assert!(exec_ran); } - fn resolved_outcome(name: &str, version: &str, recent: bool) -> TargetOutcome { - // Recency derives from age vs `report_with`'s 2-day threshold: - // one hour ⇒ recent, a year ⇒ not. - let age = if recent { - Duration::from_secs(3600) - } else { - Duration::from_secs(365 * 86400) - }; - TargetOutcome::Resolved { - target: InstallTarget { - name: name.to_string(), - display: format!("{name}=={version}"), - kind: TargetKind::Unverifiable { - reason: "test".to_string(), - }, - }, - resolved: crate::verify_deps::registry::ResolvedPackage { - name: name.to_string(), - version: version.to_string(), - published_at: Utc::now() - chrono::Duration::from_std(age).unwrap(), - }, - age, - verdict: VerdictStatus::NotChecked, - } - } - - fn report_with(outcomes: Vec) -> PrecheckReport { - PrecheckReport { - manager: PackageManager::Pip, - subcommand: "install".to_string(), - original_args: vec![], - outcomes, - threshold: Duration::from_secs(2 * 86400), - tree: None, - // Most tests model an install that named something; bare-install - // cases set this explicitly. - bare_install: false, - } - } - - fn set_verdict(outcome: &mut TargetOutcome, v: VerdictStatus) { - if let TargetOutcome::Resolved { verdict, .. } = outcome { - *verdict = v; - } - } - #[test] fn ecosystem_mapping() { assert_eq!(PackageManager::Pip.ecosystem(), "pypi"); @@ -1659,34 +1071,6 @@ source = { git = "https://example.com/repo?rev=abc#abc" } assert_eq!(PackageManager::Npm.normalize_name("Left_Pad"), "Left_Pad"); } - fn public_opts(no_fail: bool, force: bool) -> PrecheckOptions { - PrecheckOptions { - no_fail, - force, - verdict: Some(VerdictConfig { - base_url: "http://127.0.0.1:9".to_string(), - mode: VerdictMode::Public, - public_login_hint: true, - }), - ..stub_opts() - } - } - - fn authenticated_opts(no_fail: bool, force: bool) -> PrecheckOptions { - PrecheckOptions { - no_fail, - force, - verdict: Some(VerdictConfig { - base_url: "http://127.0.0.1:9".to_string(), - mode: VerdictMode::Authenticated { - token: "test-token".to_string(), - }, - public_login_hint: false, - }), - ..stub_opts() - } - } - /// Predicate matrix: force ⇒ never block; vulnerable blocks in every /// verdict mode; unverifiable/error findings block only in authenticated /// mode; recency keeps its task-2 --no-fail demotion. @@ -1909,144 +1293,6 @@ source = { git = "https://example.com/repo?rev=abc#abc" } } } - fn vm(advisory: &str, fixed: Option<&str>) -> crate::vuln_api::VulnMatch { - crate::vuln_api::VulnMatch { - advisory_id: advisory.to_string(), - severity_level: "high".to_string(), - tier: 1, - vulnerable_version_range: None, - fixed_version: fixed.map(str::to_string), - } - } - - #[test] - fn safe_version_single_fix() { - assert_eq!( - safe_version(&[vm("A-1", Some("2.0.0"))]), - Some("2.0.0".to_string()) - ); - } - - #[test] - fn safe_version_duplicate_fixes_collapse_without_parsing() { - // "1.0rc1" is unparsable, but a single distinct value needs no parse. - assert_eq!( - safe_version(&[vm("A-1", Some("1.0rc1")), vm("A-2", Some("1.0rc1"))]), - Some("1.0rc1".to_string()) - ); - } - - #[test] - fn safe_version_picks_highest_of_distinct_fixes() { - // Semver order, not lexical ("1.2.0" > "1.10.0" lexically). - assert_eq!( - safe_version(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), - Some("1.10.0".to_string()) - ); - } - - #[test] - fn safe_version_two_component_versions_normalize() { - assert_eq!( - safe_version(&[vm("A-1", Some("4.0")), vm("A-2", Some("3.2.5"))]), - Some("4.0".to_string()) - ); - } - - #[test] - fn safe_version_mixed_fix_and_none_is_none() { - assert_eq!( - safe_version(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), - None - ); - } - - #[test] - fn safe_version_unparsable_among_distinct_is_none() { - assert_eq!( - safe_version(&[vm("A-1", Some("2!1.0")), vm("A-2", Some("1.0.0"))]), - None - ); - } - - #[test] - fn safe_version_empty_matches_is_none() { - assert_eq!(safe_version(&[]), None); - } - - #[test] - fn error_prefix_strips_parenthesized_detail() { - // The reqwest network-failure shape: per-package URL in parens. - assert_eq!( - error_prefix("Failed to send vuln-api request: error sending request for url (http://x/v1/packages/pypi/a/versions/1.0.0/check)"), - "Failed to send vuln-api request: error sending request for url" - ); - assert_eq!( - error_prefix("vuln-api unavailable (HTTP 503)"), - "vuln-api unavailable" - ); - assert_eq!(error_prefix("no parens here"), "no parens here"); - } - - /// Four unverifiable findings sharing a prefix collapse into one group - /// (named + transitive both count); three do not. - #[test] - fn collapsed_groups_require_more_than_threshold() { - let unverifiable = |name: &str| { - let mut o = resolved_outcome(name, "1.0.0", false); - set_verdict( - &mut o, - VerdictStatus::Unverifiable(format!("vuln-api unavailable (HTTP 503: {name})")), - ); - o - }; - - let mut report = report_with(vec![ - unverifiable("a"), - unverifiable("b"), - unverifiable("c"), - ]); - assert!(collapsed_unverifiable_groups(&report).is_empty()); - - report.tree = Some(TreeReport::Full { - resolved_count: 4, - transitive: vec![TreeOutcome { - name: "d".to_string(), - version: "1.0.0".to_string(), - verdict: VerdictStatus::Unverifiable( - "vuln-api unavailable (HTTP 503: d)".to_string(), - ), - origin: TreeOrigin::Transitive, - }], - }); - let groups = collapsed_unverifiable_groups(&report); - assert_eq!(groups.len(), 1); - let (prefix, count, first) = groups[0]; - assert_eq!(prefix, "vuln-api unavailable"); - assert_eq!(count, 4); - // Render order is transitive-first, so the tree finding leads. - assert_eq!(first, "vuln-api unavailable (HTTP 503: d)"); - } - - #[test] - fn advertised_fix_ignores_matches_without_fix() { - // safe_version returns None here; the advertised fix still surfaces. - assert_eq!( - advertised_fix(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), - Some("2.0.0".to_string()) - ); - assert_eq!(advertised_fix(&[vm("A-1", None)]), None); - assert_eq!(advertised_fix(&[]), None); - } - - #[test] - fn advertised_fix_picks_highest_by_semver() { - assert_eq!( - advertised_fix(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), - Some("1.10.0".to_string()) - ); - } - /// Leftover origin assignment: pip `requested` ⇒ Requested; manifest /// direct dep ⇒ PreExisting; otherwise Transitive. Requested wins over /// a direct-dep hit. @@ -2077,107 +1323,4 @@ source = { git = "https://example.com/repo?rev=abc#abc" } ] ); } - - /// The existing-tree refusal fires only when every vulnerable finding - /// predates the command: a `Requested` finding (pip `-r`) is added by - /// this command, and a `Transitive` finding is being pulled in right - /// now unless the install is truly bare. `bare_install` is the explicit - /// discriminator — a requirements-only install also has no named - /// outcomes, but its resolved set is the command's doing. - #[test] - fn refusal_blame_respects_finding_origin() { - let tree_vulnerable = |origin| TreeOutcome { - name: "dep".to_string(), - version: "1.0.0".to_string(), - verdict: VerdictStatus::Vulnerable(vec![vm("A-1", None)]), - origin, - }; - // (origin, named outcomes present, bare_install, expected). - // (origin, named=false, bare=false) is the requirements-only shape. - let cases = [ - (TreeOrigin::PreExisting, false, true, true), - (TreeOrigin::PreExisting, false, false, true), - (TreeOrigin::PreExisting, true, false, true), - (TreeOrigin::Transitive, false, true, true), - (TreeOrigin::Transitive, false, false, false), - (TreeOrigin::Transitive, true, false, false), - (TreeOrigin::Requested, false, true, false), - (TreeOrigin::Requested, false, false, false), - (TreeOrigin::Requested, true, false, false), - ]; - for (origin, with_named, bare_install, blames_tree) in cases { - let outcomes = if with_named { - vec![resolved_outcome("cleanpkg", "1.0.0", false)] - } else { - vec![] - }; - let mut report = report_with(outcomes); - report.bare_install = bare_install; - report.tree = Some(TreeReport::Full { - resolved_count: 1, - transitive: vec![tree_vulnerable(origin)], - }); - assert_eq!( - refusal_blames_existing_tree(&report, &authenticated_opts(false, false)), - blames_tree, - "origin {origin:?}, with_named {with_named}, bare {bare_install}" - ); - } - } - - /// Unverifiable tree findings block too (`should_block_install`), so - /// they must pass the same origin test before the refusal may blame the - /// existing tree: a command-added unverifiable transitive alongside a - /// pre-existing vulnerable dep keeps the generic refusal on a named - /// install, while on a bare install everything still predates the - /// command. - #[test] - fn refusal_blame_considers_unverifiable_tree_findings() { - let tree_finding = |name: &str, verdict, origin| TreeOutcome { - name: name.to_string(), - version: "1.0.0".to_string(), - verdict, - origin, - }; - let mixed_tree = || { - Some(TreeReport::Full { - resolved_count: 2, - transitive: vec![ - tree_finding( - "stickydep", - VerdictStatus::Vulnerable(vec![vm("A-1", None)]), - TreeOrigin::PreExisting, - ), - tree_finding( - "newdep", - VerdictStatus::Unverifiable("vuln-api unavailable".to_string()), - TreeOrigin::Transitive, - ), - ], - }) - }; - - // Named install: the unverifiable transitive is being added by this - // command, so "none were added by this command" would lie. - let mut report = report_with(vec![resolved_outcome("cleanpkg", "1.0.0", false)]); - report.tree = mixed_tree(); - assert!(!refusal_blames_existing_tree( - &report, - &authenticated_opts(false, false) - )); - assert!(refusal_blames_existing_tree( - &report, - &public_opts(false, false) - )); - - // Bare install: nothing named, everything resolved predates the - // command — the mixed findings still blame the existing tree. - let mut report = report_with(vec![]); - report.bare_install = true; - report.tree = mixed_tree(); - assert!(refusal_blames_existing_tree( - &report, - &authenticated_opts(false, false) - )); - } } diff --git a/src/precheck/render.rs b/src/precheck/render.rs new file mode 100644 index 0000000..9c15201 --- /dev/null +++ b/src/precheck/render.rs @@ -0,0 +1,762 @@ +//! Report rendering: text/JSON output, refusal line, fix/steer helpers. + +use crate::verify_deps; + +use super::{ + parse, PackageManager, PrecheckOptions, PrecheckReport, TargetOutcome, TreeOrigin, TreeReport, + VerdictMode, VerdictStatus, +}; + +/// Reason recorded on resolved targets when no verdict pass ran. +const NO_VERDICT_REASON: &str = "vulnerability verdict not checked"; + +/// One honest stderr line when a zero-spec install can't be gated: +/// yarn/pnpm/uv have no safe dry-run, so a bare install pulls its whole +/// dependency set unchecked. No-op for other managers (bare npm is gated +/// via the tree pass; bare pip installs nothing). +pub(super) fn bare_install_note(manager: PackageManager, subcommand_label: &str) { + if matches!( + manager, + PackageManager::Yarn | PackageManager::Pnpm | PackageManager::Uv + ) { + eprintln!( + "note: bare '{} {}' is not gated (no safe dry-run) — dependencies install unchecked", + manager.binary_name(), + subcommand_label + ); + } +} + +/// The refusal line on stderr. When vulnerable findings exist but none sit on +/// a named target — and no named target is unverifiable either — the block is +/// entirely the existing tree's doing, so say that instead of implying the +/// package the user typed is at fault. Messaging only; the block decision +/// stays with `should_block_install`. +pub(super) fn print_refusal(report: &PrecheckReport, opts: &PrecheckOptions) { + if refusal_blames_existing_tree(report, opts) { + eprintln!( + "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." + ); + } else if report.vulnerable_count() > 0 + || (super::authenticated_verdict(opts) && report.unverifiable_count() > 0) + || (super::authenticated_verdict(opts) && report.error_count() > 0) + { + eprintln!("Refusing to run install. Pass --force to proceed despite findings."); + } else { + eprintln!("Refusing to run install. Pass --no-fail to proceed anyway."); + } +} + +/// True when the block is entirely the existing tree's doing: vulnerable +/// findings exist, none sit on a named target (or block as unverifiable +/// there), and every *blocking* tree finding — vulnerable or unverifiable, +/// since `should_block_install` refuses on both — genuinely predates this +/// command. A `Requested` finding (pip `-r`) is added by this command and +/// renders as `(from requirements)`; a `Transitive` finding on any install +/// that names targets or requirements files is being pulled in by them +/// right now. Only a truly bare install (`report.bare_install`) or +/// manifest-declared `PreExisting` findings may blame the existing tree. +fn refusal_blames_existing_tree(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + let fail_closed = super::authenticated_verdict(opts); + let named_findings = report.named_vulnerable_count() + + if fail_closed { + report.named_unverifiable_count() + } else { + 0 + }; + if report.vulnerable_count() == 0 || named_findings > 0 { + return false; + } + let Some(TreeReport::Full { transitive, .. }) = &report.tree else { + return false; + }; + transitive + .iter() + .filter(|t| { + matches!(t.verdict, VerdictStatus::Vulnerable(_)) + || (fail_closed && matches!(t.verdict, VerdictStatus::Unverifiable(_))) + }) + .all(|t| match t.origin { + // A locked pin predates the sync command that installs it. + TreeOrigin::PreExisting | TreeOrigin::Locked => true, + TreeOrigin::Requested => false, + TreeOrigin::Transitive => report.bare_install, + }) +} + +/// Print the "requirements files are not recency-checked" note when the +/// install carried any `-r` files. No-op otherwise. +pub(super) fn requirements_note(parsed: &parse::ParsedInstall) { + if parsed.requirements_files.is_empty() { + return; + } + let files: Vec = parsed + .requirements_files + .iter() + .map(|p| p.display().to_string()) + .collect(); + eprintln!( + "note: requirements files ({}) are not recency-checked by the baseline gate", + files.join(", ") + ); +} + +pub(super) fn warn_public_lookup_failures(report: &PrecheckReport, opts: &PrecheckOptions) { + if super::public_verdict(opts) && report.unverifiable_count() > 0 { + eprintln!("warning: CVE check unavailable; continuing because public mode is fail-open."); + } +} + +/// Suffix for a vulnerable match line: the advisory's fix, if known. +fn fix_note(m: &crate::vuln_api::VulnMatch) -> String { + match &m.fixed_version { + Some(v) => format!(" — fixed in {v}"), + None => " — no fixed version known".to_string(), + } +} + +/// Highest of `fixes` after sort/dedup: a single distinct value is returned +/// as-is (no parsing — preserves odd-but-unambiguous forms); several distinct +/// values compare by lenient semver. With `all_must_parse`, one unparsable +/// candidate among several poisons the answer (`None`); otherwise unparsable +/// candidates are skipped. +fn highest_fix(mut fixes: Vec<&str>, all_must_parse: bool) -> Option { + fixes.sort_unstable(); + fixes.dedup(); + match fixes.as_slice() { + [] => None, + [only] => Some((*only).to_string()), + many => { + let mut best: Option<(semver::Version, &str)> = None; + for raw in many { + let v = + match semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) + { + Ok(v) => v, + Err(_) if all_must_parse => return None, + Err(_) => continue, + }; + match &best { + Some((cur, _)) if cur >= &v => {} + _ => best = Some((v, raw)), + } + } + best.map(|(_, raw)| (*raw).to_string()) + } + } +} + +/// The one version certified to clear every match. Requires every match to +/// carry a `fixed_version`; any match without one — or an unparsable +/// candidate among several — means no version can be certified, so `None`. +fn safe_version(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let fixes: Vec<&str> = matches + .iter() + .map(|m| m.fixed_version.as_deref()) + .collect::>()?; + highest_fix(fixes, true) +} + +/// Highest `fixed_version` the advisories advertise, by lenient semver. +/// Unlike `safe_version` this is *not* a certification: matches without a +/// fix are ignored, so the result may still be vulnerable to them. `None` +/// only when no match advertises a fix (or no candidate parses). +fn advertised_fix(matches: &[crate::vuln_api::VulnMatch]) -> Option { + let fixes: Vec<&str> = matches + .iter() + .filter_map(|m| m.fixed_version.as_deref()) + .collect(); + highest_fix(fixes, false) +} + +/// Per-match advisory lines plus the safe-version steer, shared by the +/// named-target and transitive vulnerable render arms. +fn print_vulnerable_matches(name: &str, matches: &[crate::vuln_api::VulnMatch]) { + for m in matches { + println!( + " {} ({}){}", + m.advisory_id, + m.severity_level, + fix_note(m) + ); + } + if let Some(safe) = safe_version(matches) { + println!(" → safe version: {name}@{safe}"); + } +} + +/// One summary-line segment, e.g. `"2 vulnerable (2 from resolved tree)"`. +/// The parenthetical separates findings the resolved tree carried in from +/// findings on the targets this command names; omitted when the tree +/// contributed none. +fn summary_segment(total: usize, from_tree: usize, label: &str) -> String { + if from_tree > 0 { + format!("{total} {label} ({from_tree} from resolved tree)") + } else { + format!("{total} {label}") + } +} + +/// More than this many unverifiable findings with the same error-prefix +/// render as one collapsed line instead of one line per package. +const UNVERIFIABLE_COLLAPSE_THRESHOLD: usize = 3; + +/// Group key for collapsing repeated unverifiable errors: the text before +/// the first `(` — strips per-package detail (URLs, status codes) so one +/// outage groups under one key. +fn error_prefix(error: &str) -> &str { + match error.find('(') { + Some(i) => error[..i].trim_end(), + None => error, + } +} + +/// Unverifiable error strings across transitive tree findings and named +/// outcomes, in render order. +fn unverifiable_errors(report: &PrecheckReport) -> Vec<&str> { + let mut errors = Vec::new(); + if let Some(TreeReport::Full { transitive, .. }) = &report.tree { + for t in transitive { + if let VerdictStatus::Unverifiable(e) = &t.verdict { + errors.push(e.as_str()); + } + } + } + for o in &report.outcomes { + if let TargetOutcome::Resolved { + verdict: VerdictStatus::Unverifiable(e), + .. + } = o + { + errors.push(e.as_str()); + } + } + errors +} + +/// `(prefix, count, first error)` groups of unverifiable findings large +/// enough to collapse (> `UNVERIFIABLE_COLLAPSE_THRESHOLD` per prefix) — +/// the vuln-api outage case, where every package fails the same way. +/// Display-only: counts and exit codes never change. +fn collapsed_unverifiable_groups(report: &PrecheckReport) -> Vec<(&str, usize, &str)> { + let mut groups: Vec<(&str, usize, &str)> = Vec::new(); + for e in unverifiable_errors(report) { + let prefix = error_prefix(e); + match groups.iter_mut().find(|(p, _, _)| *p == prefix) { + Some((_, count, _)) => *count += 1, + None => groups.push((prefix, 1, e)), + } + } + groups.retain(|(_, count, _)| *count > UNVERIFIABLE_COLLAPSE_THRESHOLD); + groups +} + +pub(super) fn print_text(report: &PrecheckReport) { + // Build the echoed command from non-empty parts: a bare gated install + // (e.g. `npm install` with zero specs) has no args to append. + let mut command = format!("{} {}", report.manager.binary_name(), report.subcommand); + if !report.original_args.is_empty() { + command.push(' '); + command.push_str(&report.original_args.join(" ")); + } + + let collapsed = collapsed_unverifiable_groups(report); + let is_collapsed = |error: &str| { + collapsed + .iter() + .any(|(prefix, _, _)| *prefix == error_prefix(error)) + }; + + println!( + "Pre-checking `{}` (threshold {})", + command, + verify_deps::format_duration(report.threshold) + ); + println!( + " {} ok, {} recent, {}, {}, {} skipped, {} errors", + report.ok_count(), + report.recent_count(), + summary_segment( + report.vulnerable_count(), + report.tree_vulnerable_count(), + "vulnerable" + ), + summary_segment( + report.unverifiable_count(), + report.tree_unverifiable_count(), + "unverifiable" + ), + report.skipped_count(), + report.error_count(), + ); + + match &report.tree { + Some(TreeReport::Full { + resolved_count, + transitive, + .. + }) => { + println!( + " tree: {} packages resolved, {} transitive checked", + resolved_count, + transitive.len() + ); + for t in transitive { + match &t.verdict { + VerdictStatus::Vulnerable(matches) => { + println!( + " ✗ {}@{} {} known vulnerable:", + t.name, + t.version, + t.origin.label() + ); + print_vulnerable_matches(&t.name, matches); + // A vulnerable dep the project already declares can be + // bumped directly — point at the fix as a command. + // When `safe_version` is `Some` it equals + // `advertised_fix` and clears every advisory; otherwise + // some advisory has no fix, so the "(advertised fix)" + // hedge marks the bump as partial. + if t.origin == TreeOrigin::PreExisting { + if let Some(fix) = advertised_fix(matches) { + let hedge = if safe_version(matches).is_some() { + "" + } else { + " (advertised fix)" + }; + println!( + " fix with: corgea {} install {}@{}{}", + report.manager.binary_name(), + t.name, + fix, + hedge + ); + } + } + } + VerdictStatus::Unverifiable(error) => { + if !is_collapsed(error) { + println!( + " ⚠ {}@{} {} could not be verified: {}", + t.name, + t.version, + t.origin.label(), + error + ); + } + } + // Clean / not-checked tree entries stay quiet in text mode. + VerdictStatus::Clean | VerdictStatus::NotChecked => {} + } + } + } + Some(TreeReport::NamedOnly { reason }) => { + println!(" tree: transitive dependencies NOT checked ({reason})"); + } + None => {} + } + + // One line per collapsed outage group instead of one per package. + for (_, count, first_error) in &collapsed { + println!( + " ⚠ {count} packages could not be verified (vuln-api unreachable: {first_error})" + ); + } + + for o in &report.outcomes { + match o { + TargetOutcome::Resolved { + target, + resolved, + age, + verdict, + } => match verdict { + VerdictStatus::Vulnerable(matches) => { + println!( + " ✗ {} → {}@{} known vulnerable:", + target.display, resolved.name, resolved.version, + ); + print_vulnerable_matches(&resolved.name, matches); + } + VerdictStatus::Unverifiable(error) => { + if !is_collapsed(error) { + println!( + " ⚠ {} → {}@{} could not be verified: {}", + target.display, resolved.name, resolved.version, error, + ); + } + } + VerdictStatus::Clean | VerdictStatus::NotChecked => { + if report.is_recent(*age) { + println!( + " ⚠ {} → {}@{} published {} ago at {} (within threshold)", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + resolved.published_at.format("%Y-%m-%d %H:%M:%S UTC"), + ); + } else { + println!( + " ✓ {} → {}@{} published {} ago", + target.display, + resolved.name, + resolved.version, + verify_deps::format_duration(*age), + ); + } + } + }, + TargetOutcome::Skipped { target, reason } => { + println!(" ? {}: {}", target.display, reason); + } + TargetOutcome::Error { target, error } => { + println!(" ✗ {}: {}", target.display, error); + } + } + } +} + +/// JSON shape for a single verdict. Shared by named outcomes and tree +/// (transitive) outcomes so both render verdicts identically. +/// `remediation` carries the version that clears every advisory +/// (`safe_version`); `null` when any advisory has no known fix. +fn verdict_json(verdict: &VerdictStatus) -> serde_json::Value { + use serde_json::json; + match verdict { + VerdictStatus::Clean => json!({ "status": "clean" }), + VerdictStatus::Vulnerable(matches) => { + json!({ + "status": "vulnerable", + "matches": matches, + "remediation": safe_version(matches), + }) + } + VerdictStatus::Unverifiable(error) => { + json!({ "status": "unverifiable", "error": error }) + } + VerdictStatus::NotChecked => { + json!({ "status": "not_checked", "reason": NO_VERDICT_REASON }) + } + } +} + +pub(super) fn print_json(report: &PrecheckReport, opts: &PrecheckOptions) { + use serde_json::json; + let verdict_mode = match opts.verdict.as_ref().map(|cfg| &cfg.mode) { + Some(VerdictMode::Public) => "public", + Some(VerdictMode::Authenticated { .. }) => "authenticated", + None => "recency-only", + }; + let outcomes: Vec<_> = report + .outcomes + .iter() + .map(|o| match o { + TargetOutcome::Resolved { + target, + resolved, + age, + verdict, + } => { + let verdict_json = verdict_json(verdict); + json!({ + "status": if report.is_recent(*age) { "recent" } else { "ok" }, + "spec": target.display, + "name": resolved.name, + "resolved_version": resolved.version, + "published_at": resolved.published_at.to_rfc3339(), + "age_seconds": age.as_secs(), + "verdict": verdict_json, + }) + } + TargetOutcome::Skipped { target, reason } => json!({ + "status": "skipped", + "spec": target.display, + "name": target.name, + "reason": reason, + }), + TargetOutcome::Error { target, error } => json!({ + "status": "error", + "spec": target.display, + "name": target.name, + "error": error, + }), + }) + .collect(); + + let body = json!({ + "manager": report.manager.binary_name(), + "subcommand": report.subcommand, + "args": report.original_args, + "threshold_seconds": report.threshold.as_secs(), + "summary": { + "ok": report.ok_count(), + "recent": report.recent_count(), + "vulnerable": report.vulnerable_count(), + "unverifiable": report.unverifiable_count(), + "skipped": report.skipped_count(), + "errors": report.error_count(), + }, + "verdict_mode": verdict_mode, + "results": outcomes, + "tree": report.tree.as_ref().map(|t| match t { + TreeReport::Full { resolved_count, transitive } => json!({ + "mode": "full", + "reason": serde_json::Value::Null, + "resolved_count": resolved_count, + "transitive": transitive.iter().map(|o| json!({ + "name": o.name, + "version": o.version, + "origin": o.origin.json_name(), + "verdict": verdict_json(&o.verdict), + })).collect::>(), + }), + TreeReport::NamedOnly { reason } => json!({ + "mode": "named-only", + "reason": reason, + "resolved_count": 0, + "transitive": [], + }), + }), + }); + + println!("{}", serde_json::to_string_pretty(&body).unwrap()); +} + +#[cfg(test)] +mod tests { + use super::super::test_support::*; + use super::super::TreeOutcome; + use super::*; + + #[test] + fn safe_version_single_fix() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0"))]), + Some("2.0.0".to_string()) + ); + } + + #[test] + fn safe_version_duplicate_fixes_collapse_without_parsing() { + // "1.0rc1" is unparsable, but a single distinct value needs no parse. + assert_eq!( + safe_version(&[vm("A-1", Some("1.0rc1")), vm("A-2", Some("1.0rc1"))]), + Some("1.0rc1".to_string()) + ); + } + + #[test] + fn safe_version_picks_highest_of_distinct_fixes() { + // Semver order, not lexical ("1.2.0" > "1.10.0" lexically). + assert_eq!( + safe_version(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } + + #[test] + fn safe_version_two_component_versions_normalize() { + assert_eq!( + safe_version(&[vm("A-1", Some("4.0")), vm("A-2", Some("3.2.5"))]), + Some("4.0".to_string()) + ); + } + + #[test] + fn safe_version_mixed_fix_and_none_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + None + ); + } + + #[test] + fn safe_version_unparsable_among_distinct_is_none() { + assert_eq!( + safe_version(&[vm("A-1", Some("2!1.0")), vm("A-2", Some("1.0.0"))]), + None + ); + } + + #[test] + fn safe_version_empty_matches_is_none() { + assert_eq!(safe_version(&[]), None); + } + + #[test] + fn error_prefix_strips_parenthesized_detail() { + // The reqwest network-failure shape: per-package URL in parens. + assert_eq!( + error_prefix("Failed to send vuln-api request: error sending request for url (http://x/v1/packages/pypi/a/versions/1.0.0/check)"), + "Failed to send vuln-api request: error sending request for url" + ); + assert_eq!( + error_prefix("vuln-api unavailable (HTTP 503)"), + "vuln-api unavailable" + ); + assert_eq!(error_prefix("no parens here"), "no parens here"); + } + + /// Four unverifiable findings sharing a prefix collapse into one group + /// (named + transitive both count); three do not. + #[test] + fn collapsed_groups_require_more_than_threshold() { + let unverifiable = |name: &str| { + let mut o = resolved_outcome(name, "1.0.0", false); + set_verdict( + &mut o, + VerdictStatus::Unverifiable(format!("vuln-api unavailable (HTTP 503: {name})")), + ); + o + }; + + let mut report = report_with(vec![ + unverifiable("a"), + unverifiable("b"), + unverifiable("c"), + ]); + assert!(collapsed_unverifiable_groups(&report).is_empty()); + + report.tree = Some(TreeReport::Full { + resolved_count: 4, + transitive: vec![TreeOutcome { + name: "d".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Unverifiable( + "vuln-api unavailable (HTTP 503: d)".to_string(), + ), + origin: TreeOrigin::Transitive, + }], + }); + let groups = collapsed_unverifiable_groups(&report); + assert_eq!(groups.len(), 1); + let (prefix, count, first) = groups[0]; + assert_eq!(prefix, "vuln-api unavailable"); + assert_eq!(count, 4); + // Render order is transitive-first, so the tree finding leads. + assert_eq!(first, "vuln-api unavailable (HTTP 503: d)"); + } + + #[test] + fn advertised_fix_ignores_matches_without_fix() { + // safe_version returns None here; the advertised fix still surfaces. + assert_eq!( + advertised_fix(&[vm("A-1", Some("2.0.0")), vm("A-2", None)]), + Some("2.0.0".to_string()) + ); + assert_eq!(advertised_fix(&[vm("A-1", None)]), None); + assert_eq!(advertised_fix(&[]), None); + } + + #[test] + fn advertised_fix_picks_highest_by_semver() { + assert_eq!( + advertised_fix(&[vm("A-1", Some("1.2.0")), vm("A-2", Some("1.10.0"))]), + Some("1.10.0".to_string()) + ); + } + + /// The existing-tree refusal fires only when every vulnerable finding + /// predates the command: a `Requested` finding (pip `-r`) is added by + /// this command, and a `Transitive` finding is being pulled in right + /// now unless the install is truly bare. `bare_install` is the explicit + /// discriminator — a requirements-only install also has no named + /// outcomes, but its resolved set is the command's doing. + #[test] + fn refusal_blame_respects_finding_origin() { + let tree_vulnerable = |origin| TreeOutcome { + name: "dep".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + origin, + }; + // (origin, named outcomes present, bare_install, expected). + // (origin, named=false, bare=false) is the requirements-only shape. + let cases = [ + (TreeOrigin::PreExisting, false, true, true), + (TreeOrigin::PreExisting, false, false, true), + (TreeOrigin::PreExisting, true, false, true), + (TreeOrigin::Transitive, false, true, true), + (TreeOrigin::Transitive, false, false, false), + (TreeOrigin::Transitive, true, false, false), + (TreeOrigin::Requested, false, true, false), + (TreeOrigin::Requested, false, false, false), + (TreeOrigin::Requested, true, false, false), + ]; + for (origin, with_named, bare_install, blames_tree) in cases { + let outcomes = if with_named { + vec![resolved_outcome("cleanpkg", "1.0.0", false)] + } else { + vec![] + }; + let mut report = report_with(outcomes); + report.bare_install = bare_install; + report.tree = Some(TreeReport::Full { + resolved_count: 1, + transitive: vec![tree_vulnerable(origin)], + }); + assert_eq!( + refusal_blames_existing_tree(&report, &authenticated_opts(false, false)), + blames_tree, + "origin {origin:?}, with_named {with_named}, bare {bare_install}" + ); + } + } + + /// Unverifiable tree findings block too (`should_block_install`), so + /// they must pass the same origin test before the refusal may blame the + /// existing tree: a command-added unverifiable transitive alongside a + /// pre-existing vulnerable dep keeps the generic refusal on a named + /// install, while on a bare install everything still predates the + /// command. + #[test] + fn refusal_blame_considers_unverifiable_tree_findings() { + let tree_finding = |name: &str, verdict, origin| TreeOutcome { + name: name.to_string(), + version: "1.0.0".to_string(), + verdict, + origin, + }; + let mixed_tree = || { + Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![ + tree_finding( + "stickydep", + VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + TreeOrigin::PreExisting, + ), + tree_finding( + "newdep", + VerdictStatus::Unverifiable("vuln-api unavailable".to_string()), + TreeOrigin::Transitive, + ), + ], + }) + }; + + // Named install: the unverifiable transitive is being added by this + // command, so "none were added by this command" would lie. + let mut report = report_with(vec![resolved_outcome("cleanpkg", "1.0.0", false)]); + report.tree = mixed_tree(); + assert!(!refusal_blames_existing_tree( + &report, + &authenticated_opts(false, false) + )); + assert!(refusal_blames_existing_tree( + &report, + &public_opts(false, false) + )); + + // Bare install: nothing named, everything resolved predates the + // command — the mixed findings still blame the existing tree. + let mut report = report_with(vec![]); + report.bare_install = true; + report.tree = mixed_tree(); + assert!(refusal_blames_existing_tree( + &report, + &authenticated_opts(false, false) + )); + } +} diff --git a/src/precheck/test_support.rs b/src/precheck/test_support.rs new file mode 100644 index 0000000..ea16aae --- /dev/null +++ b/src/precheck/test_support.rs @@ -0,0 +1,124 @@ +//! Shared builders for precheck unit tests (mod.rs, render.rs, verdict.rs). +//! Test-only: declared `#[cfg(test)]` from mod.rs. + +use std::time::Duration; + +use chrono::Utc; + +use super::{ + InstallTarget, PackageManager, PrecheckOptions, PrecheckReport, TargetKind, TargetOutcome, + VerdictConfig, VerdictMode, VerdictStatus, +}; + +/// Baseline options: pypi registry at a dead address (a port that +/// refuses connections - these tests never dial it), no verdict config. +/// Override fields per test via struct update. +pub(crate) fn stub_opts() -> PrecheckOptions { + PrecheckOptions { + threshold: Duration::from_secs(2 * 86400), + no_fail: false, + force: false, + json: false, + verdict: None, + npm_registry: None, + pypi_registry: Some("http://127.0.0.1:9".to_string()), + } +} + +/// `stub_opts()` plus a verdict config pointing at `base_url`. +pub(crate) fn verdict_opts(base_url: &str) -> PrecheckOptions { + PrecheckOptions { + verdict: Some(VerdictConfig { + base_url: base_url.to_string(), + mode: VerdictMode::Authenticated { + token: "test-token".to_string(), + }, + public_login_hint: false, + }), + ..stub_opts() + } +} + +pub(crate) fn public_opts(no_fail: bool, force: bool) -> PrecheckOptions { + PrecheckOptions { + no_fail, + force, + verdict: Some(VerdictConfig { + base_url: "http://127.0.0.1:9".to_string(), + mode: VerdictMode::Public, + public_login_hint: true, + }), + ..stub_opts() + } +} + +pub(crate) fn authenticated_opts(no_fail: bool, force: bool) -> PrecheckOptions { + PrecheckOptions { + no_fail, + force, + verdict: Some(VerdictConfig { + base_url: "http://127.0.0.1:9".to_string(), + mode: VerdictMode::Authenticated { + token: "test-token".to_string(), + }, + public_login_hint: false, + }), + ..stub_opts() + } +} + +pub(crate) fn resolved_outcome(name: &str, version: &str, recent: bool) -> TargetOutcome { + // Recency derives from age vs `report_with`'s 2-day threshold: + // one hour => recent, a year => not. + let age = if recent { + Duration::from_secs(3600) + } else { + Duration::from_secs(365 * 86400) + }; + TargetOutcome::Resolved { + target: InstallTarget { + name: name.to_string(), + display: format!("{name}=={version}"), + kind: TargetKind::Unverifiable { + reason: "test".to_string(), + }, + }, + resolved: crate::verify_deps::registry::ResolvedPackage { + name: name.to_string(), + version: version.to_string(), + published_at: Utc::now() - chrono::Duration::from_std(age).unwrap(), + }, + age, + verdict: VerdictStatus::NotChecked, + } +} + +pub(crate) fn report_with(outcomes: Vec) -> PrecheckReport { + PrecheckReport { + manager: PackageManager::Pip, + subcommand: "install".to_string(), + original_args: vec![], + outcomes, + threshold: Duration::from_secs(2 * 86400), + tree: None, + // Most tests model an install that named something; bare-install + // cases set this explicitly. + bare_install: false, + } +} + +pub(crate) fn set_verdict(outcome: &mut TargetOutcome, v: VerdictStatus) { + if let TargetOutcome::Resolved { verdict, .. } = outcome { + *verdict = v; + } +} + +pub(crate) fn vm(advisory: &str, fixed: Option<&str>) -> crate::vuln_api::VulnMatch { + crate::vuln_api::VulnMatch { + advisory_id: advisory.to_string(), + severity_level: "high".to_string(), + tier: 1, + vulnerable_version_range: None, + fixed_version: fixed.map(str::to_string), + } +} From 522dda114ffb61de3ad74ba01510133c1358f6fc Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 19:00:51 +0200 Subject: [PATCH 41/59] Extract precheck verdict module --- src/precheck/mod.rs | 461 ++-------------------------------------- src/precheck/render.rs | 8 +- src/precheck/verdict.rs | 460 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 477 insertions(+), 452 deletions(-) create mode 100644 src/precheck/verdict.rs diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index edb5317..bab4de5 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -19,6 +19,7 @@ pub mod tree; mod detect; mod exec; mod render; +mod verdict; #[cfg(test)] mod test_support; @@ -71,6 +72,10 @@ impl PackageManager { /// Canonical package name for dedup/matching across spec spellings: /// PEP 503 for pypi (shared with `deps`), verbatim for npm. + /// + /// Invariant: names are normalized at comparison/request time + /// (`verdict::verdict_pool` / `verdict::apply_verdicts` / tree dedup), + /// never at parse time — parsers and resolvers carry raw names. pub fn normalize_name(self, name: &str) -> String { match self { PackageManager::Pip | PackageManager::Uv => { @@ -491,7 +496,8 @@ fn run_uv_sync(cmd: &[String], opts: PrecheckOptions, exec: impl FnOnce() -> i32 }; let resolved_count = jobs.len(); - let results = verdict_pool(jobs, cfg, PackageManager::Uv, VERDICT_CONCURRENCY); + let results = + verdict::verdict_pool(jobs, cfg, PackageManager::Uv, verdict::VERDICT_CONCURRENCY); let transitive = results .into_iter() .map(|(pkg, verdict)| TreeOutcome { @@ -530,7 +536,7 @@ fn report_and_exec( render::print_text(report); } render::warn_public_lookup_failures(report, opts); - if should_block_install(report, opts) { + if verdict::should_block_install(report, opts) { if !opts.json { render::print_refusal(report, opts); } @@ -604,7 +610,7 @@ fn run_parsed_install( let outcomes: Vec<_> = parsed .targets .iter() - .map(|target| verify_one(target, &opts, &now)) + .map(|target| verdict::verify_one(target, &opts, &now)) .collect(); ( outcomes, @@ -714,133 +720,14 @@ fn run_tree_pass( .verdict .as_ref() .expect("tree pass requires verdict config"); - let results = verdict_pool(jobs, cfg, manager, VERDICT_CONCURRENCY); - let transitive = apply_verdicts(manager, results, outcomes, &direct_deps); + let results = verdict::verdict_pool(jobs, cfg, manager, verdict::VERDICT_CONCURRENCY); + let transitive = verdict::apply_verdicts(manager, results, outcomes, &direct_deps); TreeReport::Full { resolved_count, transitive, } } -/// Above this many verdict jobs, print a stderr progress line so a big tree -/// pass doesn't look hung. -const VERDICT_PROGRESS_THRESHOLD: usize = 8; - -/// Max parallel vuln-api verdict requests. -const VERDICT_CONCURRENCY: usize = 8; - -/// Bounded worker pool over the verdict jobs. On client/request failure every -/// job comes back `Unverifiable`; `should_block_install` decides whether that -/// fails closed for the selected mode. -/// Plain work queue, no new crates; `reqwest::blocking::Client` is -/// `Send + Sync`. Result order is not preserved; callers match results back -/// by `(name, version)`. -fn verdict_pool( - jobs: Vec, - cfg: &VerdictConfig, - manager: PackageManager, - concurrency: usize, -) -> Vec<(tree::TreePackage, VerdictStatus)> { - use std::collections::VecDeque; - use std::sync::Mutex; - - let client = match crate::vuln_api::http_client() { - Ok(c) => c, - Err(e) => { - return jobs - .into_iter() - .map(|j| (j, VerdictStatus::Unverifiable(e.clone()))) - .collect(); - } - }; - - if jobs.len() > VERDICT_PROGRESS_THRESHOLD { - eprintln!("checking {} packages against Corgea vuln-api…", jobs.len()); - } - - let ecosystem = manager.ecosystem(); - let workers = concurrency.min(jobs.len()).max(1); - let queue = Mutex::new(VecDeque::from(jobs)); - let results = Mutex::new(Vec::new()); - std::thread::scope(|s| { - for _ in 0..workers { - s.spawn(|| loop { - let Some(job) = queue.lock().unwrap().pop_front() else { - break; - }; - // vuln-api advisories are keyed by canonical names; an - // alternate spelling (PEP 503: `Flask_Cors` ≡ `flask-cors`) - // would miss and read as clean. - let verdict = match crate::vuln_api::check_package_version( - &client, - &cfg.base_url, - cfg.mode.auth_token(), - ecosystem, - &manager.normalize_name(&job.name), - &job.version, - ) { - Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), - Ok(_) => VerdictStatus::Clean, - Err(e) => VerdictStatus::Unverifiable(e.to_string()), - }; - results.lock().unwrap().push((job, verdict)); - }); - } - }); - results.into_inner().unwrap() -} - -/// Assign pooled verdicts onto matching named outcomes (by normalized -/// name + version) and return the unmatched leftovers — the tree findings. -/// Each leftover carries its provenance: pip's `requested` flag, membership -/// in the project manifest's direct deps (`direct_deps`), or transitive. -fn apply_verdicts( - manager: PackageManager, - results: Vec<(tree::TreePackage, VerdictStatus)>, - outcomes: &mut [TargetOutcome], - direct_deps: &std::collections::HashSet, -) -> Vec { - let norm = |n: &str| manager.normalize_name(n); - // Index named outcomes by (normalized name, version) so matching the - // pooled results stays linear on big trees. - let mut named: std::collections::HashMap<(String, String), Vec> = - std::collections::HashMap::new(); - for (i, o) in outcomes.iter().enumerate() { - if let TargetOutcome::Resolved { resolved, .. } = o { - named - .entry((norm(&resolved.name), resolved.version.clone())) - .or_default() - .push(i); - } - } - - let mut transitive = Vec::new(); - for (pkg, verdict) in results { - if let Some(indices) = named.get(&(norm(&pkg.name), pkg.version.clone())) { - for &i in indices { - if let TargetOutcome::Resolved { verdict: v, .. } = &mut outcomes[i] { - *v = verdict.clone(); - } - } - } else { - let origin = if pkg.requested { - TreeOrigin::Requested - } else if direct_deps.contains(&pkg.name) { - TreeOrigin::PreExisting - } else { - TreeOrigin::Transitive - }; - transitive.push(TreeOutcome { - name: pkg.name, - version: pkg.version, - origin, - verdict, - }); - } - } - transitive -} - /// Vuln-api verdict pass over resolved targets, run through the bounded /// worker pool. No-op without a `VerdictConfig` (direct recency-only callers). /// Any client/call failure becomes `Unverifiable`; authenticated mode blocks @@ -866,83 +753,14 @@ fn run_verdict_pass( }) .collect(); - let results = verdict_pool(jobs, cfg, manager, VERDICT_CONCURRENCY); - let leftovers = apply_verdicts(manager, results, outcomes, &Default::default()); + let results = verdict::verdict_pool(jobs, cfg, manager, verdict::VERDICT_CONCURRENCY); + let leftovers = verdict::apply_verdicts(manager, results, outcomes, &Default::default()); debug_assert!( leftovers.is_empty(), "named verdict pass left tree leftovers" ); } -fn authenticated_verdict(opts: &PrecheckOptions) -> bool { - opts.verdict - .as_ref() - .is_some_and(|cfg| cfg.mode.is_authenticated()) -} - -fn public_verdict(opts: &PrecheckOptions) -> bool { - opts.verdict - .as_ref() - .is_some_and(|cfg| cfg.mode.is_public()) -} - -fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { - if opts.force { - return false; - } - // A resolution error means no verdict was obtained for that target, so - // in authenticated mode it fails closed like `Unverifiable` — otherwise a - // registry outage silently bypasses the gate. - let fail_closed = authenticated_verdict(opts); - report.vulnerable_count() > 0 - || (fail_closed && report.unverifiable_count() > 0) - || (fail_closed && report.error_count() > 0) - || (!opts.no_fail && report.recent_count() > 0) -} - -fn verify_one( - target: &InstallTarget, - opts: &PrecheckOptions, - now: &chrono::DateTime, -) -> TargetOutcome { - use crate::verify_deps::registry; - - let resolved = match &target.kind { - TargetKind::Unverifiable { reason } => { - return TargetOutcome::Skipped { - target: target.clone(), - reason: reason.clone(), - }; - } - TargetKind::Npm(spec) => { - registry::npm_resolve(&target.name, spec, opts.npm_registry.as_deref()) - } - TargetKind::Pypi(spec) => { - registry::pypi_resolve(&target.name, spec, opts.pypi_registry.as_deref()) - } - }; - - match resolved { - Ok(resolved) => { - // Future publish dates clamp to zero — maximally recent. - let age = now - .signed_duration_since(resolved.published_at) - .to_std() - .unwrap_or_else(|_| Duration::from_secs(0)); - TargetOutcome::Resolved { - target: target.clone(), - resolved, - age, - verdict: VerdictStatus::NotChecked, - } - } - Err(e) => TargetOutcome::Error { - target: target.clone(), - error: e, - }, - } -} - #[cfg(test)] mod tests { use super::test_support::*; @@ -1070,257 +888,4 @@ source = { git = "https://example.com/repo?rev=abc#abc" } // npm names are case-sensitive and pass through verbatim. assert_eq!(PackageManager::Npm.normalize_name("Left_Pad"), "Left_Pad"); } - - /// Predicate matrix: force ⇒ never block; vulnerable blocks in every - /// verdict mode; unverifiable/error findings block only in authenticated - /// mode; recency keeps its task-2 --no-fail demotion. - #[test] - fn block_predicate_matrix() { - let clean = { - let mut o = resolved_outcome("pkg", "1.0.0", false); - set_verdict(&mut o, VerdictStatus::Clean); - report_with(vec![o]) - }; - let recent = report_with(vec![resolved_outcome("pkg", "1.0.0", true)]); - let vulnerable = { - let mut o = resolved_outcome("pkg", "1.0.0", false); - set_verdict(&mut o, VerdictStatus::Vulnerable(vec![])); - report_with(vec![o]) - }; - let unverifiable = { - let mut o = resolved_outcome("pkg", "1.0.0", false); - set_verdict(&mut o, VerdictStatus::Unverifiable("503".to_string())); - report_with(vec![o]) - }; - let resolution_error = report_with(vec![TargetOutcome::Error { - target: InstallTarget { - name: "pkg".to_string(), - display: "pkg==1.0.0".to_string(), - kind: TargetKind::Unverifiable { - reason: "test".to_string(), - }, - }, - error: "registry unavailable".to_string(), - }]); - - assert!(!should_block_install(&clean, &public_opts(false, false))); - assert!(should_block_install(&recent, &public_opts(false, false))); - assert!(!should_block_install(&recent, &public_opts(true, false))); - assert!(should_block_install( - &vulnerable, - &public_opts(false, false) - )); - assert!( - should_block_install(&vulnerable, &public_opts(true, false)), - "--no-fail must not waive a vulnerable block" - ); - assert!( - !should_block_install(&unverifiable, &public_opts(false, false)), - "public mode must fail open on lookup errors" - ); - assert!( - should_block_install(&unverifiable, &authenticated_opts(true, false)), - "authenticated mode must fail closed on lookup errors" - ); - assert!( - !should_block_install(&resolution_error, &public_opts(false, false)), - "public mode must fail open when no verdict can be obtained" - ); - assert!( - should_block_install(&resolution_error, &authenticated_opts(false, false)), - "authenticated mode must fail closed when no verdict can be obtained" - ); - for report in [ - &clean, - &recent, - &vulnerable, - &unverifiable, - &resolution_error, - ] { - assert!( - !should_block_install(report, &public_opts(false, true)), - "--force must never block" - ); - assert!(!should_block_install( - report, - &authenticated_opts(true, true) - )); - } - } - - /// A clean named outcome plus a vulnerable transitive tree finding must - /// roll into the block counts: `vulnerable_count() == 1`, - /// `should_block_install` true without `--force`, false with it. - #[test] - fn tree_findings_extend_block_counts() { - let mut named = resolved_outcome("pkg", "1.0.0", false); - set_verdict(&mut named, VerdictStatus::Clean); - let mut report = report_with(vec![named]); - report.tree = Some(TreeReport::Full { - resolved_count: 2, - transitive: vec![TreeOutcome { - name: "evildep".to_string(), - version: "0.4.2".to_string(), - origin: TreeOrigin::Transitive, - verdict: VerdictStatus::Vulnerable(vec![]), - }], - }); - - assert_eq!(report.vulnerable_count(), 1); - let opts = |force: bool| PrecheckOptions { - force, - ..stub_opts() - }; - assert!(should_block_install(&report, &opts(false))); - assert!(!should_block_install(&report, &opts(true))); - } - - /// Verdict pass against an in-process stub: vulnerable body → Vulnerable - /// with matches; 503 override → Unverifiable; no VerdictConfig → outcomes - /// keep NotChecked. - #[test] - fn verdict_pass_maps_stub_responses() { - use std::collections::HashMap; - - let key = |name: &str| ("pypi".to_string(), name.to_string(), "1.0.0".to_string()); - let mut checks = HashMap::new(); - checks.insert( - key("evil"), - r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true, - "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":null}]}"# - .to_string(), - ); - checks.insert(key("flaky"), "{}".to_string()); - let mut statuses = HashMap::new(); - statuses.insert(key("flaky"), 503u16); - let stub = crate::vuln_api_stub::spawn_with_statuses(checks, statuses); - - let opts = verdict_opts(&stub.base_url); - - let mut outcomes = vec![ - resolved_outcome("evil", "1.0.0", false), - resolved_outcome("flaky", "1.0.0", false), - resolved_outcome("goodpkg", "1.0.0", false), // unknown → stub default clean - ]; - run_verdict_pass(PackageManager::Pip, &mut outcomes, &opts); - - let verdicts: Vec<_> = outcomes - .iter() - .map(|o| match o { - TargetOutcome::Resolved { verdict, .. } => verdict.clone(), - _ => unreachable!(), - }) - .collect(); - assert!( - matches!(&verdicts[0], VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") - ); - assert!(matches!(&verdicts[1], VerdictStatus::Unverifiable(_))); - assert!(matches!(&verdicts[2], VerdictStatus::Clean)); - - // Without a VerdictConfig the pass is a no-op. - let mut untouched = vec![resolved_outcome("evil", "1.0.0", false)]; - let no_verdict = stub_opts(); - run_verdict_pass(PackageManager::Pip, &mut untouched, &no_verdict); - assert!(matches!( - &untouched[0], - TargetOutcome::Resolved { - verdict: VerdictStatus::NotChecked, - .. - } - )); - } - - /// The pool must verdict every job exactly once and return the flagged - /// job `Vulnerable` with the rest `Clean`, regardless of `concurrency` - /// (1 = serial, 8 > job count = all workers spawn but some drain empty). - #[test] - fn verdict_pool_returns_all_results() { - use std::collections::HashMap; - - let key = |name: &str| ("pypi".to_string(), name.to_string(), "1.0.0".to_string()); - let mut checks = HashMap::new(); - checks.insert( - key("evil"), - r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true, - "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":null}]}"# - .to_string(), - ); - let stub = crate::vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); - - let cfg = VerdictConfig { - base_url: stub.base_url.clone(), - mode: VerdictMode::Authenticated { - token: "test-token".to_string(), - }, - public_login_hint: false, - }; - - let jobs: Vec = ["a", "b", "evil", "c", "d", "e"] - .iter() - .map(|n| tree::TreePackage { - name: n.to_string(), - version: "1.0.0".to_string(), - requested: false, - }) - .collect(); - - for concurrency in [1usize, 8] { - let results = verdict_pool(jobs.clone(), &cfg, PackageManager::Pip, concurrency); - assert_eq!( - results.len(), - 6, - "concurrency {concurrency}: all jobs verdicted" - ); - let flagged = results - .iter() - .filter(|(_, v)| matches!(v, VerdictStatus::Vulnerable(_))) - .count(); - let clean = results - .iter() - .filter(|(_, v)| matches!(v, VerdictStatus::Clean)) - .count(); - assert_eq!(flagged, 1, "concurrency {concurrency}: only evil flagged"); - assert_eq!(clean, 5, "concurrency {concurrency}: rest clean"); - let evil = results - .iter() - .find(|(p, _)| p.name == "evil") - .expect("evil present"); - assert!( - matches!(&evil.1, VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") - ); - } - } - - /// Leftover origin assignment: pip `requested` ⇒ Requested; manifest - /// direct dep ⇒ PreExisting; otherwise Transitive. Requested wins over - /// a direct-dep hit. - #[test] - fn apply_verdicts_assigns_origins() { - let pkg = |name: &str, requested: bool| tree::TreePackage { - name: name.to_string(), - version: "1.0.0".to_string(), - requested, - }; - let results = vec![ - (pkg("reqdep", true), VerdictStatus::Clean), - (pkg("predep", false), VerdictStatus::Clean), - (pkg("deepdep", false), VerdictStatus::Clean), - ]; - let direct_deps = std::collections::HashSet::from(["predep".to_string()]); - let mut outcomes = []; - let mut tree = apply_verdicts(PackageManager::Npm, results, &mut outcomes, &direct_deps); - tree.sort_by(|a, b| a.name.cmp(&b.name)); - let origins: Vec<(&str, TreeOrigin)> = - tree.iter().map(|t| (t.name.as_str(), t.origin)).collect(); - assert_eq!( - origins, - vec![ - ("deepdep", TreeOrigin::Transitive), - ("predep", TreeOrigin::PreExisting), - ("reqdep", TreeOrigin::Requested), - ] - ); - } } diff --git a/src/precheck/render.rs b/src/precheck/render.rs index 9c15201..5920267 100644 --- a/src/precheck/render.rs +++ b/src/precheck/render.rs @@ -38,8 +38,8 @@ pub(super) fn print_refusal(report: &PrecheckReport, opts: &PrecheckOptions) { "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." ); } else if report.vulnerable_count() > 0 - || (super::authenticated_verdict(opts) && report.unverifiable_count() > 0) - || (super::authenticated_verdict(opts) && report.error_count() > 0) + || (super::verdict::authenticated_verdict(opts) && report.unverifiable_count() > 0) + || (super::verdict::authenticated_verdict(opts) && report.error_count() > 0) { eprintln!("Refusing to run install. Pass --force to proceed despite findings."); } else { @@ -57,7 +57,7 @@ pub(super) fn print_refusal(report: &PrecheckReport, opts: &PrecheckOptions) { /// right now. Only a truly bare install (`report.bare_install`) or /// manifest-declared `PreExisting` findings may blame the existing tree. fn refusal_blames_existing_tree(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { - let fail_closed = super::authenticated_verdict(opts); + let fail_closed = super::verdict::authenticated_verdict(opts); let named_findings = report.named_vulnerable_count() + if fail_closed { report.named_unverifiable_count() @@ -102,7 +102,7 @@ pub(super) fn requirements_note(parsed: &parse::ParsedInstall) { } pub(super) fn warn_public_lookup_failures(report: &PrecheckReport, opts: &PrecheckOptions) { - if super::public_verdict(opts) && report.unverifiable_count() > 0 { + if super::verdict::public_verdict(opts) && report.unverifiable_count() > 0 { eprintln!("warning: CVE check unavailable; continuing because public mode is fail-open."); } } diff --git a/src/precheck/verdict.rs b/src/precheck/verdict.rs new file mode 100644 index 0000000..f2c9efb --- /dev/null +++ b/src/precheck/verdict.rs @@ -0,0 +1,460 @@ +//! Verdict pass: bounded vuln-api worker pool, result matching, and the +//! single block predicate (`should_block_install`). + +use std::time::Duration; + +use super::{ + tree, InstallTarget, PackageManager, PrecheckOptions, PrecheckReport, TargetKind, + TargetOutcome, TreeOrigin, TreeOutcome, VerdictConfig, VerdictStatus, +}; + +/// Above this many verdict jobs, print a stderr progress line so a big tree +/// pass doesn't look hung. +const VERDICT_PROGRESS_THRESHOLD: usize = 8; + +/// Max parallel vuln-api verdict requests. +pub(super) const VERDICT_CONCURRENCY: usize = 8; + +/// Bounded worker pool over the verdict jobs. On client/request failure every +/// job comes back `Unverifiable`; `should_block_install` decides whether that +/// fails closed for the selected mode. +/// Plain work queue, no new crates; `reqwest::blocking::Client` is +/// `Send + Sync`. Result order is not preserved; callers match results back +/// by `(name, version)`. +pub(super) fn verdict_pool( + jobs: Vec, + cfg: &VerdictConfig, + manager: PackageManager, + concurrency: usize, +) -> Vec<(tree::TreePackage, VerdictStatus)> { + use std::collections::VecDeque; + use std::sync::Mutex; + + let client = match crate::vuln_api::http_client() { + Ok(c) => c, + Err(e) => { + return jobs + .into_iter() + .map(|j| (j, VerdictStatus::Unverifiable(e.clone()))) + .collect(); + } + }; + + if jobs.len() > VERDICT_PROGRESS_THRESHOLD { + eprintln!("checking {} packages against Corgea vuln-api…", jobs.len()); + } + + let ecosystem = manager.ecosystem(); + let workers = concurrency.min(jobs.len()).max(1); + let queue = Mutex::new(VecDeque::from(jobs)); + let results = Mutex::new(Vec::new()); + std::thread::scope(|s| { + for _ in 0..workers { + s.spawn(|| loop { + let Some(job) = queue.lock().unwrap().pop_front() else { + break; + }; + // vuln-api advisories are keyed by canonical names; an + // alternate spelling (PEP 503: `Flask_Cors` ≡ `flask-cors`) + // would miss and read as clean. + let verdict = match crate::vuln_api::check_package_version( + &client, + &cfg.base_url, + cfg.mode.auth_token(), + ecosystem, + &manager.normalize_name(&job.name), + &job.version, + ) { + Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), + Ok(_) => VerdictStatus::Clean, + Err(e) => VerdictStatus::Unverifiable(e.to_string()), + }; + results.lock().unwrap().push((job, verdict)); + }); + } + }); + results.into_inner().unwrap() +} + +/// Assign pooled verdicts onto matching named outcomes (by normalized +/// name + version) and return the unmatched leftovers — the tree findings. +/// Each leftover carries its provenance: pip's `requested` flag, membership +/// in the project manifest's direct deps (`direct_deps`), or transitive. +pub(super) fn apply_verdicts( + manager: PackageManager, + results: Vec<(tree::TreePackage, VerdictStatus)>, + outcomes: &mut [TargetOutcome], + direct_deps: &std::collections::HashSet, +) -> Vec { + let norm = |n: &str| manager.normalize_name(n); + // Index named outcomes by (normalized name, version) so matching the + // pooled results stays linear on big trees. + let mut named: std::collections::HashMap<(String, String), Vec> = + std::collections::HashMap::new(); + for (i, o) in outcomes.iter().enumerate() { + if let TargetOutcome::Resolved { resolved, .. } = o { + named + .entry((norm(&resolved.name), resolved.version.clone())) + .or_default() + .push(i); + } + } + + let mut transitive = Vec::new(); + for (pkg, verdict) in results { + if let Some(indices) = named.get(&(norm(&pkg.name), pkg.version.clone())) { + for &i in indices { + if let TargetOutcome::Resolved { verdict: v, .. } = &mut outcomes[i] { + *v = verdict.clone(); + } + } + } else { + let origin = if pkg.requested { + TreeOrigin::Requested + } else if direct_deps.contains(&pkg.name) { + TreeOrigin::PreExisting + } else { + TreeOrigin::Transitive + }; + transitive.push(TreeOutcome { + name: pkg.name, + version: pkg.version, + origin, + verdict, + }); + } + } + transitive +} + +pub(super) fn authenticated_verdict(opts: &PrecheckOptions) -> bool { + opts.verdict + .as_ref() + .is_some_and(|cfg| cfg.mode.is_authenticated()) +} + +pub(super) fn public_verdict(opts: &PrecheckOptions) -> bool { + opts.verdict + .as_ref() + .is_some_and(|cfg| cfg.mode.is_public()) +} + +pub(super) fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + if opts.force { + return false; + } + // A resolution error means no verdict was obtained for that target, so + // in authenticated mode it fails closed like `Unverifiable` — otherwise a + // registry outage silently bypasses the gate. + let fail_closed = authenticated_verdict(opts); + report.vulnerable_count() > 0 + || (fail_closed && report.unverifiable_count() > 0) + || (fail_closed && report.error_count() > 0) + || (!opts.no_fail && report.recent_count() > 0) +} + +pub(super) fn verify_one( + target: &InstallTarget, + opts: &PrecheckOptions, + now: &chrono::DateTime, +) -> TargetOutcome { + use crate::verify_deps::registry; + + let resolved = match &target.kind { + TargetKind::Unverifiable { reason } => { + return TargetOutcome::Skipped { + target: target.clone(), + reason: reason.clone(), + }; + } + TargetKind::Npm(spec) => { + registry::npm_resolve(&target.name, spec, opts.npm_registry.as_deref()) + } + TargetKind::Pypi(spec) => { + registry::pypi_resolve(&target.name, spec, opts.pypi_registry.as_deref()) + } + }; + + match resolved { + Ok(resolved) => { + // Future publish dates clamp to zero — maximally recent. + let age = now + .signed_duration_since(resolved.published_at) + .to_std() + .unwrap_or_else(|_| Duration::from_secs(0)); + TargetOutcome::Resolved { + target: target.clone(), + resolved, + age, + verdict: VerdictStatus::NotChecked, + } + } + Err(e) => TargetOutcome::Error { + target: target.clone(), + error: e, + }, + } +} + +#[cfg(test)] +mod tests { + use super::super::test_support::*; + use super::super::{ + run_verdict_pass, tree, InstallTarget, PackageManager, PrecheckOptions, TargetKind, + TargetOutcome, TreeOrigin, TreeOutcome, VerdictConfig, VerdictMode, VerdictStatus, + }; + use super::*; + + /// Predicate matrix: force ⇒ never block; vulnerable blocks in every + /// verdict mode; unverifiable/error findings block only in authenticated + /// mode; recency keeps its task-2 --no-fail demotion. + #[test] + fn block_predicate_matrix() { + let clean = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Clean); + report_with(vec![o]) + }; + let recent = report_with(vec![resolved_outcome("pkg", "1.0.0", true)]); + let vulnerable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Vulnerable(vec![])); + report_with(vec![o]) + }; + let unverifiable = { + let mut o = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut o, VerdictStatus::Unverifiable("503".to_string())); + report_with(vec![o]) + }; + let resolution_error = report_with(vec![TargetOutcome::Error { + target: InstallTarget { + name: "pkg".to_string(), + display: "pkg==1.0.0".to_string(), + kind: TargetKind::Unverifiable { + reason: "test".to_string(), + }, + }, + error: "registry unavailable".to_string(), + }]); + + assert!(!should_block_install(&clean, &public_opts(false, false))); + assert!(should_block_install(&recent, &public_opts(false, false))); + assert!(!should_block_install(&recent, &public_opts(true, false))); + assert!(should_block_install( + &vulnerable, + &public_opts(false, false) + )); + assert!( + should_block_install(&vulnerable, &public_opts(true, false)), + "--no-fail must not waive a vulnerable block" + ); + assert!( + !should_block_install(&unverifiable, &public_opts(false, false)), + "public mode must fail open on lookup errors" + ); + assert!( + should_block_install(&unverifiable, &authenticated_opts(true, false)), + "authenticated mode must fail closed on lookup errors" + ); + assert!( + !should_block_install(&resolution_error, &public_opts(false, false)), + "public mode must fail open when no verdict can be obtained" + ); + assert!( + should_block_install(&resolution_error, &authenticated_opts(false, false)), + "authenticated mode must fail closed when no verdict can be obtained" + ); + for report in [ + &clean, + &recent, + &vulnerable, + &unverifiable, + &resolution_error, + ] { + assert!( + !should_block_install(report, &public_opts(false, true)), + "--force must never block" + ); + assert!(!should_block_install( + report, + &authenticated_opts(true, true) + )); + } + } + + /// A clean named outcome plus a vulnerable transitive tree finding must + /// roll into the block counts: `vulnerable_count() == 1`, + /// `should_block_install` true without `--force`, false with it. + #[test] + fn tree_findings_extend_block_counts() { + let mut named = resolved_outcome("pkg", "1.0.0", false); + set_verdict(&mut named, VerdictStatus::Clean); + let mut report = report_with(vec![named]); + report.tree = Some(super::super::TreeReport::Full { + resolved_count: 2, + transitive: vec![TreeOutcome { + name: "evildep".to_string(), + version: "0.4.2".to_string(), + origin: TreeOrigin::Transitive, + verdict: VerdictStatus::Vulnerable(vec![]), + }], + }); + + assert_eq!(report.vulnerable_count(), 1); + let opts = |force: bool| PrecheckOptions { + force, + ..stub_opts() + }; + assert!(should_block_install(&report, &opts(false))); + assert!(!should_block_install(&report, &opts(true))); + } + + /// Verdict pass against an in-process stub: vulnerable body → Vulnerable + /// with matches; 503 override → Unverifiable; no VerdictConfig → outcomes + /// keep NotChecked. + #[test] + fn verdict_pass_maps_stub_responses() { + use std::collections::HashMap; + + let key = |name: &str| ("pypi".to_string(), name.to_string(), "1.0.0".to_string()); + let mut checks = HashMap::new(); + checks.insert( + key("evil"), + r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string(), + ); + checks.insert(key("flaky"), "{}".to_string()); + let mut statuses = HashMap::new(); + statuses.insert(key("flaky"), 503u16); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, statuses); + + let opts = verdict_opts(&stub.base_url); + + let mut outcomes = vec![ + resolved_outcome("evil", "1.0.0", false), + resolved_outcome("flaky", "1.0.0", false), + resolved_outcome("goodpkg", "1.0.0", false), // unknown → stub default clean + ]; + run_verdict_pass(PackageManager::Pip, &mut outcomes, &opts); + + let verdicts: Vec<_> = outcomes + .iter() + .map(|o| match o { + TargetOutcome::Resolved { verdict, .. } => verdict.clone(), + _ => unreachable!(), + }) + .collect(); + assert!( + matches!(&verdicts[0], VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + assert!(matches!(&verdicts[1], VerdictStatus::Unverifiable(_))); + assert!(matches!(&verdicts[2], VerdictStatus::Clean)); + + // Without a VerdictConfig the pass is a no-op. + let mut untouched = vec![resolved_outcome("evil", "1.0.0", false)]; + let no_verdict = stub_opts(); + run_verdict_pass(PackageManager::Pip, &mut untouched, &no_verdict); + assert!(matches!( + &untouched[0], + TargetOutcome::Resolved { + verdict: VerdictStatus::NotChecked, + .. + } + )); + } + + /// The pool must verdict every job exactly once and return the flagged + /// job `Vulnerable` with the rest `Clean`, regardless of `concurrency` + /// (1 = serial, 8 > job count = all workers spawn but some drain empty). + #[test] + fn verdict_pool_returns_all_results() { + use std::collections::HashMap; + + let key = |name: &str| ("pypi".to_string(), name.to_string(), "1.0.0".to_string()); + let mut checks = HashMap::new(); + checks.insert( + key("evil"), + r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true, + "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, + "vulnerable_version_range":null,"fixed_version":null}]}"# + .to_string(), + ); + let stub = crate::vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); + + let cfg = VerdictConfig { + base_url: stub.base_url.clone(), + mode: VerdictMode::Authenticated { + token: "test-token".to_string(), + }, + public_login_hint: false, + }; + + let jobs: Vec = ["a", "b", "evil", "c", "d", "e"] + .iter() + .map(|n| tree::TreePackage { + name: n.to_string(), + version: "1.0.0".to_string(), + requested: false, + }) + .collect(); + + for concurrency in [1usize, 8] { + let results = verdict_pool(jobs.clone(), &cfg, PackageManager::Pip, concurrency); + assert_eq!( + results.len(), + 6, + "concurrency {concurrency}: all jobs verdicted" + ); + let flagged = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Vulnerable(_))) + .count(); + let clean = results + .iter() + .filter(|(_, v)| matches!(v, VerdictStatus::Clean)) + .count(); + assert_eq!(flagged, 1, "concurrency {concurrency}: only evil flagged"); + assert_eq!(clean, 5, "concurrency {concurrency}: rest clean"); + let evil = results + .iter() + .find(|(p, _)| p.name == "evil") + .expect("evil present"); + assert!( + matches!(&evil.1, VerdictStatus::Vulnerable(m) if m[0].advisory_id == "MAL-2024-0001") + ); + } + } + + /// Leftover origin assignment: pip `requested` ⇒ Requested; manifest + /// direct dep ⇒ PreExisting; otherwise Transitive. Requested wins over + /// a direct-dep hit. + #[test] + fn apply_verdicts_assigns_origins() { + let pkg = |name: &str, requested: bool| tree::TreePackage { + name: name.to_string(), + version: "1.0.0".to_string(), + requested, + }; + let results = vec![ + (pkg("reqdep", true), VerdictStatus::Clean), + (pkg("predep", false), VerdictStatus::Clean), + (pkg("deepdep", false), VerdictStatus::Clean), + ]; + let direct_deps = std::collections::HashSet::from(["predep".to_string()]); + let mut outcomes = []; + let mut tree = apply_verdicts(PackageManager::Npm, results, &mut outcomes, &direct_deps); + tree.sort_by(|a, b| a.name.cmp(&b.name)); + let origins: Vec<(&str, TreeOrigin)> = + tree.iter().map(|t| (t.name.as_str(), t.origin)).collect(); + assert_eq!( + origins, + vec![ + ("deepdep", TreeOrigin::Transitive), + ("predep", TreeOrigin::PreExisting), + ("reqdep", TreeOrigin::Requested), + ] + ); + } +} From f5d8b79b882e1f6bfdbda1df67e099549f475dea Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 19:09:10 +0200 Subject: [PATCH 42/59] Extract precheck uv module --- src/precheck/mod.rs | 177 +---------------------------------------- src/precheck/uv.rs | 190 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 192 insertions(+), 175 deletions(-) create mode 100644 src/precheck/uv.rs diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index bab4de5..29ce33a 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -19,6 +19,7 @@ pub mod tree; mod detect; mod exec; mod render; +mod uv; mod verdict; #[cfg(test)] @@ -343,7 +344,7 @@ impl PrecheckReport { /// package manager with no arguments. pub fn run_install(manager: PackageManager, cmd: &[String], opts: PrecheckOptions) -> i32 { if manager == PackageManager::Uv { - return run_uv(cmd, opts); + return uv::run_uv(cmd, opts); } if cmd.is_empty() { @@ -414,115 +415,6 @@ fn unsupported_pip_add_message(rest: &[String]) -> String { ) } -fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { - let exec = || exec::exec_command("uv", cmd); - - if matches!(cmd.first().map(String::as_str), Some("install" | "i")) { - eprintln!("{}", unsupported_uv_install_message(&cmd[1..])); - return 1; - } - - match parse::classify_uv_command(cmd) { - parse::UvCommand::Passthrough => exec(), - parse::UvCommand::PipInstall { install_args } => { - let parsed = match parse::parse_pip_install_args(install_args) { - Ok(p) => p, - Err(e) => { - eprintln!("failed to parse install args: {}", e); - return 2; - } - }; - run_parsed_install( - PackageManager::Uv, - "pip install", - install_args, - parsed, - exec, - opts, - ) - } - parse::UvCommand::Add { add_args } => { - let parsed = parse::parse_pypi_positionals_args(add_args); - if let Some(message) = - detect::wrong_package_manager_message(PackageManager::Uv, add_args, &parsed) - { - eprintln!("{message}"); - return 1; - } - run_parsed_install(PackageManager::Uv, "add", add_args, parsed, exec, opts) - } - parse::UvCommand::Sync => run_uv_sync(cmd, opts, exec), - } -} - -fn unsupported_uv_install_message(rest: &[String]) -> String { - format!( - "error: uv does not support top-level `install`.\nDid you mean `{}`?", - corgea_cmd(&["uv", "pip", "install"], rest) - ) -} - -/// Gate `uv sync` from the project's `uv.lock`. The lockfile is the full -/// locked universe (all groups/extras) — a superset of what sync installs, -/// conservative in the blocking direction; a stale lock that sync would -/// re-resolve is gated as written. Recency isn't checked (locked versions -/// aren't newly chosen by this command); the verdict pass is the gate. We -/// never run `uv lock` ourselves — locking can build sdists, which would -/// execute package code before any verdict. -fn run_uv_sync(cmd: &[String], opts: PrecheckOptions, exec: impl FnOnce() -> i32) -> i32 { - let Some(cfg) = &opts.verdict else { - // Direct callers may still disable verdicts completely. - return exec(); - }; - let lock = match std::fs::read_to_string("uv.lock") { - Ok(content) => content, - Err(_) => { - eprintln!( - "note: no uv.lock here — 'uv sync' is not gated; dependencies install unchecked (run 'uv lock' first to enable the gate)" - ); - return exec(); - } - }; - let jobs = match parse_uv_lock(&lock) { - Ok(jobs) => jobs, - Err(e) if opts.force => { - eprintln!("warning: cannot verify 'uv sync' ({e}); proceeding under --force"); - return exec(); - } - Err(e) => { - eprintln!("error: cannot verify 'uv sync': {e} (pass --force to proceed unchecked)"); - return 1; - } - }; - - let resolved_count = jobs.len(); - let results = - verdict::verdict_pool(jobs, cfg, PackageManager::Uv, verdict::VERDICT_CONCURRENCY); - let transitive = results - .into_iter() - .map(|(pkg, verdict)| TreeOutcome { - name: pkg.name, - version: pkg.version, - origin: TreeOrigin::Locked, - verdict, - }) - .collect(); - let report = PrecheckReport { - manager: PackageManager::Uv, - subcommand: "sync".to_string(), - original_args: cmd[1..].to_vec(), - outcomes: Vec::new(), - threshold: opts.threshold, - tree: Some(TreeReport::Full { - resolved_count, - transitive, - }), - bare_install: true, - }; - - report_and_exec(&report, &opts, exec) -} - /// Shared tail of every gated path: render the report, refuse (exit 1) when /// the block predicate fires, otherwise run the install. fn report_and_exec( @@ -545,39 +437,6 @@ fn report_and_exec( exec() } -/// Packages from `uv.lock` that `uv sync` installs from an index. Local -/// stanzas (the project itself and path deps: editable / virtual / -/// directory / path sources) carry no registry identity and are skipped. -fn parse_uv_lock(content: &str) -> Result, String> { - #[derive(serde::Deserialize)] - struct Lock { - #[serde(default)] - package: Vec, - } - #[derive(serde::Deserialize)] - struct Pkg { - name: String, - version: Option, - #[serde(default)] - source: std::collections::BTreeMap, - } - const LOCAL_SOURCES: [&str; 4] = ["editable", "virtual", "directory", "path"]; - - let lock: Lock = toml::from_str(content).map_err(|e| format!("parse uv.lock: {e}"))?; - Ok(lock - .package - .into_iter() - .filter(|p| !LOCAL_SOURCES.iter().any(|k| p.source.contains_key(*k))) - .filter_map(|p| { - Some(tree::TreePackage { - name: p.name, - version: p.version?, - requested: false, - }) - }) - .collect()) -} - /// Post-parse verification shared by npm/yarn/pnpm/pip and uv install paths. fn run_parsed_install( manager: PackageManager, @@ -784,38 +643,6 @@ mod tests { assert!(!PackageManager::Pip.is_install_subcommand("freeze")); } - #[test] - fn parse_uv_lock_keeps_index_packages_and_skips_local_sources() { - let lock = r#" -version = 1 - -[[package]] -name = "proj" -version = "0.1.0" -source = { editable = "." } - -[[package]] -name = "evildep" -version = "0.4.2" -source = { registry = "https://pypi.org/simple" } - -[[package]] -name = "gitdep" -version = "1.2.3" -source = { git = "https://example.com/repo?rev=abc#abc" } -"#; - let pkgs = parse_uv_lock(lock).expect("parse uv.lock"); - let names: Vec<&str> = pkgs.iter().map(|p| p.name.as_str()).collect(); - assert_eq!(names, vec!["evildep", "gitdep"]); - assert_eq!(pkgs[0].version, "0.4.2"); - } - - #[test] - fn parse_uv_lock_rejects_invalid_toml() { - let err = parse_uv_lock("not = [valid").expect_err("invalid toml"); - assert!(err.contains("parse uv.lock"), "got: {err}"); - } - /// Run `run_parsed_install` for `pip install ` with an exec /// closure that records whether it ran (returning 42 instead of /// spawning anything). diff --git a/src/precheck/uv.rs b/src/precheck/uv.rs new file mode 100644 index 0000000..2efe8cb --- /dev/null +++ b/src/precheck/uv.rs @@ -0,0 +1,190 @@ +//! `corgea uv` routing: `uv pip install` / `uv add` reuse the parsed-install +//! gate; `uv sync` is gated from `uv.lock`. + +use super::{ + corgea_cmd, detect, exec, parse, tree, verdict, PackageManager, PrecheckOptions, + PrecheckReport, TreeOrigin, TreeOutcome, TreeReport, +}; + +pub(super) fn run_uv(cmd: &[String], opts: PrecheckOptions) -> i32 { + let exec = || exec::exec_command("uv", cmd); + + if matches!(cmd.first().map(String::as_str), Some("install" | "i")) { + eprintln!("{}", unsupported_uv_install_message(&cmd[1..])); + return 1; + } + + match parse::classify_uv_command(cmd) { + parse::UvCommand::Passthrough => exec(), + parse::UvCommand::PipInstall { install_args } => { + let parsed = match parse::parse_pip_install_args(install_args) { + Ok(p) => p, + Err(e) => { + eprintln!("failed to parse install args: {}", e); + return 2; + } + }; + super::run_parsed_install( + PackageManager::Uv, + "pip install", + install_args, + parsed, + exec, + opts, + ) + } + parse::UvCommand::Add { add_args } => { + let parsed = parse::parse_pypi_positionals_args(add_args); + if let Some(message) = + detect::wrong_package_manager_message(PackageManager::Uv, add_args, &parsed) + { + eprintln!("{message}"); + return 1; + } + super::run_parsed_install(PackageManager::Uv, "add", add_args, parsed, exec, opts) + } + parse::UvCommand::Sync => run_uv_sync(cmd, opts, exec), + } +} + +fn unsupported_uv_install_message(rest: &[String]) -> String { + format!( + "error: uv does not support top-level `install`.\nDid you mean `{}`?", + corgea_cmd(&["uv", "pip", "install"], rest) + ) +} + +/// Gate `uv sync` from the project's `uv.lock`. The lockfile is the full +/// locked universe (all groups/extras) — a superset of what sync installs, +/// conservative in the blocking direction; a stale lock that sync would +/// re-resolve is gated as written. Recency isn't checked (locked versions +/// aren't newly chosen by this command); the verdict pass is the gate. We +/// never run `uv lock` ourselves — locking can build sdists, which would +/// execute package code before any verdict. +fn run_uv_sync(cmd: &[String], opts: PrecheckOptions, exec: impl FnOnce() -> i32) -> i32 { + let Some(cfg) = &opts.verdict else { + // Direct callers may still disable verdicts completely. + return exec(); + }; + let lock = match std::fs::read_to_string("uv.lock") { + Ok(content) => content, + Err(_) => { + eprintln!( + "note: no uv.lock here — 'uv sync' is not gated; dependencies install unchecked (run 'uv lock' first to enable the gate)" + ); + return exec(); + } + }; + let jobs = match parse_uv_lock(&lock) { + Ok(jobs) => jobs, + Err(e) if opts.force => { + eprintln!("warning: cannot verify 'uv sync' ({e}); proceeding under --force"); + return exec(); + } + Err(e) => { + // The single documented bypass of the "all blocking goes through + // `verdict::should_block_install`" invariant: an unparsable + // uv.lock means there is no report to feed the predicate, so the + // gate refuses directly (--force above is the only escape). + eprintln!("error: cannot verify 'uv sync': {e} (pass --force to proceed unchecked)"); + return 1; + } + }; + + let resolved_count = jobs.len(); + let results = + verdict::verdict_pool(jobs, cfg, PackageManager::Uv, verdict::VERDICT_CONCURRENCY); + let transitive = results + .into_iter() + .map(|(pkg, verdict)| TreeOutcome { + name: pkg.name, + version: pkg.version, + origin: TreeOrigin::Locked, + verdict, + }) + .collect(); + let report = PrecheckReport { + manager: PackageManager::Uv, + subcommand: "sync".to_string(), + original_args: cmd[1..].to_vec(), + outcomes: Vec::new(), + threshold: opts.threshold, + tree: Some(TreeReport::Full { + resolved_count, + transitive, + }), + bare_install: true, + }; + + super::report_and_exec(&report, &opts, exec) +} + +/// Packages from `uv.lock` that `uv sync` installs from an index. Local +/// stanzas (the project itself and path deps: editable / virtual / +/// directory / path sources) carry no registry identity and are skipped. +fn parse_uv_lock(content: &str) -> Result, String> { + #[derive(serde::Deserialize)] + struct Lock { + #[serde(default)] + package: Vec, + } + #[derive(serde::Deserialize)] + struct Pkg { + name: String, + version: Option, + #[serde(default)] + source: std::collections::BTreeMap, + } + const LOCAL_SOURCES: [&str; 4] = ["editable", "virtual", "directory", "path"]; + + let lock: Lock = toml::from_str(content).map_err(|e| format!("parse uv.lock: {e}"))?; + Ok(lock + .package + .into_iter() + .filter(|p| !LOCAL_SOURCES.iter().any(|k| p.source.contains_key(*k))) + .filter_map(|p| { + Some(tree::TreePackage { + name: p.name, + version: p.version?, + requested: false, + }) + }) + .collect()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_uv_lock_keeps_index_packages_and_skips_local_sources() { + let lock = r#" +version = 1 + +[[package]] +name = "proj" +version = "0.1.0" +source = { editable = "." } + +[[package]] +name = "evildep" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } + +[[package]] +name = "gitdep" +version = "1.2.3" +source = { git = "https://example.com/repo?rev=abc#abc" } +"#; + let pkgs = parse_uv_lock(lock).expect("parse uv.lock"); + let names: Vec<&str> = pkgs.iter().map(|p| p.name.as_str()).collect(); + assert_eq!(names, vec!["evildep", "gitdep"]); + assert_eq!(pkgs[0].version, "0.4.2"); + } + + #[test] + fn parse_uv_lock_rejects_invalid_toml() { + let err = parse_uv_lock("not = [valid").expect_err("invalid toml"); + assert!(err.contains("parse uv.lock"), "got: {err}"); + } +} From e93a284bcfd9be741b110840ec6a1751399e155c Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 19:17:14 +0200 Subject: [PATCH 43/59] Deduplicate PyPI name extraction --- src/precheck/parse.rs | 69 ++++++++++++++++++++++++++++++++++++++++++- src/precheck/tree.rs | 34 ++++++++++++--------- 2 files changed, 88 insertions(+), 15 deletions(-) diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs index 30c31d5..e1477b4 100644 --- a/src/precheck/parse.rs +++ b/src/precheck/parse.rs @@ -403,7 +403,7 @@ fn parse_pypi_spec(raw: &str) -> InstallTarget { }; // Strip extras: `requests[security]` -> `requests`. - let name_no_extras = name_part.split('[').next().unwrap_or(name_part).trim(); + let name_no_extras = pypi_name_part(name_part, PypiNameCut::ParseNamePart); // Strip env markers: `package; python_version >= "3.7"`. let spec_no_marker = spec_part.split(';').next().unwrap_or(spec_part).trim(); @@ -432,6 +432,29 @@ fn parse_pypi_spec(raw: &str) -> InstallTarget { } } +#[derive(Debug, Clone, Copy)] +pub(super) enum PypiNameCut { + /// Existing `parse_pypi_spec` behavior: the caller already split at the + /// leftmost version operator, so only extras are removed from the name part. + ParseNamePart, + /// Existing requirements-line behavior: stop at extras, markers, + /// operators, or whitespace. + RequirementLine, +} + +/// Bare PyPI name extraction only; callers normalize when they need a +/// comparison key. +pub(super) fn pypi_name_part(spec: &str, cut: PypiNameCut) -> &str { + let stop = |c: char| match cut { + PypiNameCut::ParseNamePart => c == '[', + PypiNameCut::RequirementLine => { + matches!(c, '[' | '<' | '>' | '=' | '!' | '~' | ';' | ' ') + } + }; + let cut = spec.find(stop).unwrap_or(spec.len()); + spec[..cut].trim() +} + #[cfg(test)] mod tests { use super::*; @@ -625,6 +648,50 @@ mod tests { "env marker must not leak into the spec: {:?}", t.kind ); + + let marker_only = parse_pypi_spec("pkg; python_version >= \"3.7\""); + assert_eq!(marker_only.name, "pkg; python_version"); + assert!( + matches!(marker_only.kind, TargetKind::Pypi(PypiSpec::Specifier(ref s)) if s == ">= \"3.7\""), + "got {:?}", + marker_only.kind + ); + } + + #[test] + fn pypi_name_part_strips_extras_markers_and_operators() { + assert_eq!( + pypi_name_part("requests", PypiNameCut::ParseNamePart), + "requests" + ); + assert_eq!( + pypi_name_part("requests[security]", PypiNameCut::ParseNamePart), + "requests" + ); + assert_eq!( + pypi_name_part("pkg; python_version ", PypiNameCut::ParseNamePart), + "pkg; python_version" + ); + assert_eq!( + pypi_name_part("requests[security]==2.31.0", PypiNameCut::RequirementLine), + "requests" + ); + assert_eq!( + pypi_name_part("Flask_Cors>=4.0", PypiNameCut::RequirementLine), + "Flask_Cors" + ); + assert_eq!( + pypi_name_part( + "pkg; python_version >= \"3.7\"", + PypiNameCut::RequirementLine + ), + "pkg" + ); + assert_eq!( + pypi_name_part("pkg ==1.0", PypiNameCut::RequirementLine), + "pkg" + ); + assert_eq!(pypi_name_part("", PypiNameCut::RequirementLine), ""); } #[test] diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index 03af40d..d07f316 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -155,7 +155,8 @@ fn resolve_uv_tree(parsed: &super::parse::ParsedInstall) -> Result Result std::collections::HashSet { - let norm = |n: &str| PackageManager::Uv.normalize_name(n); +fn requested_names( + manager: PackageManager, + parsed: &super::parse::ParsedInstall, +) -> std::collections::HashSet { + let norm = |n: &str| manager.normalize_name(n); let mut out: std::collections::HashSet = parsed .targets .iter() @@ -180,12 +184,10 @@ fn requested_names(parsed: &super::parse::ParsedInstall) -> std::collections::Ha if line.is_empty() || line.starts_with(['#', '-']) || line.contains("://") { continue; } - let name: String = line - .chars() - .take_while(|c| !matches!(c, '[' | '<' | '>' | '=' | '!' | '~' | ';' | ' ')) - .collect(); + let name = + super::parse::pypi_name_part(line, super::parse::PypiNameCut::RequirementLine); if !name.is_empty() { - out.insert(norm(&name)); + out.insert(norm(name)); } } } @@ -198,6 +200,7 @@ fn requested_names(parsed: &super::parse::ParsedInstall) -> std::collections::Ha fn parse_compiled_requirements( out: &str, requested: &std::collections::HashSet, + manager: PackageManager, ) -> Result, String> { let mut pkgs = Vec::new(); for line in out.lines() { @@ -214,9 +217,10 @@ fn parse_compiled_requirements( )); }; // Strip extras: `celery[redis]==5.3.4`. - let name = name.split('[').next().unwrap_or(name).trim().to_string(); + let name = super::parse::pypi_name_part(name, super::parse::PypiNameCut::RequirementLine) + .to_string(); pkgs.push(TreePackage { - requested: requested.contains(&PackageManager::Uv.normalize_name(&name)), + requested: requested.contains(&manager.normalize_name(&name)), name, version: version.trim().to_string(), }); @@ -396,7 +400,8 @@ mod tests { fn parse_compiled_requirements_pins_extras_and_markers() { let requested = std::collections::HashSet::from(["flask-cors".to_string()]); let out = "Flask_Cors==4.0.0\ncelery[redis]==5.3.4\nwerkzeug==3.1.8 ; python_version >= \"3.9\"\n\n# comment\n--index-url https://example.com\n"; - let pkgs = parse_compiled_requirements(out, &requested).expect("parse pins"); + let pkgs = + parse_compiled_requirements(out, &requested, PackageManager::Uv).expect("parse pins"); assert_eq!( pkgs, vec![ @@ -422,9 +427,10 @@ mod tests { #[test] fn parse_compiled_requirements_rejects_non_pins() { let none = std::collections::HashSet::new(); - let err = parse_compiled_requirements("flask>=2.0\n", &none).expect_err("not a pin"); + let err = parse_compiled_requirements("flask>=2.0\n", &none, PackageManager::Uv) + .expect_err("not a pin"); assert!(err.contains("unexpected line"), "got: {err}"); - let err = parse_compiled_requirements("", &none).expect_err("empty"); + let err = parse_compiled_requirements("", &none, PackageManager::Uv).expect_err("empty"); assert!(err.contains("no packages"), "got: {err}"); } @@ -447,7 +453,7 @@ mod tests { }], requirements_files: vec![req], }; - let names = requested_names(&parsed); + let names = requested_names(PackageManager::Uv, &parsed); for name in ["celery", "flask-cors", "requests"] { assert!(names.contains(name), "missing {name}: {names:?}"); } From ed60f960e206d4508bc0fa23962f1b4467090d28 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 19:42:38 +0200 Subject: [PATCH 44/59] Consolidate install gate test harnesses --- tests/cli_bare_install.rs | 165 +++++++++++++------------- tests/cli_exec_fallback.rs | 61 +++------- tests/cli_install.rs | 171 +++++++++++---------------- tests/cli_refusal_context.rs | 12 +- tests/cli_uv_sync.rs | 95 +++++++-------- tests/cli_verdict.rs | 24 ++-- tests/common/mod.rs | 223 +++++++++++++++++++++++++++-------- 7 files changed, 407 insertions(+), 344 deletions(-) diff --git a/tests/cli_bare_install.rs b/tests/cli_bare_install.rs index c607a43..a5c6f73 100644 --- a/tests/cli_bare_install.rs +++ b/tests/cli_bare_install.rs @@ -15,83 +15,25 @@ mod common; -use common::{ - corgea_isolated, key, spawn_oldpkg_registry_stub, vulnerable_body, write_fake_recorder, - write_fake_tree_pm, NPM_LOCK, RESOLUTION_FAILS, -}; -use corgea::vuln_api_stub::{self, PackageKey}; +use common::{key, vulnerable_body, GateHarness, NPM_LOCK, RESOLUTION_FAILS}; use std::collections::HashMap; -use std::path::PathBuf; -use std::process::Command; -use tempfile::TempDir; const PACKAGE_JSON: &str = r#"{"name":"proj","version":"1.0.0","dependencies":{"oldpkg":"1.0.0"}}"#; -fn vulnerable_evildep_body() -> String { - vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None) -} - -/// `corgea` wired to a fake package manager, the registry + vuln-api stubs, -/// a token, and a throwaway project dir as cwd. -struct BareHarness { - cmd: Command, - marker: PathBuf, - project: TempDir, - _home: TempDir, - _bin: TempDir, -} - -impl BareHarness { - /// `npm_payload`: `Some` wires a tree-aware fake npm with that canned - /// lockfile (or `RESOLUTION_FAILS`); `None` wires a plain recorder for - /// `binary`. `exit_code` is what the fake exits with on the exec'd - /// (non-tree) invocation. - fn new( - binary: &str, - checks: HashMap, - npm_payload: Option<&str>, - exit_code: i32, - ) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let project = TempDir::new().expect("project dir"); - let marker = bin.path().join("pm-argv.txt"); - match npm_payload { - Some(payload) => write_fake_tree_pm(bin.path(), "npm", &marker, payload, exit_code), - None => write_fake_recorder(bin.path(), binary, &marker, exit_code), - } - let registry = spawn_oldpkg_registry_stub(); - let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); - cmd.env("PATH", bin.path()) - .env("CORGEA_NPM_REGISTRY", ®istry) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) - .env("CORGEA_TOKEN", "test-token") - .current_dir(project.path()); - Self { - cmd, - marker, - project, - _home: home, - _bin: bin, - } - } - - fn with_package_json(self) -> Self { - std::fs::write(self.project.path().join("package.json"), PACKAGE_JSON) - .expect("write package.json"); - self - } - - fn recorded_argv(&self) -> Option { - std::fs::read_to_string(&self.marker).ok() - } -} - #[test] fn bare_npm_install_vulnerable_lockfile_blocks() { let mut checks = HashMap::new(); - checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); - let mut h = BareHarness::new("npm", checks, Some(NPM_LOCK), 0).with_package_json(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(checks) + .token("test-token") + .with_project_file("package.json", PACKAGE_JSON) + .build(); let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(1), "vulnerable lockfile must block"); assert_eq!( @@ -118,7 +60,13 @@ fn bare_npm_install_vulnerable_lockfile_blocks() { #[test] fn bare_npm_install_clean_lockfile_proceeds() { - let mut h = BareHarness::new("npm", HashMap::new(), Some(NPM_LOCK), 0).with_package_json(); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .token("test-token") + .with_project_file("package.json", PACKAGE_JSON) + .build(); let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(0), "clean tree must proceed"); assert_eq!(h.recorded_argv().as_deref(), Some("install")); @@ -132,8 +80,17 @@ fn bare_npm_install_clean_lockfile_proceeds() { #[test] fn bare_npm_install_force_overrides_block() { let mut checks = HashMap::new(); - checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); - let mut h = BareHarness::new("npm", checks, Some(NPM_LOCK), 0).with_package_json(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(checks) + .token("test-token") + .with_project_file("package.json", PACKAGE_JSON) + .build(); let out = h .cmd .args(["npm", "--force", "install"]) @@ -150,8 +107,17 @@ fn bare_npm_install_force_overrides_block() { #[test] fn bare_npm_install_json_carries_tree_object() { let mut checks = HashMap::new(); - checks.insert(key("npm", "evildep", "0.4.2"), vulnerable_evildep_body()); - let mut h = BareHarness::new("npm", checks, Some(NPM_LOCK), 0).with_package_json(); + checks.insert( + key("npm", "evildep", "0.4.2"), + vulnerable_body("npm", "evildep", "0.4.2", "MAL-2024-0002", None), + ); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(checks) + .token("test-token") + .with_project_file("package.json", PACKAGE_JSON) + .build(); let out = h .cmd .args(["npm", "--json", "install"]) @@ -174,8 +140,13 @@ fn bare_npm_install_json_carries_tree_object() { fn bare_npm_resolution_failure_falls_back_with_warning() { // Fake npm exits 1 on `--package-lock-only`. Nothing named remains to // verify, so the install proceeds behind the loud fallback warning. - let mut h = - BareHarness::new("npm", HashMap::new(), Some(RESOLUTION_FAILS), 0).with_package_json(); + let mut h = GateHarness::new() + .fake_tree_pm("npm", RESOLUTION_FAILS, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .token("test-token") + .with_project_file("package.json", PACKAGE_JSON) + .build(); let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(0), "fallback must proceed"); assert_eq!(h.recorded_argv().as_deref(), Some("install")); @@ -189,7 +160,13 @@ fn bare_npm_resolution_failure_falls_back_with_warning() { #[test] fn bare_npm_without_package_json_passes_through() { // No package.json in cwd → nothing to resolve → straight exec, no gate. - let mut h = BareHarness::new("npm", HashMap::new(), Some(NPM_LOCK), 3); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 3) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .token("test-token") + .in_project_dir() + .build(); let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(3), "npm's own exit code propagates"); assert_eq!(h.recorded_argv().as_deref(), Some("install")); @@ -204,7 +181,13 @@ fn bare_npm_without_package_json_passes_through() { #[test] fn bare_npm_tokenless_runs_public_tree_check() { // package.json present but no token → public mode still verdicts the tree. - let mut h = BareHarness::new("npm", HashMap::new(), Some(NPM_LOCK), 0).with_package_json(); + let mut h = GateHarness::new() + .fake_tree_pm("npm", NPM_LOCK, 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .token("test-token") + .with_project_file("package.json", PACKAGE_JSON) + .build(); h.cmd.env_remove("CORGEA_TOKEN"); let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(0)); @@ -227,7 +210,13 @@ fn bare_ungated_managers_print_note_and_exec() { ("uv", &["uv", "pip", "install"][..], "pip install", 0), ]; for (binary, args, forwarded_argv, exit_code) in cases { - let mut h = BareHarness::new(binary, HashMap::new(), None, exit_code); + let mut h = GateHarness::new() + .fake_recorder(binary, exit_code) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .token("test-token") + .in_project_dir() + .build(); let out = h.cmd.args(args).output().expect("run corgea"); assert_eq!(out.status.code(), Some(exit_code), "{args:?}"); assert_eq!(h.recorded_argv().as_deref(), Some(forwarded_argv)); @@ -245,7 +234,13 @@ fn bare_ungated_managers_print_note_and_exec() { #[test] fn bare_yarn_note_prints_without_token_too() { - let mut h = BareHarness::new("yarn", HashMap::new(), None, 0); + let mut h = GateHarness::new() + .fake_recorder("yarn", 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .token("test-token") + .in_project_dir() + .build(); h.cmd.env_remove("CORGEA_TOKEN"); let out = h .cmd @@ -263,7 +258,13 @@ fn bare_yarn_note_prints_without_token_too() { #[test] fn yarn_named_target_does_not_print_bare_note() { // A named target takes the gated path: named-only warning, no bare note. - let mut h = BareHarness::new("yarn", HashMap::new(), None, 0); + let mut h = GateHarness::new() + .fake_recorder("yarn", 0) + .oldpkg_registry() + .vuln_checks(HashMap::new()) + .token("test-token") + .in_project_dir() + .build(); let out = h .cmd .args(["yarn", "add", "oldpkg@1.0.0"]) diff --git a/tests/cli_exec_fallback.rs b/tests/cli_exec_fallback.rs index b5946ca..87dcdbd 100644 --- a/tests/cli_exec_fallback.rs +++ b/tests/cli_exec_fallback.rs @@ -10,51 +10,14 @@ mod common; -use common::{corgea_isolated, spawn_oldpkg_registry_stub, write_fake_recorder}; -use std::collections::HashMap; -use std::path::PathBuf; -use std::process::Command; -use tempfile::TempDir; - -/// Isolated `corgea` wired to the PyPI and vuln-api stubs, with `PATH` set -/// to a private temp dir containing only the named fake binaries. -struct FallbackHarness { - cmd: Command, - marker: PathBuf, - _home: TempDir, - _bin: TempDir, -} - -impl FallbackHarness { - fn new(binaries: &[&str]) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let marker = bin.path().join("pm-argv.txt"); - for binary in binaries { - write_fake_recorder(bin.path(), binary, &marker, 0); - } - let registry = spawn_oldpkg_registry_stub(); - let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(HashMap::new(), HashMap::new()); - cmd.env("PATH", bin.path()) - .env("CORGEA_PYPI_REGISTRY", ®istry) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); - Self { - cmd, - marker, - _home: home, - _bin: bin, - } - } - - /// The argv the fake package manager was invoked with, if it ran. - fn recorded_argv(&self) -> Option { - std::fs::read_to_string(&self.marker).ok() - } -} +use common::GateHarness; #[test] fn pip_install_falls_back_to_pip3_when_pip_missing() { - let mut h = FallbackHarness::new(&["pip3"]); + let mut h = GateHarness::new() + .fake_recorder("pip3", 0) + .oldpkg_registry() + .build(); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -76,7 +39,10 @@ fn pip_install_falls_back_to_pip3_when_pip_missing() { #[test] fn pip_passthrough_falls_back_to_pip3() { - let mut h = FallbackHarness::new(&["pip3"]); + let mut h = GateHarness::new() + .fake_recorder("pip3", 0) + .oldpkg_registry() + .build(); let out = h.cmd.args(["pip", "list"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(0)); assert_eq!(h.recorded_argv().as_deref(), Some("list")); @@ -84,7 +50,7 @@ fn pip_passthrough_falls_back_to_pip3() { #[test] fn pip_missing_both_pip_and_pip3_exits_127_with_message() { - let mut h = FallbackHarness::new(&[]); + let mut h = GateHarness::new().oldpkg_registry().build(); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -101,7 +67,10 @@ fn pip_missing_both_pip_and_pip3_exits_127_with_message() { #[test] fn pip3_top_level_command_prints_pip_wrapper_suggestion() { - let mut h = FallbackHarness::new(&["pip3"]); + let mut h = GateHarness::new() + .fake_recorder("pip3", 0) + .oldpkg_registry() + .build(); let out = h .cmd .args(["pip3", "install", "oldpkg==1.0.0"]) @@ -127,7 +96,7 @@ fn pip3_top_level_command_prints_pip_wrapper_suggestion() { #[test] fn npm_missing_binary_error_names_binary_without_fallback() { - let mut h = FallbackHarness::new(&[]); + let mut h = GateHarness::new().oldpkg_registry().build(); let out = h.cmd.args(["npm", "list"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(127)); let stderr = String::from_utf8_lossy(&out.stderr); diff --git a/tests/cli_install.rs b/tests/cli_install.rs index 55f3485..b288d79 100644 --- a/tests/cli_install.rs +++ b/tests/cli_install.rs @@ -14,12 +14,9 @@ mod common; use common::{ - corgea_isolated, spawn_http_stub, write_fake_recorder, write_fake_tree_pm, write_script, - NOT_FOUND_JSON, OLDPKG_NPM_PACKUMENT, OLDPKG_PYPI_JSON, RESOLUTION_FAILS, + spawn_http_stub, GateHarness, NOT_FOUND_JSON, OLDPKG_NPM_PACKUMENT, OLDPKG_PYPI_JSON, + RESOLUTION_FAILS, }; -use std::collections::HashMap; -use std::path::PathBuf; -use std::process::Command; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use tempfile::TempDir; @@ -63,80 +60,48 @@ fn spawn_registry_stub() -> (String, Arc) { (base_url, hits) } -/// A ready-to-run wrapper invocation: isolated `corgea` command with the -/// registry stub wired in and a fake `binary` on a PATH of its own. -struct WrapperHarness { - cmd: Command, - marker: PathBuf, - registry_hits: Arc, - _home: TempDir, - _bin: TempDir, - _vuln_stub: corgea::vuln_api_stub::VulnApiStub, +fn wrapper(binary: &str, registry_env: &str, pm_exit_code: i32) -> GateHarness { + wrapper_with_hits(binary, registry_env, pm_exit_code).0 } -impl WrapperHarness { - /// `registry_env` is `CORGEA_PYPI_REGISTRY` or `CORGEA_NPM_REGISTRY`, - /// matching `binary`'s ecosystem. - fn new(binary: &str, registry_env: &str, pm_exit_code: i32) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let marker = bin.path().join("pm-argv.txt"); - match binary { - "npm" | "pip" => { - write_fake_tree_pm(bin.path(), binary, &marker, RESOLUTION_FAILS, pm_exit_code) - } - _ => write_fake_recorder(bin.path(), binary, &marker, pm_exit_code), - } - let (base_url, registry_hits) = spawn_registry_stub(); - let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(HashMap::new(), HashMap::new()); - cmd.env("PATH", bin.path()) - .env(registry_env, &base_url) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); - Self { - cmd, - marker, - registry_hits, - _home: home, - _bin: bin, - _vuln_stub: vuln_stub, - } - } - - fn new_externally_managed_pip() -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let marker = bin.path().join("pm-argv.txt"); - let fake_python = bin.path().join("python-managed"); - let python_script = format!( - "#!/bin/sh\nif [ \"$1\" = \"-c\" ]; then printf '1\\n'; exit 0; fi\nprintf '%s' \"$*\" > '{}'\nexit 0\n", - marker.display() - ); - write_script(bin.path(), "python-managed", &python_script); - write_script(bin.path(), "pip", &format!("#!{}\n", fake_python.display())); - let (base_url, registry_hits) = spawn_registry_stub(); - let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(HashMap::new(), HashMap::new()); - cmd.env("PATH", bin.path()) - .env("CORGEA_PYPI_REGISTRY", &base_url) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); - Self { - cmd, - marker, - registry_hits, - _home: home, - _bin: bin, - _vuln_stub: vuln_stub, - } - } +fn wrapper_with_hits( + binary: &str, + registry_env: &str, + pm_exit_code: i32, +) -> (GateHarness, Arc) { + let (base_url, registry_hits) = spawn_registry_stub(); + let h = GateHarness::new(); + let h = match binary { + "npm" | "pip" => h.fake_tree_pm(binary, RESOLUTION_FAILS, pm_exit_code), + _ => h.fake_recorder(binary, pm_exit_code), + }; + ( + h.registry_env(registry_env, &base_url).build(), + registry_hits, + ) +} - /// The argv the fake package manager was invoked with, if it ran. - fn recorded_argv(&self) -> Option { - std::fs::read_to_string(&self.marker).ok() - } +fn externally_managed_pip() -> (GateHarness, Arc) { + let (base_url, registry_hits) = spawn_registry_stub(); + let h = GateHarness::new() + .script_with_paths("python-managed", |_, marker| { + format!( + "#!/bin/sh\nif [ \"$1\" = \"-c\" ]; then printf '1\\n'; exit 0; fi\nprintf '%s' \"$*\" > '{}'\nexit 0\n", + marker.display() + ) + }) + .script_with_paths("pip", |bin, _| { + format!("#!{}\n", bin.join("python-managed").display()) + }); + ( + h.registry_env("CORGEA_PYPI_REGISTRY", &base_url).build(), + registry_hits, + ) } #[test] fn pip_fresh_pin_blocks_without_running_install() { - let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); let out = h .cmd .args(["pip", "install", "freshpkg==9.9.9"]) @@ -155,7 +120,7 @@ fn pip_fresh_pin_blocks_without_running_install() { #[test] fn pip_old_pin_runs_install_with_forwarded_args() { - let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -175,7 +140,7 @@ fn pip_old_pin_runs_install_with_forwarded_args() { #[test] fn pip_no_fail_demotes_block_and_installs() { - let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); let out = h .cmd .args(["pip", "--no-fail", "install", "freshpkg==9.9.9"]) @@ -198,7 +163,7 @@ fn pip_no_fail_demotes_block_and_installs() { #[test] fn pip_non_install_subcommand_passes_through_without_registry_hit() { - let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let (mut h, registry_hits) = wrapper_with_hits("pip", "CORGEA_PYPI_REGISTRY", 0); let out = h .cmd .args(["pip", "list"]) @@ -207,7 +172,7 @@ fn pip_non_install_subcommand_passes_through_without_registry_hit() { assert_eq!(out.status.code(), Some(0)); assert_eq!(h.recorded_argv().as_deref(), Some("list")); assert_eq!( - h.registry_hits.load(Ordering::SeqCst), + registry_hits.load(Ordering::SeqCst), 0, "passthrough must not touch the registry" ); @@ -215,7 +180,7 @@ fn pip_non_install_subcommand_passes_through_without_registry_hit() { #[test] fn pip_add_blocks_with_install_suggestion_without_running_pip() { - let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let (mut h, registry_hits) = wrapper_with_hits("pip", "CORGEA_PYPI_REGISTRY", 0); let out = h .cmd .args(["pip", "add", "oldpkg"]) @@ -225,7 +190,7 @@ fn pip_add_blocks_with_install_suggestion_without_running_pip() { assert_eq!(out.status.code(), Some(1)); assert_eq!(h.recorded_argv(), None, "pip must not run"); assert_eq!( - h.registry_hits.load(Ordering::SeqCst), + registry_hits.load(Ordering::SeqCst), 0, "invalid pip command must not touch the registry" ); @@ -242,7 +207,7 @@ fn pip_add_blocks_with_install_suggestion_without_running_pip() { #[test] fn externally_managed_pip_blocks_before_registry_checks() { - let mut h = WrapperHarness::new_externally_managed_pip(); + let (mut h, registry_hits) = externally_managed_pip(); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -252,7 +217,7 @@ fn externally_managed_pip_blocks_before_registry_checks() { assert_eq!(out.status.code(), Some(1)); assert_eq!(h.recorded_argv(), None, "pip must not run"); assert_eq!( - h.registry_hits.load(Ordering::SeqCst), + registry_hits.load(Ordering::SeqCst), 0, "externally-managed preflight must run before registry checks" ); @@ -276,7 +241,7 @@ fn externally_managed_pip_blocks_before_registry_checks() { #[test] fn pip_json_reports_fresh_pin_as_recent() { - let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); let out = h .cmd .args(["pip", "--json", "install", "freshpkg==9.9.9"]) @@ -296,7 +261,7 @@ fn pip_resolution_error_prints_error_but_install_proceeds() { // `nosuchpkg` hits the stub's 404 route → an error outcome, which // warns but does not block in public mode (authenticated mode fails // closed — see cli_verdict.rs) — the install must still run. - let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let (mut h, registry_hits) = wrapper_with_hits("pip", "CORGEA_PYPI_REGISTRY", 0); let out = h .cmd .args(["pip", "install", "nosuchpkg==1.0.0"]) @@ -310,7 +275,7 @@ fn pip_resolution_error_prints_error_but_install_proceeds() { String::from_utf8_lossy(&out.stderr) ); assert!( - h.registry_hits.load(Ordering::SeqCst) >= 1, + registry_hits.load(Ordering::SeqCst) >= 1, "the 404 route must have been hit" ); assert_eq!( @@ -325,7 +290,7 @@ fn pip_resolution_error_prints_error_but_install_proceeds() { #[test] fn pip_mixed_fresh_and_old_pins_block_without_running_install() { - let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 0); let out = h .cmd .args(["pip", "install", "freshpkg==9.9.9", "oldpkg==1.0.0"]) @@ -344,7 +309,7 @@ fn pip_mixed_fresh_and_old_pins_block_without_running_install() { #[test] fn npm_fresh_pin_blocks_without_running_install() { - let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let mut h = wrapper("npm", "CORGEA_NPM_REGISTRY", 0); let out = h .cmd .args(["npm", "install", "freshpkg@9.9.9"]) @@ -361,7 +326,7 @@ fn npm_fresh_pin_blocks_without_running_install() { #[test] fn npm_old_pin_runs_install_with_forwarded_args() { - let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let mut h = wrapper("npm", "CORGEA_NPM_REGISTRY", 0); let out = h .cmd .args(["npm", "install", "oldpkg@1.0.0"]) @@ -379,7 +344,7 @@ fn npm_old_pin_runs_install_with_forwarded_args() { #[test] fn npm_in_pnpm_lock_project_blocks_with_pnpm_add_suggestion() { - let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let (mut h, registry_hits) = wrapper_with_hits("npm", "CORGEA_NPM_REGISTRY", 0); let project = TempDir::new().expect("project dir"); std::fs::write(project.path().join("package.json"), r#"{"name":"proj"}"#) .expect("write package.json"); @@ -399,7 +364,7 @@ fn npm_in_pnpm_lock_project_blocks_with_pnpm_add_suggestion() { assert_eq!(out.status.code(), Some(1)); assert_eq!(h.recorded_argv(), None, "npm must not run"); assert_eq!( - h.registry_hits.load(Ordering::SeqCst), + registry_hits.load(Ordering::SeqCst), 0, "wrong-manager guard must run before registry checks" ); @@ -416,7 +381,7 @@ fn npm_in_pnpm_lock_project_blocks_with_pnpm_add_suggestion() { #[test] fn package_manager_field_beats_missing_lockfile_for_node_guard() { - let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let (mut h, registry_hits) = wrapper_with_hits("npm", "CORGEA_NPM_REGISTRY", 0); let project = TempDir::new().expect("project dir"); std::fs::write( project.path().join("package.json"), @@ -433,7 +398,7 @@ fn package_manager_field_beats_missing_lockfile_for_node_guard() { assert_eq!(out.status.code(), Some(1)); assert_eq!(h.recorded_argv(), None, "npm must not run"); - assert_eq!(h.registry_hits.load(Ordering::SeqCst), 0); + assert_eq!(registry_hits.load(Ordering::SeqCst), 0); assert!( String::from_utf8_lossy(&out.stderr).contains("Did you mean `corgea pnpm add oldpkg`?"), "stderr: {}", @@ -443,7 +408,7 @@ fn package_manager_field_beats_missing_lockfile_for_node_guard() { #[test] fn conflicting_node_lockfiles_do_not_block_as_wrong_manager() { - let mut h = WrapperHarness::new("npm", "CORGEA_NPM_REGISTRY", 0); + let (mut h, registry_hits) = wrapper_with_hits("npm", "CORGEA_NPM_REGISTRY", 0); let project = TempDir::new().expect("project dir"); std::fs::write(project.path().join("package.json"), r#"{"name":"proj"}"#) .expect("write package.json"); @@ -470,14 +435,14 @@ fn conflicting_node_lockfiles_do_not_block_as_wrong_manager() { ); assert_eq!(h.recorded_argv().as_deref(), Some("install oldpkg@1.0.0")); assert!( - h.registry_hits.load(Ordering::SeqCst) >= 1, + registry_hits.load(Ordering::SeqCst) >= 1, "the normal install gate should still run" ); } #[test] fn pip_in_uv_lock_project_blocks_with_uv_add_suggestion() { - let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let (mut h, registry_hits) = wrapper_with_hits("pip", "CORGEA_PYPI_REGISTRY", 0); let project = TempDir::new().expect("project dir"); std::fs::write(project.path().join("uv.lock"), "version = 1\n").expect("write uv lock"); @@ -490,7 +455,7 @@ fn pip_in_uv_lock_project_blocks_with_uv_add_suggestion() { assert_eq!(out.status.code(), Some(1)); assert_eq!(h.recorded_argv(), None, "pip must not run"); - assert_eq!(h.registry_hits.load(Ordering::SeqCst), 0); + assert_eq!(registry_hits.load(Ordering::SeqCst), 0); let stderr = String::from_utf8_lossy(&out.stderr); assert!( stderr.contains("error: this project appears to use uv, but you ran pip."), @@ -504,7 +469,7 @@ fn pip_in_uv_lock_project_blocks_with_uv_add_suggestion() { #[test] fn pip_requirements_in_uv_project_suggests_uv_pip_install() { - let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 0); + let (mut h, registry_hits) = wrapper_with_hits("pip", "CORGEA_PYPI_REGISTRY", 0); let project = TempDir::new().expect("project dir"); std::fs::write(project.path().join("uv.lock"), "version = 1\n").expect("write uv lock"); std::fs::write(project.path().join("requirements.txt"), "oldpkg==1.0.0\n") @@ -519,7 +484,7 @@ fn pip_requirements_in_uv_project_suggests_uv_pip_install() { assert_eq!(out.status.code(), Some(1)); assert_eq!(h.recorded_argv(), None, "pip must not run"); - assert_eq!(h.registry_hits.load(Ordering::SeqCst), 0); + assert_eq!(registry_hits.load(Ordering::SeqCst), 0); assert!( String::from_utf8_lossy(&out.stderr) .contains("Did you mean `corgea uv pip install -r requirements.txt`?"), @@ -530,7 +495,7 @@ fn pip_requirements_in_uv_project_suggests_uv_pip_install() { #[test] fn uv_add_in_requirements_project_blocks_with_pip_install_suggestion() { - let mut h = WrapperHarness::new("uv", "CORGEA_PYPI_REGISTRY", 0); + let (mut h, registry_hits) = wrapper_with_hits("uv", "CORGEA_PYPI_REGISTRY", 0); let project = TempDir::new().expect("project dir"); std::fs::write(project.path().join("requirements.txt"), "oldpkg==1.0.0\n") .expect("write requirements"); @@ -545,7 +510,7 @@ fn uv_add_in_requirements_project_blocks_with_pip_install_suggestion() { assert_eq!(out.status.code(), Some(1)); assert_eq!(h.recorded_argv(), None, "uv must not run"); assert_eq!( - h.registry_hits.load(Ordering::SeqCst), + registry_hits.load(Ordering::SeqCst), 0, "wrong-manager guard must run before registry checks" ); @@ -562,7 +527,7 @@ fn uv_add_in_requirements_project_blocks_with_pip_install_suggestion() { #[test] fn uv_install_blocks_with_uv_pip_install_suggestion_without_running_uv() { - let mut h = WrapperHarness::new("uv", "CORGEA_PYPI_REGISTRY", 0); + let (mut h, registry_hits) = wrapper_with_hits("uv", "CORGEA_PYPI_REGISTRY", 0); let out = h .cmd .args(["uv", "install", "oldpkg"]) @@ -572,7 +537,7 @@ fn uv_install_blocks_with_uv_pip_install_suggestion_without_running_uv() { assert_eq!(out.status.code(), Some(1)); assert_eq!(h.recorded_argv(), None, "uv must not run"); assert_eq!( - h.registry_hits.load(Ordering::SeqCst), + registry_hits.load(Ordering::SeqCst), 0, "invalid uv command must not touch the registry" ); @@ -589,7 +554,7 @@ fn uv_install_blocks_with_uv_pip_install_suggestion_without_running_uv() { #[test] fn uv_add_in_pyproject_with_requirements_does_not_guess_pip() { - let mut h = WrapperHarness::new("uv", "CORGEA_PYPI_REGISTRY", 0); + let (mut h, registry_hits) = wrapper_with_hits("uv", "CORGEA_PYPI_REGISTRY", 0); let project = TempDir::new().expect("project dir"); std::fs::write( project.path().join("pyproject.toml"), @@ -615,14 +580,14 @@ fn uv_add_in_pyproject_with_requirements_does_not_guess_pip() { ); assert_eq!(h.recorded_argv().as_deref(), Some("add oldpkg")); assert!( - h.registry_hits.load(Ordering::SeqCst) >= 1, + registry_hits.load(Ordering::SeqCst) >= 1, "the normal uv add gate should still run" ); } #[test] fn wrapper_forwards_package_manager_exit_code() { - let mut h = WrapperHarness::new("pip", "CORGEA_PYPI_REGISTRY", 7); + let mut h = wrapper("pip", "CORGEA_PYPI_REGISTRY", 7); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) diff --git a/tests/cli_refusal_context.rs b/tests/cli_refusal_context.rs index a14daaa..61f9b4c 100644 --- a/tests/cli_refusal_context.rs +++ b/tests/cli_refusal_context.rs @@ -25,10 +25,6 @@ const TREE_REFUSAL: &str = "Refusing to run install: your existing dependency tr /// Refusal when a named target carries a blocking verdict. const GENERIC_REFUSAL: &str = "Refusing to run install. Pass --force to proceed despite findings."; -fn vulnerable_body(name: &str, version: &str) -> String { - common::vulnerable_body("pypi", name, version, "MAL-2024-0002", None) -} - fn harness(checks: HashMap, statuses: HashMap) -> TreeHarness { TreeHarness::new("pip", checks, statuses, TREE_REPORT) } @@ -48,7 +44,7 @@ fn named_install_with_transitive_vulnerable_keeps_generic_refusal() { let mut checks = HashMap::new(); checks.insert( key("pypi", "evildep", "0.4.2"), - vulnerable_body("evildep", "0.4.2"), + common::vulnerable_body("pypi", "evildep", "0.4.2", "MAL-2024-0002", None), ); let mut h = harness(checks, HashMap::new()); let out = run_install(&mut h); @@ -83,7 +79,7 @@ fn requirements_only_install_with_vulnerable_transitive_keeps_generic_refusal() let mut checks = HashMap::new(); checks.insert( key("pypi", "evildep", "0.4.2"), - vulnerable_body("evildep", "0.4.2"), + common::vulnerable_body("pypi", "evildep", "0.4.2", "MAL-2024-0002", None), ); let mut h = harness(checks, HashMap::new()); // `pip install -r reqs.txt` with no named targets — the canned tree @@ -120,7 +116,7 @@ fn named_vulnerable_keeps_generic_refusal() { let mut checks = HashMap::new(); checks.insert( key("pypi", "oldpkg", "1.0.0"), - vulnerable_body("oldpkg", "1.0.0"), + common::vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0002", None), ); let mut h = harness(checks, HashMap::new()); let out = run_install(&mut h); @@ -154,7 +150,7 @@ fn named_unverifiable_with_transitive_vulnerable_keeps_generic_refusal() { let mut checks = HashMap::new(); checks.insert( key("pypi", "evildep", "0.4.2"), - vulnerable_body("evildep", "0.4.2"), + common::vulnerable_body("pypi", "evildep", "0.4.2", "MAL-2024-0002", None), ); let mut statuses = HashMap::new(); statuses.insert(key("pypi", "oldpkg", "1.0.0"), 503u16); diff --git a/tests/cli_uv_sync.rs b/tests/cli_uv_sync.rs index c8c54b2..6a6c10c 100644 --- a/tests/cli_uv_sync.rs +++ b/tests/cli_uv_sync.rs @@ -11,12 +11,9 @@ mod common; -use common::{corgea_isolated, key, vulnerable_body, write_fake_recorder}; -use corgea::vuln_api_stub::{self, PackageKey}; +use common::{key, vulnerable_body, GateHarness}; +use corgea::vuln_api_stub::PackageKey; use std::collections::HashMap; -use std::path::PathBuf; -use std::process::Command; -use tempfile::TempDir; /// `proj` is the project itself (editable — skipped); `evildep` is the one /// index-sourced pin the gate must verdict. @@ -34,45 +31,6 @@ version = "0.4.2" source = { registry = "https://pypi.org/simple" } "#; -struct SyncHarness { - cmd: Command, - marker: PathBuf, - project: TempDir, - _home: TempDir, - _bin: TempDir, -} - -impl SyncHarness { - fn new(checks: HashMap) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let project = TempDir::new().expect("project dir"); - let marker = bin.path().join("pm-argv.txt"); - write_fake_recorder(bin.path(), "uv", &marker, 0); - let vuln_stub = vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); - cmd.env("PATH", bin.path()) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) - .env("CORGEA_TOKEN", "test-token") - .current_dir(project.path()); - Self { - cmd, - marker, - project, - _home: home, - _bin: bin, - } - } - - fn with_uv_lock(self, content: &str) -> Self { - std::fs::write(self.project.path().join("uv.lock"), content).expect("write uv.lock"); - self - } - - fn recorded_argv(&self) -> Option { - std::fs::read_to_string(&self.marker).ok() - } -} - fn vulnerable_evildep_checks() -> HashMap { let mut checks = HashMap::new(); checks.insert( @@ -84,7 +42,12 @@ fn vulnerable_evildep_checks() -> HashMap { #[test] fn uv_sync_vulnerable_lockfile_blocks() { - let mut h = SyncHarness::new(vulnerable_evildep_checks()).with_uv_lock(UV_LOCK); + let mut h = GateHarness::new() + .fake_recorder("uv", 0) + .vuln_checks(vulnerable_evildep_checks()) + .token("test-token") + .with_project_file("uv.lock", UV_LOCK) + .build(); let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(1), "vulnerable lock must block"); assert_eq!( @@ -108,7 +71,12 @@ fn uv_sync_vulnerable_lockfile_blocks() { #[test] fn uv_sync_clean_lockfile_proceeds() { - let mut h = SyncHarness::new(HashMap::new()).with_uv_lock(UV_LOCK); + let mut h = GateHarness::new() + .fake_recorder("uv", 0) + .vuln_checks(HashMap::new()) + .token("test-token") + .with_project_file("uv.lock", UV_LOCK) + .build(); let out = h .cmd .args(["uv", "sync", "--frozen"]) @@ -129,7 +97,12 @@ fn uv_sync_clean_lockfile_proceeds() { #[test] fn uv_sync_force_overrides_block() { - let mut h = SyncHarness::new(vulnerable_evildep_checks()).with_uv_lock(UV_LOCK); + let mut h = GateHarness::new() + .fake_recorder("uv", 0) + .vuln_checks(vulnerable_evildep_checks()) + .token("test-token") + .with_project_file("uv.lock", UV_LOCK) + .build(); let out = h .cmd .args(["uv", "--force", "sync"]) @@ -145,7 +118,12 @@ fn uv_sync_force_overrides_block() { #[test] fn uv_sync_without_lockfile_execs_with_note() { - let mut h = SyncHarness::new(HashMap::new()); + let mut h = GateHarness::new() + .fake_recorder("uv", 0) + .vuln_checks(HashMap::new()) + .token("test-token") + .in_project_dir() + .build(); let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(0)); assert_eq!(h.recorded_argv().as_deref(), Some("sync")); @@ -158,7 +136,12 @@ fn uv_sync_without_lockfile_execs_with_note() { #[test] fn uv_sync_malformed_lockfile_fails_closed() { - let mut h = SyncHarness::new(HashMap::new()).with_uv_lock("not = [valid"); + let mut h = GateHarness::new() + .fake_recorder("uv", 0) + .vuln_checks(HashMap::new()) + .token("test-token") + .with_project_file("uv.lock", "not = [valid") + .build(); let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(1), "unparseable lock must block"); assert_eq!(h.recorded_argv(), None); @@ -172,7 +155,12 @@ fn uv_sync_malformed_lockfile_fails_closed() { #[test] fn uv_sync_tokenless_runs_public_lock_check() { - let mut h = SyncHarness::new(HashMap::new()).with_uv_lock(UV_LOCK); + let mut h = GateHarness::new() + .fake_recorder("uv", 0) + .vuln_checks(HashMap::new()) + .token("test-token") + .with_project_file("uv.lock", UV_LOCK) + .build(); h.cmd.env_remove("CORGEA_TOKEN"); let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(0)); @@ -188,7 +176,12 @@ fn uv_sync_tokenless_runs_public_lock_check() { #[test] fn uv_lock_stays_passthrough() { // `uv lock` installs nothing; the gate applies to the sync that follows. - let mut h = SyncHarness::new(vulnerable_evildep_checks()).with_uv_lock(UV_LOCK); + let mut h = GateHarness::new() + .fake_recorder("uv", 0) + .vuln_checks(vulnerable_evildep_checks()) + .token("test-token") + .with_project_file("uv.lock", UV_LOCK) + .build(); let out = h.cmd.args(["uv", "lock"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(0)); assert_eq!(h.recorded_argv().as_deref(), Some("lock")); diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs index 16e46a0..c09c0f2 100644 --- a/tests/cli_verdict.rs +++ b/tests/cli_verdict.rs @@ -15,14 +15,13 @@ use common::{key, vulnerable_body, PipHarness}; use corgea::vuln_api_stub::{header_value, spawn_capturing_vuln_api_stub}; use std::collections::HashMap; -fn vulnerable_oldpkg_body() -> String { - vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")) -} - #[test] fn vulnerable_pin_blocks_without_running_install() { let mut checks = HashMap::new(); - checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), + ); let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd @@ -72,7 +71,10 @@ fn alternate_pypi_spelling_hits_canonical_verdict() { #[test] fn force_overrides_vulnerable_block_and_propagates_exit_code() { let mut checks = HashMap::new(); - checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), + ); let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 7); let out = h .cmd @@ -143,7 +145,10 @@ fn verdict_503_fails_closed() { fn tokenless_public_check_blocks_vulnerable_pin() { // No token still runs public CVE checks and blocks a vulnerable verdict. let mut checks = HashMap::new(); - checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), + ); let mut h = PipHarness::new(checks, HashMap::new(), None, 0); let out = h .cmd @@ -288,7 +293,10 @@ fn outage_noise_collapses_above_three_unverifiable() { #[test] fn json_carries_verdict_object_and_mode() { let mut checks = HashMap::new(); - checks.insert(key("pypi", "oldpkg", "1.0.0"), vulnerable_oldpkg_body()); + checks.insert( + key("pypi", "oldpkg", "1.0.0"), + vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), + ); let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 2ccad96..1243a48 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -247,17 +247,146 @@ pub fn write_fake_tree_pm( write_script(dir, binary, &script); } -/// `corgea` wired to the wildcard pypi registry stub, a report-less fake pip -/// (recording its argv to a marker), and a vuln-api stub. +/// One configurable harness behind every gate test: isolated `corgea`, a +/// private PATH of fake package managers, optional registry stubs, the +/// vuln-api stub, optional token, and an optional throwaway project cwd. #[cfg(unix)] #[allow(dead_code)] -pub struct PipHarness { +pub struct GateHarness { pub cmd: Command, marker: PathBuf, + project: Option, + checks: HashMap, + statuses: HashMap, _home: TempDir, _bin: TempDir, + _vuln_stub: Option, } +#[cfg(unix)] +#[allow(dead_code)] +impl GateHarness { + pub fn new() -> Self { + let (mut cmd, home) = corgea_isolated(); + let bin = TempDir::new().expect("temp bin dir"); + let marker = bin.path().join("pm-argv.txt"); + cmd.env("PATH", bin.path()); + Self { + cmd, + marker, + project: None, + checks: HashMap::new(), + statuses: HashMap::new(), + _home: home, + _bin: bin, + _vuln_stub: None, + } + } + + /// Tree-aware fake manager: emits `payload` on its tree flag, records + /// argv and exits `exit_code` otherwise. + pub fn fake_tree_pm(self, binary: &str, payload: &str, exit_code: i32) -> Self { + write_fake_tree_pm(self._bin.path(), binary, &self.marker, payload, exit_code); + self + } + + /// Plain argv recorder. Call repeatedly for multiple binaries; call + /// never for an empty PATH. + pub fn fake_recorder(self, binary: &str, exit_code: i32) -> Self { + write_fake_recorder(self._bin.path(), binary, &self.marker, exit_code); + self + } + + /// Raw script escape hatch. + pub fn script(self, binary: &str, script: &str) -> Self { + write_script(self._bin.path(), binary, script); + self + } + + /// Raw script escape hatch for scripts that need the temp bin dir or + /// marker path. + pub fn script_with_paths(self, binary: &str, make_script: F) -> Self + where + F: FnOnce(&std::path::Path, &std::path::Path) -> String, + { + let script = make_script(self._bin.path(), &self.marker); + write_script(self._bin.path(), binary, &script); + self + } + + /// oldpkg stub on both registry env vars; only the exercised ecosystem + /// dials it. + pub fn oldpkg_registry(mut self) -> Self { + let url = spawn_oldpkg_registry_stub(); + self.cmd + .env("CORGEA_PYPI_REGISTRY", &url) + .env("CORGEA_NPM_REGISTRY", &url); + self + } + + pub fn wildcard_pypi_registry(mut self) -> Self { + let url = spawn_wildcard_pypi_stub(); + self.cmd.env("CORGEA_PYPI_REGISTRY", &url); + self + } + + pub fn registry_env(mut self, var: &str, url: &str) -> Self { + self.cmd.env(var, url); + self + } + + pub fn vuln_checks(mut self, checks: HashMap) -> Self { + self.checks = checks; + self + } + + pub fn vuln_statuses(mut self, statuses: HashMap) -> Self { + self.statuses = statuses; + self + } + + pub fn token(mut self, token: &str) -> Self { + self.cmd.env("CORGEA_TOKEN", token); + self + } + + pub fn in_project_dir(mut self) -> Self { + let project = TempDir::new().expect("project dir"); + self.cmd.current_dir(project.path()); + self.project = Some(project); + self + } + + pub fn with_project_file(mut self, name: &str, body: &str) -> Self { + if self.project.is_none() { + self = self.in_project_dir(); + } + let dir = self.project.as_ref().unwrap().path(); + std::fs::write(dir.join(name), body).expect("write project file"); + self + } + + pub fn build(mut self) -> Self { + let stub = corgea::vuln_api_stub::spawn_with_statuses( + std::mem::take(&mut self.checks), + std::mem::take(&mut self.statuses), + ); + self.cmd.env("CORGEA_VULN_API_URL", &stub.base_url); + self._vuln_stub = Some(stub); + self + } + + pub fn recorded_argv(&self) -> Option { + std::fs::read_to_string(&self.marker).ok() + } +} + +/// `corgea` wired to the wildcard pypi registry stub, a report-less fake pip +/// (recording its argv to a marker), and a vuln-api stub. +#[cfg(unix)] +#[allow(dead_code)] +pub struct PipHarness(GateHarness); + #[cfg(unix)] #[allow(dead_code)] impl PipHarness { @@ -268,30 +397,33 @@ impl PipHarness { token: Option<&str>, pip_exit_code: i32, ) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let marker = bin.path().join("pm-argv.txt"); // RESOLUTION_FAILS models an old pip with no `--report`: the tree // dry-run exits 2, so these tests exercise the named-only fallback. - write_fake_tree_pm(bin.path(), "pip", &marker, RESOLUTION_FAILS, pip_exit_code); - let registry = spawn_wildcard_pypi_stub(); - let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(checks, statuses); - cmd.env("PATH", bin.path()) - .env("CORGEA_PYPI_REGISTRY", ®istry) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url); + let mut h = GateHarness::new() + .fake_tree_pm("pip", RESOLUTION_FAILS, pip_exit_code) + .wildcard_pypi_registry() + .vuln_checks(checks) + .vuln_statuses(statuses); if let Some(t) = token { - cmd.env("CORGEA_TOKEN", t); - } - Self { - cmd, - marker, - _home: home, - _bin: bin, + h = h.token(t); } + Self(h.build()) } +} - pub fn recorded_argv(&self) -> Option { - std::fs::read_to_string(&self.marker).ok() +#[cfg(unix)] +impl std::ops::Deref for PipHarness { + type Target = GateHarness; + + fn deref(&self) -> &GateHarness { + &self.0 + } +} + +#[cfg(unix)] +impl std::ops::DerefMut for PipHarness { + fn deref_mut(&mut self) -> &mut GateHarness { + &mut self.0 } } @@ -300,12 +432,7 @@ impl PipHarness { /// stub, and a token. #[cfg(unix)] #[allow(dead_code)] -pub struct TreeHarness { - pub cmd: Command, - marker: PathBuf, - _home: TempDir, - _bin: TempDir, -} +pub struct TreeHarness(GateHarness); #[cfg(unix)] #[allow(dead_code)] @@ -316,26 +443,30 @@ impl TreeHarness { statuses: HashMap, payload: &str, ) -> Self { - let (mut cmd, home) = corgea_isolated(); - let bin = TempDir::new().expect("temp bin dir"); - let marker = bin.path().join("pm-argv.txt"); - write_fake_tree_pm(bin.path(), binary, &marker, payload, 0); - let registry = spawn_oldpkg_registry_stub(); - let vuln_stub = corgea::vuln_api_stub::spawn_with_statuses(checks, statuses); - cmd.env("PATH", bin.path()) - .env("CORGEA_PYPI_REGISTRY", ®istry) - .env("CORGEA_NPM_REGISTRY", ®istry) - .env("CORGEA_VULN_API_URL", &vuln_stub.base_url) - .env("CORGEA_TOKEN", "test-token"); - Self { - cmd, - marker, - _home: home, - _bin: bin, - } + Self( + GateHarness::new() + .fake_tree_pm(binary, payload, 0) + .oldpkg_registry() + .vuln_checks(checks) + .vuln_statuses(statuses) + .token("test-token") + .build(), + ) } +} - pub fn recorded_argv(&self) -> Option { - std::fs::read_to_string(&self.marker).ok() +#[cfg(unix)] +impl std::ops::Deref for TreeHarness { + type Target = GateHarness; + + fn deref(&self) -> &GateHarness { + &self.0 + } +} + +#[cfg(unix)] +impl std::ops::DerefMut for TreeHarness { + fn deref_mut(&mut self) -> &mut GateHarness { + &mut self.0 } } From a9951d7d5a6fec13808c40bba01f6c01af02f692 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 20:43:29 +0200 Subject: [PATCH 45/59] Refine install gate verdict handling --- Cargo.lock | 31 +++++ Cargo.toml | 1 + src/precheck/mod.rs | 46 +++---- src/precheck/parse.rs | 80 +++--------- src/precheck/render.rs | 190 ++++------------------------ src/precheck/tree.rs | 38 +++--- src/precheck/uv.rs | 5 +- src/precheck/verdict.rs | 238 +++++++++++++++++++++++++++++++++-- src/verify_deps/registry.rs | 224 +++++++++++++-------------------- src/vuln_api/mod.rs | 75 +++++++---- src/vuln_api_stub/mod.rs | 39 +++--- tests/cli_bare_install.rs | 4 - tests/cli_provenance.rs | 16 +-- tests/cli_refusal_context.rs | 8 +- tests/cli_remediation.rs | 12 +- tests/cli_tree.rs | 16 +-- tests/cli_uv_sync.rs | 2 - tests/cli_verdict.rs | 32 ++--- tests/common/mod.rs | 122 +++++------------- 19 files changed, 565 insertions(+), 614 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c2b933e..53fec52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,18 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "async-compression" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -287,6 +299,23 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "compression-codecs" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -1627,6 +1656,7 @@ version = "0.12.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" dependencies = [ + "async-compression", "base64", "bytes", "cookie", @@ -1653,6 +1683,7 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-native-tls", + "tokio-util", "tower", "tower-http", "tower-service", diff --git a/Cargo.toml b/Cargo.toml index afaf048..f4e4818 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ dirs = "5.0.1" reqwest = { version = "0.12.23", default-features = false, features = [ "blocking", "cookies", + "gzip", "json", "multipart", "native-tls", diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 29ce33a..9c8e650 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -63,11 +63,13 @@ impl PackageManager { } } - /// vuln-api ecosystem path segment for this manager's registry. - pub fn ecosystem(self) -> &'static str { + /// vuln-api ecosystem for this manager's registry. + pub fn ecosystem(self) -> crate::vuln_api::Ecosystem { match self { - PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => "npm", - PackageManager::Pip | PackageManager::Uv => "pypi", + PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => { + crate::vuln_api::Ecosystem::Npm + } + PackageManager::Pip | PackageManager::Uv => crate::vuln_api::Ecosystem::Pypi, } } @@ -428,9 +430,9 @@ fn report_and_exec( render::print_text(report); } render::warn_public_lookup_failures(report, opts); - if verdict::should_block_install(report, opts) { + if let Some(reason) = verdict::block_reason(report, opts) { if !opts.json { - render::print_refusal(report, opts); + render::print_refusal(reason); } return 1; } @@ -466,11 +468,7 @@ fn run_parsed_install( let now = Utc::now(); let (mut outcomes, tree_resolution) = std::thread::scope(|s| { let tree = tree_eligible.then(|| s.spawn(|| tree::resolve_tree(manager, rest, &parsed))); - let outcomes: Vec<_> = parsed - .targets - .iter() - .map(|target| verdict::verify_one(target, &opts, &now)) - .collect(); + let outcomes = verdict::verify_all(&parsed.targets, &opts, &now); ( outcomes, tree.map(|handle| handle.join().expect("tree resolution thread panicked")), @@ -528,14 +526,9 @@ fn run_tree_pass( outcomes: &mut [TargetOutcome], opts: &PrecheckOptions, ) -> TreeReport { - let set = match resolution { - Ok(Some(set)) => set, - Ok(None) => { - run_verdict_pass(manager, outcomes, opts); - return TreeReport::NamedOnly { - reason: format!("{} has no safe dry-run", manager.binary_name()), - }; - } + let no_dry_run = || format!("{} has no safe dry-run", manager.binary_name()); + let set = match resolution.and_then(|opt| opt.ok_or_else(no_dry_run)) { + Ok(set) => set, Err(reason) => { run_verdict_pass(manager, outcomes, opts); return TreeReport::NamedOnly { reason }; @@ -579,7 +572,7 @@ fn run_tree_pass( .verdict .as_ref() .expect("tree pass requires verdict config"); - let results = verdict::verdict_pool(jobs, cfg, manager, verdict::VERDICT_CONCURRENCY); + let results = verdict::verdict_pool(jobs, cfg, manager); let transitive = verdict::apply_verdicts(manager, results, outcomes, &direct_deps); TreeReport::Full { resolved_count, @@ -612,7 +605,7 @@ fn run_verdict_pass( }) .collect(); - let results = verdict::verdict_pool(jobs, cfg, manager, verdict::VERDICT_CONCURRENCY); + let results = verdict::verdict_pool(jobs, cfg, manager); let leftovers = verdict::apply_verdicts(manager, results, outcomes, &Default::default()); debug_assert!( leftovers.is_empty(), @@ -693,11 +686,12 @@ mod tests { #[test] fn ecosystem_mapping() { - assert_eq!(PackageManager::Pip.ecosystem(), "pypi"); - assert_eq!(PackageManager::Uv.ecosystem(), "pypi"); - assert_eq!(PackageManager::Npm.ecosystem(), "npm"); - assert_eq!(PackageManager::Yarn.ecosystem(), "npm"); - assert_eq!(PackageManager::Pnpm.ecosystem(), "npm"); + use crate::vuln_api::Ecosystem; + assert_eq!(PackageManager::Pip.ecosystem(), Ecosystem::Pypi); + assert_eq!(PackageManager::Uv.ecosystem(), Ecosystem::Pypi); + assert_eq!(PackageManager::Npm.ecosystem(), Ecosystem::Npm); + assert_eq!(PackageManager::Yarn.ecosystem(), Ecosystem::Npm); + assert_eq!(PackageManager::Pnpm.ecosystem(), Ecosystem::Npm); } #[test] diff --git a/src/precheck/parse.rs b/src/precheck/parse.rs index e1477b4..4977beb 100644 --- a/src/precheck/parse.rs +++ b/src/precheck/parse.rs @@ -382,20 +382,11 @@ fn parse_pypi_spec(raw: &str) -> InstallTarget { }; } - // Find the first specifier operator (`==`, `>=`, `<=`, `!=`, `~=`, - // `>`, `<`). PEP 440 also allows `===` (arbitrary equality). - // Find the leftmost specifier operator. On ties, prefer the - // longer operator (e.g. `==` over `=`). + // Split at the leftmost specifier operator (`==`, `>=`, `<=`, `!=`, + // `~=`, `>`, `<`; PEP 440 also allows `===`). Only the index matters — + // the operator itself stays with the spec part. let separators = ["===", "==", ">=", "<=", "!=", "~=", ">", "<"]; - let mut split_at: Option = None; - for sep in &separators { - if let Some(idx) = trimmed.find(sep) { - split_at = match split_at { - Some(prev) if prev <= idx => Some(prev), - _ => Some(idx), - }; - } - } + let split_at = separators.iter().filter_map(|sep| trimmed.find(sep)).min(); let (name_part, spec_part): (&str, &str) = match split_at { Some(idx) => (&trimmed[..idx], &trimmed[idx..]), @@ -403,7 +394,7 @@ fn parse_pypi_spec(raw: &str) -> InstallTarget { }; // Strip extras: `requests[security]` -> `requests`. - let name_no_extras = pypi_name_part(name_part, PypiNameCut::ParseNamePart); + let name_no_extras = name_part.split('[').next().unwrap_or(name_part).trim(); // Strip env markers: `package; python_version >= "3.7"`. let spec_no_marker = spec_part.split(';').next().unwrap_or(spec_part).trim(); @@ -432,25 +423,10 @@ fn parse_pypi_spec(raw: &str) -> InstallTarget { } } -#[derive(Debug, Clone, Copy)] -pub(super) enum PypiNameCut { - /// Existing `parse_pypi_spec` behavior: the caller already split at the - /// leftmost version operator, so only extras are removed from the name part. - ParseNamePart, - /// Existing requirements-line behavior: stop at extras, markers, - /// operators, or whitespace. - RequirementLine, -} - -/// Bare PyPI name extraction only; callers normalize when they need a -/// comparison key. -pub(super) fn pypi_name_part(spec: &str, cut: PypiNameCut) -> &str { - let stop = |c: char| match cut { - PypiNameCut::ParseNamePart => c == '[', - PypiNameCut::RequirementLine => { - matches!(c, '[' | '<' | '>' | '=' | '!' | '~' | ';' | ' ') - } - }; +/// Bare PyPI name from a requirement line: stop at extras, operators, +/// markers, or whitespace. Callers normalize when they need a comparison key. +pub(super) fn pypi_name_part(spec: &str) -> &str { + let stop = |c: char| matches!(c, '[' | '<' | '>' | '=' | '!' | '~' | ';' | ' '); let cut = spec.find(stop).unwrap_or(spec.len()); spec[..cut].trim() } @@ -660,38 +636,12 @@ mod tests { #[test] fn pypi_name_part_strips_extras_markers_and_operators() { - assert_eq!( - pypi_name_part("requests", PypiNameCut::ParseNamePart), - "requests" - ); - assert_eq!( - pypi_name_part("requests[security]", PypiNameCut::ParseNamePart), - "requests" - ); - assert_eq!( - pypi_name_part("pkg; python_version ", PypiNameCut::ParseNamePart), - "pkg; python_version" - ); - assert_eq!( - pypi_name_part("requests[security]==2.31.0", PypiNameCut::RequirementLine), - "requests" - ); - assert_eq!( - pypi_name_part("Flask_Cors>=4.0", PypiNameCut::RequirementLine), - "Flask_Cors" - ); - assert_eq!( - pypi_name_part( - "pkg; python_version >= \"3.7\"", - PypiNameCut::RequirementLine - ), - "pkg" - ); - assert_eq!( - pypi_name_part("pkg ==1.0", PypiNameCut::RequirementLine), - "pkg" - ); - assert_eq!(pypi_name_part("", PypiNameCut::RequirementLine), ""); + assert_eq!(pypi_name_part("requests"), "requests"); + assert_eq!(pypi_name_part("requests[security]==2.31.0"), "requests"); + assert_eq!(pypi_name_part("Flask_Cors>=4.0"), "Flask_Cors"); + assert_eq!(pypi_name_part("pkg; python_version >= \"3.7\""), "pkg"); + assert_eq!(pypi_name_part("pkg ==1.0"), "pkg"); + assert_eq!(pypi_name_part(""), ""); } #[test] diff --git a/src/precheck/render.rs b/src/precheck/render.rs index 5920267..a68f12d 100644 --- a/src/precheck/render.rs +++ b/src/precheck/render.rs @@ -27,61 +27,21 @@ pub(super) fn bare_install_note(manager: PackageManager, subcommand_label: &str) } } -/// The refusal line on stderr. When vulnerable findings exist but none sit on -/// a named target — and no named target is unverifiable either — the block is -/// entirely the existing tree's doing, so say that instead of implying the -/// package the user typed is at fault. Messaging only; the block decision -/// stays with `should_block_install`. -pub(super) fn print_refusal(report: &PrecheckReport, opts: &PrecheckOptions) { - if refusal_blames_existing_tree(report, opts) { - eprintln!( +/// The refusal line on stderr. Messaging only; the block decision and the +/// choice of escape hatch live in `verdict::block_reason`. +pub(super) fn print_refusal(reason: super::verdict::BlockReason) { + use super::verdict::BlockReason; + match reason { + BlockReason::ExistingTree => eprintln!( "Refusing to run install: your existing dependency tree has known-vulnerable packages (none were added by this command). Fix them or pass --force." - ); - } else if report.vulnerable_count() > 0 - || (super::verdict::authenticated_verdict(opts) && report.unverifiable_count() > 0) - || (super::verdict::authenticated_verdict(opts) && report.error_count() > 0) - { - eprintln!("Refusing to run install. Pass --force to proceed despite findings."); - } else { - eprintln!("Refusing to run install. Pass --no-fail to proceed anyway."); - } -} - -/// True when the block is entirely the existing tree's doing: vulnerable -/// findings exist, none sit on a named target (or block as unverifiable -/// there), and every *blocking* tree finding — vulnerable or unverifiable, -/// since `should_block_install` refuses on both — genuinely predates this -/// command. A `Requested` finding (pip `-r`) is added by this command and -/// renders as `(from requirements)`; a `Transitive` finding on any install -/// that names targets or requirements files is being pulled in by them -/// right now. Only a truly bare install (`report.bare_install`) or -/// manifest-declared `PreExisting` findings may blame the existing tree. -fn refusal_blames_existing_tree(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { - let fail_closed = super::verdict::authenticated_verdict(opts); - let named_findings = report.named_vulnerable_count() - + if fail_closed { - report.named_unverifiable_count() - } else { - 0 - }; - if report.vulnerable_count() == 0 || named_findings > 0 { - return false; + ), + BlockReason::Findings => { + eprintln!("Refusing to run install. Pass --force to proceed despite findings.") + } + BlockReason::RecencyOnly => { + eprintln!("Refusing to run install. Pass --no-fail to proceed anyway.") + } } - let Some(TreeReport::Full { transitive, .. }) = &report.tree else { - return false; - }; - transitive - .iter() - .filter(|t| { - matches!(t.verdict, VerdictStatus::Vulnerable(_)) - || (fail_closed && matches!(t.verdict, VerdictStatus::Unverifiable(_))) - }) - .all(|t| match t.origin { - // A locked pin predates the sync command that installs it. - TreeOrigin::PreExisting | TreeOrigin::Locked => true, - TreeOrigin::Requested => false, - TreeOrigin::Transitive => report.bare_install, - }) } /// Print the "requirements files are not recency-checked" note when the @@ -127,21 +87,18 @@ fn highest_fix(mut fixes: Vec<&str>, all_must_parse: bool) -> Option { [] => None, [only] => Some((*only).to_string()), many => { - let mut best: Option<(semver::Version, &str)> = None; + let mut parsed = Vec::with_capacity(many.len()); for raw in many { - let v = - match semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) - { - Ok(v) => v, - Err(_) if all_must_parse => return None, - Err(_) => continue, - }; - match &best { - Some((cur, _)) if cur >= &v => {} - _ => best = Some((v, raw)), + match semver::Version::parse(&verify_deps::registry::normalize_for_semver(raw)) { + Ok(v) => parsed.push((v, *raw)), + Err(_) if all_must_parse => return None, + Err(_) => {} } } - best.map(|(_, raw)| (*raw).to_string()) + parsed + .into_iter() + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, raw)| raw.to_string()) } } } @@ -656,107 +613,4 @@ mod tests { Some("1.10.0".to_string()) ); } - - /// The existing-tree refusal fires only when every vulnerable finding - /// predates the command: a `Requested` finding (pip `-r`) is added by - /// this command, and a `Transitive` finding is being pulled in right - /// now unless the install is truly bare. `bare_install` is the explicit - /// discriminator — a requirements-only install also has no named - /// outcomes, but its resolved set is the command's doing. - #[test] - fn refusal_blame_respects_finding_origin() { - let tree_vulnerable = |origin| TreeOutcome { - name: "dep".to_string(), - version: "1.0.0".to_string(), - verdict: VerdictStatus::Vulnerable(vec![vm("A-1", None)]), - origin, - }; - // (origin, named outcomes present, bare_install, expected). - // (origin, named=false, bare=false) is the requirements-only shape. - let cases = [ - (TreeOrigin::PreExisting, false, true, true), - (TreeOrigin::PreExisting, false, false, true), - (TreeOrigin::PreExisting, true, false, true), - (TreeOrigin::Transitive, false, true, true), - (TreeOrigin::Transitive, false, false, false), - (TreeOrigin::Transitive, true, false, false), - (TreeOrigin::Requested, false, true, false), - (TreeOrigin::Requested, false, false, false), - (TreeOrigin::Requested, true, false, false), - ]; - for (origin, with_named, bare_install, blames_tree) in cases { - let outcomes = if with_named { - vec![resolved_outcome("cleanpkg", "1.0.0", false)] - } else { - vec![] - }; - let mut report = report_with(outcomes); - report.bare_install = bare_install; - report.tree = Some(TreeReport::Full { - resolved_count: 1, - transitive: vec![tree_vulnerable(origin)], - }); - assert_eq!( - refusal_blames_existing_tree(&report, &authenticated_opts(false, false)), - blames_tree, - "origin {origin:?}, with_named {with_named}, bare {bare_install}" - ); - } - } - - /// Unverifiable tree findings block too (`should_block_install`), so - /// they must pass the same origin test before the refusal may blame the - /// existing tree: a command-added unverifiable transitive alongside a - /// pre-existing vulnerable dep keeps the generic refusal on a named - /// install, while on a bare install everything still predates the - /// command. - #[test] - fn refusal_blame_considers_unverifiable_tree_findings() { - let tree_finding = |name: &str, verdict, origin| TreeOutcome { - name: name.to_string(), - version: "1.0.0".to_string(), - verdict, - origin, - }; - let mixed_tree = || { - Some(TreeReport::Full { - resolved_count: 2, - transitive: vec![ - tree_finding( - "stickydep", - VerdictStatus::Vulnerable(vec![vm("A-1", None)]), - TreeOrigin::PreExisting, - ), - tree_finding( - "newdep", - VerdictStatus::Unverifiable("vuln-api unavailable".to_string()), - TreeOrigin::Transitive, - ), - ], - }) - }; - - // Named install: the unverifiable transitive is being added by this - // command, so "none were added by this command" would lie. - let mut report = report_with(vec![resolved_outcome("cleanpkg", "1.0.0", false)]); - report.tree = mixed_tree(); - assert!(!refusal_blames_existing_tree( - &report, - &authenticated_opts(false, false) - )); - assert!(refusal_blames_existing_tree( - &report, - &public_opts(false, false) - )); - - // Bare install: nothing named, everything resolved predates the - // command — the mixed findings still blame the existing tree. - let mut report = report_with(vec![]); - report.bare_install = true; - report.tree = mixed_tree(); - assert!(refusal_blames_existing_tree( - &report, - &authenticated_opts(false, false) - )); - } } diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index d07f316..b946788 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -155,20 +155,16 @@ fn resolve_uv_tree(parsed: &super::parse::ParsedInstall) -> Result std::collections::HashSet { - let norm = |n: &str| manager.normalize_name(n); +/// PEP 503-normalized names the user asked for — named CLI targets plus +/// entries of `-r` files — so tree findings label "(from requirements)" like +/// pip's `requested` report flag. Best-effort line parse; anything unparsed +/// just labels "(transitive)". +fn requested_names(parsed: &super::parse::ParsedInstall) -> std::collections::HashSet { + let norm = crate::deps::ecosystems::pypi::normalize_pypi_name; let mut out: std::collections::HashSet = parsed .targets .iter() @@ -184,8 +180,7 @@ fn requested_names( if line.is_empty() || line.starts_with(['#', '-']) || line.contains("://") { continue; } - let name = - super::parse::pypi_name_part(line, super::parse::PypiNameCut::RequirementLine); + let name = super::parse::pypi_name_part(line); if !name.is_empty() { out.insert(norm(name)); } @@ -200,7 +195,6 @@ fn requested_names( fn parse_compiled_requirements( out: &str, requested: &std::collections::HashSet, - manager: PackageManager, ) -> Result, String> { let mut pkgs = Vec::new(); for line in out.lines() { @@ -217,10 +211,10 @@ fn parse_compiled_requirements( )); }; // Strip extras: `celery[redis]==5.3.4`. - let name = super::parse::pypi_name_part(name, super::parse::PypiNameCut::RequirementLine) - .to_string(); + let name = super::parse::pypi_name_part(name).to_string(); pkgs.push(TreePackage { - requested: requested.contains(&manager.normalize_name(&name)), + requested: requested + .contains(&crate::deps::ecosystems::pypi::normalize_pypi_name(&name)), name, version: version.trim().to_string(), }); @@ -400,8 +394,7 @@ mod tests { fn parse_compiled_requirements_pins_extras_and_markers() { let requested = std::collections::HashSet::from(["flask-cors".to_string()]); let out = "Flask_Cors==4.0.0\ncelery[redis]==5.3.4\nwerkzeug==3.1.8 ; python_version >= \"3.9\"\n\n# comment\n--index-url https://example.com\n"; - let pkgs = - parse_compiled_requirements(out, &requested, PackageManager::Uv).expect("parse pins"); + let pkgs = parse_compiled_requirements(out, &requested).expect("parse pins"); assert_eq!( pkgs, vec![ @@ -427,10 +420,9 @@ mod tests { #[test] fn parse_compiled_requirements_rejects_non_pins() { let none = std::collections::HashSet::new(); - let err = parse_compiled_requirements("flask>=2.0\n", &none, PackageManager::Uv) - .expect_err("not a pin"); + let err = parse_compiled_requirements("flask>=2.0\n", &none).expect_err("not a pin"); assert!(err.contains("unexpected line"), "got: {err}"); - let err = parse_compiled_requirements("", &none, PackageManager::Uv).expect_err("empty"); + let err = parse_compiled_requirements("", &none).expect_err("empty"); assert!(err.contains("no packages"), "got: {err}"); } @@ -453,7 +445,7 @@ mod tests { }], requirements_files: vec![req], }; - let names = requested_names(PackageManager::Uv, &parsed); + let names = requested_names(&parsed); for name in ["celery", "flask-cors", "requests"] { assert!(names.contains(name), "missing {name}: {names:?}"); } diff --git a/src/precheck/uv.rs b/src/precheck/uv.rs index 2efe8cb..58857b0 100644 --- a/src/precheck/uv.rs +++ b/src/precheck/uv.rs @@ -83,7 +83,7 @@ fn run_uv_sync(cmd: &[String], opts: PrecheckOptions, exec: impl FnOnce() -> i32 } Err(e) => { // The single documented bypass of the "all blocking goes through - // `verdict::should_block_install`" invariant: an unparsable + // `verdict::block_reason`" invariant: an unparsable // uv.lock means there is no report to feed the predicate, so the // gate refuses directly (--force above is the only escape). eprintln!("error: cannot verify 'uv sync': {e} (pass --force to proceed unchecked)"); @@ -92,8 +92,7 @@ fn run_uv_sync(cmd: &[String], opts: PrecheckOptions, exec: impl FnOnce() -> i32 }; let resolved_count = jobs.len(); - let results = - verdict::verdict_pool(jobs, cfg, PackageManager::Uv, verdict::VERDICT_CONCURRENCY); + let results = verdict::verdict_pool(jobs, cfg, PackageManager::Uv); let transitive = results .into_iter() .map(|(pkg, verdict)| TreeOutcome { diff --git a/src/precheck/verdict.rs b/src/precheck/verdict.rs index f2c9efb..86b8d10 100644 --- a/src/precheck/verdict.rs +++ b/src/precheck/verdict.rs @@ -1,22 +1,22 @@ //! Verdict pass: bounded vuln-api worker pool, result matching, and the -//! single block predicate (`should_block_install`). +//! single block predicate (`block_reason`). use std::time::Duration; use super::{ tree, InstallTarget, PackageManager, PrecheckOptions, PrecheckReport, TargetKind, - TargetOutcome, TreeOrigin, TreeOutcome, VerdictConfig, VerdictStatus, + TargetOutcome, TreeOrigin, TreeOutcome, TreeReport, VerdictConfig, VerdictStatus, }; /// Above this many verdict jobs, print a stderr progress line so a big tree /// pass doesn't look hung. const VERDICT_PROGRESS_THRESHOLD: usize = 8; -/// Max parallel vuln-api verdict requests. -pub(super) const VERDICT_CONCURRENCY: usize = 8; +/// Max parallel vuln-api / registry requests. +const VERDICT_CONCURRENCY: usize = 8; /// Bounded worker pool over the verdict jobs. On client/request failure every -/// job comes back `Unverifiable`; `should_block_install` decides whether that +/// job comes back `Unverifiable`; `block_reason` decides whether that /// fails closed for the selected mode. /// Plain work queue, no new crates; `reqwest::blocking::Client` is /// `Send + Sync`. Result order is not preserved; callers match results back @@ -25,6 +25,14 @@ pub(super) fn verdict_pool( jobs: Vec, cfg: &VerdictConfig, manager: PackageManager, +) -> Vec<(tree::TreePackage, VerdictStatus)> { + verdict_pool_with(jobs, cfg, manager, VERDICT_CONCURRENCY) +} + +fn verdict_pool_with( + jobs: Vec, + cfg: &VerdictConfig, + manager: PackageManager, concurrency: usize, ) -> Vec<(tree::TreePackage, VerdictStatus)> { use std::collections::VecDeque; @@ -139,21 +147,120 @@ pub(super) fn public_verdict(opts: &PrecheckOptions) -> bool { .is_some_and(|cfg| cfg.mode.is_public()) } -pub(super) fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { +/// Why the gate refuses to run the install. The single owner of both the +/// block decision and the escape hatch the refusal advertises — +/// `render::print_refusal` only maps variants to text. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum BlockReason { + /// Every blocking finding predates this command (existing tree only). + /// `--force` is the escape. + ExistingTree, + /// Vulnerable findings, or unverifiable/error findings in fail-closed + /// (authenticated) mode. `--force` is the escape. + Findings, + /// Only the recency threshold fired. `--no-fail` is the escape. + RecencyOnly, +} + +pub(super) fn block_reason(report: &PrecheckReport, opts: &PrecheckOptions) -> Option { if opts.force { - return false; + return None; } // A resolution error means no verdict was obtained for that target, so // in authenticated mode it fails closed like `Unverifiable` — otherwise a // registry outage silently bypasses the gate. let fail_closed = authenticated_verdict(opts); - report.vulnerable_count() > 0 + if report.vulnerable_count() > 0 || (fail_closed && report.unverifiable_count() > 0) || (fail_closed && report.error_count() > 0) - || (!opts.no_fail && report.recent_count() > 0) + { + return Some(if blames_existing_tree(report, opts) { + BlockReason::ExistingTree + } else { + BlockReason::Findings + }); + } + if !opts.no_fail && report.recent_count() > 0 { + return Some(BlockReason::RecencyOnly); + } + None } -pub(super) fn verify_one( +/// True when the block is entirely the existing tree's doing: vulnerable +/// findings exist, none sit on a named target (or block as unverifiable +/// there), and every *blocking* tree finding — vulnerable or unverifiable, +/// since `block_reason` refuses on both — genuinely predates this +/// command. A `Requested` finding (pip `-r`) is added by this command and +/// renders as `(from requirements)`; a `Transitive` finding on any install +/// that names targets or requirements files is being pulled in by them +/// right now. Only a truly bare install (`report.bare_install`) or +/// manifest-declared `PreExisting` findings may blame the existing tree. +fn blames_existing_tree(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + let fail_closed = authenticated_verdict(opts); + let named_findings = report.named_vulnerable_count() + + if fail_closed { + report.named_unverifiable_count() + } else { + 0 + }; + if report.vulnerable_count() == 0 || named_findings > 0 { + return false; + } + let Some(TreeReport::Full { transitive, .. }) = &report.tree else { + return false; + }; + transitive + .iter() + .filter(|t| { + matches!(t.verdict, VerdictStatus::Vulnerable(_)) + || (fail_closed && matches!(t.verdict, VerdictStatus::Unverifiable(_))) + }) + .all(|t| match t.origin { + // A locked pin predates the sync command that installs it. + TreeOrigin::PreExisting | TreeOrigin::Locked => true, + TreeOrigin::Requested => false, + TreeOrigin::Transitive => report.bare_install, + }) +} + +/// Resolve every named target against its registry through a bounded worker +/// pool — each lookup is an independent blocking HTTP GET on the gate's +/// critical path, so they must not run serially. Order is preserved: +/// outcome `i` belongs to `targets[i]`. +pub(super) fn verify_all( + targets: &[InstallTarget], + opts: &PrecheckOptions, + now: &chrono::DateTime, +) -> Vec { + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Mutex; + + if targets.len() <= 1 { + return targets.iter().map(|t| verify_one(t, opts, now)).collect(); + } + let next = AtomicUsize::new(0); + let results: Mutex>> = + Mutex::new(targets.iter().map(|_| None).collect()); + let workers = VERDICT_CONCURRENCY.min(targets.len()); + std::thread::scope(|s| { + for _ in 0..workers { + s.spawn(|| loop { + let i = next.fetch_add(1, Ordering::Relaxed); + let Some(target) = targets.get(i) else { break }; + let outcome = verify_one(target, opts, now); + results.lock().unwrap()[i] = Some(outcome); + }); + } + }); + results + .into_inner() + .unwrap() + .into_iter() + .map(|o| o.expect("verify_all worker filled every slot")) + .collect() +} + +fn verify_one( target: &InstallTarget, opts: &PrecheckOptions, now: &chrono::DateTime, @@ -201,10 +308,15 @@ mod tests { use super::super::test_support::*; use super::super::{ run_verdict_pass, tree, InstallTarget, PackageManager, PrecheckOptions, TargetKind, - TargetOutcome, TreeOrigin, TreeOutcome, VerdictConfig, VerdictMode, VerdictStatus, + TargetOutcome, TreeOrigin, TreeOutcome, TreeReport, VerdictConfig, VerdictMode, + VerdictStatus, }; use super::*; + fn should_block_install(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { + block_reason(report, opts).is_some() + } + /// Predicate matrix: force ⇒ never block; vulnerable blocks in every /// verdict mode; unverifiable/error findings block only in authenticated /// mode; recency keeps its task-2 --no-fail demotion. @@ -290,7 +402,7 @@ mod tests { let mut named = resolved_outcome("pkg", "1.0.0", false); set_verdict(&mut named, VerdictStatus::Clean); let mut report = report_with(vec![named]); - report.tree = Some(super::super::TreeReport::Full { + report.tree = Some(TreeReport::Full { resolved_count: 2, transitive: vec![TreeOutcome { name: "evildep".to_string(), @@ -309,6 +421,106 @@ mod tests { assert!(!should_block_install(&report, &opts(true))); } + /// The existing-tree refusal fires only when every vulnerable finding + /// predates the command: a `Requested` finding (pip `-r`) is added by + /// this command, and a `Transitive` finding is being pulled in right + /// now unless the install is truly bare. `bare_install` is the explicit + /// discriminator — a requirements-only install also has no named + /// outcomes, but its resolved set is the command's doing. + #[test] + fn refusal_blame_respects_finding_origin() { + let tree_vulnerable = |origin| TreeOutcome { + name: "dep".to_string(), + version: "1.0.0".to_string(), + verdict: VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + origin, + }; + // (origin, named outcomes present, bare_install, expected). + // (origin, named=false, bare=false) is the requirements-only shape. + let cases = [ + (TreeOrigin::PreExisting, false, true, true), + (TreeOrigin::PreExisting, false, false, true), + (TreeOrigin::PreExisting, true, false, true), + (TreeOrigin::Transitive, false, true, true), + (TreeOrigin::Transitive, false, false, false), + (TreeOrigin::Transitive, true, false, false), + (TreeOrigin::Requested, false, true, false), + (TreeOrigin::Requested, false, false, false), + (TreeOrigin::Requested, true, false, false), + ]; + for (origin, with_named, bare_install, blames_tree) in cases { + let outcomes = if with_named { + vec![resolved_outcome("cleanpkg", "1.0.0", false)] + } else { + vec![] + }; + let mut report = report_with(outcomes); + report.bare_install = bare_install; + report.tree = Some(TreeReport::Full { + resolved_count: 1, + transitive: vec![tree_vulnerable(origin)], + }); + assert_eq!( + blames_existing_tree(&report, &authenticated_opts(false, false)), + blames_tree, + "origin {origin:?}, with_named {with_named}, bare {bare_install}" + ); + } + } + + /// Unverifiable tree findings block too (`block_reason`), so they must + /// pass the same origin test before the refusal may blame the existing + /// tree: a command-added unverifiable transitive alongside a + /// pre-existing vulnerable dep keeps the generic refusal on a named + /// install, while on a bare install everything still predates the + /// command. + #[test] + fn refusal_blame_considers_unverifiable_tree_findings() { + let tree_finding = |name: &str, verdict, origin| TreeOutcome { + name: name.to_string(), + version: "1.0.0".to_string(), + verdict, + origin, + }; + let mixed_tree = || { + Some(TreeReport::Full { + resolved_count: 2, + transitive: vec![ + tree_finding( + "stickydep", + VerdictStatus::Vulnerable(vec![vm("A-1", None)]), + TreeOrigin::PreExisting, + ), + tree_finding( + "newdep", + VerdictStatus::Unverifiable("vuln-api unavailable".to_string()), + TreeOrigin::Transitive, + ), + ], + }) + }; + + // Named install: the unverifiable transitive is being added by this + // command, so "none were added by this command" would lie. + let mut report = report_with(vec![resolved_outcome("cleanpkg", "1.0.0", false)]); + report.tree = mixed_tree(); + assert!(!blames_existing_tree( + &report, + &authenticated_opts(false, false) + )); + assert!(blames_existing_tree(&report, &public_opts(false, false))); + + // Bare install: nothing named, everything resolved predates the + // command — the mixed findings still blame the existing tree. + let mut report = report_with(vec![]); + report.bare_install = true; + report.tree = mixed_tree(); + assert!(blames_existing_tree( + &report, + &authenticated_opts(false, false) + )); + } + /// Verdict pass against an in-process stub: vulnerable body → Vulnerable /// with matches; 503 override → Unverifiable; no VerdictConfig → outcomes /// keep NotChecked. @@ -401,7 +613,7 @@ mod tests { .collect(); for concurrency in [1usize, 8] { - let results = verdict_pool(jobs.clone(), &cfg, PackageManager::Pip, concurrency); + let results = verdict_pool_with(jobs.clone(), &cfg, PackageManager::Pip, concurrency); assert_eq!( results.len(), 6, diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs index bcc965b..7b6012d 100644 --- a/src/verify_deps/registry.rs +++ b/src/verify_deps/registry.rs @@ -33,6 +33,42 @@ fn http_client() -> &'static reqwest::blocking::Client { }) } +/// Shared fetch/parse boilerplate for registry metadata GETs: 404 → "not +/// found", other non-success → status error, then parse the JSON body. +/// `label` names the registry in error messages ("npm registry" / "PyPI"). +fn fetch_registry_json( + url: &str, + label: &str, + name: &str, + base: &str, +) -> Result { + let resp = http_client() + .get(url) + .header("Accept", "application/json") + .send() + .map_err(|e| format!("{} request failed: {}", label, e))?; + + let status = resp.status(); + if status == reqwest::StatusCode::NOT_FOUND { + return Err(format!( + "package '{}' not found on {} ({})", + name, label, base + )); + } + if !status.is_success() { + return Err(format!( + "{} returned status {} for '{}'", + label, status, name + )); + } + + let body = resp + .text() + .map_err(|e| format!("failed to read {} response: {}", label, e))?; + serde_json::from_str(&body) + .map_err(|e| format!("failed to parse {} response for '{}': {}", label, name, e)) +} + /// URL-encode an npm package name. Scoped names contain `@` and `/`, /// the latter must be encoded as `%2f` for the package metadata URL. /// Also used by `vuln_api` for its npm path segments. @@ -129,38 +165,7 @@ pub fn npm_resolve( .unwrap_or(DEFAULT_NPM_REGISTRY) .trim_end_matches('/'); let url = format!("{}/{}", base, encode_npm_name(name)); - - let client = http_client(); - let resp = client - .get(&url) - .header("Accept", "application/json") - .send() - .map_err(|e| format!("npm registry request failed: {}", e))?; - - let status = resp.status(); - if status == reqwest::StatusCode::NOT_FOUND { - return Err(format!( - "package '{}' not found on npm registry ({})", - name, base - )); - } - if !status.is_success() { - return Err(format!( - "npm registry returned status {} for '{}'", - status, name - )); - } - - let body = resp - .text() - .map_err(|e| format!("failed to read npm registry response: {}", e))?; - - let meta: NpmFullMetadata = serde_json::from_str(&body).map_err(|e| { - format!( - "failed to parse npm registry response for '{}': {}", - name, e - ) - })?; + let meta: NpmFullMetadata = fetch_registry_json(&url, "npm registry", name, base)?; let resolved_version = match spec { NpmSpec::Latest => meta.dist_tags.get("latest").cloned().ok_or_else(|| { @@ -241,25 +246,12 @@ fn npm_pick_highest_matching( ) -> Option { let req = parse_npm_range(range)?; let range_has_prerelease = range.contains('-'); - - let mut best: Option<(semver::Version, String)> = None; - for raw in versions.keys() { - let v = match semver::Version::parse(raw) { - Ok(v) => v, - Err(_) => continue, - }; - if !v.pre.is_empty() && !range_has_prerelease { - continue; - } - if !req.matches(&v) { - continue; - } - match &best { - Some((cur, _)) if cur >= &v => {} - _ => best = Some((v, raw.clone())), - } - } - best.map(|(_, raw)| raw) + versions + .keys() + .filter_map(|raw| semver::Version::parse(raw).ok().map(|v| (v, raw))) + .filter(|(v, _)| (v.pre.is_empty() || range_has_prerelease) && req.matches(v)) + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, raw)| raw.clone()) } /// PyPI version specifier used by install wrappers. We parse a @@ -298,28 +290,7 @@ pub fn pypi_resolve( .unwrap_or(DEFAULT_PYPI_REGISTRY) .trim_end_matches('/'); let url = format!("{}/pypi/{}/json", base, urlencoding::encode(name)); - - let client = http_client(); - let resp = client - .get(&url) - .header("Accept", "application/json") - .send() - .map_err(|e| format!("PyPI request failed: {}", e))?; - - let status = resp.status(); - if status == reqwest::StatusCode::NOT_FOUND { - return Err(format!("package '{}' not found on PyPI ({})", name, base)); - } - if !status.is_success() { - return Err(format!("PyPI returned status {} for '{}'", status, name)); - } - - let body = resp - .text() - .map_err(|e| format!("failed to read PyPI response: {}", e))?; - - let meta: PypiInfoResponse = serde_json::from_str(&body) - .map_err(|e| format!("failed to parse PyPI response for '{}': {}", name, e))?; + let meta: PypiInfoResponse = fetch_registry_json(&url, "PyPI", name, base)?; let candidates = collect_pypi_candidates(&meta); // A yanked release resolves only via an exact pin (PEP 592), matching @@ -387,21 +358,15 @@ fn collect_pypi_candidates(meta: &PypiInfoResponse) -> Vec { if files.is_empty() { continue; } - let mut earliest: Option> = None; - for f in files { - let raw = f - .upload_time_iso_8601 - .as_deref() - .or(f.upload_time.as_deref()); - if let Some(raw) = raw { - if let Ok(dt) = parse_iso8601(raw) { - earliest = match earliest { - Some(prev) if prev <= dt => Some(prev), - _ => Some(dt), - }; - } - } - } + let earliest = files + .iter() + .filter_map(|f| { + f.upload_time_iso_8601 + .as_deref() + .or(f.upload_time.as_deref()) + }) + .filter_map(|raw| parse_iso8601(raw).ok()) + .min(); if let Some(dt) = earliest { out.push(PypiCandidate { version: ver.clone(), @@ -417,23 +382,17 @@ fn collect_pypi_candidates(meta: &PypiInfoResponse) -> Vec { /// best-effort PEP 440 ordering. Falls back to the entry with the /// latest upload time if no candidate parses as semver. fn pick_latest_stable(candidates: &[PypiCandidate]) -> Option<&PypiCandidate> { - let mut best_semver: Option<(semver::Version, &PypiCandidate)> = None; - for c in candidates { - let normalized = normalize_for_semver(&c.version); - if let Ok(v) = semver::Version::parse(&normalized) { - if !v.pre.is_empty() { - continue; - } - match &best_semver { - Some((cur, _)) if cur >= &v => {} - _ => best_semver = Some((v, c)), - } - } - } - if let Some((_, picked)) = best_semver { - return Some(picked); - } - candidates.iter().max_by_key(|c| c.uploaded) + candidates + .iter() + .filter_map(|c| { + semver::Version::parse(&normalize_for_semver(&c.version)) + .ok() + .filter(|v| v.pre.is_empty()) + .map(|v| (v, c)) + }) + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, c)| c) + .or_else(|| candidates.iter().max_by_key(|c| c.uploaded)) } /// Best-effort PEP 440 → semver: PyPI versions are usually `X.Y.Z` or @@ -494,41 +453,28 @@ fn pypi_resolve_specifier( requirements.push((op, v)); } - let mut best: Option<(semver::Version, String)> = None; - for c in candidates { - let raw = &c.version; - let v = match semver::Version::parse(&normalize_for_semver(raw)) { - Ok(v) => v, - Err(_) => continue, - }; - if !v.pre.is_empty() { - continue; - } - let satisfies = requirements.iter().all(|(op, want)| match *op { - "==" => &v == want, - ">=" => &v >= want, - "<=" => &v <= want, - "!=" => &v != want, - ">" => &v > want, - "<" => &v < want, - "~=" => { - if &v < want { - return false; - } - let upper = semver::Version::new(want.major, want.minor + 1, 0); - v < upper - } + let satisfies = |v: &semver::Version| { + requirements.iter().all(|(op, want)| match *op { + "==" => v == want, + ">=" => v >= want, + "<=" => v <= want, + "!=" => v != want, + ">" => v > want, + "<" => v < want, + "~=" => *v >= *want && *v < semver::Version::new(want.major, want.minor + 1, 0), _ => false, - }); - if !satisfies { - continue; - } - match &best { - Some((cur, _)) if cur >= &v => {} - _ => best = Some((v, raw.clone())), - } - } - Ok(best.map(|(_, raw)| raw)) + }) + }; + Ok(candidates + .iter() + .filter_map(|c| { + semver::Version::parse(&normalize_for_semver(&c.version)) + .ok() + .filter(|v| v.pre.is_empty() && satisfies(v)) + .map(|v| (v, &c.version)) + }) + .max_by(|(a, _), (b, _)| a.cmp(b)) + .map(|(_, raw)| raw.clone())) } #[cfg(test)] diff --git a/src/vuln_api/mod.rs b/src/vuln_api/mod.rs index 7d13571..d43af0a 100644 --- a/src/vuln_api/mod.rs +++ b/src/vuln_api/mod.rs @@ -25,6 +25,24 @@ const REQUEST_TIMEOUT: Duration = Duration::from_secs(30); /// Cloudflare HTML before it gets ugly. const ERROR_BODY_SNIPPET_LEN: usize = 300; +/// Registry ecosystem a package check targets. Typed so the URL path +/// segment and the per-ecosystem name encoding can't drift apart on a +/// string spelling. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Ecosystem { + Npm, + Pypi, +} + +impl Ecosystem { + pub fn path_segment(self) -> &'static str { + match self { + Ecosystem::Npm => "npm", + Ecosystem::Pypi => "pypi", + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct VulnCheckResponse { pub ecosystem: String, @@ -84,11 +102,10 @@ pub fn auth_header(token: &str) -> (&'static str, String) { /// Encode package name for the vuln-api path segment. /// npm scoped names: `@scope/pkg` → `@scope%2fpkg`. -fn encode_package_name(ecosystem: &str, name: &str) -> String { - if ecosystem.eq_ignore_ascii_case("npm") { - crate::verify_deps::registry::encode_npm_name(name) - } else { - urlencoding::encode(name).into_owned() +fn encode_package_name(ecosystem: Ecosystem, name: &str) -> String { + match ecosystem { + Ecosystem::Npm => crate::verify_deps::registry::encode_npm_name(name), + Ecosystem::Pypi => urlencoding::encode(name).into_owned(), } } @@ -186,7 +203,7 @@ pub fn check_package_version( client: &reqwest::blocking::Client, base_url: &str, token: Option<&str>, - ecosystem: &str, + ecosystem: Ecosystem, name: &str, version: &str, ) -> Result> { @@ -195,7 +212,10 @@ pub fn check_package_version( let encoded_version = urlencoding::encode(version); let url = format!( "{}/v1/packages/{}/{}/versions/{}/check", - base, ecosystem, encoded_name, encoded_version + base, + ecosystem.path_segment(), + encoded_name, + encoded_version ); debug(&format!("Sending vuln-api request to URL: {}", url)); @@ -217,7 +237,7 @@ pub fn check_package_version( } 404 => { return Ok(VulnCheckResponse { - ecosystem: ecosystem.to_string(), + ecosystem: ecosystem.path_segment().to_string(), package_name: name.to_string(), version: version.to_string(), is_vulnerable: false, @@ -249,10 +269,15 @@ pub fn check_package_version( // Confused-deputy guard: refuse to attribute advisories to a different // (name, version, ecosystem) than what we asked about. The server is // allowed to be silent on identity, but if it answers, it must match. - if !parsed.ecosystem.is_empty() && !parsed.ecosystem.eq_ignore_ascii_case(ecosystem) { + if !parsed.ecosystem.is_empty() + && !parsed + .ecosystem + .eq_ignore_ascii_case(ecosystem.path_segment()) + { return Err(format!( "vuln-api response ecosystem '{}' does not match request '{}'", - parsed.ecosystem, ecosystem + parsed.ecosystem, + ecosystem.path_segment() ) .into()); } @@ -306,7 +331,7 @@ mod tests { &client, &stub.base_url, Some("test-token"), - "npm", + Ecosystem::Npm, "lodash", "4.17.20", ) @@ -317,8 +342,15 @@ mod tests { fn captured_request(auth_token: Option<&str>) -> String { let (base_url, requests) = spawn_capturing_vuln_api_stub(); let client = http_client().expect("test client"); - check_package_version(&client, &base_url, auth_token, "npm", "lodash", "4.17.20") - .expect("captured request should succeed"); + check_package_version( + &client, + &base_url, + auth_token, + Ecosystem::Npm, + "lodash", + "4.17.20", + ) + .expect("captured request should succeed"); let requests = requests.lock().unwrap(); requests[0].clone() } @@ -406,7 +438,7 @@ mod tests { &client, &stub.base_url, Some("test-token"), - "npm", + Ecosystem::Npm, "lodash", "4.17.20", ) @@ -471,19 +503,16 @@ mod tests { #[test] fn encode_package_name_scoped_npm() { - assert_eq!(encode_package_name("npm", "@types/node"), "@types%2fnode"); - assert_eq!(encode_package_name("npm", "lodash"), "lodash"); + assert_eq!( + encode_package_name(Ecosystem::Npm, "@types/node"), + "@types%2fnode" + ); + assert_eq!(encode_package_name(Ecosystem::Npm, "lodash"), "lodash"); } #[test] fn encode_package_name_pypi() { - assert_eq!(encode_package_name("PyPI", "requests"), "requests"); - } - - #[test] - fn encode_package_name_npm_case_insensitive() { - // Defends against vuln_api_ecosystem() casing changes. - assert_eq!(encode_package_name("NPM", "@types/node"), "@types%2fnode"); + assert_eq!(encode_package_name(Ecosystem::Pypi, "requests"), "requests"); } #[test] diff --git a/src/vuln_api_stub/mod.rs b/src/vuln_api_stub/mod.rs index 421cd0e..3231c01 100644 --- a/src/vuln_api_stub/mod.rs +++ b/src/vuln_api_stub/mod.rs @@ -84,17 +84,27 @@ pub fn spawn_capturing_vuln_api_stub() -> (String, std::sync::Arc String { + format!( + "HTTP/1.1 {}\r\nContent-Type: application/json\r\n{}Content-Length: {}\r\nConnection: close\r\n\r\n{}", + status_line, + extra_headers, + body.len(), + body + ) +} + /// The value of header `name` in a raw captured HTTP request, if present. pub fn header_value(request: &str, name: &str) -> Option { request @@ -168,17 +178,14 @@ fn handle_connection( None => (400, r#"{"error":"bad request"}"#.to_string(), false), }; - let status_text = status_text(status_code); - // `Connection: close` is load-bearing: the stub serves one response per - // connection, so without it reqwest pools the socket and a second request - // (the gate's tree pass makes several per run) races the close and fails. - let response = format!( - "HTTP/1.1 {} {}\r\nContent-Type: application/json\r\n{}Content-Length: {}\r\nConnection: close\r\n\r\n{}", - status_code, - status_text, - if retry_after { "Retry-After: 1\r\n" } else { "" }, - response_body.len(), - response_body + let response = http_response( + &format!("{} {}", status_code, status_text(status_code)), + if retry_after { + "Retry-After: 1\r\n" + } else { + "" + }, + &response_body, ); let _ = stream.write_all(response.as_bytes()); } diff --git a/tests/cli_bare_install.rs b/tests/cli_bare_install.rs index a5c6f73..b2e485b 100644 --- a/tests/cli_bare_install.rs +++ b/tests/cli_bare_install.rs @@ -185,10 +185,8 @@ fn bare_npm_tokenless_runs_public_tree_check() { .fake_tree_pm("npm", NPM_LOCK, 0) .oldpkg_registry() .vuln_checks(HashMap::new()) - .token("test-token") .with_project_file("package.json", PACKAGE_JSON) .build(); - h.cmd.env_remove("CORGEA_TOKEN"); let out = h.cmd.args(["npm", "install"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(0)); assert_eq!(h.recorded_argv().as_deref(), Some("install")); @@ -238,10 +236,8 @@ fn bare_yarn_note_prints_without_token_too() { .fake_recorder("yarn", 0) .oldpkg_registry() .vuln_checks(HashMap::new()) - .token("test-token") .in_project_dir() .build(); - h.cmd.env_remove("CORGEA_TOKEN"); let out = h .cmd .args(["yarn", "install"]) diff --git a/tests/cli_provenance.rs b/tests/cli_provenance.rs index 6b2ecc1..0819f48 100644 --- a/tests/cli_provenance.rs +++ b/tests/cli_provenance.rs @@ -13,7 +13,7 @@ mod common; -use common::{key, TreeHarness, NPM_LOCK}; +use common::{key, tree_harness, NPM_LOCK}; use std::collections::HashMap; use tempfile::TempDir; @@ -54,7 +54,7 @@ fn pip_requirements_finding_labeled_from_requirements() { key("pypi", "reqpkg", "6.0.0"), vulnerable_body("pypi", "reqpkg", "6.0.0", None), ); - let mut h = TreeHarness::new("pip", checks, HashMap::new(), PIP_REQ_REPORT); + let mut h = tree_harness("pip", checks, HashMap::new(), PIP_REQ_REPORT); let out = h .cmd .args(["pip", "install", "-r", "reqs.txt"]) @@ -82,7 +82,7 @@ fn npm_preexisting_direct_dep_labeled_with_fix_hint() { key("npm", "evildep", "0.4.2"), vulnerable_body("npm", "evildep", "0.4.2", Some("1.2.2")), ); - let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); + let mut h = tree_harness("npm", checks, HashMap::new(), NPM_LOCK); let out = h .cmd .current_dir(project.path()) @@ -119,7 +119,7 @@ fn npm_preexisting_fix_hint_keeps_hedge_when_fix_is_partial() { "vulnerable_version_range":null,"fixed_version":null}]}"# .to_string(), ); - let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); + let mut h = tree_harness("npm", checks, HashMap::new(), NPM_LOCK); let out = h .cmd .current_dir(project.path()) @@ -157,7 +157,7 @@ fn preexisting_vulnerable_with_unverifiable_transitive_keeps_generic_refusal() { ); let mut statuses = HashMap::new(); statuses.insert(key("npm", "newdep", "2.0.0"), 503u16); - let mut h = TreeHarness::new("npm", checks, statuses, LOCK_WITH_NEWDEP); + let mut h = tree_harness("npm", checks, statuses, LOCK_WITH_NEWDEP); h.cmd.env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); let out = h .cmd @@ -185,7 +185,7 @@ fn npm_preexisting_without_fix_has_no_hint() { key("npm", "evildep", "0.4.2"), vulnerable_body("npm", "evildep", "0.4.2", None), ); - let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); + let mut h = tree_harness("npm", checks, HashMap::new(), NPM_LOCK); let out = h .cmd .current_dir(project.path()) @@ -209,7 +209,7 @@ fn pip_json_carries_origin_per_tree_entry() { // All-clean run mixing origins: the named `oldpkg` matches its outcome, // `reqpkg` (requested) and `evildep` (transitive) land in `tree.transitive` // with their origins. - let mut h = TreeHarness::new("pip", HashMap::new(), HashMap::new(), PIP_MIXED_REPORT); + let mut h = tree_harness("pip", HashMap::new(), HashMap::new(), PIP_MIXED_REPORT); let out = h .cmd .args([ @@ -249,7 +249,7 @@ fn npm_json_carries_preexisting_origin() { key("npm", "evildep", "0.4.2"), vulnerable_body("npm", "evildep", "0.4.2", Some("1.2.2")), ); - let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); + let mut h = tree_harness("npm", checks, HashMap::new(), NPM_LOCK); let out = h .cmd .current_dir(project.path()) diff --git a/tests/cli_refusal_context.rs b/tests/cli_refusal_context.rs index 61f9b4c..b27a9b4 100644 --- a/tests/cli_refusal_context.rs +++ b/tests/cli_refusal_context.rs @@ -15,7 +15,7 @@ mod common; -use common::{key, TreeHarness, TREE_REPORT}; +use common::{key, tree_harness, GateHarness, TREE_REPORT}; use corgea::vuln_api_stub::PackageKey; use std::collections::HashMap; use tempfile::TempDir; @@ -25,11 +25,11 @@ const TREE_REFUSAL: &str = "Refusing to run install: your existing dependency tr /// Refusal when a named target carries a blocking verdict. const GENERIC_REFUSAL: &str = "Refusing to run install. Pass --force to proceed despite findings."; -fn harness(checks: HashMap, statuses: HashMap) -> TreeHarness { - TreeHarness::new("pip", checks, statuses, TREE_REPORT) +fn harness(checks: HashMap, statuses: HashMap) -> GateHarness { + tree_harness("pip", checks, statuses, TREE_REPORT) } -fn run_install(h: &mut TreeHarness) -> std::process::Output { +fn run_install(h: &mut GateHarness) -> std::process::Output { h.cmd .args(["pip", "install", "oldpkg==1.0.0"]) .output() diff --git a/tests/cli_remediation.rs b/tests/cli_remediation.rs index a266192..ac1ff6c 100644 --- a/tests/cli_remediation.rs +++ b/tests/cli_remediation.rs @@ -3,7 +3,7 @@ //! covering every advisory. When any advisory has no known fix, no steer //! prints and JSON `remediation` is null. //! -//! Uses the shared `common::PipHarness` (pypi stub published 2020 so recency +//! Uses the shared `common::pip_harness` (pypi stub published 2020 so recency //! never blocks, a fake pip recording its argv, the in-crate vuln-api stub, //! and a set token) — every block here is the verdict's doing. @@ -11,7 +11,7 @@ mod common; -use common::{key, vulnerable_body, PipHarness}; +use common::{key, pip_harness, vulnerable_body}; use std::collections::HashMap; fn fixed_body() -> String { @@ -26,7 +26,7 @@ fn no_fix_body() -> String { fn fixed_match_blocks_and_names_safe_version() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); - let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -50,7 +50,7 @@ fn fixed_match_blocks_and_names_safe_version() { fn no_fix_match_reports_no_fixed_version_known() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), no_fix_body()); - let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -77,7 +77,7 @@ fn no_fix_match_reports_no_fixed_version_known() { fn json_remediation_carries_safe_version() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), fixed_body()); - let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "--json", "install", "oldpkg==1.0.0"]) @@ -97,7 +97,7 @@ fn json_remediation_carries_safe_version() { fn json_remediation_null_when_no_fix() { let mut checks = HashMap::new(); checks.insert(key("pypi", "oldpkg", "1.0.0"), no_fix_body()); - let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "--json", "install", "oldpkg==1.0.0"]) diff --git a/tests/cli_tree.rs b/tests/cli_tree.rs index 13d7704..5465d12 100644 --- a/tests/cli_tree.rs +++ b/tests/cli_tree.rs @@ -13,7 +13,7 @@ mod common; use common::{ - key, vulnerable_body, TreeHarness, NPM_LOCK, RESOLUTION_FAILS, TREE_REPORT, UV_COMPILED, + key, tree_harness, vulnerable_body, NPM_LOCK, RESOLUTION_FAILS, TREE_REPORT, UV_COMPILED, }; use std::collections::HashMap; use tempfile::TempDir; @@ -48,7 +48,7 @@ fn transitive_vulnerable_blocks_install() { for (binary, eco, payload, args) in cases { let mut checks = HashMap::new(); checks.insert(key(eco, "evildep", "0.4.2"), vulnerable_evildep_body(eco)); - let mut h = TreeHarness::new(binary, checks, HashMap::new(), payload); + let mut h = tree_harness(binary, checks, HashMap::new(), payload); let out = h.cmd.args(args).output().expect("run corgea"); assert_eq!( out.status.code(), @@ -80,7 +80,7 @@ fn uv_requirements_file_install_is_tree_gated() { key("pypi", "evildep", "0.4.2"), vulnerable_evildep_body("pypi"), ); - let mut h = TreeHarness::new("uv", checks, HashMap::new(), UV_COMPILED); + let mut h = tree_harness("uv", checks, HashMap::new(), UV_COMPILED); let out = h .cmd .current_dir(cwd.path()) @@ -110,7 +110,7 @@ fn tree_pass_runs_via_pip3_when_pip_is_absent() { key("pypi", "evildep", "0.4.2"), vulnerable_evildep_body("pypi"), ); - let mut h = TreeHarness::new("pip3", checks, HashMap::new(), TREE_REPORT); + let mut h = tree_harness("pip3", checks, HashMap::new(), TREE_REPORT); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -151,7 +151,7 @@ fn resolution_failure_falls_back_with_loud_warning() { ), ]; for (binary, args, forwarded_argv) in cases { - let mut h = TreeHarness::new(binary, HashMap::new(), HashMap::new(), RESOLUTION_FAILS); + let mut h = tree_harness(binary, HashMap::new(), HashMap::new(), RESOLUTION_FAILS); let out = h.cmd.args(args).output().expect("run corgea"); assert_eq!( out.status.code(), @@ -174,7 +174,7 @@ fn pip_json_carries_tree_object() { key("pypi", "evildep", "0.4.2"), vulnerable_evildep_body("pypi"), ); - let mut h = TreeHarness::new("pip", checks, HashMap::new(), TREE_REPORT); + let mut h = tree_harness("pip", checks, HashMap::new(), TREE_REPORT); let out = h .cmd .args(["pip", "--json", "install", "oldpkg==1.0.0"]) @@ -196,7 +196,7 @@ fn pip_json_carries_tree_object() { #[test] fn pip_clean_tree_proceeds() { // Stub default-clean (no overrides), so every resolved package is clean. - let mut h = TreeHarness::new("pip", HashMap::new(), HashMap::new(), TREE_REPORT); + let mut h = tree_harness("pip", HashMap::new(), HashMap::new(), TREE_REPORT); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -228,7 +228,7 @@ fn npm_does_not_touch_project_lockfile() { key("npm", "evildep", "0.4.2"), vulnerable_evildep_body("npm"), ); - let mut h = TreeHarness::new("npm", checks, HashMap::new(), NPM_LOCK); + let mut h = tree_harness("npm", checks, HashMap::new(), NPM_LOCK); let out = h .cmd .current_dir(project.path()) diff --git a/tests/cli_uv_sync.rs b/tests/cli_uv_sync.rs index 6a6c10c..478609e 100644 --- a/tests/cli_uv_sync.rs +++ b/tests/cli_uv_sync.rs @@ -158,10 +158,8 @@ fn uv_sync_tokenless_runs_public_lock_check() { let mut h = GateHarness::new() .fake_recorder("uv", 0) .vuln_checks(HashMap::new()) - .token("test-token") .with_project_file("uv.lock", UV_LOCK) .build(); - h.cmd.env_remove("CORGEA_TOKEN"); let out = h.cmd.args(["uv", "sync"]).output().expect("run corgea"); assert_eq!(out.status.code(), Some(0)); assert_eq!(h.recorded_argv().as_deref(), Some("sync")); diff --git a/tests/cli_verdict.rs b/tests/cli_verdict.rs index c09c0f2..2b4489e 100644 --- a/tests/cli_verdict.rs +++ b/tests/cli_verdict.rs @@ -3,7 +3,7 @@ //! //! Composes the `cli_install.rs` harness pattern (fake package manager on a //! private PATH + local pypi registry stub) with the in-crate vuln-api stub — -//! the shared `common::PipHarness`. `oldpkg==1.0.0` is published in 2020, so +//! the shared `common::pip_harness`. `oldpkg==1.0.0` is published in 2020, so //! recency never blocks here — every block in this file is the verdict's //! doing. @@ -11,7 +11,7 @@ mod common; -use common::{key, vulnerable_body, PipHarness}; +use common::{key, pip_harness, vulnerable_body}; use corgea::vuln_api_stub::{header_value, spawn_capturing_vuln_api_stub}; use std::collections::HashMap; @@ -22,7 +22,7 @@ fn vulnerable_pin_blocks_without_running_install() { key("pypi", "oldpkg", "1.0.0"), vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), ); - let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -52,7 +52,7 @@ fn alternate_pypi_spelling_hits_canonical_verdict() { key("pypi", "flask-cors", "1.0.0"), vulnerable_body("pypi", "flask-cors", "1.0.0", "GHSA-TEST-0001", None), ); - let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "Flask_Cors==1.0.0"]) @@ -75,7 +75,7 @@ fn force_overrides_vulnerable_block_and_propagates_exit_code() { key("pypi", "oldpkg", "1.0.0"), vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), ); - let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 7); + let mut h = pip_harness(checks, HashMap::new(), Some("test-token"), 7); let out = h .cmd .args(["pip", "--force", "install", "oldpkg==1.0.0"]) @@ -99,7 +99,7 @@ fn resolution_error_fails_closed_when_authenticated() { // The wildcard registry stub only knows version 1.0.0, so `==2.0.0` // is a resolution error: no verdict was obtained, and authenticated // mode must block — otherwise a registry outage bypasses the gate. - let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(HashMap::new(), HashMap::new(), Some("test-token"), 0); h.cmd.env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); let out = h .cmd @@ -124,7 +124,7 @@ fn resolution_error_fails_closed_when_authenticated() { fn verdict_503_fails_closed() { let mut statuses = HashMap::new(); statuses.insert(key("pypi", "oldpkg", "1.0.0"), 503u16); - let mut h = PipHarness::new(HashMap::new(), statuses, Some("test-token"), 0); + let mut h = pip_harness(HashMap::new(), statuses, Some("test-token"), 0); h.cmd.env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); let out = h .cmd @@ -149,7 +149,7 @@ fn tokenless_public_check_blocks_vulnerable_pin() { key("pypi", "oldpkg", "1.0.0"), vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), ); - let mut h = PipHarness::new(checks, HashMap::new(), None, 0); + let mut h = pip_harness(checks, HashMap::new(), None, 0); let out = h .cmd .args(["pip", "install", "oldpkg==1.0.0"]) @@ -177,7 +177,7 @@ fn tokenless_public_check_blocks_vulnerable_pin() { #[test] fn tokenless_vuln_api_outage_warns_but_installs() { - let mut h = PipHarness::new(HashMap::new(), HashMap::new(), None, 0); + let mut h = pip_harness(HashMap::new(), HashMap::new(), None, 0); h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); let out = h .cmd @@ -200,7 +200,7 @@ fn tokenless_vuln_api_outage_warns_but_installs() { #[test] fn progress_line_prints_only_above_eight_verdict_jobs() { // Nine resolvable named targets → 9 verdict jobs (> 8) → progress line. - let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(HashMap::new(), HashMap::new(), Some("test-token"), 0); let mut args = vec!["pip".to_string(), "install".to_string()]; args.extend((1..=9).map(|i| format!("pkg{i}==1.0.0"))); let out = h.cmd.args(&args).output().expect("run corgea"); @@ -212,7 +212,7 @@ fn progress_line_prints_only_above_eight_verdict_jobs() { ); // Two jobs → quiet. - let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(HashMap::new(), HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "install", "pkg1==1.0.0", "pkg2==1.0.0"]) @@ -231,7 +231,7 @@ fn outage_noise_collapses_above_three_unverifiable() { // vuln-api refuses connections: every check fails with the same // error-prefix (only the per-package URL differs). Four findings → // one collapsed line; counts and fail-closed exit code unchanged. - let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(HashMap::new(), HashMap::new(), Some("test-token"), 0); h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); h.cmd.env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); let out = h @@ -263,7 +263,7 @@ fn outage_noise_collapses_above_three_unverifiable() { ); // Three findings stay per-line — no collapse at the threshold. - let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(HashMap::new(), HashMap::new(), Some("test-token"), 0); h.cmd.env("CORGEA_VULN_API_URL", "http://127.0.0.1:1"); h.cmd.env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); let out = h @@ -297,7 +297,7 @@ fn json_carries_verdict_object_and_mode() { key("pypi", "oldpkg", "1.0.0"), vulnerable_body("pypi", "oldpkg", "1.0.0", "MAL-2024-0001", Some("2.0.0")), ); - let mut h = PipHarness::new(checks, HashMap::new(), Some("test-token"), 0); + let mut h = pip_harness(checks, HashMap::new(), Some("test-token"), 0); let out = h .cmd .args(["pip", "--json", "install", "oldpkg==1.0.0"]) @@ -323,7 +323,7 @@ fn json_carries_verdict_object_and_mode() { #[test] fn custom_vuln_api_url_with_token_does_not_send_token_by_default() { let (base_url, requests) = spawn_capturing_vuln_api_stub(); - let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("opaque-token"), 0); + let mut h = pip_harness(HashMap::new(), HashMap::new(), Some("opaque-token"), 0); h.cmd.env("CORGEA_VULN_API_URL", &base_url); let out = h .cmd @@ -340,7 +340,7 @@ fn custom_vuln_api_url_with_token_does_not_send_token_by_default() { #[test] fn custom_vuln_api_url_sends_token_only_with_opt_in() { let (base_url, requests) = spawn_capturing_vuln_api_stub(); - let mut h = PipHarness::new(HashMap::new(), HashMap::new(), Some("opaque-token"), 0); + let mut h = pip_harness(HashMap::new(), HashMap::new(), Some("opaque-token"), 0); h.cmd .env("CORGEA_VULN_API_URL", &base_url) .env("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL", "1"); diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 1243a48..a025f3e 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -97,8 +97,7 @@ pub const UV_COMPILED: &str = "oldpkg==1.0.0\nevildep==0.4.2\n"; /// Spawn a one-response-per-connection HTTP stub on an ephemeral 127.0.0.1 /// port; `route` maps a request path to `(status line, body)`. Returns the -/// base URL. `Connection: close` is load-bearing — without it reqwest pools -/// the socket and a second request races the close and fails. +/// base URL. #[allow(dead_code)] pub fn spawn_http_stub(route: F) -> String where @@ -120,12 +119,7 @@ where .and_then(|l| l.split_whitespace().nth(1)) .unwrap_or(""); let (status, body) = route(path); - let response = format!( - "HTTP/1.1 {}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", - status, - body.len(), - body - ); + let response = corgea::vuln_api_stub::http_response(status, "", &body); let _ = stream.write_all(response.as_bytes()); } }); @@ -297,12 +291,6 @@ impl GateHarness { self } - /// Raw script escape hatch. - pub fn script(self, binary: &str, script: &str) -> Self { - write_script(self._bin.path(), binary, script); - self - } - /// Raw script escape hatch for scripts that need the temp bin dir or /// marker path. pub fn script_with_paths(self, binary: &str, make_script: F) -> Self @@ -383,48 +371,26 @@ impl GateHarness { /// `corgea` wired to the wildcard pypi registry stub, a report-less fake pip /// (recording its argv to a marker), and a vuln-api stub. +/// `token: None` exercises public mode (no CORGEA_TOKEN set). #[cfg(unix)] #[allow(dead_code)] -pub struct PipHarness(GateHarness); - -#[cfg(unix)] -#[allow(dead_code)] -impl PipHarness { - /// `token: None` exercises public mode (no CORGEA_TOKEN set). - pub fn new( - checks: HashMap, - statuses: HashMap, - token: Option<&str>, - pip_exit_code: i32, - ) -> Self { - // RESOLUTION_FAILS models an old pip with no `--report`: the tree - // dry-run exits 2, so these tests exercise the named-only fallback. - let mut h = GateHarness::new() - .fake_tree_pm("pip", RESOLUTION_FAILS, pip_exit_code) - .wildcard_pypi_registry() - .vuln_checks(checks) - .vuln_statuses(statuses); - if let Some(t) = token { - h = h.token(t); - } - Self(h.build()) - } -} - -#[cfg(unix)] -impl std::ops::Deref for PipHarness { - type Target = GateHarness; - - fn deref(&self) -> &GateHarness { - &self.0 - } -} - -#[cfg(unix)] -impl std::ops::DerefMut for PipHarness { - fn deref_mut(&mut self) -> &mut GateHarness { - &mut self.0 +pub fn pip_harness( + checks: HashMap, + statuses: HashMap, + token: Option<&str>, + pip_exit_code: i32, +) -> GateHarness { + // RESOLUTION_FAILS models an old pip with no `--report`: the tree + // dry-run exits 2, so these tests exercise the named-only fallback. + let mut h = GateHarness::new() + .fake_tree_pm("pip", RESOLUTION_FAILS, pip_exit_code) + .wildcard_pypi_registry() + .vuln_checks(checks) + .vuln_statuses(statuses); + if let Some(t) = token { + h = h.token(t); } + h.build() } /// `corgea` wired to the oldpkg registry stub, a tree-aware fake `binary` @@ -432,41 +398,17 @@ impl std::ops::DerefMut for PipHarness { /// stub, and a token. #[cfg(unix)] #[allow(dead_code)] -pub struct TreeHarness(GateHarness); - -#[cfg(unix)] -#[allow(dead_code)] -impl TreeHarness { - pub fn new( - binary: &str, - checks: HashMap, - statuses: HashMap, - payload: &str, - ) -> Self { - Self( - GateHarness::new() - .fake_tree_pm(binary, payload, 0) - .oldpkg_registry() - .vuln_checks(checks) - .vuln_statuses(statuses) - .token("test-token") - .build(), - ) - } -} - -#[cfg(unix)] -impl std::ops::Deref for TreeHarness { - type Target = GateHarness; - - fn deref(&self) -> &GateHarness { - &self.0 - } -} - -#[cfg(unix)] -impl std::ops::DerefMut for TreeHarness { - fn deref_mut(&mut self) -> &mut GateHarness { - &mut self.0 - } +pub fn tree_harness( + binary: &str, + checks: HashMap, + statuses: HashMap, + payload: &str, +) -> GateHarness { + GateHarness::new() + .fake_tree_pm(binary, payload, 0) + .oldpkg_registry() + .vuln_checks(checks) + .vuln_statuses(statuses) + .token("test-token") + .build() } From 7547a9e4f9aeac37044b022f3ea55263c800a4c7 Mon Sep 17 00:00:00 2001 From: juangaitanv Date: Thu, 11 Jun 2026 21:19:32 +0200 Subject: [PATCH 46/59] Consolidate install gate vuln API handling --- src/config.rs | 36 +++----- src/main.rs | 2 +- src/precheck/mod.rs | 128 +++++++++++++++------------ src/precheck/render.rs | 6 +- src/precheck/tree.rs | 17 ++-- src/precheck/verdict.rs | 170 +++++++++++++++--------------------- src/utils/api.rs | 6 +- src/verify_deps/registry.rs | 8 +- src/vuln_api/mod.rs | 58 +++++++----- src/vuln_api_stub/mod.rs | 94 +++++++++++--------- tests/cli_provenance.rs | 38 ++++---- tests/common/mod.rs | 28 ++---- 12 files changed, 283 insertions(+), 308 deletions(-) diff --git a/src/config.rs b/src/config.rs index 5b6a18f..2c9287c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -102,47 +102,39 @@ impl Config { self.debug } +} - /// Base URL for the vuln-api service: `CORGEA_VULN_API_URL` env var, - /// then the public default. - pub fn get_vuln_api_url(&self) -> String { - crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") - .unwrap_or_else(|| DEFAULT_VULN_API_URL.to_string()) - .trim() - .trim_end_matches('/') - .to_string() - } +/// Base URL for the vuln-api service: `CORGEA_VULN_API_URL` env var, +/// then the public default. Pure env/constant — no config file field. +pub fn vuln_api_url() -> String { + crate::utils::generic::get_env_var_if_exists("CORGEA_VULN_API_URL") + .unwrap_or_else(|| DEFAULT_VULN_API_URL.to_string()) + .trim() + .trim_end_matches('/') + .to_string() } #[cfg(test)] mod tests { use super::*; - fn test_config() -> Config { - Config { - url: "https://www.corgea.app".to_string(), - debug: 0, - token: "".to_string(), - } - } - - /// All `get_vuln_api_url` cases in one test fn: the env-var cases + /// All `vuln_api_url` cases in one test fn: the env-var cases /// mutate process-global state, so they must not run concurrently /// with each other under the parallel test harness. #[test] - fn get_vuln_api_url_resolution_order() { + fn vuln_api_url_resolution_order() { env::remove_var("CORGEA_VULN_API_URL"); // Default when the env var is unset. - assert_eq!(test_config().get_vuln_api_url(), DEFAULT_VULN_API_URL); + assert_eq!(vuln_api_url(), DEFAULT_VULN_API_URL); // Env var wins; whitespace and trailing slash trimmed. env::set_var("CORGEA_VULN_API_URL", " https://env.example.com/ "); - assert_eq!(test_config().get_vuln_api_url(), "https://env.example.com"); + assert_eq!(vuln_api_url(), "https://env.example.com"); // Empty / whitespace-only env var is treated as unset. env::set_var("CORGEA_VULN_API_URL", " "); - assert_eq!(test_config().get_vuln_api_url(), DEFAULT_VULN_API_URL); + assert_eq!(vuln_api_url(), DEFAULT_VULN_API_URL); env::remove_var("CORGEA_VULN_API_URL"); } } diff --git a/src/main.rs b/src/main.rs index 49d5963..6d5f0c4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -272,7 +272,7 @@ fn install_wrap_options( ) -> corgea::precheck::PrecheckOptions { let token = config.get_token(); let token = token.trim(); - let base_url = config.get_vuln_api_url(); + let base_url = config::vuln_api_url(); let custom_vuln_api_url = base_url != config::DEFAULT_VULN_API_URL; let send_token_to_custom = utils::generic::get_env_var_if_exists("CORGEA_VULN_API_SEND_TOKEN_TO_CUSTOM_URL") diff --git a/src/precheck/mod.rs b/src/precheck/mod.rs index 9c8e650..b358245 100644 --- a/src/precheck/mod.rs +++ b/src/precheck/mod.rs @@ -13,12 +13,11 @@ //! *without* running the install. Use `--no-fail` to demote this to a //! warning (the install runs anyway). -pub mod parse; -pub mod tree; - mod detect; mod exec; +mod parse; mod render; +mod tree; mod uv; mod verdict; @@ -73,19 +72,15 @@ impl PackageManager { } } - /// Canonical package name for dedup/matching across spec spellings: - /// PEP 503 for pypi (shared with `deps`), verbatim for npm. + /// Canonical package name for dedup/matching across spec spellings — + /// the ecosystem's rule (`vuln_api::Ecosystem::normalize_name`). /// - /// Invariant: names are normalized at comparison/request time - /// (`verdict::verdict_pool` / `verdict::apply_verdicts` / tree dedup), - /// never at parse time — parsers and resolvers carry raw names. + /// Invariant: request-time normalization is owned by the vuln-api + /// client (`vuln_api::check_package_version`); comparison sites + /// (`verdict::apply_verdicts` / tree dedup) normalize here. Parsers + /// and resolvers carry raw names. pub fn normalize_name(self, name: &str) -> String { - match self { - PackageManager::Pip | PackageManager::Uv => { - crate::deps::ecosystems::pypi::normalize_pypi_name(name) - } - PackageManager::Npm | PackageManager::Yarn | PackageManager::Pnpm => name.to_string(), - } + self.ecosystem().normalize_name(name) } } @@ -105,14 +100,6 @@ impl VerdictMode { VerdictMode::Authenticated { token } => Some(token.as_str()), } } - - fn is_authenticated(&self) -> bool { - matches!(self, VerdictMode::Authenticated { .. }) - } - - fn is_public(&self) -> bool { - matches!(self, VerdictMode::Public) - } } /// Connection details for the vuln-api verdict pass. @@ -141,6 +128,20 @@ pub enum VerdictStatus { NotChecked, } +impl VerdictStatus { + /// Whether this verdict blocks the install: vulnerable always; + /// unverifiable only when the mode fails closed (authenticated). + /// The single definition of "blocking finding", shared by + /// `verdict::block_reason` and the refusal-blame test. + fn blocks(&self, fail_closed: bool) -> bool { + match self { + VerdictStatus::Vulnerable(_) => true, + VerdictStatus::Unverifiable(_) => fail_closed, + VerdictStatus::Clean | VerdictStatus::NotChecked => false, + } + } +} + #[derive(Debug, Clone)] pub struct PrecheckOptions { pub threshold: Duration, @@ -296,40 +297,40 @@ impl PrecheckReport { pub fn recent_count(&self) -> usize { self.count(|o| matches!(o, TargetOutcome::Resolved { age, .. } if self.is_recent(*age))) } - pub fn vulnerable_count(&self) -> usize { - self.named_vulnerable_count() + self.tree_vulnerable_count() + /// Every verdict in the report: named (resolved) outcomes, then + /// transitive tree findings. + fn verdicts(&self) -> impl Iterator { + self.named_verdicts().chain(self.tree_verdicts()) } - pub fn unverifiable_count(&self) -> usize { - self.named_unverifiable_count() + self.tree_unverifiable_count() + /// Verdicts on the named targets this command adds. + fn named_verdicts(&self) -> impl Iterator { + self.outcomes.iter().filter_map(|o| match o { + TargetOutcome::Resolved { verdict, .. } => Some(verdict), + _ => None, + }) } - /// Vulnerable findings among the named targets this command adds. - pub fn named_vulnerable_count(&self) -> usize { - self.named_finding_count(|v| matches!(v, VerdictStatus::Vulnerable(_))) + /// Verdicts beyond the named targets (the resolved tree). + fn tree_verdicts(&self) -> impl Iterator { + match &self.tree { + Some(TreeReport::Full { transitive, .. }) => transitive.as_slice(), + Some(TreeReport::NamedOnly { .. }) | None => &[], + } + .iter() + .map(|o| &o.verdict) } - /// Unverifiable findings among the named targets this command adds. - pub fn named_unverifiable_count(&self) -> usize { - self.named_finding_count(|v| matches!(v, VerdictStatus::Unverifiable(_))) + pub fn vulnerable_count(&self) -> usize { + count_vulnerable(self.verdicts()) } - /// Count named (resolved) outcomes whose verdict matches `pred`. - fn named_finding_count(&self, pred: impl Fn(&VerdictStatus) -> bool) -> usize { - self.count(|o| matches!(o, TargetOutcome::Resolved { verdict, .. } if pred(verdict))) + pub fn unverifiable_count(&self) -> usize { + count_unverifiable(self.verdicts()) } /// Vulnerable findings beyond the named targets (the resolved tree). pub fn tree_vulnerable_count(&self) -> usize { - self.tree_finding_count(|v| matches!(v, VerdictStatus::Vulnerable(_))) + count_vulnerable(self.tree_verdicts()) } /// Unverifiable findings beyond the named targets (the resolved tree). pub fn tree_unverifiable_count(&self) -> usize { - self.tree_finding_count(|v| matches!(v, VerdictStatus::Unverifiable(_))) - } - /// Count transitive tree findings whose verdict matches `pred`. - fn tree_finding_count(&self, pred: impl Fn(&VerdictStatus) -> bool) -> usize { - match &self.tree { - Some(TreeReport::Full { transitive, .. }) => { - transitive.iter().filter(|o| pred(&o.verdict)).count() - } - Some(TreeReport::NamedOnly { .. }) | None => 0, - } + count_unverifiable(self.tree_verdicts()) } pub fn skipped_count(&self) -> usize { self.count(|o| matches!(o, TargetOutcome::Skipped { .. })) @@ -339,6 +340,18 @@ impl PrecheckReport { } } +fn count_vulnerable<'a>(verdicts: impl Iterator) -> usize { + verdicts + .filter(|v| matches!(v, VerdictStatus::Vulnerable(_))) + .count() +} + +fn count_unverifiable<'a>(verdicts: impl Iterator) -> usize { + verdicts + .filter(|v| matches!(v, VerdictStatus::Unverifiable(_))) + .count() +} + /// Canonical entry for ecosystem commands (`corgea npm install …`). /// /// `cmd` is everything after the ecosystem name, e.g. @@ -496,7 +509,7 @@ fn run_parsed_install( if opts .verdict .as_ref() - .is_some_and(|cfg| cfg.mode.is_public() && cfg.public_login_hint) + .is_some_and(|cfg| matches!(cfg.mode, VerdictMode::Public) && cfg.public_login_hint) { eprintln!( "warning: using public CVE checks; login enables authenticated enforcement and private Corgea intelligence." @@ -522,12 +535,11 @@ fn run_parsed_install( /// Only called when `opts.verdict.is_some()`. fn run_tree_pass( manager: PackageManager, - resolution: Result>, String>, + resolution: Result, String>, outcomes: &mut [TargetOutcome], opts: &PrecheckOptions, ) -> TreeReport { - let no_dry_run = || format!("{} has no safe dry-run", manager.binary_name()); - let set = match resolution.and_then(|opt| opt.ok_or_else(no_dry_run)) { + let set = match resolution { Ok(set) => set, Err(reason) => { run_verdict_pass(manager, outcomes, opts); @@ -591,8 +603,8 @@ fn run_verdict_pass( ) { let Some(cfg) = &opts.verdict else { return }; - // One job per resolved target; jobs are 1:1 with outcomes, so - // `apply_verdicts` matches everything and returns no leftovers. + // One job per resolved target, in outcome order; the pool preserves + // order, so verdicts zip straight back onto the resolved outcomes. let jobs: Vec = outcomes .iter() .filter_map(|o| match o { @@ -605,12 +617,12 @@ fn run_verdict_pass( }) .collect(); - let results = verdict::verdict_pool(jobs, cfg, manager); - let leftovers = verdict::apply_verdicts(manager, results, outcomes, &Default::default()); - debug_assert!( - leftovers.is_empty(), - "named verdict pass left tree leftovers" - ); + let mut results = verdict::verdict_pool(jobs, cfg, manager).into_iter(); + for o in outcomes.iter_mut() { + if let TargetOutcome::Resolved { verdict, .. } = o { + *verdict = results.next().expect("one verdict per resolved outcome").1; + } + } } #[cfg(test)] diff --git a/src/precheck/render.rs b/src/precheck/render.rs index a68f12d..65cd91f 100644 --- a/src/precheck/render.rs +++ b/src/precheck/render.rs @@ -62,7 +62,11 @@ pub(super) fn requirements_note(parsed: &parse::ParsedInstall) { } pub(super) fn warn_public_lookup_failures(report: &PrecheckReport, opts: &PrecheckOptions) { - if super::verdict::public_verdict(opts) && report.unverifiable_count() > 0 { + let public = opts + .verdict + .as_ref() + .is_some_and(|cfg| matches!(cfg.mode, VerdictMode::Public)); + if public && report.unverifiable_count() > 0 { eprintln!("warning: CVE check unavailable; continuing because public mode is fail-open."); } } diff --git a/src/precheck/tree.rs b/src/precheck/tree.rs index b946788..5bdccf6 100644 --- a/src/precheck/tree.rs +++ b/src/precheck/tree.rs @@ -30,19 +30,20 @@ pub fn covers_input(manager: PackageManager, parsed: &super::parse::ParsedInstal || (manager == PackageManager::Npm && std::path::Path::new("package.json").exists()) } -/// `Ok(None)`: manager has no safe dry-run — named-only with warning. -/// `Err(reason)`: dry-run attempted and failed — named-only, warning carries reason. +/// `Err(reason)`: no safe dry-run for this manager, or the dry-run failed — +/// the caller falls back to named-only and its warning carries `reason`. pub fn resolve_tree( manager: PackageManager, install_args: &[String], parsed: &super::parse::ParsedInstall, -) -> Result>, String> { +) -> Result, String> { match manager { - PackageManager::Pip => resolve_pip_tree(manager.binary_name(), install_args).map(Some), - PackageManager::Npm => resolve_npm_tree(manager.binary_name(), install_args).map(Some), - PackageManager::Uv => resolve_uv_tree(parsed).map(Some), - // yarn/pnpm have no safe dry-run for installs. - PackageManager::Yarn | PackageManager::Pnpm => Ok(None), + PackageManager::Pip => resolve_pip_tree(manager.binary_name(), install_args), + PackageManager::Npm => resolve_npm_tree(manager.binary_name(), install_args), + PackageManager::Uv => resolve_uv_tree(parsed), + PackageManager::Yarn | PackageManager::Pnpm => { + Err(format!("{} has no safe dry-run", manager.binary_name())) + } } } diff --git a/src/precheck/verdict.rs b/src/precheck/verdict.rs index 86b8d10..6d10f8d 100644 --- a/src/precheck/verdict.rs +++ b/src/precheck/verdict.rs @@ -17,10 +17,8 @@ const VERDICT_CONCURRENCY: usize = 8; /// Bounded worker pool over the verdict jobs. On client/request failure every /// job comes back `Unverifiable`; `block_reason` decides whether that -/// fails closed for the selected mode. -/// Plain work queue, no new crates; `reqwest::blocking::Client` is -/// `Send + Sync`. Result order is not preserved; callers match results back -/// by `(name, version)`. +/// fails closed for the selected mode. Order is preserved: result `i` +/// belongs to job `i`. pub(super) fn verdict_pool( jobs: Vec, cfg: &VerdictConfig, @@ -35,9 +33,6 @@ fn verdict_pool_with( manager: PackageManager, concurrency: usize, ) -> Vec<(tree::TreePackage, VerdictStatus)> { - use std::collections::VecDeque; - use std::sync::Mutex; - let client = match crate::vuln_api::http_client() { Ok(c) => c, Err(e) => { @@ -53,35 +48,59 @@ fn verdict_pool_with( } let ecosystem = manager.ecosystem(); - let workers = concurrency.min(jobs.len()).max(1); - let queue = Mutex::new(VecDeque::from(jobs)); - let results = Mutex::new(Vec::new()); + let verdicts = pooled_map( + &jobs, + concurrency, + |job| match crate::vuln_api::check_package_version( + &client, + &cfg.base_url, + cfg.mode.auth_token(), + ecosystem, + &job.name, + &job.version, + ) { + Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), + Ok(_) => VerdictStatus::Clean, + Err(e) => VerdictStatus::Unverifiable(e.to_string()), + }, + ); + jobs.into_iter().zip(verdicts).collect() +} + +/// Order-preserving bounded worker pool: `results[i]` is `f(&items[i])`. +/// Each call is an independent blocking HTTP request on the gate's critical +/// path, so they must not run serially. Plain work-stealing over an index, +/// no new crates; single-item lists skip the thread machinery. +fn pooled_map( + items: &[T], + concurrency: usize, + f: impl Fn(&T) -> R + Sync, +) -> Vec { + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Mutex; + + if items.len() <= 1 { + return items.iter().map(&f).collect(); + } + let next = AtomicUsize::new(0); + let results: Mutex>> = Mutex::new(items.iter().map(|_| None).collect()); + let workers = concurrency.clamp(1, items.len()); std::thread::scope(|s| { for _ in 0..workers { s.spawn(|| loop { - let Some(job) = queue.lock().unwrap().pop_front() else { - break; - }; - // vuln-api advisories are keyed by canonical names; an - // alternate spelling (PEP 503: `Flask_Cors` ≡ `flask-cors`) - // would miss and read as clean. - let verdict = match crate::vuln_api::check_package_version( - &client, - &cfg.base_url, - cfg.mode.auth_token(), - ecosystem, - &manager.normalize_name(&job.name), - &job.version, - ) { - Ok(resp) if resp.is_vulnerable => VerdictStatus::Vulnerable(resp.matches), - Ok(_) => VerdictStatus::Clean, - Err(e) => VerdictStatus::Unverifiable(e.to_string()), - }; - results.lock().unwrap().push((job, verdict)); + let i = next.fetch_add(1, Ordering::Relaxed); + let Some(item) = items.get(i) else { break }; + let result = f(item); + results.lock().unwrap()[i] = Some(result); }); } }); - results.into_inner().unwrap() + results + .into_inner() + .unwrap() + .into_iter() + .map(|r| r.expect("pooled_map worker filled every slot")) + .collect() } /// Assign pooled verdicts onto matching named outcomes (by normalized @@ -135,16 +154,11 @@ pub(super) fn apply_verdicts( transitive } +/// Authenticated mode fails closed: lookup errors block instead of warning. pub(super) fn authenticated_verdict(opts: &PrecheckOptions) -> bool { opts.verdict .as_ref() - .is_some_and(|cfg| cfg.mode.is_authenticated()) -} - -pub(super) fn public_verdict(opts: &PrecheckOptions) -> bool { - opts.verdict - .as_ref() - .is_some_and(|cfg| cfg.mode.is_public()) + .is_some_and(|cfg| cfg.mode.auth_token().is_some()) } /// Why the gate refuses to run the install. The single owner of both the @@ -170,9 +184,7 @@ pub(super) fn block_reason(report: &PrecheckReport, opts: &PrecheckOptions) -> O // in authenticated mode it fails closed like `Unverifiable` — otherwise a // registry outage silently bypasses the gate. let fail_closed = authenticated_verdict(opts); - if report.vulnerable_count() > 0 - || (fail_closed && report.unverifiable_count() > 0) - || (fail_closed && report.error_count() > 0) + if report.verdicts().any(|v| v.blocks(fail_closed)) || (fail_closed && report.error_count() > 0) { return Some(if blames_existing_tree(report, opts) { BlockReason::ExistingTree @@ -187,23 +199,18 @@ pub(super) fn block_reason(report: &PrecheckReport, opts: &PrecheckOptions) -> O } /// True when the block is entirely the existing tree's doing: vulnerable -/// findings exist, none sit on a named target (or block as unverifiable -/// there), and every *blocking* tree finding — vulnerable or unverifiable, -/// since `block_reason` refuses on both — genuinely predates this -/// command. A `Requested` finding (pip `-r`) is added by this command and -/// renders as `(from requirements)`; a `Transitive` finding on any install -/// that names targets or requirements files is being pulled in by them -/// right now. Only a truly bare install (`report.bare_install`) or -/// manifest-declared `PreExisting` findings may blame the existing tree. +/// findings exist, no named target blocks, and every *blocking* tree +/// finding (`VerdictStatus::blocks`, same predicate `block_reason` refuses +/// on) genuinely predates this command. A `Requested` finding (pip `-r`) +/// is added by this command and renders as `(from requirements)`; a +/// `Transitive` finding on any install that names targets or requirements +/// files is being pulled in by them right now. Only a truly bare install +/// (`report.bare_install`) or manifest-declared `PreExisting` findings may +/// blame the existing tree. fn blames_existing_tree(report: &PrecheckReport, opts: &PrecheckOptions) -> bool { let fail_closed = authenticated_verdict(opts); - let named_findings = report.named_vulnerable_count() - + if fail_closed { - report.named_unverifiable_count() - } else { - 0 - }; - if report.vulnerable_count() == 0 || named_findings > 0 { + let named_blocks = report.named_verdicts().any(|v| v.blocks(fail_closed)); + if report.vulnerable_count() == 0 || named_blocks { return false; } let Some(TreeReport::Full { transitive, .. }) = &report.tree else { @@ -211,10 +218,7 @@ fn blames_existing_tree(report: &PrecheckReport, opts: &PrecheckOptions) -> bool }; transitive .iter() - .filter(|t| { - matches!(t.verdict, VerdictStatus::Vulnerable(_)) - || (fail_closed && matches!(t.verdict, VerdictStatus::Unverifiable(_))) - }) + .filter(|t| t.verdict.blocks(fail_closed)) .all(|t| match t.origin { // A locked pin predates the sync command that installs it. TreeOrigin::PreExisting | TreeOrigin::Locked => true, @@ -223,41 +227,14 @@ fn blames_existing_tree(report: &PrecheckReport, opts: &PrecheckOptions) -> bool }) } -/// Resolve every named target against its registry through a bounded worker -/// pool — each lookup is an independent blocking HTTP GET on the gate's -/// critical path, so they must not run serially. Order is preserved: -/// outcome `i` belongs to `targets[i]`. +/// Resolve every named target against its registry through the bounded +/// worker pool. Order is preserved: outcome `i` belongs to `targets[i]`. pub(super) fn verify_all( targets: &[InstallTarget], opts: &PrecheckOptions, now: &chrono::DateTime, ) -> Vec { - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::Mutex; - - if targets.len() <= 1 { - return targets.iter().map(|t| verify_one(t, opts, now)).collect(); - } - let next = AtomicUsize::new(0); - let results: Mutex>> = - Mutex::new(targets.iter().map(|_| None).collect()); - let workers = VERDICT_CONCURRENCY.min(targets.len()); - std::thread::scope(|s| { - for _ in 0..workers { - s.spawn(|| loop { - let i = next.fetch_add(1, Ordering::Relaxed); - let Some(target) = targets.get(i) else { break }; - let outcome = verify_one(target, opts, now); - results.lock().unwrap()[i] = Some(outcome); - }); - } - }); - results - .into_inner() - .unwrap() - .into_iter() - .map(|o| o.expect("verify_all worker filled every slot")) - .collect() + pooled_map(targets, VERDICT_CONCURRENCY, |t| verify_one(t, opts, now)) } fn verify_one( @@ -528,14 +505,11 @@ mod tests { fn verdict_pass_maps_stub_responses() { use std::collections::HashMap; - let key = |name: &str| ("pypi".to_string(), name.to_string(), "1.0.0".to_string()); + let key = |name: &str| crate::vuln_api_stub::key("pypi", name, "1.0.0"); let mut checks = HashMap::new(); checks.insert( key("evil"), - r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true, - "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":null}]}"# - .to_string(), + crate::vuln_api_stub::vulnerable_body("pypi", "evil", "1.0.0", "MAL-2024-0001", None), ); checks.insert(key("flaky"), "{}".to_string()); let mut statuses = HashMap::new(); @@ -584,14 +558,10 @@ mod tests { fn verdict_pool_returns_all_results() { use std::collections::HashMap; - let key = |name: &str| ("pypi".to_string(), name.to_string(), "1.0.0".to_string()); let mut checks = HashMap::new(); checks.insert( - key("evil"), - r#"{"ecosystem":"pypi","package_name":"evil","version":"1.0.0","is_vulnerable":true, - "matches":[{"advisory_id":"MAL-2024-0001","severity_level":"critical","tier":1, - "vulnerable_version_range":null,"fixed_version":null}]}"# - .to_string(), + crate::vuln_api_stub::key("pypi", "evil", "1.0.0"), + crate::vuln_api_stub::vulnerable_body("pypi", "evil", "1.0.0", "MAL-2024-0001", None), ); let stub = crate::vuln_api_stub::spawn_with_statuses(checks, HashMap::new()); diff --git a/src/utils/api.rs b/src/utils/api.rs index 32805ca..cf68176 100644 --- a/src/utils/api.rs +++ b/src/utils/api.rs @@ -1,6 +1,6 @@ use crate::log::debug; use crate::utils; -use corgea::vuln_api::auth_header; +use corgea::vuln_api::{auth_header, source as get_source}; use reqwest::header::HeaderMap; use reqwest::StatusCode; use reqwest::{ @@ -19,10 +19,6 @@ use std::path::Path; const CHUNK_SIZE: usize = 50 * 1024 * 1024; // 50 MB const API_BASE: &str = "/api/v1"; -fn get_source() -> String { - std::env::var("CORGEA_SOURCE").unwrap_or_else(|_| "cli".to_string()) -} - fn auth_headers(token: &str) -> HeaderMap { let mut headers = HeaderMap::new(); let (name, value) = auth_header(token); diff --git a/src/verify_deps/registry.rs b/src/verify_deps/registry.rs index 7b6012d..d20f63a 100644 --- a/src/verify_deps/registry.rs +++ b/src/verify_deps/registry.rs @@ -18,8 +18,10 @@ const DEFAULT_PYPI_REGISTRY: &str = "https://pypi.org"; const REQUEST_TIMEOUT: Duration = Duration::from_secs(20); -fn user_agent() -> String { - format!("corgea-cli/{} (deps)", env!("CARGO_PKG_VERSION")) +/// `corgea-cli/ (