diff --git a/Cargo.toml b/Cargo.toml index d60edad..2f232c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,3 +45,6 @@ serde_yaml_ng = "0.10" [target.'cfg(not(target_os = "windows"))'.dependencies] openssl = { version = "0.10", features = ["vendored"] } + +[dev-dependencies] +tempfile = "3.12.0" diff --git a/src/main.rs b/src/main.rs index 442c5a1..597875f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -120,6 +120,12 @@ enum Commands { )] target: Option, + #[arg( + long, + help = "Exclude files matching glob patterns from the scan. Accepts comma-separated glob patterns. Examples: 'tests/**', 'src/**/*.test.ts,**/*.spec.js', '*.md'." + )] + exclude: Option, + #[arg( long, help = "The name of the Corgea project. Defaults to git repository name if found, otherwise to the current directory name." @@ -353,6 +359,7 @@ fn main() { out_format, out_file, target, + exclude, project_name, }) => { verify_token_and_exit_when_fail(&corgea_config); @@ -438,6 +445,11 @@ fn main() { ::log::warn!("\nWarning: you didn't specify an only policy scan, so all other types of scans will run as well."); } } + if exclude.is_some() && *scanner != Scanner::Blast { + ::log::error!("exclude is only supported with blast scanner."); + std::process::exit(1); + } + match scanner { Scanner::Snyk => scan::run_snyk(&corgea_config, project_name.clone()), Scanner::Semgrep => scan::run_semgrep(&corgea_config, project_name.clone()), @@ -451,6 +463,7 @@ fn main() { out_format.clone(), out_file.clone(), target.clone(), + exclude.clone(), project_name.clone(), ), } diff --git a/src/scan.rs b/src/scan.rs index 8c669ca..af1fd4a 100644 --- a/src/scan.rs +++ b/src/scan.rs @@ -322,22 +322,18 @@ pub fn upload_scan( Ok(res) => { if !res.status().is_success() { true - } else { - if let Some(server_offset) = res.headers().get("Upload-Offset") { - let expected_offset = offset + chunk.len(); - if let Ok(server_offset_str) = server_offset.to_str() { - if let Ok(server_offset_val) = server_offset_str.parse::() { - if server_offset_val != expected_offset { - log::error!( - "Upload offset mismatch on chunk {}/{}: server has {} bytes but expected {}. \ - This may indicate that chunks are being routed to different server instances. \ - Please contact support.", - index + 1, total_chunks, server_offset_val, expected_offset - ); - true - } else { - false - } + } else if let Some(server_offset) = res.headers().get("Upload-Offset") { + let expected_offset = offset + chunk.len(); + if let Ok(server_offset_str) = server_offset.to_str() { + if let Ok(server_offset_val) = server_offset_str.parse::() { + if server_offset_val != expected_offset { + log::error!( + "Upload offset mismatch on chunk {}/{}: server has {} bytes but expected {}. \ + This may indicate that chunks are being routed to different server instances. \ + Please contact support.", + index + 1, total_chunks, server_offset_val, expected_offset + ); + true } else { false } @@ -347,6 +343,8 @@ pub fn upload_scan( } else { false } + } else { + false } } Err(_) => true, diff --git a/src/scanners/blast.rs b/src/scanners/blast.rs index 9ea207d..25f33ca 100644 --- a/src/scanners/blast.rs +++ b/src/scanners/blast.rs @@ -20,6 +20,7 @@ pub fn run( out_format: Option, out_file: Option, target: Option, + exclude: Option, project_name: Option, ) { // Validate that only_uncommitted and target are not used together @@ -86,8 +87,12 @@ pub fn run( target.as_deref() }; + if target_str.is_none() && exclude.is_some() { + println!("Excluding files matching: {}", exclude.as_deref().unwrap()); + } + if let Some(target_value) = target_str { - match targets::resolve_targets(target_value) { + match targets::resolve_targets_with_exclude(target_value, exclude.as_deref()) { Ok(result) => { if result.files.is_empty() { *stop_signal.lock().unwrap() = true; @@ -159,7 +164,7 @@ pub fn run( } } - match utils::generic::create_zip_from_target(target_str, &zip_path, None) { + match utils::generic::create_zip_from_target(target_str, &zip_path, None, exclude.as_deref()) { Ok(added_files) => { if added_files.is_empty() { *stop_signal.lock().unwrap() = true; diff --git a/src/targets.rs b/src/targets.rs index 96efe65..09dc283 100644 --- a/src/targets.rs +++ b/src/targets.rs @@ -18,7 +18,10 @@ pub struct TargetSegmentResult { pub error: Option, } -pub fn resolve_targets(target_value: &str) -> Result { +pub fn resolve_targets_with_exclude( + target_value: &str, + exclude: Option<&str>, +) -> Result { let segments: Vec = target_value .split(',') .map(|s| s.trim().to_string()) @@ -40,6 +43,8 @@ pub fn resolve_targets(target_value: &str) -> Result Result { + if is_excluded_by_glob(&normalized, &repo_root, &exclude_glob_set) { + continue; + } if seen_files.insert(normalized.clone()) { all_files.push(normalized); } @@ -101,6 +109,63 @@ pub fn resolve_targets(target_value: &str) -> Result) -> Result, String> { + let exclude_str = match exclude { + Some(s) if !s.trim().is_empty() => s, + _ => return Ok(None), + }; + + let patterns: Vec<&str> = exclude_str + .split(',') + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .collect(); + if patterns.is_empty() { + return Ok(None); + } + + let mut builder = GlobSetBuilder::new(); + for pattern in &patterns { + let glob = Glob::new(pattern) + .map_err(|e| format!("Invalid exclude glob pattern '{}': {}", pattern, e))?; + builder.add(glob); + } + let glob_set = builder + .build() + .map_err(|e| format!("Failed to build exclude glob set: {}", e))?; + Ok(Some(glob_set)) +} + +fn is_excluded_by_glob( + file: &Path, + repo_root: &Path, + exclude_glob_set: &Option, +) -> bool { + let glob_set = match exclude_glob_set { + Some(gs) => gs, + None => return false, + }; + + if let Ok(relative) = file.strip_prefix(repo_root) { + return glob_set.is_match(relative); + } + glob_set.is_match(file) +} + +pub fn build_user_exclude_glob_set( + exclude: Option<&str>, +) -> Result, String> { + build_exclude_glob_set(exclude) +} + +pub fn is_file_excluded( + file: &Path, + base_dir: &Path, + exclude_glob_set: &Option, +) -> bool { + is_excluded_by_glob(file, base_dir, exclude_glob_set) +} + fn resolve_segment(segment: &str, repo_root: &Path) -> Result, String> { if segment == "-" { return read_stdin_files(false); @@ -480,3 +545,137 @@ fn find_repo_root() -> Result { fn is_git_repo(dir: &Path) -> bool { Repository::discover(dir).is_ok() } + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn setup_test_dir() -> tempfile::TempDir { + let dir = tempfile::tempdir().unwrap(); + let base = dir.path(); + + Repository::init(base).unwrap(); + + fs::create_dir_all(base.join("src")).unwrap(); + fs::create_dir_all(base.join("tests")).unwrap(); + fs::create_dir_all(base.join("docs")).unwrap(); + + fs::write(base.join("src/main.rs"), "fn main() {}").unwrap(); + fs::write(base.join("src/lib.rs"), "pub fn hello() {}").unwrap(); + fs::write(base.join("tests/test_main.rs"), "// test").unwrap(); + fs::write(base.join("docs/readme.md"), "# readme").unwrap(); + fs::write(base.join("config.toml"), "[config]").unwrap(); + + dir + } + + #[test] + fn build_exclude_glob_set_returns_none_for_none() { + let result = build_exclude_glob_set(None).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn build_exclude_glob_set_returns_none_for_empty() { + let result = build_exclude_glob_set(Some("")).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn build_exclude_glob_set_returns_some_for_valid_pattern() { + let result = build_exclude_glob_set(Some("tests/**")).unwrap(); + assert!(result.is_some()); + } + + #[test] + fn build_exclude_glob_set_handles_comma_separated() { + let result = build_exclude_glob_set(Some("tests/**,docs/**")).unwrap(); + assert!(result.is_some()); + let gs = result.unwrap(); + assert!(gs.is_match("tests/foo.rs")); + assert!(gs.is_match("docs/readme.md")); + assert!(!gs.is_match("src/main.rs")); + } + + #[test] + fn build_exclude_glob_set_returns_error_for_invalid() { + let result = build_exclude_glob_set(Some("[invalid")); + assert!(result.is_err()); + } + + #[test] + fn is_excluded_by_glob_matches_relative_path() { + let gs = build_exclude_glob_set(Some("tests/**")).unwrap(); + let repo_root = Path::new("/repo"); + let file = Path::new("/repo/tests/test_main.rs"); + assert!(is_excluded_by_glob(file, repo_root, &gs)); + } + + #[test] + fn is_excluded_by_glob_does_not_match_non_excluded() { + let gs = build_exclude_glob_set(Some("tests/**")).unwrap(); + let repo_root = Path::new("/repo"); + let file = Path::new("/repo/src/main.rs"); + assert!(!is_excluded_by_glob(file, repo_root, &gs)); + } + + #[test] + fn is_excluded_by_glob_returns_false_for_none() { + let gs: Option = None; + let file = Path::new("/repo/tests/test_main.rs"); + assert!(!is_excluded_by_glob(file, Path::new("/repo"), &gs)); + } + + #[test] + fn is_excluded_by_glob_wildcard_extension() { + let gs = build_exclude_glob_set(Some("**/*.md")).unwrap(); + let repo_root = Path::new("/repo"); + assert!(is_excluded_by_glob( + Path::new("/repo/docs/readme.md"), + repo_root, + &gs + )); + assert!(!is_excluded_by_glob( + Path::new("/repo/src/main.rs"), + repo_root, + &gs + )); + } + + #[test] + fn is_excluded_filters_directory_files_correctly() { + let dir = setup_test_dir(); + let base = dir.path(); + let gs = build_exclude_glob_set(Some("tests/**,**/*.md")).unwrap(); + + assert!(!is_excluded_by_glob(&base.join("src/main.rs"), base, &gs)); + assert!(!is_excluded_by_glob(&base.join("src/lib.rs"), base, &gs)); + assert!(!is_excluded_by_glob(&base.join("config.toml"), base, &gs)); + assert!(is_excluded_by_glob( + &base.join("tests/test_main.rs"), + base, + &gs + )); + assert!(is_excluded_by_glob(&base.join("docs/readme.md"), base, &gs)); + } + + #[test] + fn is_excluded_with_none_includes_all() { + let dir = setup_test_dir(); + let base = dir.path(); + let gs: Option = None; + + assert!(!is_excluded_by_glob(&base.join("src/main.rs"), base, &gs)); + assert!(!is_excluded_by_glob( + &base.join("tests/test_main.rs"), + base, + &gs + )); + assert!(!is_excluded_by_glob( + &base.join("docs/readme.md"), + base, + &gs + )); + } +} diff --git a/src/utils/generic.rs b/src/utils/generic.rs index 29f2e0b..f07d013 100644 --- a/src/utils/generic.rs +++ b/src/utils/generic.rs @@ -36,10 +36,12 @@ const DEFAULT_EXCLUDE_GLOBS: &[&str] = &[ /// - If `target` is `None`, performs a full repository scan (equivalent to scanning all files). /// - If `target` is `Some(target_str)`, resolves the target using the targets module and creates zip from those files. /// The target string can be a comma-separated list of files, directories, globs, or git selectors. +/// - `user_exclude` is an optional comma-separated list of glob patterns from `--exclude`. pub fn create_zip_from_target>( target: Option<&str>, output_zip: P, exclude_globs: Option<&[&str]>, + user_exclude: Option<&str>, ) -> Result, Box> { let exclude_globs = exclude_globs.unwrap_or(DEFAULT_EXCLUDE_GLOBS); @@ -49,9 +51,12 @@ pub fn create_zip_from_target>( } let glob_set = glob_builder.build()?; + let user_exclude_glob_set = crate::targets::build_user_exclude_glob_set(user_exclude) + .map_err(|e| format!("Failed to build exclude patterns: {}", e))?; + let files_to_zip: Vec<(PathBuf, PathBuf)> = if let Some(target_str) = target { let current_dir = env::current_dir()?; - let result = crate::targets::resolve_targets(target_str) + let result = crate::targets::resolve_targets_with_exclude(target_str, user_exclude) .map_err(|e| format!("Failed to resolve targets: {}", e))?; result @@ -78,6 +83,15 @@ pub fn create_zip_from_target>( if path.is_file() || path.is_dir() { let relative_path = path.strip_prefix(directory)?; + if path.is_file() + && crate::targets::is_file_excluded( + relative_path, + Path::new(""), + &user_exclude_glob_set, + ) + { + continue; + } files.push((path.to_path_buf(), relative_path.to_path_buf())); } } @@ -342,7 +356,7 @@ mod tests { // which would exclude *everything*. The filter + warn path under test // is identical either way. let excludes: &[&str] = &["**/node_modules/**"]; - let added = create_zip_from_target(Some(&target), &output_zip, Some(excludes)) + let added = create_zip_from_target(Some(&target), &output_zip, Some(excludes), None) .expect("zip creation should succeed"); assert!(