Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ GuardDog's behavior can be customized using environment variables:
| `GUARDDOG_VERIFY_EXHAUSTIVE_DEPENDENCIES` | Analyze all possible versions of dependencies (`true`/`false`) | `false` |
| `GUARDDOG_TOP_PACKAGES_CACHE_LOCATION` | Location of the top packages cache directory | `guarddog/analyzer/metadata/resources` |
| `GUARDDOG_YARA_EXT_EXCLUDE` | Comma-separated list of file extensions to exclude from YARA scanning | `ini,md,rst,txt,lock,json,yaml,yml,toml,xml,html,csv,sql,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,changelog,readme,makefile,dockerfile,pkg-info,d.ts` |
| `GUARDDOG_YARA_PATH_EXCLUDE_REGEX` | Regex pattern to exclude files from YARA scanning (matched against relative file path) | _empty_ (disabled) |

#### Archive Extraction Security Limits

Expand Down
23 changes: 22 additions & 1 deletion guarddog/analyzer/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
import re
import yara # type: ignore

from collections import defaultdict
Expand All @@ -17,7 +18,7 @@
validate_identifies,
validate_mitre_tactics,
)
from guarddog.utils.config import YARA_EXT_EXCLUDE
from guarddog.utils.config import YARA_EXT_EXCLUDE, YARA_PATH_EXCLUDE_REGEX
from guarddog.ecosystems import ECOSYSTEM, LANGUAGE

SOURCECODE_RULES_PATH = os.path.join(os.path.dirname(__file__), "sourcecode")
Expand Down Expand Up @@ -378,6 +379,22 @@ def analyze_yara(self, path: str, rules: Optional[set] = None) -> dict:
log.debug("No yara rules to run")
return {"results": results, "errors": errors, "issues": issues}

regex_exclude = None
if YARA_PATH_EXCLUDE_REGEX:
try:
regex_exclude = re.compile(YARA_PATH_EXCLUDE_REGEX)
except re.error as e:
return {
"results": results,
"errors": {
"rules-all": (
"failed to run rule: invalid "
f"GUARDDOG_YARA_PATH_EXCLUDE_REGEX: {str(e)}"
)
},
"issues": issues,
}

import time

# Get rule metadata to access max_hits
Expand Down Expand Up @@ -425,6 +442,10 @@ def analyze_yara(self, path: str, rules: Optional[set] = None) -> dict:
scan_file_target_abspath, path
)

# Skip files matching global exclusion regex
if regex_exclude and regex_exclude.search(scan_file_target_relpath):
continue

# Check path_include patterns if specified (takes precedence)
if path_include:
patterns = [p.strip() for p in path_include.split(",")]
Expand Down
9 changes: 9 additions & 0 deletions guarddog/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@
"pptx,xls,xlsx,odt,changelog,readme,makefile,dockerfile,pkg-info,d.ts",
).split(",")

"""
This parameter specifies a regex pattern to exclude files from YARA scanning.
The regex is matched against the file path relative to the scan root.
- Default: empty (disabled)
"""
YARA_PATH_EXCLUDE_REGEX: str = os.environ.get(
"GUARDDOG_YARA_PATH_EXCLUDE_REGEX", ""
)

"""
This parameter specifies the maximum uncompressed size allowed for archive extraction
- Default: 2 GB in bytes
Expand Down
44 changes: 44 additions & 0 deletions tests/core/test_sourcecode_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from guarddog import ecosystems
from guarddog.analyzer.analyzer import Analyzer
import guarddog.analyzer.analyzer as analyzer_module
from guarddog.ecosystems import LANGUAGE

pypi_analyzer = Analyzer(ecosystem=ecosystems.ECOSYSTEM.PYPI)
Expand Down Expand Up @@ -69,6 +70,49 @@ def test_get_snippet_file_not_found():
assert snippet == ""


def test_analyze_yara_excludes_files_matching_regex():
analyzer = Analyzer(ecosystem=ecosystems.ECOSYSTEM.PYPI)
rule = next(iter(analyzer.yara_ruleset))

class FakeCompiledRules:
def __init__(self):
self.scanned_files = []

def match(self, file_path):
self.scanned_files.append(file_path)
return []

fake_compiled_rules = FakeCompiledRules()

with patch.object(analyzer_module, "YARA_EXT_EXCLUDE", []), patch.object(
analyzer_module, "YARA_PATH_EXCLUDE_REGEX", r"\.min\.js$"
), patch.object(
analyzer_module.yara, "compile", return_value=fake_compiled_rules
), patch.object(
analyzer_module.os,
"walk",
return_value=[("/tmp/pkg", [], ["keep.py", "bundle.min.js"])],
):
analyzer.analyze_yara("/tmp/pkg", {rule})

assert "/tmp/pkg/keep.py" in fake_compiled_rules.scanned_files
assert "/tmp/pkg/bundle.min.js" not in fake_compiled_rules.scanned_files


def test_analyze_yara_returns_error_for_invalid_exclude_regex():
analyzer = Analyzer(ecosystem=ecosystems.ECOSYSTEM.PYPI)
rule = next(iter(analyzer.yara_ruleset))

with patch.object(analyzer_module, "YARA_PATH_EXCLUDE_REGEX", "("), patch.object(
analyzer_module.yara, "compile"
) as yara_compile:
result = analyzer.analyze_yara("/tmp/pkg", {rule})

assert "rules-all" in result["errors"]
assert "GUARDDOG_YARA_PATH_EXCLUDE_REGEX" in result["errors"]["rules-all"]
yara_compile.assert_not_called()


# Comment filtering tests


Expand Down