From 4b63483fc424cc832d903ea1bba41d17be01e4d0 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Fri, 7 Jun 2024 14:11:27 -0500 Subject: [PATCH 01/21] Add ability to detect SPDXIDs by reserved keyname Signed-off-by: Matt Rutkowski --- identifier/identifier.go | 67 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/identifier/identifier.go b/identifier/identifier.go index b640db5..d32365a 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -3,9 +3,9 @@ package identifier import ( + "bufio" "fmt" "io/fs" - "io/ioutil" "os" "path/filepath" "regexp" @@ -113,7 +113,21 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li return IdentifierResults{}, nil } - b, err := ioutil.ReadFile(filePath) + licenseMatches, err := findSPDXIdentifierInFile(filePath, 10) + if err != nil { + return IdentifierResults{}, err + } + if len(licenseMatches) > 0 { + fmt.Printf("matches[0]: %v\n", licenseMatches[0]) + var results IdentifierResults + spdxId := licenseMatches[0].LicenseId + sliceMatches := []Match{licenseMatches[0].Match} + results.Matches = make(map[string][]Match) + results.Matches[spdxId] = sliceMatches + return results, nil + } + + b, err := os.ReadFile(filePath) if err != nil { return IdentifierResults{}, err } @@ -124,6 +138,55 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li return result, err } +const SPDX_ID_KEY = "SPDX-License-Identifier:" + +var LEN_SPDX_ID_KEY = len(SPDX_ID_KEY) +var SPDX_ID_KEY_BYTES = []byte(SPDX_ID_KEY) + +func findSPDXIdentifierInFile(filePath string, maxLines int) (licenseMatches []licenseMatch, err error) { + var file *os.File + // Note: parent function has already verified the file exists + // TODO: this function should perhaps accept a file handle and allow the parent to open and provide it + file, err = os.Open(filePath) + if err != nil { + Logger.Errorf("cannot open file: %s", filePath) + return + } + defer file.Close() + + fileReader := bufio.NewReader(file) + fileScanner := bufio.NewScanner(fileReader) + + fileScanner.Split(bufio.ScanLines) + var foundLine string + for i := 0; i < maxLines; i++ { + fileScanner.Scan() + if strings.Contains(fileScanner.Text(), SPDX_ID_KEY) { + foundLine = fileScanner.Text() + fmt.Println("SPDX Found: " + foundLine) + break + } + } + if foundLine != "" { + idx := strings.Index(foundLine, SPDX_ID_KEY) + fmt.Printf("idx: %v\n", idx) + spdxIdPlus := foundLine[idx:] + fmt.Printf("idx: %s\n", spdxIdPlus) + var match licenseMatch + match.LicenseId = spdxIdPlus + licenseMatches = append(licenseMatches, match) + // type licenseMatch struct { + // LicenseId string + // Match Match + // } + // type Match struct { + // Begins int + // Ends int + // } + } + return +} + func IdentifyLicensesInDirectory(dirPath string, options Options, licenseLibrary *licenses.LicenseLibrary) (ret []IdentifierResults, err error) { var lfs []string From f3ffd317019bb2593c5ae19aec7537ddc036379c Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Fri, 7 Jun 2024 17:00:10 -0500 Subject: [PATCH 02/21] Support match pattern in command options Signed-off-by: Matt Rutkowski --- cmd/root.go | 8 +++++--- identifier/identifier.go | 38 ++++++++++++++++++++++++++------------ normalizer/normalizer.go | 4 ++-- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 85acc29..43bf50b 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -5,15 +5,16 @@ package cmd import ( "errors" "fmt" + "os" + "sort" + "time" + "github.com/CycloneDX/license-scanner/configurer" "github.com/CycloneDX/license-scanner/debugger" "github.com/CycloneDX/license-scanner/identifier" "github.com/CycloneDX/license-scanner/importer" "github.com/CycloneDX/license-scanner/licenses" "github.com/CycloneDX/sbom-utility/log" - "os" - "sort" - "time" "github.com/spf13/cobra" "github.com/spf13/cobra/doc" @@ -173,6 +174,7 @@ func findLicensesInDirectory(cfg *viper.Viper) error { options := identifier.Options{ ForceResult: true, + Patterns: identifier.SUPPORTED_MATCH_PATTERNS, Enhancements: identifier.Enhancements{ AddNotes: "", AddTextBlocks: true, diff --git a/identifier/identifier.go b/identifier/identifier.go index d32365a..a3abf7d 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -26,9 +26,20 @@ var ( nonAlphaRE = regexp.MustCompile(`^[^A-Za-z0-9]*$`) ) +const ( + MATCH_PATTERN_SPDX_ID = "spdx-id" + MATCH_PATTERN_ALIAS = "alias" + MATCH_PATTERN_URL = "url" + MATCH_PATTERN_PRIMARY = "primary" + MATCH_PATTERN_ASSOCIATED = "associated" +) + +var SUPPORTED_MATCH_PATTERNS = []string{MATCH_PATTERN_SPDX_ID, MATCH_PATTERN_ALIAS, MATCH_PATTERN_URL, MATCH_PATTERN_PRIMARY, MATCH_PATTERN_ASSOCIATED} + type Options struct { ForceResult bool OmitBlocks bool + Patterns []string Enhancements Enhancements } @@ -113,18 +124,21 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li return IdentifierResults{}, nil } - licenseMatches, err := findSPDXIdentifierInFile(filePath, 10) - if err != nil { - return IdentifierResults{}, err - } - if len(licenseMatches) > 0 { - fmt.Printf("matches[0]: %v\n", licenseMatches[0]) - var results IdentifierResults - spdxId := licenseMatches[0].LicenseId - sliceMatches := []Match{licenseMatches[0].Match} - results.Matches = make(map[string][]Match) - results.Matches[spdxId] = sliceMatches - return results, nil + if slices.Contains(options.Patterns, MATCH_PATTERN_SPDX_ID) { + Logger.Infof("Pattern: %s\n", MATCH_PATTERN_SPDX_ID) + licenseMatches, err := findSPDXIdentifierInFile(filePath, 10) + if err != nil { + return IdentifierResults{}, err + } + if len(licenseMatches) > 0 { + fmt.Printf("matches[0]: %v\n", licenseMatches[0]) + var results IdentifierResults + spdxId := licenseMatches[0].LicenseId + sliceMatches := []Match{licenseMatches[0].Match} + results.Matches = make(map[string][]Match) + results.Matches[spdxId] = sliceMatches + return results, nil + } } b, err := os.ReadFile(filePath) diff --git a/normalizer/normalizer.go b/normalizer/normalizer.go index 38839eb..1d769d2 100644 --- a/normalizer/normalizer.go +++ b/normalizer/normalizer.go @@ -145,8 +145,8 @@ func NewNormalizationData(originalText string, isTemplate bool) *NormalizationDa func (n *NormalizationData) NormalizeText() error { // verify that the original text is a string with a length of at least one. if len(n.OriginalText) < 1 { - Logger.Error("Invalid text") - return fmt.Errorf("failed to normalize data: invalid input text with length %d", len(n.OriginalText)) + err := Logger.Errorf("failed to normalize data: invalid input text with length %d", len(n.OriginalText)) + return err } // Check if the text contains control characters indicative of binary or non-text files. From 639ae2f55d24210f512fbf07c75be7732085a148 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Fri, 7 Jun 2024 17:16:17 -0500 Subject: [PATCH 03/21] Add pattern flag support to root command Signed-off-by: Matt Rutkowski --- configurer/configurer.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configurer/configurer.go b/configurer/configurer.go index f118ad8..deb8ac1 100644 --- a/configurer/configurer.go +++ b/configurer/configurer.go @@ -35,6 +35,7 @@ const ( SpdxPathFlag = "spdxPath" CustomFlag = "custom" CustomPathFlag = "customPath" + PatternsFlag = "patterns" ) var ( @@ -147,4 +148,5 @@ func AddDefaultFlags(flagSet *pflag.FlagSet) { flagSet.String(SpdxPathFlag, "", "Path to external SPDX templates to use") flagSet.String(CustomFlag, DefaultResource, "Custom templates to use") flagSet.String(CustomPathFlag, "", "Path to external custom templates to use") + flagSet.StringP(PatternsFlag, "", "", "List of pattern matching functions to execute") } From 4d4d9de4b546b506acbb3d8a3a5eb16bfa0534ec Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Mon, 10 Jun 2024 13:03:46 -0500 Subject: [PATCH 04/21] Add help for the --patterns flag Signed-off-by: Matt Rutkowski --- cmd/root.go | 2 +- configurer/configurer.go | 17 +++++++++++++++-- identifier/identifier.go | 21 +++++++++++---------- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 43bf50b..8f5cc24 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -174,7 +174,7 @@ func findLicensesInDirectory(cfg *viper.Viper) error { options := identifier.Options{ ForceResult: true, - Patterns: identifier.SUPPORTED_MATCH_PATTERNS, + Patterns: configurer.SUPPORTED_MATCH_PATTERNS, Enhancements: identifier.Enhancements{ AddNotes: "", AddTextBlocks: true, diff --git a/configurer/configurer.go b/configurer/configurer.go index deb8ac1..288d9e9 100644 --- a/configurer/configurer.go +++ b/configurer/configurer.go @@ -8,9 +8,9 @@ import ( "path" "path/filepath" "runtime" + "strings" "github.com/spf13/pflag" - "github.com/spf13/viper" ) @@ -38,6 +38,16 @@ const ( PatternsFlag = "patterns" ) +const ( + MATCH_PATTERN_SPDX_ID = "spdx-id" + MATCH_PATTERN_ALIAS = "alias" + MATCH_PATTERN_URL = "url" + MATCH_PATTERN_PRIMARY = "primary" + MATCH_PATTERN_ASSOCIATED = "associated" +) + +var SUPPORTED_MATCH_PATTERNS = []string{MATCH_PATTERN_SPDX_ID, MATCH_PATTERN_ALIAS, MATCH_PATTERN_URL, MATCH_PATTERN_PRIMARY, MATCH_PATTERN_ASSOCIATED} + var ( _, thisFile, _, _ = runtime.Caller(0) // Dirs/files are relative to this file thisDir = filepath.Dir(thisFile) @@ -148,5 +158,8 @@ func AddDefaultFlags(flagSet *pflag.FlagSet) { flagSet.String(SpdxPathFlag, "", "Path to external SPDX templates to use") flagSet.String(CustomFlag, DefaultResource, "Custom templates to use") flagSet.String(CustomPathFlag, "", "Path to external custom templates to use") - flagSet.StringP(PatternsFlag, "", "", "List of pattern matching functions to execute") + + help_msg_pattern := fmt.Sprintf("Comma-separated list of license pattern-matching functions to execute. One or more of: [%v]; defaults to all patterns.", + strings.Join(SUPPORTED_MATCH_PATTERNS, ", ")) + flagSet.StringP(PatternsFlag, "", "", help_msg_pattern) } diff --git a/identifier/identifier.go b/identifier/identifier.go index a3abf7d..4b5fe0a 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -17,6 +17,7 @@ import ( "golang.org/x/exp/slices" "golang.org/x/sync/errgroup" + "github.com/CycloneDX/license-scanner/configurer" "github.com/CycloneDX/license-scanner/licenses" "github.com/CycloneDX/license-scanner/normalizer" ) @@ -26,15 +27,15 @@ var ( nonAlphaRE = regexp.MustCompile(`^[^A-Za-z0-9]*$`) ) -const ( - MATCH_PATTERN_SPDX_ID = "spdx-id" - MATCH_PATTERN_ALIAS = "alias" - MATCH_PATTERN_URL = "url" - MATCH_PATTERN_PRIMARY = "primary" - MATCH_PATTERN_ASSOCIATED = "associated" -) +// const ( +// MATCH_PATTERN_SPDX_ID = "spdx-id" +// MATCH_PATTERN_ALIAS = "alias" +// MATCH_PATTERN_URL = "url" +// MATCH_PATTERN_PRIMARY = "primary" +// MATCH_PATTERN_ASSOCIATED = "associated" +// ) -var SUPPORTED_MATCH_PATTERNS = []string{MATCH_PATTERN_SPDX_ID, MATCH_PATTERN_ALIAS, MATCH_PATTERN_URL, MATCH_PATTERN_PRIMARY, MATCH_PATTERN_ASSOCIATED} +// var SUPPORTED_MATCH_PATTERNS = []string{MATCH_PATTERN_SPDX_ID, MATCH_PATTERN_ALIAS, MATCH_PATTERN_URL, MATCH_PATTERN_PRIMARY, MATCH_PATTERN_ASSOCIATED} type Options struct { ForceResult bool @@ -124,8 +125,8 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li return IdentifierResults{}, nil } - if slices.Contains(options.Patterns, MATCH_PATTERN_SPDX_ID) { - Logger.Infof("Pattern: %s\n", MATCH_PATTERN_SPDX_ID) + if slices.Contains(options.Patterns, configurer.MATCH_PATTERN_SPDX_ID) { + Logger.Infof("Pattern: %s\n", configurer.MATCH_PATTERN_SPDX_ID) licenseMatches, err := findSPDXIdentifierInFile(filePath, 10) if err != nil { return IdentifierResults{}, err From 7d720e6db2a6375158fe4fd9f725b9d978e1c232 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Tue, 11 Jun 2024 17:57:47 -0500 Subject: [PATCH 05/21] Allow SPDX Identifier matches to be additive to all subsequent find functions Signed-off-by: Matt Rutkowski --- api/scanner/scan.go | 7 +- cmd/root.go | 92 ++++++++++++++++++------ identifier/identifier.go | 150 +++++++++++++++++++++------------------ 3 files changed, 154 insertions(+), 95 deletions(-) diff --git a/api/scanner/scan.go b/api/scanner/scan.go index cfdc95b..ffe63b2 100644 --- a/api/scanner/scan.go +++ b/api/scanner/scan.go @@ -145,14 +145,15 @@ func (s *ScanSpec) ScanLicenseText(licenseLibrary *licenses.LicenseLibrary, resu // find the licenses in the normalized text and return a list of SPDX IDs // in case of an error, return as much as we have along with an error - results, err := identifier.Identify(identifier.Options{}, licenseLibrary, normalizedData) + identifierResults := identifier.IdentifierResults{} + err := identifier.Identify(&identifierResults, identifier.Options{}, licenseLibrary, normalizedData) if err != nil { r.Error = err return r } // if the results are empty, add unknown as the SPDX ID - if len(results.Matches) == 0 { + if len(identifierResults.Matches) == 0 { // Add NOASSERTION to the LicenseChoice of the SPDX Name for this scan r.CycloneDXLicenses = append(r.CycloneDXLicenses, cyclonedx.LicenseChoice{ License: &cyclonedx.License{ @@ -161,7 +162,7 @@ func (s *ScanSpec) ScanLicenseText(licenseLibrary *licenses.LicenseLibrary, resu }) } else { // iterate over the list of matches and maintain the unique list of SPDX IDs in the result - for id := range results.Matches { + for id := range identifierResults.Matches { // Add an SPDX ID from the match // update the LicenseChoice to include each new match diff --git a/cmd/root.go b/cmd/root.go index 15e684a..ae33711 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -7,6 +7,7 @@ import ( "fmt" "os" "sort" + "strings" "time" "github.com/CycloneDX/license-scanner/configurer" @@ -161,6 +162,32 @@ func listLicenses(cfg *viper.Viper) error { return nil } +func getCommandLineOptions(cfg *viper.Viper) (options identifier.Options) { + options = identifier.Options{ + ForceResult: true, + // Default to all pattern matching functions + Patterns: configurer.SUPPORTED_MATCH_PATTERNS, + Enhancements: identifier.Enhancements{ + AddNotes: "", + AddTextBlocks: true, + FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag), + FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag), + FlagKeywords: cfg.GetBool(configurer.KeywordsFlag), + }, + } + + // Parse out patterns into easy-to-test map + tmpPatterns := cfg.GetString(configurer.PatternsFlag) + if tmpPatterns != "" { + options.Patterns = strings.Split(cfg.GetString(configurer.PatternsFlag), ",") + } + options.PatternMap = make(map[string]bool) + for _, pattern := range options.Patterns { + options.PatternMap[pattern] = true + } + return +} + func findLicensesInDirectory(cfg *viper.Viper) error { d := cfg.GetString(configurer.DirFlag) @@ -172,17 +199,31 @@ func findLicensesInDirectory(cfg *viper.Viper) error { return err } - options := identifier.Options{ - ForceResult: true, - Patterns: configurer.SUPPORTED_MATCH_PATTERNS, - Enhancements: identifier.Enhancements{ - AddNotes: "", - AddTextBlocks: true, - FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag), - FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag), - FlagKeywords: cfg.GetBool(configurer.KeywordsFlag), - }, - } + // options := identifier.Options{ + // ForceResult: true, + // // Default to all pattern matching functions + // Patterns: configurer.SUPPORTED_MATCH_PATTERNS, + // Enhancements: identifier.Enhancements{ + // AddNotes: "", + // AddTextBlocks: true, + // FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag), + // FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag), + // FlagKeywords: cfg.GetBool(configurer.KeywordsFlag), + // }, + // } + + // // Parse out patterns into easy-to-test map + // tmpPatterns := cfg.GetString(configurer.PatternsFlag) + // if tmpPatterns == "" { + // options.Patterns = configurer.SUPPORTED_MATCH_PATTERNS + // } else { + // options.Patterns = strings.Split(cfg.GetString(configurer.PatternsFlag), ",") + // } + // options.PatternMap = make(map[string]bool) + // for _, pattern := range options.Patterns { + // options.PatternMap[pattern] = true + // } + options := getCommandLineOptions(cfg) results, err := identifier.IdentifyLicensesInDirectory(d, options, licenseLibrary) if err != nil { @@ -241,16 +282,25 @@ func findLicensesInFile(cfg *viper.Viper, f string) error { return err } - options := identifier.Options{ - ForceResult: true, - Enhancements: identifier.Enhancements{ - AddNotes: "", - AddTextBlocks: true, - FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag), - FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag), - FlagKeywords: cfg.GetBool(configurer.KeywordsFlag), - }, - } + // options := identifier.Options{ + // ForceResult: true, + // // Default to all pattern matching functions + // Patterns: configurer.SUPPORTED_MATCH_PATTERNS, + // Enhancements: identifier.Enhancements{ + // AddNotes: "", + // AddTextBlocks: true, + // FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag), + // FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag), + // FlagKeywords: cfg.GetBool(configurer.KeywordsFlag), + // }, + // } + + // // Parse out patterns into easy-to-test map + // patterns := strings.Split(cfg.GetString(configurer.PatternsFlag), ",") + // for _, pattern := range patterns { + // options.PatternMap[pattern] = true + // } + options := getCommandLineOptions(cfg) results, err := identifier.IdentifyLicensesInFile(f, options, licenseLibrary) if err != nil { diff --git a/identifier/identifier.go b/identifier/identifier.go index 4b5fe0a..698d83d 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -27,20 +27,11 @@ var ( nonAlphaRE = regexp.MustCompile(`^[^A-Za-z0-9]*$`) ) -// const ( -// MATCH_PATTERN_SPDX_ID = "spdx-id" -// MATCH_PATTERN_ALIAS = "alias" -// MATCH_PATTERN_URL = "url" -// MATCH_PATTERN_PRIMARY = "primary" -// MATCH_PATTERN_ASSOCIATED = "associated" -// ) - -// var SUPPORTED_MATCH_PATTERNS = []string{MATCH_PATTERN_SPDX_ID, MATCH_PATTERN_ALIAS, MATCH_PATTERN_URL, MATCH_PATTERN_PRIMARY, MATCH_PATTERN_ASSOCIATED} - type Options struct { ForceResult bool OmitBlocks bool Patterns []string + PatternMap map[string]bool Enhancements Enhancements } @@ -78,79 +69,97 @@ type Block struct { Matches []string } -func Identify(options Options, licenseLibrary *licenses.LicenseLibrary, normalizedData normalizer.NormalizationData) (IdentifierResults, error) { +func Identify(identifierResults *IdentifierResults, options Options, licenseLibrary *licenses.LicenseLibrary, normalizedData normalizer.NormalizationData) (err error) { // find the licenses in the normalized text and return a list of SPDX IDs // in case of an error, return as much as we have along with an error - licenseResults, err := findAllLicensesInNormalizedData(licenseLibrary, normalizedData) - if err != nil { - return IdentifierResults{}, err + if err = findAllLicensesInNormalizedData(identifierResults, licenseLibrary, normalizedData); err != nil { + return } - if err := FromOptions(&licenseResults, options.Enhancements, licenseLibrary); err != nil { - return IdentifierResults{}, err + if err = FromOptions(identifierResults, options.Enhancements, licenseLibrary); err != nil { + return } - if err := applyMutatorLicenses(licenseLibrary.LicenseMap, &licenseResults); err != nil { - return IdentifierResults{}, err + if err = applyMutatorLicenses(licenseLibrary.LicenseMap, identifierResults); err != nil { + return } + // TODO: document why we are initializing here if options.OmitBlocks { - licenseResults.Blocks = []Block{} + identifierResults.Blocks = []Block{} } - return licenseResults, err + return } -func IdentifyLicensesInString(input string, options Options, licenseLibrary *licenses.LicenseLibrary) (IdentifierResults, error) { +func IdentifyLicensesInString(identifierResults *IdentifierResults, input string, options Options, licenseLibrary *licenses.LicenseLibrary) (err error) { // instantiate normalizedData with the input license text normalizedData := normalizer.NormalizationData{ OriginalText: input, } // normalize the input license text - if err := normalizedData.NormalizeText(); err != nil { - return IdentifierResults{}, err + if err = normalizedData.NormalizeText(); err != nil { + return } - return Identify(options, licenseLibrary, normalizedData) + return Identify(identifierResults, options, licenseLibrary, normalizedData) } -func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *licenses.LicenseLibrary) (IdentifierResults, error) { - fi, err := os.Stat(filePath) - if err != nil { - return IdentifierResults{}, err +func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *licenses.LicenseLibrary) (identifierResults IdentifierResults, err error) { + // Carry filepath used for matches in result set + identifierResults = IdentifierResults{} + identifierResults.File = filePath + + // Verify filepath exists + fi, errStat := os.Stat(filePath) + if errStat != nil { + return identifierResults, errStat } + // Only scan files of reasonable sizes + // TODO: make the max. size configurable if fi.Size() > 1000000 { - Logger.Errorf("file too large (%v > 1000000)", fi.Size()) // log error, but return nil - return IdentifierResults{}, nil + err = Logger.Errorf("file too large (%v > 1000000)", fi.Size()) // log error, but return nil } - if slices.Contains(options.Patterns, configurer.MATCH_PATTERN_SPDX_ID) { - Logger.Infof("Pattern: %s\n", configurer.MATCH_PATTERN_SPDX_ID) - licenseMatches, err := findSPDXIdentifierInFile(filePath, 10) - if err != nil { - return IdentifierResults{}, err + // Pattern match: "spdx-id" + if options.PatternMap[configurer.MATCH_PATTERN_SPDX_ID] { + Logger.Infof("Matching pattern: `%s`\n", configurer.MATCH_PATTERN_SPDX_ID) + // Scan for match in first 10 lines of file + // TODO: make first X lines configurable + licenseMatches, errSpdx := findSPDXIdentifierInFile(filePath, 10) + if errSpdx != nil { + err = errSpdx + return } if len(licenseMatches) > 0 { - fmt.Printf("matches[0]: %v\n", licenseMatches[0]) - var results IdentifierResults + //fmt.Printf("matches[0]: %v\n", licenseMatches[0]) spdxId := licenseMatches[0].LicenseId sliceMatches := []Match{licenseMatches[0].Match} - results.Matches = make(map[string][]Match) - results.Matches[spdxId] = sliceMatches - return results, nil + identifierResults.Matches = make(map[string][]Match) + identifierResults.Matches[spdxId] = sliceMatches + //return } } - b, err := os.ReadFile(filePath) - if err != nil { - return IdentifierResults{}, err + // Pattern match: includes "alias", "url", "primary", "associated" + // We will need to read the entire file into memory + if options.PatternMap[configurer.MATCH_PATTERN_PRIMARY] || + options.PatternMap[configurer.MATCH_PATTERN_URL] || + options.PatternMap[configurer.MATCH_PATTERN_ALIAS] || + options.PatternMap[configurer.MATCH_PATTERN_ASSOCIATED] { + var bytes []byte + bytes, err = os.ReadFile(filePath) + if err != nil { + return IdentifierResults{}, err + } + input := string(bytes) + err = IdentifyLicensesInString(&identifierResults, input, options, licenseLibrary) + // TODO: do NOT overwrite existing results + identifierResults.File = filePath } - input := string(b) - result, err := IdentifyLicensesInString(input, options, licenseLibrary) - result.File = filePath - return result, err + return } const SPDX_ID_KEY = "SPDX-License-Identifier:" @@ -190,14 +199,6 @@ func findSPDXIdentifierInFile(filePath string, maxLines int) (licenseMatches []l var match licenseMatch match.LicenseId = spdxIdPlus licenseMatches = append(licenseMatches, match) - // type licenseMatch struct { - // LicenseId string - // Match Match - // } - // type Match struct { - // Begins int - // Ends int - // } } return } @@ -262,23 +263,30 @@ func IdentifyLicensesInDirectory(dirPath string, options Options, licenseLibrary return ret, err } -func findAllLicensesInNormalizedData(licenseLibrary *licenses.LicenseLibrary, normalizedData normalizer.NormalizationData) (IdentifierResults, error) { +func findAllLicensesInNormalizedData(identifierResults *IdentifierResults, licenseLibrary *licenses.LicenseLibrary, normalizedData normalizer.NormalizationData) (err error) { // initialize the result with original license text, normalized license text, and hash (md5, sha256, and sha512) - ret := IdentifierResults{ - OriginalText: normalizedData.OriginalText, - NormalizedText: normalizedData.NormalizedText, - Hash: normalizedData.Hash, - } + // ret := IdentifierResults{ + // OriginalText: normalizedData.OriginalText, + // NormalizedText: normalizedData.NormalizedText, + // Hash: normalizedData.Hash, + // } + identifierResults.OriginalText = normalizedData.OriginalText + identifierResults.NormalizedText = normalizedData.NormalizedText + identifierResults.Hash = normalizedData.Hash // LicenseID-to-matches map to return - ret.Matches = make(map[string][]Match) + //ret.Matches = make(map[string][]Match) + if identifierResults.Matches == nil { + identifierResults.Matches = make(map[string][]Match) + } + // List with LicenseID and indexes for generating text blocks var licensesMatched []licenseMatch for id, lic := range licenseLibrary.LicenseMap { - matches, err := findLicenseInNormalizedData(lic, normalizedData, licenseLibrary) - if err != nil { - return ret, err + matches, errFind := findLicenseInNormalizedData(lic, normalizedData, licenseLibrary) + if errFind != nil { + return errFind } // Sort the matches slice by start and end index. @@ -295,18 +303,18 @@ func findAllLicensesInNormalizedData(licenseLibrary *licenses.LicenseLibrary, no continue // remove duplicates } licensesMatched = append(licensesMatched, licenseMatch{LicenseId: id, Match: matches[i]}) - ret.Matches[id] = append(ret.Matches[id], matches[i]) + identifierResults.Matches[id] = append(identifierResults.Matches[id], matches[i]) } } // Generate Blocks. - blocks, err := generateTextBlocks(normalizedData.OriginalText, licensesMatched) - if err != nil { - return ret, err + blocks, errGenerate := generateTextBlocks(normalizedData.OriginalText, licensesMatched) + if errGenerate != nil { + return errGenerate } - ret.Blocks = blocks + identifierResults.Blocks = blocks - return ret, nil + return } func findLicenseInNormalizedData(lic licenses.License, normalizedData normalizer.NormalizationData, ll *licenses.LicenseLibrary) (licenseMatches []Match, err error) { From f267ca15f339d56a45d39045de8ecd9b74104d78 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Tue, 11 Jun 2024 18:21:18 -0500 Subject: [PATCH 06/21] Update identifier tests to account for param. changes Signed-off-by: Matt Rutkowski --- identifier/identifier_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/identifier/identifier_test.go b/identifier/identifier_test.go index 6cdf004..7824129 100644 --- a/identifier/identifier_test.go +++ b/identifier/identifier_test.go @@ -511,7 +511,8 @@ func Test_identifyLicensesInString(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - got, err := IdentifyLicensesInString(tt.args.input, options, licenseLibrary) + identifierResults = IdentifierResults{} + err := IdentifyLicensesInString(&identifierResults, tt.args.input, options, licenseLibrary) if (err != nil) != tt.wantErr { t.Errorf("identifyLicensesInString() error = %v, wantErr %v", err, tt.wantErr) } else if d := cmp.Diff(tt.want.Matches, got.Matches, cmp.AllowUnexported(Match{})); d != "" { From ea03344f3edd4d35e923e1f96bf1a17a1f915f2e Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Tue, 11 Jun 2024 18:22:36 -0500 Subject: [PATCH 07/21] Allow SPDX Identifier matches to be additive to all subsequent find functions Signed-off-by: Matt Rutkowski --- identifier/identifier_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/identifier/identifier_test.go b/identifier/identifier_test.go index 7824129..21d65ef 100644 --- a/identifier/identifier_test.go +++ b/identifier/identifier_test.go @@ -605,7 +605,8 @@ func Test_identifyLicensesInStringPreChecks(t *testing.T) { if err := ll.AddAll(); err != nil { t.Fatalf("AddAll() error = %v", err) } - got, err := IdentifyLicensesInString(tt.input, options, ll) + identifierResults = IdentifierResults{} + err := IdentifyLicensesInString(&identifierResults, tt.input, options, ll) if err != nil { t.Errorf("identifyLicensesInString() error = %v", err) } else if d := cmp.Diff(tt.want.Matches, got.Matches, cmp.AllowUnexported(Match{})); d != "" { From 618061acbea0ea8fbb5d3bc640b815639b255e6c Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Tue, 11 Jun 2024 19:09:23 -0500 Subject: [PATCH 08/21] Allow SPDX Identifier matches to be additive to all subsequent find functions Signed-off-by: Matt Rutkowski --- identifier/identifier_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/identifier/identifier_test.go b/identifier/identifier_test.go index 21d65ef..d50dc5e 100644 --- a/identifier/identifier_test.go +++ b/identifier/identifier_test.go @@ -511,7 +511,7 @@ func Test_identifyLicensesInString(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - identifierResults = IdentifierResults{} + identifierResults := IdentifierResults{} err := IdentifyLicensesInString(&identifierResults, tt.args.input, options, licenseLibrary) if (err != nil) != tt.wantErr { t.Errorf("identifyLicensesInString() error = %v, wantErr %v", err, tt.wantErr) @@ -605,7 +605,7 @@ func Test_identifyLicensesInStringPreChecks(t *testing.T) { if err := ll.AddAll(); err != nil { t.Fatalf("AddAll() error = %v", err) } - identifierResults = IdentifierResults{} + identifierResults := IdentifierResults{} err := IdentifyLicensesInString(&identifierResults, tt.input, options, ll) if err != nil { t.Errorf("identifyLicensesInString() error = %v", err) From 1256b05c8a9449ba05f16b08ce722c59eb35835f Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Tue, 11 Jun 2024 19:14:24 -0500 Subject: [PATCH 09/21] Allow SPDX Identifier matches to be additive to all subsequent find functions Signed-off-by: Matt Rutkowski --- identifier/identifier_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/identifier/identifier_test.go b/identifier/identifier_test.go index d50dc5e..04db944 100644 --- a/identifier/identifier_test.go +++ b/identifier/identifier_test.go @@ -515,13 +515,13 @@ func Test_identifyLicensesInString(t *testing.T) { err := IdentifyLicensesInString(&identifierResults, tt.args.input, options, licenseLibrary) if (err != nil) != tt.wantErr { t.Errorf("identifyLicensesInString() error = %v, wantErr %v", err, tt.wantErr) - } else if d := cmp.Diff(tt.want.Matches, got.Matches, cmp.AllowUnexported(Match{})); d != "" { + } else if d := cmp.Diff(tt.want.Matches, identifierResults.Matches, cmp.AllowUnexported(Match{})); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) - } else if d := cmp.Diff(tt.want.CopyRightStatements, got.CopyRightStatements); d != "" { + } else if d := cmp.Diff(tt.want.CopyRightStatements, identifierResults.CopyRightStatements); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) - } else if d := cmp.Diff(tt.want.Blocks, got.Blocks); d != "" { + } else if d := cmp.Diff(tt.want.Blocks, identifierResults.Blocks); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) - } else if d := cmp.Diff(tt.want.Hash, got.Hash); d != "" { + } else if d := cmp.Diff(tt.want.Hash, identifierResults.Hash); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) } }) @@ -609,9 +609,9 @@ func Test_identifyLicensesInStringPreChecks(t *testing.T) { err := IdentifyLicensesInString(&identifierResults, tt.input, options, ll) if err != nil { t.Errorf("identifyLicensesInString() error = %v", err) - } else if d := cmp.Diff(tt.want.Matches, got.Matches, cmp.AllowUnexported(Match{})); d != "" { + } else if d := cmp.Diff(tt.want.Matches, identifierResults.Matches, cmp.AllowUnexported(Match{})); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) - } else if d := cmp.Diff(tt.want.Blocks, got.Blocks); d != "" { + } else if d := cmp.Diff(tt.want.Blocks, identifierResults.Blocks); d != "" { t.Errorf("Didn't get expected result: (-want, +got): %v", d) } }) From c65a4139ed47eb97060cb78b5e8fe7ede3a6b9d3 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Tue, 11 Jun 2024 19:27:01 -0500 Subject: [PATCH 10/21] reuse error variable Signed-off-by: Matt Rutkowski --- identifier/identifier_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/identifier/identifier_test.go b/identifier/identifier_test.go index 04db944..38f61fb 100644 --- a/identifier/identifier_test.go +++ b/identifier/identifier_test.go @@ -606,7 +606,7 @@ func Test_identifyLicensesInStringPreChecks(t *testing.T) { t.Fatalf("AddAll() error = %v", err) } identifierResults := IdentifierResults{} - err := IdentifyLicensesInString(&identifierResults, tt.input, options, ll) + err = IdentifyLicensesInString(&identifierResults, tt.input, options, ll) if err != nil { t.Errorf("identifyLicensesInString() error = %v", err) } else if d := cmp.Diff(tt.want.Matches, identifierResults.Matches, cmp.AllowUnexported(Match{})); d != "" { From 96e48ba3d22c3a315117227a3506a302a065fbcd Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Thu, 13 Jun 2024 12:38:17 -0500 Subject: [PATCH 11/21] Update dependencies and fix unit test Signed-off-by: Matt Rutkowski --- cmd/root.go | 2 +- go.mod | 16 ++++++------- go.sum | 36 +++++++++++++++--------------- identifier/identifier.go | 31 ++++++++++++++++++++----- identifier/identifier_spdx_test.go | 2 +- 5 files changed, 54 insertions(+), 33 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index ae33711..0e0e614 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -179,7 +179,7 @@ func getCommandLineOptions(cfg *viper.Viper) (options identifier.Options) { // Parse out patterns into easy-to-test map tmpPatterns := cfg.GetString(configurer.PatternsFlag) if tmpPatterns != "" { - options.Patterns = strings.Split(cfg.GetString(configurer.PatternsFlag), ",") + options.Patterns = strings.Split(tmpPatterns, ",") } options.PatternMap = make(map[string]bool) for _, pattern := range options.Patterns { diff --git a/go.mod b/go.mod index 26522fd..9a3df19 100644 --- a/go.mod +++ b/go.mod @@ -4,25 +4,25 @@ go 1.18 require ( github.com/CycloneDX/cyclonedx-go v0.7.1 - github.com/CycloneDX/sbom-utility v0.9.3 - github.com/google/go-cmp v0.5.8 - github.com/spf13/cobra v1.6.1 + github.com/CycloneDX/sbom-utility v0.16.0 + github.com/google/go-cmp v0.5.9 + github.com/spf13/cobra v1.7.0 github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.12.0 - golang.org/x/exp v0.0.0-20220428152302-39d4317da171 + golang.org/x/exp v0.0.0-20231006140011-7918f672742d golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f ) require ( github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect - github.com/fatih/color v1.14.1 // indirect + github.com/fatih/color v1.15.0 // indirect github.com/fsnotify/fsnotify v1.5.4 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/hokaccha/go-prettyjson v0.0.0-20211117102719-0474bc63780f // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/magiconair/properties v1.8.6 // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.17 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/pelletier/go-toml v1.9.5 // indirect github.com/pelletier/go-toml/v2 v2.0.1 // indirect @@ -31,8 +31,8 @@ require ( github.com/spf13/cast v1.5.0 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/subosito/gotenv v1.3.0 // indirect - golang.org/x/sys v0.4.0 // indirect - golang.org/x/text v0.3.8 // indirect + golang.org/x/sys v0.13.0 // indirect + golang.org/x/text v0.9.0 // indirect gopkg.in/ini.v1 v1.66.4 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 759c1ac..b36e3d9 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/CycloneDX/cyclonedx-go v0.7.1 h1:5w1SxjGm9MTMNTuRbEPyw21ObdbaagTWF/KfF0qHTRE= github.com/CycloneDX/cyclonedx-go v0.7.1/go.mod h1:N/nrdWQI2SIjaACyyDs/u7+ddCkyl/zkNs8xFsHF2Ps= -github.com/CycloneDX/sbom-utility v0.9.3 h1:kbseWT30dvnnyR1pMg1uqXBmIVXMcf00EMbXpH26pvM= -github.com/CycloneDX/sbom-utility v0.9.3/go.mod h1:n9hQR2A0Qa7EnC25BJEhY5sDXqUPwMWyAGcypB/H3ik= +github.com/CycloneDX/sbom-utility v0.16.0 h1:EpHNoLmw3vfVQWFFflFHmwo7mCWO833qRm+AlbG8wXY= +github.com/CycloneDX/sbom-utility v0.16.0/go.mod h1:+EfZPoy8g6iGhVpo5cH+y2la5pQ3qKImGjHMy2xA+tM= github.com/bradleyjkemp/cupaloy/v2 v2.8.0 h1:any4BmKE+jGIaMpnU8YgH/I2LPiLBufr6oMMlVBbn9M= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= @@ -62,8 +62,8 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po= github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/fatih/color v1.14.1 h1:qfhVLaG5s+nCROl1zJsZRxFeYrHLqWroPOQ8BWiNb4w= -github.com/fatih/color v1.14.1/go.mod h1:2oHN61fhTpgcxD3TSWCgKDiH1+x4OiDVVGH8WlgGZGg= +github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= +github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE= github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI= github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= @@ -106,8 +106,8 @@ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= -github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= @@ -134,7 +134,6 @@ github.com/hokaccha/go-prettyjson v0.0.0-20211117102719-0474bc63780f h1:7LYC+Yfk github.com/hokaccha/go-prettyjson v0.0.0-20211117102719-0474bc63780f/go.mod h1:pFlLw2CfqZiIBOx6BuCeRLCrfxBJipTY0nIOF/VbGcI= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= -github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= @@ -151,8 +150,8 @@ github.com/magiconair/properties v1.8.6/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPK github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= -github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= @@ -172,8 +171,8 @@ github.com/spf13/afero v1.8.2 h1:xehSyVa0YnHWsJ49JFljMpg1HX19V6NDZ1fkm1Xznbo= github.com/spf13/afero v1.8.2/go.mod h1:CtAatgMJh6bJEIs48Ay/FOnkljP3WeGUG0MC1RfAqwo= github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w= github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU= -github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= -github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= +github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= +github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= @@ -216,8 +215,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20220428152302-39d4317da171 h1:TfdoLivD44QwvssI9Sv1xwa5DcL5XQr4au4sZ2F2NV4= -golang.org/x/exp v0.0.0-20220428152302-39d4317da171/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -329,8 +328,9 @@ golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18= -golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -338,8 +338,8 @@ golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= -golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -483,8 +483,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/ini.v1 v1.66.4 h1:SsAcf+mM7mRZo2nJNGt8mZCjG8ZRaNGMURJw7BsIST4= gopkg.in/ini.v1 v1.66.4/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= diff --git a/identifier/identifier.go b/identifier/identifier.go index 698d83d..0e2bfd5 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -35,6 +35,26 @@ type Options struct { Enhancements Enhancements } +func (options *Options) DumpStruct() { + Logger.EnableIndent(true) + Logger.DumpStruct("", options) +} + +// Parse out patterns into easy-to-test map +func (options *Options) ParsePatternsFromSlice(patterns []string) { + options.PatternMap = make(map[string]bool) + for _, pattern := range patterns { + options.PatternMap[pattern] = true + } +} + +func (options *Options) ParsePatternsFromString(patterns string) { + if patterns != "" { + options.Patterns = strings.Split(patterns, ",") + } + options.ParsePatternsFromSlice(options.Patterns) +} + type licenseMatch struct { LicenseId string Match Match @@ -110,6 +130,7 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li // Carry filepath used for matches in result set identifierResults = IdentifierResults{} identifierResults.File = filePath + identifierResults.Matches = make(map[string][]Match) // Verify filepath exists fi, errStat := os.Stat(filePath) @@ -133,12 +154,9 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li return } if len(licenseMatches) > 0 { - //fmt.Printf("matches[0]: %v\n", licenseMatches[0]) spdxId := licenseMatches[0].LicenseId sliceMatches := []Match{licenseMatches[0].Match} - identifierResults.Matches = make(map[string][]Match) identifierResults.Matches[spdxId] = sliceMatches - //return } } @@ -155,8 +173,8 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li } input := string(bytes) err = IdentifyLicensesInString(&identifierResults, input, options, licenseLibrary) - // TODO: do NOT overwrite existing results - identifierResults.File = filePath + // // TODO: do NOT overwrite existing results + // identifierResults.File = filePath } return @@ -205,6 +223,9 @@ func findSPDXIdentifierInFile(filePath string, maxLines int) (licenseMatches []l func IdentifyLicensesInDirectory(dirPath string, options Options, licenseLibrary *licenses.LicenseLibrary) (ret []IdentifierResults, err error) { var lfs []string + fmt.Printf("Bar: [A]\n") + Logger.Tracef(">> Options: %+v\n", options) + Logger.DumpStruct("Options", options) if err := filepath.WalkDir(dirPath, func(path string, d fs.DirEntry, err error) error { if err != nil { diff --git a/identifier/identifier_spdx_test.go b/identifier/identifier_spdx_test.go index 424ebeb..628e31d 100644 --- a/identifier/identifier_spdx_test.go +++ b/identifier/identifier_spdx_test.go @@ -41,7 +41,6 @@ func Test_identifyLicensesInSPDXTestDataDirectory(t *testing.T) { if err := licenseLibrary.AddAllSPDX(); err != nil { t.Fatalf("licenseLibrary.AddAllSPDX() error = %v", err) } - results, err := IdentifyLicensesInDirectory(testDataDir, options, licenseLibrary) if err != nil { t.Errorf("IdentifyLicensesInDirectory(%v) err = %v", testDataDir, err) @@ -53,6 +52,7 @@ func Test_identifyLicensesInSPDXTestDataDirectory(t *testing.T) { result := result t.Run(result.File, func(t *testing.T) { t.Parallel() + fmt.Printf("result.File: `%v`\n", result.File) if !strings.Contains(result.File, "/invalid/") { wantLicenseID := strings.TrimSuffix(path.Base(result.File), ".txt") wantLicenseID = strings.TrimPrefix(wantLicenseID, "deprecated_") From b61df8afc33179139151e9fec99f88048983fa3f Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Thu, 13 Jun 2024 12:49:55 -0500 Subject: [PATCH 12/21] Update dependencies and fix unit test Signed-off-by: Matt Rutkowski --- identifier/identifier_spdx_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/identifier/identifier_spdx_test.go b/identifier/identifier_spdx_test.go index 628e31d..433ecd2 100644 --- a/identifier/identifier_spdx_test.go +++ b/identifier/identifier_spdx_test.go @@ -12,6 +12,7 @@ import ( "strings" "testing" + "github.com/CycloneDX/license-scanner/configurer" "github.com/CycloneDX/license-scanner/licenses" ) @@ -41,6 +42,9 @@ func Test_identifyLicensesInSPDXTestDataDirectory(t *testing.T) { if err := licenseLibrary.AddAllSPDX(); err != nil { t.Fatalf("licenseLibrary.AddAllSPDX() error = %v", err) } + // Initialize the match pattern options to include all functions + options.ParsePatternsFromSlice(configurer.SUPPORTED_MATCH_PATTERNS) + results, err := IdentifyLicensesInDirectory(testDataDir, options, licenseLibrary) if err != nil { t.Errorf("IdentifyLicensesInDirectory(%v) err = %v", testDataDir, err) From 248fa096158b690af1f1d77395c94b7d3a384791 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Thu, 13 Jun 2024 12:59:10 -0500 Subject: [PATCH 13/21] Remove old/dead code Signed-off-by: Matt Rutkowski --- cmd/root.go | 45 ++-------------------------------------- identifier/identifier.go | 2 -- 2 files changed, 2 insertions(+), 45 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 0e0e614..72b80c5 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -198,31 +198,7 @@ func findLicensesInDirectory(cfg *viper.Viper) error { if err := licenseLibrary.AddAll(); err != nil { return err } - - // options := identifier.Options{ - // ForceResult: true, - // // Default to all pattern matching functions - // Patterns: configurer.SUPPORTED_MATCH_PATTERNS, - // Enhancements: identifier.Enhancements{ - // AddNotes: "", - // AddTextBlocks: true, - // FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag), - // FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag), - // FlagKeywords: cfg.GetBool(configurer.KeywordsFlag), - // }, - // } - - // // Parse out patterns into easy-to-test map - // tmpPatterns := cfg.GetString(configurer.PatternsFlag) - // if tmpPatterns == "" { - // options.Patterns = configurer.SUPPORTED_MATCH_PATTERNS - // } else { - // options.Patterns = strings.Split(cfg.GetString(configurer.PatternsFlag), ",") - // } - // options.PatternMap = make(map[string]bool) - // for _, pattern := range options.Patterns { - // options.PatternMap[pattern] = true - // } + // retrieve command line options from flags options := getCommandLineOptions(cfg) results, err := identifier.IdentifyLicensesInDirectory(d, options, licenseLibrary) @@ -282,24 +258,7 @@ func findLicensesInFile(cfg *viper.Viper, f string) error { return err } - // options := identifier.Options{ - // ForceResult: true, - // // Default to all pattern matching functions - // Patterns: configurer.SUPPORTED_MATCH_PATTERNS, - // Enhancements: identifier.Enhancements{ - // AddNotes: "", - // AddTextBlocks: true, - // FlagAcceptable: cfg.GetBool(configurer.AcceptableFlag), - // FlagCopyrights: cfg.GetBool(configurer.CopyrightsFlag), - // FlagKeywords: cfg.GetBool(configurer.KeywordsFlag), - // }, - // } - - // // Parse out patterns into easy-to-test map - // patterns := strings.Split(cfg.GetString(configurer.PatternsFlag), ",") - // for _, pattern := range patterns { - // options.PatternMap[pattern] = true - // } + // retrieve command line options from flags options := getCommandLineOptions(cfg) results, err := identifier.IdentifyLicensesInFile(f, options, licenseLibrary) diff --git a/identifier/identifier.go b/identifier/identifier.go index 0e2bfd5..6d3a5be 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -173,8 +173,6 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li } input := string(bytes) err = IdentifyLicensesInString(&identifierResults, input, options, licenseLibrary) - // // TODO: do NOT overwrite existing results - // identifierResults.File = filePath } return From c4dcd888c4ee4f5cef64817593a6f4b2592f193b Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Thu, 13 Jun 2024 15:04:20 -0500 Subject: [PATCH 14/21] Remove debug/trace printfs Signed-off-by: Matt Rutkowski --- identifier/identifier.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/identifier/identifier.go b/identifier/identifier.go index 6d3a5be..ebeba6a 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -203,15 +203,12 @@ func findSPDXIdentifierInFile(filePath string, maxLines int) (licenseMatches []l fileScanner.Scan() if strings.Contains(fileScanner.Text(), SPDX_ID_KEY) { foundLine = fileScanner.Text() - fmt.Println("SPDX Found: " + foundLine) break } } if foundLine != "" { idx := strings.Index(foundLine, SPDX_ID_KEY) - fmt.Printf("idx: %v\n", idx) spdxIdPlus := foundLine[idx:] - fmt.Printf("idx: %s\n", spdxIdPlus) var match licenseMatch match.LicenseId = spdxIdPlus licenseMatches = append(licenseMatches, match) @@ -221,9 +218,6 @@ func findSPDXIdentifierInFile(filePath string, maxLines int) (licenseMatches []l func IdentifyLicensesInDirectory(dirPath string, options Options, licenseLibrary *licenses.LicenseLibrary) (ret []IdentifierResults, err error) { var lfs []string - fmt.Printf("Bar: [A]\n") - Logger.Tracef(">> Options: %+v\n", options) - Logger.DumpStruct("Options", options) if err := filepath.WalkDir(dirPath, func(path string, d fs.DirEntry, err error) error { if err != nil { From 7bb08d1a93a96e4752f84e60c5fa21a6e66c0d62 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Thu, 13 Jun 2024 17:05:05 -0500 Subject: [PATCH 15/21] extract only the actual SPDX ID from scanline it was found at Signed-off-by: Matt Rutkowski --- identifier/identifier.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/identifier/identifier.go b/identifier/identifier.go index ebeba6a..6bd30b1 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -181,7 +181,8 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li const SPDX_ID_KEY = "SPDX-License-Identifier:" var LEN_SPDX_ID_KEY = len(SPDX_ID_KEY) -var SPDX_ID_KEY_BYTES = []byte(SPDX_ID_KEY) + +//var SPDX_ID_KEY_BYTES = []byte(SPDX_ID_KEY) func findSPDXIdentifierInFile(filePath string, maxLines int) (licenseMatches []licenseMatch, err error) { var file *os.File @@ -208,9 +209,14 @@ func findSPDXIdentifierInFile(filePath string, maxLines int) (licenseMatches []l } if foundLine != "" { idx := strings.Index(foundLine, SPDX_ID_KEY) + // find start index of where the actual SPDX ID is by + // adding in the length of the SPDX License Identifier key + // Then trim any whitespace to extract the actual SPDX ID value + idx += LEN_SPDX_ID_KEY spdxIdPlus := foundLine[idx:] + //fmt.Printf("line (%v): `%s`; id: `%s`\n", idx, foundLine, spdxIdPlus) var match licenseMatch - match.LicenseId = spdxIdPlus + match.LicenseId = strings.TrimSpace(spdxIdPlus) licenseMatches = append(licenseMatches, match) } return From 73e0b4dcf24234ecdfc0eb782e15f5755ed1f353 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Thu, 13 Jun 2024 18:10:26 -0500 Subject: [PATCH 16/21] make sure we return upon detecting large files Signed-off-by: Matt Rutkowski --- identifier/identifier.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/identifier/identifier.go b/identifier/identifier.go index 6bd30b1..5b51735 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -141,6 +141,7 @@ func IdentifyLicensesInFile(filePath string, options Options, licenseLibrary *li // TODO: make the max. size configurable if fi.Size() > 1000000 { err = Logger.Errorf("file too large (%v > 1000000)", fi.Size()) // log error, but return nil + return } // Pattern match: "spdx-id" @@ -208,14 +209,13 @@ func findSPDXIdentifierInFile(filePath string, maxLines int) (licenseMatches []l } } if foundLine != "" { + var match licenseMatch idx := strings.Index(foundLine, SPDX_ID_KEY) // find start index of where the actual SPDX ID is by // adding in the length of the SPDX License Identifier key // Then trim any whitespace to extract the actual SPDX ID value idx += LEN_SPDX_ID_KEY spdxIdPlus := foundLine[idx:] - //fmt.Printf("line (%v): `%s`; id: `%s`\n", idx, foundLine, spdxIdPlus) - var match licenseMatch match.LicenseId = strings.TrimSpace(spdxIdPlus) licenseMatches = append(licenseMatches, match) } @@ -284,17 +284,11 @@ func IdentifyLicensesInDirectory(dirPath string, options Options, licenseLibrary func findAllLicensesInNormalizedData(identifierResults *IdentifierResults, licenseLibrary *licenses.LicenseLibrary, normalizedData normalizer.NormalizationData) (err error) { // initialize the result with original license text, normalized license text, and hash (md5, sha256, and sha512) - // ret := IdentifierResults{ - // OriginalText: normalizedData.OriginalText, - // NormalizedText: normalizedData.NormalizedText, - // Hash: normalizedData.Hash, - // } identifierResults.OriginalText = normalizedData.OriginalText identifierResults.NormalizedText = normalizedData.NormalizedText identifierResults.Hash = normalizedData.Hash // LicenseID-to-matches map to return - //ret.Matches = make(map[string][]Match) if identifierResults.Matches == nil { identifierResults.Matches = make(map[string][]Match) } From b651f6110b0dd274b4ed22775473c626191070b4 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Thu, 13 Jun 2024 18:22:56 -0500 Subject: [PATCH 17/21] Remove unused method for dumping struct Signed-off-by: Matt Rutkowski --- identifier/identifier.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/identifier/identifier.go b/identifier/identifier.go index 5b51735..1f414b9 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -35,11 +35,6 @@ type Options struct { Enhancements Enhancements } -func (options *Options) DumpStruct() { - Logger.EnableIndent(true) - Logger.DumpStruct("", options) -} - // Parse out patterns into easy-to-test map func (options *Options) ParsePatternsFromSlice(patterns []string) { options.PatternMap = make(map[string]bool) From dfaa607867f053c2eb1d32e3d37ffbdb5e068c69 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Fri, 14 Jun 2024 03:51:44 -0500 Subject: [PATCH 18/21] remove printf in updated unit test function Signed-off-by: Matt Rutkowski --- identifier/identifier_spdx_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/identifier/identifier_spdx_test.go b/identifier/identifier_spdx_test.go index 433ecd2..1455edb 100644 --- a/identifier/identifier_spdx_test.go +++ b/identifier/identifier_spdx_test.go @@ -56,7 +56,6 @@ func Test_identifyLicensesInSPDXTestDataDirectory(t *testing.T) { result := result t.Run(result.File, func(t *testing.T) { t.Parallel() - fmt.Printf("result.File: `%v`\n", result.File) if !strings.Contains(result.File, "/invalid/") { wantLicenseID := strings.TrimSuffix(path.Base(result.File), ".txt") wantLicenseID = strings.TrimPrefix(wantLicenseID, "deprecated_") From e9d206a3a8033a71c0e7cc32a4aef439719d97ad Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Fri, 14 Jun 2024 03:55:31 -0500 Subject: [PATCH 19/21] remove need for errGenerate Signed-off-by: Matt Rutkowski --- identifier/identifier.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/identifier/identifier.go b/identifier/identifier.go index 1f414b9..0e68dd3 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -316,11 +316,10 @@ func findAllLicensesInNormalizedData(identifierResults *IdentifierResults, licen } // Generate Blocks. - blocks, errGenerate := generateTextBlocks(normalizedData.OriginalText, licensesMatched) - if errGenerate != nil { - return errGenerate + identifierResults.Blocks, err = generateTextBlocks(normalizedData.OriginalText, licensesMatched) + if err != nil { + return err } - identifierResults.Blocks = blocks return } From 34e2efead2f72d148b4355720bd054ecabdbaefd Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Fri, 14 Jun 2024 03:57:02 -0500 Subject: [PATCH 20/21] Remove unsued byte version of spdx key Signed-off-by: Matt Rutkowski --- identifier/identifier.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/identifier/identifier.go b/identifier/identifier.go index 0e68dd3..ca4f271 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -178,8 +178,6 @@ const SPDX_ID_KEY = "SPDX-License-Identifier:" var LEN_SPDX_ID_KEY = len(SPDX_ID_KEY) -//var SPDX_ID_KEY_BYTES = []byte(SPDX_ID_KEY) - func findSPDXIdentifierInFile(filePath string, maxLines int) (licenseMatches []licenseMatch, err error) { var file *os.File // Note: parent function has already verified the file exists From 34a81188e38dbc25c2f39372a20fe36be5407740 Mon Sep 17 00:00:00 2001 From: Matt Rutkowski Date: Fri, 14 Jun 2024 04:17:21 -0500 Subject: [PATCH 21/21] Add additional comment around the OmitBlocks element Signed-off-by: Matt Rutkowski --- identifier/identifier.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/identifier/identifier.go b/identifier/identifier.go index ca4f271..0d7d301 100644 --- a/identifier/identifier.go +++ b/identifier/identifier.go @@ -99,7 +99,7 @@ func Identify(identifierResults *IdentifierResults, options Options, licenseLibr return } - // TODO: document why we are initializing here + // TODO: document why we are initializing here and why this element is never used if options.OmitBlocks { identifierResults.Blocks = []Block{} }