From 06d7fe3ce5a3d62d793707f9a4fdea829a41954a Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 16 Dec 2024 16:59:39 -0500 Subject: [PATCH] fix transformers, split packaging, better logs Signed-off-by: Alex Goodman --- go.mod | 10 +- go.sum | 8 +- internal/tarutil/reader_entry.go | 2 + pkg/process/build.go | 80 ++++++++- pkg/process/package.go | 153 +++++++--------- pkg/process/package_legacy.go | 116 ------------ pkg/process/v6/archive.go | 166 ++++++++++++++++++ .../v6/transformers/github/transform.go | 1 + .../v6/transformers/github/transform_test.go | 21 ++- pkg/process/v6/transformers/nvd/transform.go | 1 + .../v6/transformers/nvd/transform_test.go | 10 ++ pkg/process/v6/transformers/os/transform.go | 1 + .../v6/transformers/os/transform_test.go | 99 ++++++----- pkg/process/v6/writer.go | 2 +- 14 files changed, 400 insertions(+), 270 deletions(-) delete mode 100644 pkg/process/package_legacy.go create mode 100644 pkg/process/v6/archive.go diff --git a/go.mod b/go.mod index ed4e9889..f7ec7b23 100644 --- a/go.mod +++ b/go.mod @@ -2,18 +2,17 @@ module github.com/anchore/grype-db go 1.23.2 -toolchain go1.23.4 - require ( github.com/Masterminds/semver/v3 v3.3.1 github.com/OneOfOne/xxhash v1.2.8 github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d github.com/adrg/xdg v0.5.3 github.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a - github.com/anchore/grype v0.86.1 - github.com/anchore/syft v1.18.1 + github.com/anchore/grype v0.86.2-0.20241216230527-69330e5f3d62 + github.com/anchore/syft v1.18.2-0.20241216153735-397eb9c10acd github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de github.com/dave/jennifer v1.7.1 + github.com/dustin/go-humanize v1.0.1 github.com/glebarez/sqlite v1.11.0 github.com/go-test/deep v1.1.1 github.com/google/go-cmp v0.6.0 @@ -108,7 +107,6 @@ require ( github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect github.com/docker/go-units v0.5.0 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect - github.com/dustin/go-humanize v1.0.1 // indirect github.com/edsrzf/mmap-go v1.1.0 // indirect github.com/elliotchance/phpserialize v1.4.0 // indirect github.com/emirpasic/gods v1.18.1 // indirect @@ -246,7 +244,5 @@ require ( modernc.org/sqlite v1.34.2 // indirect ) -replace github.com/mholt/archiver/v3 v3.5.1 => github.com/anchore/archiver/v3 v3.5.2 - // this is a breaking change, so we need to pin the version until glebarez/go-sqlite is updated to use internal/libc replace modernc.org/sqlite v1.33.0 => modernc.org/sqlite v1.32.0 diff --git a/go.sum b/go.sum index 34b00743..127104ef 100644 --- a/go.sum +++ b/go.sum @@ -252,14 +252,14 @@ github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 h1:VzprUTpc0v github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04/go.mod h1:6dK64g27Qi1qGQZ67gFmBFvEHScy0/C8qhQhNe5B5pQ= github.com/anchore/go-version v1.2.2-0.20210903204242-51efa5b487c4 h1:rmZG77uXgE+o2gozGEBoUMpX27lsku+xrMwlmBZJtbg= github.com/anchore/go-version v1.2.2-0.20210903204242-51efa5b487c4/go.mod h1:Bkc+JYWjMCF8OyZ340IMSIi2Ebf3uwByOk6ho4wne1E= -github.com/anchore/grype v0.86.1 h1:HWpzCOCwjKkwkIEEC5lcKI4yl6GhTF3+Z12tXWYtMoI= -github.com/anchore/grype v0.86.1/go.mod h1:k3VnXfi+e/OGx1mTUL733gy3fyB4W/AdHP8fSyQML9w= +github.com/anchore/grype v0.86.2-0.20241216230527-69330e5f3d62 h1:eNZG5LS8tadVkk10YtuVxTKn1jU5nXnD1yH6eoaQaM8= +github.com/anchore/grype v0.86.2-0.20241216230527-69330e5f3d62/go.mod h1:k3VnXfi+e/OGx1mTUL733gy3fyB4W/AdHP8fSyQML9w= github.com/anchore/packageurl-go v0.1.1-0.20241018175412-5c22e6360c4f h1:dAQPIrQ3a5PBqZeZ+B9NGZsGmodk4NO9OjDIsQmQyQM= github.com/anchore/packageurl-go v0.1.1-0.20241018175412-5c22e6360c4f/go.mod h1:KoYIv7tdP5+CC9VGkeZV4/vGCKsY55VvoG+5dadg4YI= github.com/anchore/stereoscope v0.0.11 h1:d+dePyWyQzoQehnWOnx/aISW5HW1zLAQKzvaFIpydsU= github.com/anchore/stereoscope v0.0.11/go.mod h1:dxQyMHSdvgOCscQd/lInPHeP5xCJsZYxpzvzy8Y804Y= -github.com/anchore/syft v1.18.1 h1:JZ7CLbeWrWolCZa4f6SJBLJ9qGBLFCzHrFd8c4bsm94= -github.com/anchore/syft v1.18.1/go.mod h1:ufXPZcjmoTjERaC0HTEW2+chF+fQdryhaQ9arcUO2WQ= +github.com/anchore/syft v1.18.2-0.20241216153735-397eb9c10acd h1:11d0Pzp4Ysw1XxloRS6cHNDBWwqB3MSMzffgMYwFDUw= +github.com/anchore/syft v1.18.2-0.20241216153735-397eb9c10acd/go.mod h1:A8LH+VE33zk5efyBdo45/X9BdXEFrMvetwjMvPV+OFw= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= diff --git a/internal/tarutil/reader_entry.go b/internal/tarutil/reader_entry.go index eda3326f..4bf74b79 100644 --- a/internal/tarutil/reader_entry.go +++ b/internal/tarutil/reader_entry.go @@ -33,6 +33,8 @@ func (t ReaderEntry) writeEntry(tw lowLevelWriter) error { } func writeEntry(tw lowLevelWriter, filename string, fileInfo os.FileInfo, opener func() (io.Reader, error)) error { + log.WithFields("path", filename).Trace("adding file to archive") + header, err := tar.FileInfoHeader(fileInfo, "") if err != nil { return err diff --git a/pkg/process/build.go b/pkg/process/build.go index 9feb629f..00fd73f9 100644 --- a/pkg/process/build.go +++ b/pkg/process/build.go @@ -5,6 +5,8 @@ import ( "fmt" "time" + "github.com/dustin/go-humanize" + "github.com/anchore/grype-db/internal/log" "github.com/anchore/grype-db/pkg/data" v3 "github.com/anchore/grype-db/pkg/process/v3" @@ -104,17 +106,39 @@ func getWriter(schemaVersion int, dataAge time.Time, directory string, states pr func build(results []providerResults, writer data.Writer, processors ...data.Processor) error { lastUpdate := time.Now() + var totalRecords int + for _, result := range results { + totalRecords += int(result.count) + } + log.WithFields("total", humanize.Comma(int64(totalRecords))).Info("processing all records") + + var recordsProcessed int + + // for exponential moving average, choose an alpha between 0 and 1, where 1 biases towards the most recent sample + // and 0 biases towards the average of all samples. + rateWindow := newEMA(0.4) + for _, result := range results { - log.WithFields("provider", result.provider.Provider, "count", result.count).Info("processing provider records") - idx := 0 + log.WithFields("provider", result.provider.Provider, "total", humanize.Comma(result.count)).Info("processing provider records") + providerRecordsProcessed := 0 + recordsProcessedInStatusCycle := 0 for opener := range result.openers { - idx++ - log.WithFields("entry", opener.String()).Tracef("processing") + providerRecordsProcessed++ + recordsProcessed++ + recordsProcessedInStatusCycle++ var processor data.Processor if time.Since(lastUpdate) > 3*time.Second { - log.WithFields("provider", result.provider.Provider, "count", result.count, "processed", idx).Debug("processing provider records") + r := recordsPerSecond(recordsProcessedInStatusCycle, lastUpdate) + rateWindow.Add(r) + + log.WithFields( + "provider", fmt.Sprintf("%q %1.0f/s (%1.2f%%)", result.provider.Provider, r, percent(providerRecordsProcessed, int(result.count))), + "overall", fmt.Sprintf("%1.2f%%", percent(recordsProcessed, totalRecords)), + "eta", eta(recordsProcessed, totalRecords, rateWindow.Average()).String(), + ).Debug("status") lastUpdate = time.Now() + recordsProcessedInStatusCycle = 0 } f, err := opener.Open() @@ -129,7 +153,6 @@ func build(results []providerResults, writer data.Writer, processors ...data.Pro for _, candidate := range processors { if candidate.IsSupported(envelope.Schema) { processor = candidate - log.WithFields("schema", envelope.Schema).Trace("matched with processor") break } } @@ -153,3 +176,48 @@ func build(results []providerResults, writer data.Writer, processors ...data.Pro return nil } + +type expMovingAverage struct { + alpha float64 + value float64 + count int +} + +func newEMA(alpha float64) *expMovingAverage { + return &expMovingAverage{alpha: alpha} +} + +func (e *expMovingAverage) Add(sample float64) { + if e.count == 0 { + e.value = sample // initialize with the first sample + } else { + e.value = e.alpha*sample + (1-e.alpha)*e.value + } + e.count++ +} + +func (e *expMovingAverage) Average() float64 { + return e.value +} + +func recordsPerSecond(idx int, lastUpdate time.Time) float64 { + sec := time.Since(lastUpdate).Seconds() + if sec == 0 { + return 0 + } + return float64(idx) / sec +} + +func percent(idx, total int) float64 { + if total == 0 { + return 0 + } + return float64(idx) / float64(total) * 100 +} + +func eta(idx, total int, rate float64) time.Duration { + if rate == 0 { + return 0 + } + return time.Duration(float64(total-idx)/rate) * time.Second +} diff --git a/pkg/process/package.go b/pkg/process/package.go index 2a8d369c..00957ce6 100644 --- a/pkg/process/package.go +++ b/pkg/process/package.go @@ -1,8 +1,8 @@ package process import ( - "errors" "fmt" + "net/url" "os" "path" "path/filepath" @@ -10,116 +10,117 @@ import ( "time" "github.com/scylladb/go-set/strset" + "github.com/spf13/afero" "github.com/anchore/grype-db/internal/log" "github.com/anchore/grype-db/internal/tarutil" - "github.com/anchore/grype-db/pkg/provider" + v6process "github.com/anchore/grype-db/pkg/process/v6" grypeDBLegacyDistribution "github.com/anchore/grype/grype/db/legacy/distribution" - v6 "github.com/anchore/grype/grype/db/v6" - v6Distribution "github.com/anchore/grype/grype/db/v6/distribution" + grypeDBLegacy "github.com/anchore/grype/grype/db/v5" + grypeDBLegacyStore "github.com/anchore/grype/grype/db/v5/store" ) +// listingFiles is a set of files that should not be included in the archive +var listingFiles = strset.New("listing.json", "latest.json", "history.json") + func Package(dbDir, publishBaseURL, overrideArchiveExtension string) error { // check if metadata file exists, if so, then this if _, err := os.Stat(filepath.Join(dbDir, grypeDBLegacyDistribution.MetadataFileName)); os.IsNotExist(err) { - return packageDB(dbDir, overrideArchiveExtension) + // TODO: detect from disk which version of the DB is present + return v6process.CreateArchive(dbDir, overrideArchiveExtension) } return packageLegacyDB(dbDir, publishBaseURL, overrideArchiveExtension) } -func packageDB(dbDir, overrideArchiveExtension string) error { - extension, err := resolveExtension(overrideArchiveExtension) +func packageLegacyDB(dbDir, publishBaseURL, overrideArchiveExtension string) error { //nolint:funlen + log.WithFields("from", dbDir, "url", publishBaseURL, "extension-override", overrideArchiveExtension).Info("packaging database") + + fs := afero.NewOsFs() + metadata, err := grypeDBLegacyDistribution.NewMetadataFromDir(fs, dbDir) if err != nil { return err } - log.WithFields("from", dbDir, "extension", extension).Info("packaging database") - s, err := v6.NewReader(v6.Config{DBDirPath: dbDir}) + if metadata == nil { + return fmt.Errorf("no metadata found in %q", dbDir) + } + + s, err := grypeDBLegacyStore.New(filepath.Join(dbDir, grypeDBLegacy.VulnerabilityStoreFileName), false) if err != nil { return fmt.Errorf("unable to open vulnerability store: %w", err) } - metadata, err := s.GetDBMetadata() - if err != nil || metadata == nil { - return fmt.Errorf("unable to get vulnerability store metadata: %w", err) + id, err := s.GetID() + if err != nil { + return fmt.Errorf("unable to get vulnerability store ID: %w", err) } - if metadata.Model != v6.ModelVersion { - return fmt.Errorf("metadata model %d does not match vulnerability store model %d", v6.ModelVersion, metadata.Model) + if id.SchemaVersion != metadata.Version { + return fmt.Errorf("metadata version %d does not match vulnerability store version %d", metadata.Version, id.SchemaVersion) } - providerModels, err := s.AllProviders() + u, err := url.Parse(publishBaseURL) if err != nil { - return fmt.Errorf("unable to get all providers: %w", err) + return err } - if len(providerModels) == 0 { - return fmt.Errorf("no providers found in the vulnerability store") + // we need a well-ordered string to append to the archive name to ensure uniqueness (to avoid overwriting + // existing archives in the CDN) as well as to ensure that multiple archives created in the same day are + // put in the correct order in the listing file. The DB timestamp represents the age of the data in the DB + // not when the DB was created. The trailer represents the time the DB was packaged. + trailer := fmt.Sprintf("%d", secondsSinceEpoch()) + + var extension = "tar.gz" + if overrideArchiveExtension != "" { + extension = strings.TrimLeft(overrideArchiveExtension, ".") } - eldest, err := toProviders(providerModels).EarliestTimestamp() - if err != nil { - return err + var found bool + for _, valid := range []string{"tar.zst", "tar.gz"} { + if valid == extension { + found = true + break + } + } + + if !found { + return fmt.Errorf("invalid archive extension %q", extension) } - // output archive vulnerability-db_VERSION_OLDESTDATADATE_BUILTEPOCH.tar.gz, where: - // - VERSION: schema version in the form of v#.#.# - // - OLDESTDATADATE: RFC3338 formatted value of the oldest date capture date found for all contained providers - // - BUILTEPOCH: linux epoch formatted value of the database metadata built field + // we attach a random value at the end of the file name to prevent from overwriting DBs in S3 that are already + // cached in the CDN. Ideally this would be based off of the archive checksum but a random string is simpler. tarName := fmt.Sprintf( - "vulnerability-db_v%s_%s_%d.%s", - fmt.Sprintf("%d.%d.%d", metadata.Model, metadata.Revision, metadata.Addition), - eldest.UTC().Format(time.RFC3339), - metadata.BuildTimestamp.Unix(), + "vulnerability-db_v%d_%s_%s.%s", + metadata.Version, + metadata.Built.Format(time.RFC3339), + trailer, extension, ) + tarPath := path.Join(dbDir, tarName) - tarPath := filepath.Join(dbDir, tarName) - - if err := populateTar(tarPath); err != nil { + if err := populateLegacyTar(tarPath); err != nil { return err } log.WithFields("path", tarPath).Info("created database archive") - return writeLatestDocument(tarPath, *metadata) -} - -func toProviders(states []v6.Provider) provider.States { - var result provider.States - for _, state := range states { - result = append(result, provider.State{ - Provider: state.ID, - Timestamp: *state.DateCaptured, - }) + entry, err := grypeDBLegacyDistribution.NewListingEntryFromArchive(fs, *metadata, tarPath, u) + if err != nil { + return fmt.Errorf("unable to create listing entry from archive: %w", err) } - return result -} - -func resolveExtension(overrideArchiveExtension string) (string, error) { - var extension = "tar.zst" - if overrideArchiveExtension != "" { - extension = strings.TrimLeft(overrideArchiveExtension, ".") + listing := grypeDBLegacyDistribution.NewListing(entry) + listingPath := path.Join(dbDir, grypeDBLegacyDistribution.ListingFileName) + if err = listing.Write(listingPath); err != nil { + return err } - var found bool - for _, valid := range []string{"tar.zst", "tar.xz", "tar.gz"} { - if valid == extension { - found = true - break - } - } + log.WithFields("path", listingPath).Debug("created initial listing file") - if !found { - return "", fmt.Errorf("unsupported archive extension %q", extension) - } - return extension, nil + return nil } -var listingFiles = strset.New("listing.json", "latest.json", "history.json") - -func populateTar(tarPath string) error { +func populateLegacyTar(tarPath string) error { originalDir, err := os.Getwd() if err != nil { return fmt.Errorf("unable to get CWD: %w", err) @@ -158,28 +159,6 @@ func populateTar(tarPath string) error { return nil } -func writeLatestDocument(tarPath string, metadata v6.DBMetadata) error { - archive, err := v6Distribution.NewArchive(tarPath, *metadata.BuildTimestamp, metadata.Model, metadata.Revision, metadata.Addition) - if err != nil || archive == nil { - return fmt.Errorf("unable to create archive: %w", err) - } - - doc := v6Distribution.NewLatestDocument(*archive) - if doc == nil { - return errors.New("unable to create latest document") - } - - dbDir := filepath.Dir(tarPath) - - latestPath := path.Join(dbDir, v6Distribution.LatestFileName) - - fh, err := os.OpenFile(latestPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) - if err != nil { - return fmt.Errorf("unable to create latest file: %w", err) - } - - if err = doc.Write(fh); err != nil { - return fmt.Errorf("unable to write latest document: %w", err) - } - return nil +func secondsSinceEpoch() int64 { + return time.Now().UTC().Unix() } diff --git a/pkg/process/package_legacy.go b/pkg/process/package_legacy.go deleted file mode 100644 index 6672fe19..00000000 --- a/pkg/process/package_legacy.go +++ /dev/null @@ -1,116 +0,0 @@ -package process - -import ( - "fmt" - "net/url" - "path" - "path/filepath" - "strings" - "time" - - "github.com/spf13/afero" - - "github.com/anchore/grype-db/internal/log" - "github.com/anchore/grype/grype/db/legacy/distribution" - grypeDBLegacy "github.com/anchore/grype/grype/db/v5" - grypeDBLegacyStore "github.com/anchore/grype/grype/db/v5/store" -) - -func packageLegacyDB(dbDir, publishBaseURL, overrideArchiveExtension string) error { //nolint:funlen - log.WithFields("from", dbDir, "url", publishBaseURL, "extension-override", overrideArchiveExtension).Info("packaging database") - - fs := afero.NewOsFs() - metadata, err := distribution.NewMetadataFromDir(fs, dbDir) - if err != nil { - return err - } - - if metadata == nil { - return fmt.Errorf("no metadata found in %q", dbDir) - } - - s, err := grypeDBLegacyStore.New(filepath.Join(dbDir, grypeDBLegacy.VulnerabilityStoreFileName), false) - if err != nil { - return fmt.Errorf("unable to open vulnerability store: %w", err) - } - - id, err := s.GetID() - if err != nil { - return fmt.Errorf("unable to get vulnerability store ID: %w", err) - } - - if id.SchemaVersion != metadata.Version { - return fmt.Errorf("metadata version %d does not match vulnerability store version %d", metadata.Version, id.SchemaVersion) - } - - u, err := url.Parse(publishBaseURL) - if err != nil { - return err - } - - // we need a well-ordered string to append to the archive name to ensure uniqueness (to avoid overwriting - // existing archives in the CDN) as well as to ensure that multiple archives created in the same day are - // put in the correct order in the listing file. The DB timestamp represents the age of the data in the DB - // not when the DB was created. The trailer represents the time the DB was packaged. - trailer := fmt.Sprintf("%d", secondsSinceEpoch()) - - // TODO (alex): supporting tar.zst - // var extension = "tar.zst" - var extension = "tar.gz" - - if overrideArchiveExtension != "" { - extension = strings.TrimLeft(overrideArchiveExtension, ".") - } - // TODO (alex): supporting tar.zst - // else if metadata.Version < 5 { - // extension = "tar.gz" - // } - - var found bool - for _, valid := range []string{"tar.zst", "tar.gz"} { - if valid == extension { - found = true - break - } - } - - if !found { - return fmt.Errorf("invalid archive extension %q", extension) - } - - // we attach a random value at the end of the file name to prevent from overwriting DBs in S3 that are already - // cached in the CDN. Ideally this would be based off of the archive checksum but a random string is simpler. - tarName := fmt.Sprintf( - "vulnerability-db_v%d_%s_%s.%s", - metadata.Version, - metadata.Built.Format(time.RFC3339), - trailer, - extension, - ) - tarPath := path.Join(dbDir, tarName) - - if err := populateTar(tarPath); err != nil { - return err - } - - log.WithFields("path", tarPath).Info("created database archive") - - entry, err := distribution.NewListingEntryFromArchive(fs, *metadata, tarPath, u) - if err != nil { - return fmt.Errorf("unable to create listing entry from archive: %w", err) - } - - listing := distribution.NewListing(entry) - listingPath := path.Join(dbDir, distribution.ListingFileName) - if err = listing.Write(listingPath); err != nil { - return err - } - - log.WithFields("path", listingPath).Debug("created initial listing file") - - return nil -} - -func secondsSinceEpoch() int64 { - return time.Now().UTC().Unix() -} diff --git a/pkg/process/v6/archive.go b/pkg/process/v6/archive.go new file mode 100644 index 00000000..58965636 --- /dev/null +++ b/pkg/process/v6/archive.go @@ -0,0 +1,166 @@ +package v6 + +import ( + "errors" + "fmt" + "os" + "path" + "path/filepath" + "strings" + "time" + + "github.com/anchore/grype-db/internal/log" + "github.com/anchore/grype-db/internal/tarutil" + "github.com/anchore/grype-db/pkg/provider" + v6 "github.com/anchore/grype/grype/db/v6" + v6Distribution "github.com/anchore/grype/grype/db/v6/distribution" +) + +func CreateArchive(dbDir, overrideArchiveExtension string) error { + extension, err := resolveExtension(overrideArchiveExtension) + if err != nil { + return err + } + log.WithFields("from", dbDir, "extension", extension).Info("packaging database") + + cfg := v6.Config{DBDirPath: dbDir} + s, err := v6.NewReader(cfg) + if err != nil { + return fmt.Errorf("unable to open vulnerability store: %w", err) + } + + metadata, err := s.GetDBMetadata() + if err != nil || metadata == nil { + return fmt.Errorf("unable to get vulnerability store metadata: %w", err) + } + + if metadata.Model != v6.ModelVersion { + return fmt.Errorf("metadata model %d does not match vulnerability store model %d", v6.ModelVersion, metadata.Model) + } + + providerModels, err := s.AllProviders() + if err != nil { + return fmt.Errorf("unable to get all providers: %w", err) + } + + if len(providerModels) == 0 { + return fmt.Errorf("no providers found in the vulnerability store") + } + + eldest, err := toProviders(providerModels).EarliestTimestamp() + if err != nil { + return err + } + + // output archive vulnerability-db_VERSION_OLDESTDATADATE_BUILTEPOCH.tar.gz, where: + // - VERSION: schema version in the form of v#.#.# + // - OLDESTDATADATE: RFC3338 formatted value of the oldest date capture date found for all contained providers + // - BUILTEPOCH: linux epoch formatted value of the database metadata built field + tarName := fmt.Sprintf( + "vulnerability-db_v%s_%s_%d.%s", + fmt.Sprintf("%d.%d.%d", metadata.Model, metadata.Revision, metadata.Addition), + eldest.UTC().Format(time.RFC3339), + metadata.BuildTimestamp.Unix(), + extension, + ) + + tarPath := filepath.Join(dbDir, tarName) + + if err := populateTar(dbDir, tarName, v6.VulnerabilityDBFileName); err != nil { + return err + } + + log.WithFields("path", tarPath).Info("created database archive") + + return writeLatestDocument(tarPath, *metadata) +} + +func toProviders(states []v6.Provider) provider.States { + var result provider.States + for _, state := range states { + result = append(result, provider.State{ + Provider: state.ID, + Timestamp: *state.DateCaptured, + }) + } + return result +} + +func resolveExtension(overrideArchiveExtension string) (string, error) { + var extension = "tar.zst" + + if overrideArchiveExtension != "" { + extension = strings.TrimLeft(overrideArchiveExtension, ".") + } + + var found bool + for _, valid := range []string{"tar.zst", "tar.xz", "tar.gz"} { + if valid == extension { + found = true + break + } + } + + if !found { + return "", fmt.Errorf("unsupported archive extension %q", extension) + } + return extension, nil +} + +func populateTar(dbDir, tarName string, files ...string) error { + originalDir, err := os.Getwd() + if err != nil { + return fmt.Errorf("unable to get CWD: %w", err) + } + + if dbDir != "" { + if err = os.Chdir(dbDir); err != nil { + return fmt.Errorf("unable to cd to build dir: %w", err) + } + + defer func() { + if err = os.Chdir(originalDir); err != nil { + log.Errorf("unable to cd to original dir: %v", err) + } + }() + } + + for _, f := range files { + _, err := os.Stat(f) + if err != nil { + return fmt.Errorf("unable to stat file %q: %w", f, err) + } + } + + if err = tarutil.PopulateWithPaths(tarName, files...); err != nil { + return fmt.Errorf("unable to create db archive: %w", err) + } + + return nil +} + +func writeLatestDocument(tarPath string, metadata v6.DBMetadata) error { + archive, err := v6Distribution.NewArchive(tarPath, *metadata.BuildTimestamp, metadata.Model, metadata.Revision, metadata.Addition) + if err != nil || archive == nil { + return fmt.Errorf("unable to create archive: %w", err) + } + + doc := v6Distribution.NewLatestDocument(*archive) + if doc == nil { + return errors.New("unable to create latest document") + } + + dbDir := filepath.Dir(tarPath) + + latestPath := path.Join(dbDir, v6Distribution.LatestFileName) + + fh, err := os.OpenFile(latestPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("unable to create latest file: %w", err) + } + + if err = doc.Write(fh); err != nil { + return fmt.Errorf("unable to write latest document: %w", err) + } + return nil +} diff --git a/pkg/process/v6/transformers/github/transform.go b/pkg/process/v6/transformers/github/transform.go index d263aeef..ece7e38a 100644 --- a/pkg/process/v6/transformers/github/transform.go +++ b/pkg/process/v6/transformers/github/transform.go @@ -28,6 +28,7 @@ func Transform(vulnerability unmarshal.GitHubAdvisory, state provider.State) ([] func getVulnerability(vuln unmarshal.GitHubAdvisory, state provider.State) grypeDB.VulnerabilityHandle { return grypeDB.VulnerabilityHandle{ Name: vuln.Advisory.GhsaID, + ProviderID: state.Provider, Provider: internal.ProviderModel(state), ModifiedDate: internal.ParseTime(vuln.Advisory.Updated), PublishedDate: internal.ParseTime(vuln.Advisory.Published), diff --git a/pkg/process/v6/transformers/github/transform_test.go b/pkg/process/v6/transformers/github/transform_test.go index 462a3e57..0882932f 100644 --- a/pkg/process/v6/transformers/github/transform_test.go +++ b/pkg/process/v6/transformers/github/transform_test.go @@ -78,7 +78,8 @@ func TestGetVulnerability(t *testing.T) { name: "test-fixtures/GHSA-2wgc-48g2-cj5w.json", expected: []grypeDB.VulnerabilityHandle{ { - Name: "GHSA-2wgc-48g2-cj5w", + Name: "GHSA-2wgc-48g2-cj5w", + ProviderID: "github", Provider: &grypeDB.Provider{ ID: "github", Version: "1", @@ -119,7 +120,8 @@ func TestGetVulnerability(t *testing.T) { name: "test-fixtures/GHSA-3x74-v64j-qc3f.json", expected: []grypeDB.VulnerabilityHandle{ { - Name: "GHSA-3x74-v64j-qc3f", + Name: "GHSA-3x74-v64j-qc3f", + ProviderID: "github", Provider: &grypeDB.Provider{ ID: "github", Version: "1", @@ -160,7 +162,8 @@ func TestGetVulnerability(t *testing.T) { name: "test-fixtures/github-github-npm-0.json", expected: []grypeDB.VulnerabilityHandle{ { - Name: "GHSA-vc9j-fhvv-8vrf", + Name: "GHSA-vc9j-fhvv-8vrf", + ProviderID: "github", Provider: &grypeDB.Provider{ ID: "github", Version: "1", @@ -201,7 +204,8 @@ func TestGetVulnerability(t *testing.T) { name: "test-fixtures/github-github-python-0.json", expected: []grypeDB.VulnerabilityHandle{ { - Name: "GHSA-6cwv-x26c-w2q4", + Name: "GHSA-6cwv-x26c-w2q4", + ProviderID: "github", Provider: &grypeDB.Provider{ ID: "github", Version: "1", @@ -228,7 +232,8 @@ func TestGetVulnerability(t *testing.T) { }, }, { - Name: "GHSA-p5wr-vp8g-q5p4", + Name: "GHSA-p5wr-vp8g-q5p4", + ProviderID: "github", Provider: &grypeDB.Provider{ ID: "github", Version: "1", @@ -258,7 +263,8 @@ func TestGetVulnerability(t *testing.T) { name: "test-fixtures/github-withdrawn.json", expected: []grypeDB.VulnerabilityHandle{ { - Name: "GHSA-6cwv-x26c-w2q4", + Name: "GHSA-6cwv-x26c-w2q4", + ProviderID: "github", Provider: &grypeDB.Provider{ ID: "github", Version: "1", @@ -291,7 +297,8 @@ func TestGetVulnerability(t *testing.T) { name: "test-fixtures/multiple-fixed-in-names.json", expected: []grypeDB.VulnerabilityHandle{ { - Name: "GHSA-p5wr-vp8g-q5p4", + Name: "GHSA-p5wr-vp8g-q5p4", + ProviderID: "github", Provider: &grypeDB.Provider{ ID: "github", Version: "1", diff --git a/pkg/process/v6/transformers/nvd/transform.go b/pkg/process/v6/transformers/nvd/transform.go index 5ea3f839..4dfa3125 100644 --- a/pkg/process/v6/transformers/nvd/transform.go +++ b/pkg/process/v6/transformers/nvd/transform.go @@ -41,6 +41,7 @@ func transform(cfg Config, vulnerability unmarshal.NVDVulnerability, state provi in := []any{ grypeDB.VulnerabilityHandle{ Name: vulnerability.ID, + ProviderID: state.Provider, Provider: internal.ProviderModel(state), ModifiedDate: internal.ParseTime(vulnerability.LastModified), PublishedDate: internal.ParseTime(vulnerability.Published), diff --git a/pkg/process/v6/transformers/nvd/transform_test.go b/pkg/process/v6/transformers/nvd/transform_test.go index b308b29e..53a438fd 100644 --- a/pkg/process/v6/transformers/nvd/transform_test.go +++ b/pkg/process/v6/transformers/nvd/transform_test.go @@ -58,6 +58,7 @@ func TestTransform(t *testing.T) { { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ Name: "CVE-2018-5487", + ProviderID: "nvd", Provider: expectedProvider("nvd"), ModifiedDate: timeRef(time.Date(2018, 7, 5, 13, 52, 30, 627000000, time.UTC)), PublishedDate: timeRef(time.Date(2018, 5, 24, 14, 29, 0, 390000000, time.UTC)), @@ -133,6 +134,7 @@ func TestTransform(t *testing.T) { { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ Name: "CVE-2018-1000222", + ProviderID: "nvd", Provider: expectedProvider("nvd"), ModifiedDate: timeRef(time.Date(2020, 3, 31, 2, 15, 12, 667000000, time.UTC)), PublishedDate: timeRef(time.Date(2018, 8, 20, 20, 29, 1, 347000000, time.UTC)), @@ -220,6 +222,7 @@ func TestTransform(t *testing.T) { { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ Name: "CVE-2018-10189", + ProviderID: "nvd", Provider: expectedProvider("nvd"), ModifiedDate: timeRef(time.Date(2018, 5, 23, 14, 41, 49, 73000000, time.UTC)), PublishedDate: timeRef(time.Date(2018, 4, 17, 20, 29, 0, 410000000, time.UTC)), @@ -302,6 +305,7 @@ func TestTransform(t *testing.T) { { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ Name: "CVE-2015-8978", + ProviderID: "nvd", Provider: expectedProvider("nvd"), ModifiedDate: timeRef(time.Date(2016, 11, 28, 19, 50, 59, 600000000, time.UTC)), PublishedDate: timeRef(time.Date(2016, 11, 22, 17, 59, 0, 180000000, time.UTC)), @@ -360,6 +364,7 @@ func TestTransform(t *testing.T) { { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ Name: "CVE-2022-26488", + ProviderID: "nvd", Provider: expectedProvider("nvd"), ModifiedDate: timeRef(time.Date(2022, 9, 3, 3, 34, 19, 933000000, time.UTC)), PublishedDate: timeRef(time.Date(2022, 3, 10, 17, 47, 45, 383000000, time.UTC)), @@ -466,6 +471,7 @@ func TestTransform(t *testing.T) { { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ Name: "CVE-2022-0543", + ProviderID: "nvd", Provider: expectedProvider("nvd"), ModifiedDate: timeRef(time.Date(2023, 9, 29, 15, 55, 24, 533000000, time.UTC)), PublishedDate: timeRef(time.Date(2022, 2, 18, 20, 15, 17, 583000000, time.UTC)), @@ -560,6 +566,7 @@ func TestTransform(t *testing.T) { { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ Name: "CVE-2020-10729", + ProviderID: "nvd", Provider: expectedProvider("nvd"), ModifiedDate: timeRef(time.Date(2021, 12, 10, 19, 57, 6, 357000000, time.UTC)), PublishedDate: timeRef(time.Date(2021, 5, 27, 19, 15, 7, 880000000, time.UTC)), @@ -650,6 +657,7 @@ func TestTransform(t *testing.T) { { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ Name: "CVE-2023-38733", + ProviderID: "nvd", Provider: expectedProvider("nvd"), ModifiedDate: timeRef(time.Date(2023, 8, 26, 2, 25, 42, 957000000, time.UTC)), PublishedDate: timeRef(time.Date(2023, 8, 22, 22, 15, 8, 460000000, time.UTC)), @@ -737,6 +745,7 @@ func TestTransform(t *testing.T) { { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ Name: "CVE-2023-45283", + ProviderID: "nvd", Provider: expectedProvider("nvd"), ModifiedDate: timeRef(time.Date(2023, 12, 14, 10, 15, 7, 947000000, time.UTC)), PublishedDate: timeRef(time.Date(2023, 11, 9, 17, 15, 8, 757000000, time.UTC)), @@ -847,6 +856,7 @@ func TestTransform(t *testing.T) { { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ Name: "CVE-2023-45283", + ProviderID: "nvd", Provider: expectedProvider("nvd"), ModifiedDate: timeRef(time.Date(2023, 12, 14, 10, 15, 7, 947000000, time.UTC)), PublishedDate: timeRef(time.Date(2023, 11, 9, 17, 15, 8, 757000000, time.UTC)), diff --git a/pkg/process/v6/transformers/os/transform.go b/pkg/process/v6/transformers/os/transform.go index a7b0a895..f86792f4 100644 --- a/pkg/process/v6/transformers/os/transform.go +++ b/pkg/process/v6/transformers/os/transform.go @@ -25,6 +25,7 @@ func Transform(vulnerability unmarshal.OSVulnerability, state provider.State) ([ in := []any{ grypeDB.VulnerabilityHandle{ Name: vulnerability.Vulnerability.Name, + ProviderID: state.Provider, Provider: internal.ProviderModel(state), Status: string(grypeDB.VulnerabilityActive), ModifiedDate: internal.ParseTime(vulnerability.Vulnerability.Metadata.Updated), diff --git a/pkg/process/v6/transformers/os/transform_test.go b/pkg/process/v6/transformers/os/transform_test.go index 6e2b44f7..f496a64e 100644 --- a/pkg/process/v6/transformers/os/transform_test.go +++ b/pkg/process/v6/transformers/os/transform_test.go @@ -83,9 +83,10 @@ func TestTransform(t *testing.T) { want: []transformers.RelatedEntries{ { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "CVE-2018-19967", - Status: "active", - Provider: expectedProvider("alpine"), + Name: "CVE-2018-19967", + Status: "active", + ProviderID: "alpine", + Provider: expectedProvider("alpine"), BlobValue: &grypeDB.VulnerabilityBlob{ ID: "CVE-2018-19967", References: []grypeDB.Reference{ @@ -125,9 +126,10 @@ func TestTransform(t *testing.T) { want: []transformers.RelatedEntries{ { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "ALAS-2018-1106", - Provider: expectedProvider("amazon"), - Status: "active", + Name: "ALAS-2018-1106", + ProviderID: "amazon", + Provider: expectedProvider("amazon"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "ALAS-2018-1106", References: []grypeDB.Reference{ @@ -236,9 +238,10 @@ func TestTransform(t *testing.T) { want: []transformers.RelatedEntries{ { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "ALAS-2021-1704", - Provider: expectedProvider("amazon"), - Status: "active", + Name: "ALAS-2021-1704", + ProviderID: "amazon", + Provider: expectedProvider("amazon"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "ALAS-2021-1704", @@ -288,9 +291,10 @@ func TestTransform(t *testing.T) { }, { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "ALASKERNEL-5.4-2022-007", - Provider: expectedProvider("amazon"), - Status: "active", + Name: "ALASKERNEL-5.4-2022-007", + ProviderID: "amazon", + Provider: expectedProvider("amazon"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "ALASKERNEL-5.4-2022-007", References: []grypeDB.Reference{ @@ -339,9 +343,10 @@ func TestTransform(t *testing.T) { }, { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "ALASKERNEL-5.10-2022-005", - Provider: expectedProvider("amazon"), - Status: "active", + Name: "ALASKERNEL-5.10-2022-005", + ProviderID: "amazon", + Provider: expectedProvider("amazon"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "ALASKERNEL-5.10-2022-005", References: []grypeDB.Reference{ @@ -396,9 +401,10 @@ func TestTransform(t *testing.T) { want: []transformers.RelatedEntries{ { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "CVE-2023-29403", - Provider: expectedProvider("mariner"), - Status: "active", + Name: "CVE-2023-29403", + ProviderID: "mariner", + Provider: expectedProvider("mariner"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "CVE-2023-29403", Description: "CVE-2023-29403 affecting package golang for versions less than 1.20.7-1. A patched version of the package is available.", @@ -439,9 +445,10 @@ func TestTransform(t *testing.T) { want: []transformers.RelatedEntries{ { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "CVE-2008-7220", - Provider: expectedProvider("debian"), - Status: "active", + Name: "CVE-2008-7220", + ProviderID: "debian", + Provider: expectedProvider("debian"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "CVE-2008-7220", References: []grypeDB.Reference{ @@ -517,9 +524,10 @@ func TestTransform(t *testing.T) { want: []transformers.RelatedEntries{ { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "CVE-2011-4623", - Provider: expectedProvider("debian"), - Status: "active", + Name: "CVE-2011-4623", + ProviderID: "debian", + Provider: expectedProvider("debian"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "CVE-2011-4623", References: []grypeDB.Reference{ @@ -553,9 +561,10 @@ func TestTransform(t *testing.T) { }, { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "CVE-2008-5618", - Provider: expectedProvider("debian"), - Status: "active", + Name: "CVE-2008-5618", + ProviderID: "debian", + Provider: expectedProvider("debian"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "CVE-2008-5618", References: []grypeDB.Reference{ @@ -595,9 +604,10 @@ func TestTransform(t *testing.T) { want: []transformers.RelatedEntries{ { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "CVE-2021-37621", - Provider: expectedProvider("mariner"), - Status: "active", + Name: "CVE-2021-37621", + ProviderID: "mariner", + Provider: expectedProvider("mariner"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "CVE-2021-37621", Description: "CVE-2021-37621 affecting package exiv2 for versions less than 0.27.5-1. An upgraded version of the package is available that resolves this issue.", @@ -640,9 +650,10 @@ func TestTransform(t *testing.T) { want: []transformers.RelatedEntries{ { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "CVE-2023-29404", - Provider: expectedProvider("mariner"), - Status: "active", + Name: "CVE-2023-29404", + ProviderID: "mariner", + Provider: expectedProvider("mariner"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "CVE-2023-29404", Description: "CVE-2023-29404 affecting package golang for versions less than 1.20.7-1. A patched version of the package is available.", @@ -684,6 +695,7 @@ func TestTransform(t *testing.T) { { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ Name: "ELSA-2020-2550", + ProviderID: "oracle", Provider: expectedProvider("oracle"), Status: "active", PublishedDate: timeRef(time.Date(2020, 6, 15, 0, 0, 0, 0, time.UTC)), @@ -757,9 +769,10 @@ func TestTransform(t *testing.T) { want: []transformers.RelatedEntries{ { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "CVE-2020-14350", - Provider: expectedProvider("oracle"), - Status: "active", + Name: "CVE-2020-14350", + ProviderID: "oracle", + Provider: expectedProvider("oracle"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "CVE-2020-14350", Description: "A flaw was found in PostgreSQL, where some PostgreSQL extensions did not use the search_path safely in their installation script. This flaw allows an attacker with sufficient privileges to trick an administrator into executing a specially crafted script during the extension's installation or update. The highest threat from this vulnerability is to confidentiality, integrity, as well as system availability.", @@ -852,9 +865,10 @@ func TestTransform(t *testing.T) { want: []transformers.RelatedEntries{ { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "CVE-2020-6819", - Provider: expectedProvider("redhat"), - Status: "active", + Name: "CVE-2020-6819", + ProviderID: "redhat", + Provider: expectedProvider("redhat"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "CVE-2020-6819", Description: "A flaw was found in Mozilla Firefox. A race condition can occur while running the nsDocShell destructor causing a use-after-free memory issue. The highest threat from this vulnerability is to data confidentiality and integrity as well as system availability.", @@ -942,9 +956,10 @@ func TestTransform(t *testing.T) { want: []transformers.RelatedEntries{ { VulnerabilityHandle: grypeDB.VulnerabilityHandle{ - Name: "CVE-2020-14350", - Provider: expectedProvider("redhat"), - Status: "active", + Name: "CVE-2020-14350", + ProviderID: "redhat", + Provider: expectedProvider("redhat"), + Status: "active", BlobValue: &grypeDB.VulnerabilityBlob{ ID: "CVE-2020-14350", Description: "A flaw was found in PostgreSQL, where some PostgreSQL extensions did not use the search_path safely in their installation script. This flaw allows an attacker with sufficient privileges to trick an administrator into executing a specially crafted script during the extension's installation or update. The highest threat from this vulnerability is to confidentiality, integrity, as well as system availability.", diff --git a/pkg/process/v6/writer.go b/pkg/process/v6/writer.go index 40358889..dff56c05 100644 --- a/pkg/process/v6/writer.go +++ b/pkg/process/v6/writer.go @@ -49,7 +49,6 @@ func NewWriter(directory string, states provider.States) (data.Writer, error) { } func (w writer) Write(entries ...data.Entry) error { - log.WithFields("records", len(entries)).Trace("writing records to DB") for _, entry := range entries { if entry.DBSchemaVersion != grypeDB.ModelVersion { return fmt.Errorf("wrong schema version: want %+v got %+v", grypeDB.ModelVersion, entry.DBSchemaVersion) @@ -57,6 +56,7 @@ func (w writer) Write(entries ...data.Entry) error { switch row := entry.Data.(type) { case transformers.RelatedEntries: + log.WithFields("vuln", row.VulnerabilityHandle.Name, "affected-packages", len(row.Related)).Trace("writing") if err := w.writeEntry(row); err != nil { return fmt.Errorf("unable to write entry to store: %w", err) }