From 852389ced62f247641a76caa9f27c1a80bf0c214 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 26 Nov 2024 14:57:43 -0500 Subject: [PATCH] moderate review comments Signed-off-by: Alex Goodman --- DEVELOPING.md | 3 +- README.md | 5 +- cmd/grype-db/cli/commands/build.go | 38 +----- internal/tarutil/writer.go | 9 ++ pkg/process/package.go | 114 +++++++++--------- pkg/provider/state.go | 28 +++++ .../provider/state_test.go | 20 ++- 7 files changed, 110 insertions(+), 107 deletions(-) rename cmd/grype-db/cli/commands/build_test.go => pkg/provider/state_test.go (89%) diff --git a/DEVELOPING.md b/DEVELOPING.md index a6b4a414..7b30abd7 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -7,7 +7,8 @@ well as acceptance testing. You will require the following: - Python 3.8+ installed on your system. Consider using [pyenv](https://github.com/pyenv/pyenv) if you do not have a preference for managing python interpreter installations. - +- `zstd` binary utility if you are packaging v6+ DB schemas +- _(optional)_ `xz` binary utility if you have specifically overridden the package command options - [Poetry](https://python-poetry.org/) installed for dependency and virtualenv management for python dependencies, to install: diff --git a/README.md b/README.md index 121d32b2..e259d53f 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ curl -sSfL https://raw.githubusercontent.com/anchore/grype-db/main/install.sh | curl -sSfL https://raw.githubusercontent.com/anchore/grype-db/main/install.sh | sh -s -- -b ``` +> [!IMPORTANT] +> You will require the `zstd` utility installed on your system to support the `package` command. ## Usage @@ -39,6 +41,7 @@ grype-db pull [-g] [-p PROVIDER ...] grype-db build [-g] [--dir=DIR] [--schema=SCHEMA] [--skip-validation] [-p PROVIDER ...] # Package the already built DB file into an archive ready for upload and serving +# note: you will require the zstd utility to be installed on your system grype-db package [--dir=DIR] [--publish-base-url=URL] ``` @@ -54,7 +57,7 @@ is created that is used in packaging and curation of the database file by this a and a `provider-metadata.json` file is created that includes the last successful run date for each provider. Use `-g` to generate the list of providers to pull based on the output of "vunnel list". -The `package` command archives the `vulnerability.db`, `metadata.json` and `provider-metadata.json` files into a `tar.gz` file. Additionally, a `listing.json` +The `package` command archives the `vulnerability.db` file into a `tar.zstd` file. Additionally, a `latest.json` is generated to aid in serving one or more database archives for downstream consumption, where the consuming application should use the listing file to discover available archives available for download. The base URL used to create the download URL for each database archive is controlled by the `package.base-url` configuration option. diff --git a/cmd/grype-db/cli/commands/build.go b/cmd/grype-db/cli/commands/build.go index 3e376187..ea19fe8c 100644 --- a/cmd/grype-db/cli/commands/build.go +++ b/cmd/grype-db/cli/commands/build.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "os" - "time" "github.com/scylladb/go-set/strset" "github.com/spf13/cobra" @@ -108,7 +107,7 @@ func runBuild(cfg buildConfig) error { return fmt.Errorf("unable to get provider states: %w", err) } - earliest, err := earliestTimestamp(states) + earliest, err := provider.States(states).EarliestTimestamp() if err != nil { return fmt.Errorf("unable to get earliest timestamp: %w", err) } @@ -152,38 +151,3 @@ func providerStates(skipValidation bool, providers []provider.Provider) ([]provi } return states, nil } - -func earliestTimestamp(states []provider.State) (time.Time, error) { - if len(states) == 0 { - return time.Time{}, fmt.Errorf("cannot find earliest timestamp: no states provided") - } - - // special case when there is exactly 1 state, return its timestamp even - // if it is nvd, because otherwise quality gates that pull only nvd deterministically fail. - if len(states) == 1 { - return states[0].Timestamp, nil - } - - var earliest time.Time - for _, s := range states { - // the NVD api is constantly down, so we don't want to consider it for the earliest timestamp - if s.Provider == "nvd" { - log.WithFields("provider", s.Provider).Debug("not considering data age for provider") - continue - } - if earliest.IsZero() { - earliest = s.Timestamp - continue - } - if s.Timestamp.Before(earliest) { - earliest = s.Timestamp - } - } - - if earliest.IsZero() { - return time.Time{}, fmt.Errorf("unable to determine earliest timestamp") - } - - log.WithFields("timestamp", earliest).Debug("earliest data timestamp") - return earliest, nil -} diff --git a/internal/tarutil/writer.go b/internal/tarutil/writer.go index 3fd6fbf7..a32f2a15 100644 --- a/internal/tarutil/writer.go +++ b/internal/tarutil/writer.go @@ -73,6 +73,15 @@ func newShellCompressor(c string, archive io.Writer) (*shellCompressor, error) { return nil, fmt.Errorf("unable to parse command: %w", err) } binary := args[0] + + binPath, err := exec.LookPath(binary) + if err != nil { + return nil, fmt.Errorf("unable to find binary %q: %w", binary, err) + } + if binPath == "" { + return nil, fmt.Errorf("unable to find binary %q in PATH", binary) + } + args = args[1:] cmd := exec.Command(binary, args...) log.Debug(strings.Join(cmd.Args, " ")) diff --git a/pkg/process/package.go b/pkg/process/package.go index 89eff025..2a8d369c 100644 --- a/pkg/process/package.go +++ b/pkg/process/package.go @@ -9,8 +9,11 @@ import ( "strings" "time" + "github.com/scylladb/go-set/strset" + "github.com/anchore/grype-db/internal/log" "github.com/anchore/grype-db/internal/tarutil" + "github.com/anchore/grype-db/pkg/provider" grypeDBLegacyDistribution "github.com/anchore/grype/grype/db/legacy/distribution" v6 "github.com/anchore/grype/grype/db/v6" v6Distribution "github.com/anchore/grype/grype/db/v6/distribution" @@ -31,69 +34,34 @@ func packageDB(dbDir, overrideArchiveExtension string) error { } log.WithFields("from", dbDir, "extension", extension).Info("packaging database") - tarPath, err := calculateTarPath(dbDir, extension) - if err != nil { - return err - } - - if err := populateTar(tarPath); err != nil { - return err - } - - log.WithFields("path", tarPath).Info("created database archive") - - return writeLatestDocument(tarPath) -} - -func resolveExtension(overrideArchiveExtension string) (string, error) { - var extension = "tar.zst" - - if overrideArchiveExtension != "" { - extension = strings.TrimLeft(overrideArchiveExtension, ".") - } - - var found bool - for _, valid := range []string{"tar.zst", "tar.xz", "tar.gz"} { - if valid == extension { - found = true - break - } - } - - if !found { - return "", fmt.Errorf("unsupported archive extension %q", extension) - } - return extension, nil -} - -func calculateTarPath(dbDir string, extension string) (string, error) { s, err := v6.NewReader(v6.Config{DBDirPath: dbDir}) if err != nil { - return "", fmt.Errorf("unable to open vulnerability store: %w", err) + return fmt.Errorf("unable to open vulnerability store: %w", err) } metadata, err := s.GetDBMetadata() - if err != nil { - return "", fmt.Errorf("unable to get vulnerability store metadata: %w", err) + if err != nil || metadata == nil { + return fmt.Errorf("unable to get vulnerability store metadata: %w", err) } if metadata.Model != v6.ModelVersion { - return "", fmt.Errorf("metadata model %d does not match vulnerability store model %d", v6.ModelVersion, metadata.Model) + return fmt.Errorf("metadata model %d does not match vulnerability store model %d", v6.ModelVersion, metadata.Model) } - providers, err := s.AllProviders() + providerModels, err := s.AllProviders() if err != nil { - return "", fmt.Errorf("unable to get all providers: %w", err) + return fmt.Errorf("unable to get all providers: %w", err) } - if len(providers) == 0 { - return "", fmt.Errorf("no providers found in the vulnerability store") + if len(providerModels) == 0 { + return fmt.Errorf("no providers found in the vulnerability store") } - eldest := eldestProviderTimestamp(providers) - if eldest == nil { - return "", errors.New("could not resolve eldest provider timestamp") + eldest, err := toProviders(providerModels).EarliestTimestamp() + if err != nil { + return err } + // output archive vulnerability-db_VERSION_OLDESTDATADATE_BUILTEPOCH.tar.gz, where: // - VERSION: schema version in the form of v#.#.# // - OLDESTDATADATE: RFC3338 formatted value of the oldest date capture date found for all contained providers @@ -106,19 +74,51 @@ func calculateTarPath(dbDir string, extension string) (string, error) { extension, ) - return filepath.Join(dbDir, tarName), err + tarPath := filepath.Join(dbDir, tarName) + + if err := populateTar(tarPath); err != nil { + return err + } + + log.WithFields("path", tarPath).Info("created database archive") + + return writeLatestDocument(tarPath, *metadata) +} + +func toProviders(states []v6.Provider) provider.States { + var result provider.States + for _, state := range states { + result = append(result, provider.State{ + Provider: state.ID, + Timestamp: *state.DateCaptured, + }) + } + return result } -func eldestProviderTimestamp(providers []v6.Provider) *time.Time { - var eldest *time.Time - for _, p := range providers { - if eldest == nil || p.DateCaptured.Before(*eldest) { - eldest = p.DateCaptured +func resolveExtension(overrideArchiveExtension string) (string, error) { + var extension = "tar.zst" + + if overrideArchiveExtension != "" { + extension = strings.TrimLeft(overrideArchiveExtension, ".") + } + + var found bool + for _, valid := range []string{"tar.zst", "tar.xz", "tar.gz"} { + if valid == extension { + found = true + break } } - return eldest + + if !found { + return "", fmt.Errorf("unsupported archive extension %q", extension) + } + return extension, nil } +var listingFiles = strset.New("listing.json", "latest.json", "history.json") + func populateTar(tarPath string) error { originalDir, err := os.Getwd() if err != nil { @@ -146,7 +146,7 @@ func populateTar(tarPath string) error { var files []string for _, fi := range fileInfos { - if fi.Name() != "listing.json" && !strings.Contains(fi.Name(), ".tar.") { + if !listingFiles.Has(fi.Name()) && !strings.Contains(fi.Name(), ".tar.") { files = append(files, fi.Name()) } } @@ -158,8 +158,8 @@ func populateTar(tarPath string) error { return nil } -func writeLatestDocument(tarPath string) error { - archive, err := v6Distribution.NewArchive(tarPath) +func writeLatestDocument(tarPath string, metadata v6.DBMetadata) error { + archive, err := v6Distribution.NewArchive(tarPath, *metadata.BuildTimestamp, metadata.Model, metadata.Revision, metadata.Addition) if err != nil || archive == nil { return fmt.Errorf("unable to create archive: %w", err) } diff --git a/pkg/provider/state.go b/pkg/provider/state.go index 81c5755f..17e469ec 100644 --- a/pkg/provider/state.go +++ b/pkg/provider/state.go @@ -134,3 +134,31 @@ func (s States) Names() []string { } return names } + +func (s States) EarliestTimestamp() (time.Time, error) { + if len(s) == 0 { + return time.Time{}, fmt.Errorf("cannot find earliest timestamp: no states provided") + } + var earliest time.Time + for _, curState := range s { + // the NVD api is constantly down, so we don't want to consider it for the earliest timestamp + if curState.Provider == "nvd" { + log.WithFields("provider", curState.Provider).Debug("not considering data age for provider") + continue + } + if earliest.IsZero() { + earliest = curState.Timestamp + continue + } + if curState.Timestamp.Before(earliest) { + earliest = curState.Timestamp + } + } + + if earliest.IsZero() { + return time.Time{}, fmt.Errorf("unable to determine earliest timestamp") + } + + log.WithFields("timestamp", earliest).Debug("earliest data timestamp") + return earliest, nil +} diff --git a/cmd/grype-db/cli/commands/build_test.go b/pkg/provider/state_test.go similarity index 89% rename from cmd/grype-db/cli/commands/build_test.go rename to pkg/provider/state_test.go index 97c19af7..bf04fd32 100644 --- a/cmd/grype-db/cli/commands/build_test.go +++ b/pkg/provider/state_test.go @@ -1,4 +1,4 @@ -package commands +package provider import ( "reflect" @@ -6,20 +6,18 @@ import ( "time" "github.com/stretchr/testify/require" - - "github.com/anchore/grype-db/pkg/provider" ) func Test_earliestTimestamp(t *testing.T) { tests := []struct { name string - states []provider.State + states []State want time.Time wantErr require.ErrorAssertionFunc }{ { name: "happy path", - states: []provider.State{ + states: []State{ { Timestamp: time.Date(2021, 1, 2, 0, 0, 0, 0, time.UTC), }, @@ -34,13 +32,13 @@ func Test_earliestTimestamp(t *testing.T) { }, { name: "empty states", - states: []provider.State{}, + states: []State{}, want: time.Time{}, wantErr: requireErrorContains("cannot find earliest timestamp: no states provided"), }, { name: "single state", - states: []provider.State{ + states: []State{ { Timestamp: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC), }, @@ -59,7 +57,7 @@ func Test_earliestTimestamp(t *testing.T) { }, { name: "all states have provider nvd", - states: []provider.State{ + states: []State{ { Provider: "nvd", Timestamp: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC), @@ -74,7 +72,7 @@ func Test_earliestTimestamp(t *testing.T) { }, { name: "mix of nvd and non-nvd providers", - states: []provider.State{ + states: []State{ { Provider: "nvd", Timestamp: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC), @@ -92,7 +90,7 @@ func Test_earliestTimestamp(t *testing.T) { }, { name: "timestamps are the same", - states: []provider.State{ + states: []State{ { Timestamp: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC), }, @@ -112,7 +110,7 @@ func Test_earliestTimestamp(t *testing.T) { if tt.wantErr == nil { tt.wantErr = require.NoError } - got, err := earliestTimestamp(tt.states) + got, err := States(tt.states).EarliestTimestamp() tt.wantErr(t, err) if err != nil { return