From 7cdc04abed42f382ba421ac912b098fffb769e27 Mon Sep 17 00:00:00 2001 From: RTann Date: Mon, 26 Feb 2024 17:13:12 -0800 Subject: [PATCH] all: account for language package overwrites Signed-off-by: RTann --- gobin/coalescer.go | 45 ----------- gobin/ecosystem.go | 5 +- gobin/gobin.go | 2 +- java/coalescer.go | 42 ---------- java/ecosystem.go | 7 +- java/packagescanner.go | 2 +- language/coalescer.go | 74 ++++++++++++++++++ language/coalescer_test.go | 155 +++++++++++++++++++++++++++++++++++++ nodejs/coalescer.go | 48 ------------ nodejs/coalescer_test.go | 71 ----------------- nodejs/ecosystem.go | 3 +- nodejs/packagescanner.go | 2 +- python/coalescer.go | 47 ----------- python/ecosystem.go | 3 +- python/packagescanner.go | 2 +- ruby/coalescer.go | 46 ----------- ruby/ecosystem.go | 3 +- ruby/packagescanner.go | 2 +- 18 files changed, 246 insertions(+), 313 deletions(-) delete mode 100644 gobin/coalescer.go delete mode 100644 java/coalescer.go create mode 100644 language/coalescer.go create mode 100644 language/coalescer_test.go delete mode 100644 nodejs/coalescer.go delete mode 100644 nodejs/coalescer_test.go delete mode 100644 python/coalescer.go delete mode 100644 ruby/coalescer.go diff --git a/gobin/coalescer.go b/gobin/coalescer.go deleted file mode 100644 index 5686ec38e..000000000 --- a/gobin/coalescer.go +++ /dev/null @@ -1,45 +0,0 @@ -package gobin - -import ( - "context" - "strings" - - "github.com/quay/claircore" - "github.com/quay/claircore/indexer" -) - -type coalescer struct{} - -func (c *coalescer) Coalesce(ctx context.Context, ls []*indexer.LayerArtifacts) (*claircore.IndexReport, error) { - ir := &claircore.IndexReport{ - Environments: map[string][]*claircore.Environment{}, - Packages: map[string]*claircore.Package{}, - Repositories: map[string]*claircore.Repository{}, - } - for _, l := range ls { - var rid string - for _, r := range l.Repos { - // Magic strings copied out of the osv package. - if r.Name != `go` || r.URI != `https://pkg.go.dev/` { - continue - } - rid = r.ID - ir.Repositories[r.ID] = r - break - } - for _, pkg := range l.Pkgs { - if !strings.HasPrefix(pkg.PackageDB, "go:") { - continue - } - ir.Packages[pkg.ID] = pkg - ir.Environments[pkg.ID] = []*claircore.Environment{ - { - PackageDB: pkg.PackageDB, - IntroducedIn: l.Hash, - RepositoryIDs: []string{rid}, - }, - } - } - } - return ir, nil -} diff --git a/gobin/ecosystem.go b/gobin/ecosystem.go index 294728ad2..1ded67dca 100644 --- a/gobin/ecosystem.go +++ b/gobin/ecosystem.go @@ -4,10 +4,11 @@ import ( "context" "github.com/quay/claircore/indexer" + "github.com/quay/claircore/language" ) // NewEcosystem provides the ecosystem for handling go binaries. -func NewEcosystem(ctx context.Context) *indexer.Ecosystem { +func NewEcosystem(_ context.Context) *indexer.Ecosystem { return &indexer.Ecosystem{ Name: "gobin", PackageScanners: func(context.Context) ([]indexer.PackageScanner, error) { @@ -15,6 +16,6 @@ func NewEcosystem(ctx context.Context) *indexer.Ecosystem { }, DistributionScanners: func(context.Context) ([]indexer.DistributionScanner, error) { return nil, nil }, RepositoryScanners: func(context.Context) ([]indexer.RepositoryScanner, error) { return nil, nil }, - Coalescer: func(context.Context) (indexer.Coalescer, error) { return &coalescer{}, nil }, + Coalescer: language.NewCoalescer, } } diff --git a/gobin/gobin.go b/gobin/gobin.go index a2eb7ff82..aabad76d9 100644 --- a/gobin/gobin.go +++ b/gobin/gobin.go @@ -35,7 +35,7 @@ type Detector struct{} const ( detectorName = `gobin` - detectorVersion = `6` + detectorVersion = `7` detectorKind = `package` ) diff --git a/java/coalescer.go b/java/coalescer.go deleted file mode 100644 index 688f86280..000000000 --- a/java/coalescer.go +++ /dev/null @@ -1,42 +0,0 @@ -package java - -import ( - "context" - - "github.com/quay/claircore" - "github.com/quay/claircore/indexer" -) - -type coalescer struct{} - -func (*coalescer) Coalesce(ctx context.Context, ls []*indexer.LayerArtifacts) (*claircore.IndexReport, error) { - ir := &claircore.IndexReport{ - Environments: map[string][]*claircore.Environment{}, - Packages: map[string]*claircore.Package{}, - Repositories: map[string]*claircore.Repository{}, - } - - for _, l := range ls { - // If we didn't find at least one maven repo in this layer - // no point in searching for packages. - if len(l.Repos) == 0 { - continue - } - rs := make([]string, len(l.Repos)) - for i, r := range l.Repos { - rs[i] = r.ID - ir.Repositories[r.ID] = r - } - for _, pkg := range l.Pkgs { - ir.Packages[pkg.ID] = pkg - ir.Environments[pkg.ID] = []*claircore.Environment{ - { - PackageDB: pkg.PackageDB, - IntroducedIn: l.Hash, - RepositoryIDs: rs, - }, - } - } - } - return ir, nil -} diff --git a/java/ecosystem.go b/java/ecosystem.go index 1cca07347..894ab920c 100644 --- a/java/ecosystem.go +++ b/java/ecosystem.go @@ -4,10 +4,11 @@ import ( "context" "github.com/quay/claircore/indexer" + "github.com/quay/claircore/language" ) // NewEcosystem provides the set of scanners for the java ecosystem. -func NewEcosystem(ctx context.Context) *indexer.Ecosystem { +func NewEcosystem(_ context.Context) *indexer.Ecosystem { return &indexer.Ecosystem{ PackageScanners: func(_ context.Context) ([]indexer.PackageScanner, error) { return []indexer.PackageScanner{&Scanner{}}, nil @@ -16,8 +17,6 @@ func NewEcosystem(ctx context.Context) *indexer.Ecosystem { RepositoryScanners: func(_ context.Context) ([]indexer.RepositoryScanner, error) { return nil, nil }, - Coalescer: func(_ context.Context) (indexer.Coalescer, error) { - return (*coalescer)(nil), nil - }, + Coalescer: language.NewCoalescer, } } diff --git a/java/packagescanner.go b/java/packagescanner.go index 796e81c05..7a674957b 100644 --- a/java/packagescanner.go +++ b/java/packagescanner.go @@ -71,7 +71,7 @@ type Scanner struct { func (*Scanner) Name() string { return "java" } // Version implements scanner.VersionedScanner. -func (*Scanner) Version() string { return "6" } +func (*Scanner) Version() string { return "7" } // Kind implements scanner.VersionedScanner. func (*Scanner) Kind() string { return "package" } diff --git a/language/coalescer.go b/language/coalescer.go new file mode 100644 index 000000000..ea3d44d86 --- /dev/null +++ b/language/coalescer.go @@ -0,0 +1,74 @@ +// Package language implements structs and functions common between +// programming language indexing implementations. +package language + +import ( + "context" + + "github.com/quay/claircore" + "github.com/quay/claircore/indexer" +) + +var _ indexer.Coalescer = (*coalescer)(nil) + +type coalescer struct{} + +// NewCoalescer returns a new common programming language coalescer. +func NewCoalescer(_ context.Context) (indexer.Coalescer, error) { + return &coalescer{}, nil +} + +// Coalesce implements [indexer.Coalescer]. +// +// Image builders may opt to update language-packages instead of deleting and recreating them +// (as in, there may or may not be a whiteout file to make it clear the package was updated). +// This function ensures both scenarios are supported. +func (c *coalescer) Coalesce(_ context.Context, ls []*indexer.LayerArtifacts) (*claircore.IndexReport, error) { + ir := &claircore.IndexReport{ + Environments: map[string][]*claircore.Environment{}, + Packages: map[string]*claircore.Package{}, + Repositories: map[string]*claircore.Repository{}, + } + // Similar to ir.Packages, except instead of mapping + // id -> package, it maps packageDB -> package. + // For language packages, it is possible the + // packageDB is overwritten between subsequent layers. + packages := make(map[string]*claircore.Package) + for i := len(ls) - 1; i >= 0; i-- { + l := ls[i] + // If we didn't find at least one repo in this layer + // no point searching for packages. + if len(l.Repos) == 0 { + continue + } + rs := make([]string, len(l.Repos)) + for i, r := range l.Repos { + rs[i] = r.ID + ir.Repositories[r.ID] = r + } + for _, pkg := range l.Pkgs { + if seen, exists := packages[pkg.PackageDB]; exists { + // If the package was renamed or has a different version in a higher (previously seen) layer, + // then this is considered a different package. + // In that case, ignore the original package in the lower (this) layer. + if pkg.Name != seen.Name || pkg.Version != seen.Version { + continue + } + // The name and version are the same, so delete the entry related to the higher (previously seen) + // layer, as this package was likely introduced in the lower (this) layer. + delete(ir.Packages, seen.ID) + delete(ir.Environments, seen.ID) + } + packages[pkg.PackageDB] = pkg + ir.Packages[pkg.ID] = pkg + ir.Environments[pkg.ID] = []*claircore.Environment{ + { + PackageDB: pkg.PackageDB, + IntroducedIn: l.Hash, + RepositoryIDs: rs, + }, + } + } + } + return ir, nil +} diff --git a/language/coalescer_test.go b/language/coalescer_test.go new file mode 100644 index 000000000..505fbb473 --- /dev/null +++ b/language/coalescer_test.go @@ -0,0 +1,155 @@ +package language + +import ( + "context" + "strconv" + "testing" + + "github.com/quay/zlog" + + "github.com/quay/claircore" + "github.com/quay/claircore/indexer" + "github.com/quay/claircore/test" +) + +func TestCoalescer(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + coalescer := &coalescer{} + pkgs := test.GenUniquePackages(6) + repo := []*claircore.Repository{{ + Name: "npm", + URI: "https://www.npmjs.com/", + }} + layerArtifacts := []*indexer.LayerArtifacts{ + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs[:1], + }, + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs[:2], + }, + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs[:3], + Repos: repo, + }, + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs[:4], + }, + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs[:5], + Repos: repo, + }, + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs, + }, + } + ir, err := coalescer.Coalesce(ctx, layerArtifacts) + if err != nil { + t.Fatalf("received error from coalesce method: %v", err) + } + // Expect 0-5 to have gotten associated with the repository. + for i := range pkgs { + es, ok := ir.Environments[strconv.Itoa(i)] + if !ok && i == 5 { + // Left out the last package. + continue + } + e := es[0] + if len(e.RepositoryIDs) == 0 { + t.Error("expected some repositories") + } + for _, id := range e.RepositoryIDs { + r := ir.Repositories[id] + if got, want := r.Name, "npm"; got != want { + t.Errorf("got: %q, want: %q", got, want) + } + } + } +} + +func TestCoalescerPackageOverwrite(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + coalescer := &coalescer{} + repo := []*claircore.Repository{{ + Name: "npm", + URI: "https://www.npmjs.com/", + }} + hashes := []claircore.Digest{ + test.RandomSHA256Digest(t), + test.RandomSHA256Digest(t), + test.RandomSHA256Digest(t), + test.RandomSHA256Digest(t), + } + layerArtifacts := []*indexer.LayerArtifacts{ + { + Hash: hashes[0], + Pkgs: []*claircore.Package{ + { + ID: "0", + Name: "semver", + Version: "7.3.8", + PackageDB: "nodejs:usr/local/lib/node_modules/npm/node_modules/semver/package.json", + }, + }, + Repos: repo, + }, + { + Hash: hashes[1], + }, + { + Hash: hashes[2], + Pkgs: []*claircore.Package{ + { + ID: "1", + Name: "semver", + Version: "7.5.2", + PackageDB: "nodejs:usr/local/lib/node_modules/npm/node_modules/semver/package.json", + }, + }, + Repos: repo, + }, + { + Hash: hashes[3], + Pkgs: []*claircore.Package{ + { + ID: "2", + Name: "semver", + Version: "7.5.2", + PackageDB: "nodejs:usr/local/lib/node_modules/npm/node_modules/semver/package.json", + }, + }, + Repos: repo, + }, + } + ir, err := coalescer.Coalesce(ctx, layerArtifacts) + if err != nil { + t.Fatalf("received error from coalesce method: %v", err) + } + if len(ir.Packages) != 1 { + t.Fatalf("unexpected number of packages: %d != %d", len(ir.Packages), 1) + } + pkg, exists := ir.Packages["1"] + if !exists { + t.Fatal("expected package does not exist") + } + if pkg.Version != "7.5.2" { + t.Fatalf("unexpected version: %s != %s", pkg.Version, "7.5.2") + } + envs, exists := ir.Environments["1"] + if !exists { + t.Fatal("expected environments do not exist") + } + if len(envs) != 1 { + t.Fatalf("unexpected number of envionments: %d != %d", len(envs), 1) + } + if envs[0].IntroducedIn.String() != hashes[2].String() { + t.Fatalf("unexpected introducedIn: %s != %s", envs[0].IntroducedIn.String(), hashes[2].String()) + } +} diff --git a/nodejs/coalescer.go b/nodejs/coalescer.go deleted file mode 100644 index 469cc7446..000000000 --- a/nodejs/coalescer.go +++ /dev/null @@ -1,48 +0,0 @@ -package nodejs - -import ( - "context" - "github.com/quay/claircore" - - "github.com/quay/claircore/indexer" -) - -var _ indexer.Coalescer = (*coalescer)(nil) - -type coalescer struct{} - -func NewCoalescer(_ context.Context) (indexer.Coalescer, error) { - return &coalescer{}, nil -} - -func (c *coalescer) Coalesce(_ context.Context, ls []*indexer.LayerArtifacts) (*claircore.IndexReport, error) { - ir := &claircore.IndexReport{ - Environments: map[string][]*claircore.Environment{}, - Packages: map[string]*claircore.Package{}, - Repositories: map[string]*claircore.Repository{}, - } - - for _, l := range ls { - // If we didn't find at least one npm repo in this layer - // no point in searching for packages. - if len(l.Repos) == 0 { - continue - } - rs := make([]string, len(l.Repos)) - for i, r := range l.Repos { - rs[i] = r.ID - ir.Repositories[r.ID] = r - } - for _, pkg := range l.Pkgs { - ir.Packages[pkg.ID] = pkg - ir.Environments[pkg.ID] = []*claircore.Environment{ - { - PackageDB: pkg.PackageDB, - IntroducedIn: l.Hash, - RepositoryIDs: rs, - }, - } - } - } - return ir, nil -} diff --git a/nodejs/coalescer_test.go b/nodejs/coalescer_test.go deleted file mode 100644 index dba365f03..000000000 --- a/nodejs/coalescer_test.go +++ /dev/null @@ -1,71 +0,0 @@ -package nodejs - -import ( - "context" - "strconv" - "testing" - - "github.com/quay/zlog" - - "github.com/quay/claircore" - "github.com/quay/claircore/indexer" - "github.com/quay/claircore/test" -) - -func TestCoalescer(t *testing.T) { - t.Parallel() - ctx := zlog.Test(context.Background(), t) - coalescer := &coalescer{} - pkgs := test.GenUniquePackages(6) - repo := []*claircore.Repository{&Repository} - layerArtifacts := []*indexer.LayerArtifacts{ - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs[:1], - }, - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs[:2], - }, - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs[:3], - Repos: repo, - }, - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs[:4], - }, - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs[:5], - Repos: repo, - }, - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs, - }, - } - ir, err := coalescer.Coalesce(ctx, layerArtifacts) - if err != nil { - t.Fatalf("received error from coalesce method: %v", err) - } - // Expect 0-5 to have gotten associated with the repository. - for i := range pkgs { - es, ok := ir.Environments[strconv.Itoa(i)] - if !ok && i == 5 { - // Left out the last package. - continue - } - e := es[0] - if len(e.RepositoryIDs) == 0 { - t.Error("expected some repositories") - } - for _, id := range e.RepositoryIDs { - r := ir.Repositories[id] - if got, want := r.Name, Repository.Name; got != want { - t.Errorf("got: %q, want: %q", got, want) - } - } - } -} diff --git a/nodejs/ecosystem.go b/nodejs/ecosystem.go index de6f84c83..3f958919d 100644 --- a/nodejs/ecosystem.go +++ b/nodejs/ecosystem.go @@ -4,6 +4,7 @@ import ( "context" "github.com/quay/claircore/indexer" + "github.com/quay/claircore/language" ) var scanners = []indexer.PackageScanner{&Scanner{}} @@ -14,6 +15,6 @@ func NewEcosystem(_ context.Context) *indexer.Ecosystem { PackageScanners: func(_ context.Context) ([]indexer.PackageScanner, error) { return scanners, nil }, DistributionScanners: func(_ context.Context) ([]indexer.DistributionScanner, error) { return nil, nil }, RepositoryScanners: func(_ context.Context) ([]indexer.RepositoryScanner, error) { return nil, nil }, - Coalescer: NewCoalescer, + Coalescer: language.NewCoalescer, } } diff --git a/nodejs/packagescanner.go b/nodejs/packagescanner.go index 9613301bc..2a4991396 100644 --- a/nodejs/packagescanner.go +++ b/nodejs/packagescanner.go @@ -44,7 +44,7 @@ type Scanner struct{} func (*Scanner) Name() string { return "nodejs" } // Version implements scanner.VersionedScanner. -func (*Scanner) Version() string { return "2" } +func (*Scanner) Version() string { return "3" } // Kind implements scanner.VersionedScanner. func (*Scanner) Kind() string { return "package" } diff --git a/python/coalescer.go b/python/coalescer.go deleted file mode 100644 index d0d713881..000000000 --- a/python/coalescer.go +++ /dev/null @@ -1,47 +0,0 @@ -package python - -import ( - "context" - - "github.com/quay/claircore" - "github.com/quay/claircore/indexer" -) - -func NewCoalescer(_ context.Context) (indexer.Coalescer, error) { - return &coalescer{}, nil -} - -type coalescer struct { -} - -func (c *coalescer) Coalesce(ctx context.Context, ls []*indexer.LayerArtifacts) (*claircore.IndexReport, error) { - ir := &claircore.IndexReport{ - Environments: map[string][]*claircore.Environment{}, - Packages: map[string]*claircore.Package{}, - Repositories: map[string]*claircore.Repository{}, - } - - for _, l := range ls { - // If we didn't find at least one pip repo in this layer - // no point in searching for packages. - if len(l.Repos) == 0 { - continue - } - rs := make([]string, len(l.Repos)) - for i, r := range l.Repos { - rs[i] = r.ID - ir.Repositories[r.ID] = r - } - for _, pkg := range l.Pkgs { - ir.Packages[pkg.ID] = pkg - ir.Environments[pkg.ID] = []*claircore.Environment{ - &claircore.Environment{ - PackageDB: pkg.PackageDB, - IntroducedIn: l.Hash, - RepositoryIDs: rs, - }, - } - } - } - return ir, nil -} diff --git a/python/ecosystem.go b/python/ecosystem.go index 7458c9edd..9cd3b2a00 100644 --- a/python/ecosystem.go +++ b/python/ecosystem.go @@ -4,6 +4,7 @@ import ( "context" "github.com/quay/claircore/indexer" + "github.com/quay/claircore/language" ) var scanners = []indexer.PackageScanner{&Scanner{}} @@ -14,6 +15,6 @@ func NewEcosystem(ctx context.Context) *indexer.Ecosystem { PackageScanners: func(_ context.Context) ([]indexer.PackageScanner, error) { return scanners, nil }, DistributionScanners: func(_ context.Context) ([]indexer.DistributionScanner, error) { return nil, nil }, RepositoryScanners: func(_ context.Context) ([]indexer.RepositoryScanner, error) { return nil, nil }, - Coalescer: NewCoalescer, + Coalescer: language.NewCoalescer, } } diff --git a/python/packagescanner.go b/python/packagescanner.go index fc494062d..bd4f93645 100644 --- a/python/packagescanner.go +++ b/python/packagescanner.go @@ -46,7 +46,7 @@ type Scanner struct{} func (*Scanner) Name() string { return "python" } // Version implements scanner.VersionedScanner. -func (*Scanner) Version() string { return "4" } +func (*Scanner) Version() string { return "5" } // Kind implements scanner.VersionedScanner. func (*Scanner) Kind() string { return "package" } diff --git a/ruby/coalescer.go b/ruby/coalescer.go deleted file mode 100644 index cb0d33ef1..000000000 --- a/ruby/coalescer.go +++ /dev/null @@ -1,46 +0,0 @@ -package ruby - -import ( - "context" - - "github.com/quay/claircore" - "github.com/quay/claircore/indexer" -) - -func NewCoalescer(_ context.Context) (indexer.Coalescer, error) { - return &coalescer{}, nil -} - -type coalescer struct{} - -func (c *coalescer) Coalesce(_ context.Context, ls []*indexer.LayerArtifacts) (*claircore.IndexReport, error) { - ir := &claircore.IndexReport{ - Environments: map[string][]*claircore.Environment{}, - Packages: map[string]*claircore.Package{}, - Repositories: map[string]*claircore.Repository{}, - } - - for _, l := range ls { - // If we didn't find at least one gem repo in this layer - // no point in searching for packages. - if len(l.Repos) == 0 { - continue - } - rs := make([]string, len(l.Repos)) - for i, r := range l.Repos { - rs[i] = r.ID - ir.Repositories[r.ID] = r - } - for _, pkg := range l.Pkgs { - ir.Packages[pkg.ID] = pkg - ir.Environments[pkg.ID] = []*claircore.Environment{ - { - PackageDB: pkg.PackageDB, - IntroducedIn: l.Hash, - RepositoryIDs: rs, - }, - } - } - } - return ir, nil -} diff --git a/ruby/ecosystem.go b/ruby/ecosystem.go index 75f7208cd..985ecdb01 100644 --- a/ruby/ecosystem.go +++ b/ruby/ecosystem.go @@ -4,6 +4,7 @@ import ( "context" "github.com/quay/claircore/indexer" + "github.com/quay/claircore/language" ) var scanners = []indexer.PackageScanner{&Scanner{}} @@ -14,6 +15,6 @@ func NewEcosystem(_ context.Context) *indexer.Ecosystem { PackageScanners: func(_ context.Context) ([]indexer.PackageScanner, error) { return scanners, nil }, DistributionScanners: func(_ context.Context) ([]indexer.DistributionScanner, error) { return nil, nil }, RepositoryScanners: func(_ context.Context) ([]indexer.RepositoryScanner, error) { return nil, nil }, - Coalescer: NewCoalescer, + Coalescer: language.NewCoalescer, } } diff --git a/ruby/packagescanner.go b/ruby/packagescanner.go index 3e3d02aa2..98c6fe6e1 100644 --- a/ruby/packagescanner.go +++ b/ruby/packagescanner.go @@ -70,7 +70,7 @@ type Scanner struct{} func (*Scanner) Name() string { return "ruby" } // Version implements scanner.VersionedScanner. -func (*Scanner) Version() string { return "2" } +func (*Scanner) Version() string { return "3" } // Kind implements scanner.VersionedScanner. func (*Scanner) Kind() string { return "package" }