Skip to content

Commit

Permalink
rpm: extract filename information
Browse files Browse the repository at this point in the history
This should allow us to extract a list of known file patterns for use
later in the Indexer pipeline.

Signed-off-by: Hank Donnay <[email protected]>
  • Loading branch information
hdonnay committed Apr 15, 2024
1 parent e8f9aff commit b1e2f67
Showing 1 changed file with 76 additions and 1 deletion.
77 changes: 76 additions & 1 deletion rpm/native_db.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"context"
"fmt"
"io"
"path"
"regexp"
"runtime/trace"
"strings"

Expand Down Expand Up @@ -121,14 +123,17 @@ type Info struct {
Module string
Arch string
Digest string
Signature []byte // This is a PGP signature packet.
Signature []byte // This is a PGP signature packet.
Filenames []string // Filtered by the [filePatterns] regexp.
DigestAlgo int
Epoch int
}

// Load populates the receiver with information extracted from the provided
// [rpm.Header].
func (i *Info) Load(ctx context.Context, h *rpm.Header) error {
var dirname, basename []string
var dirindex []int32
for idx := range h.Infos {
e := &h.Infos[idx]
if _, ok := wantTags[e.Tag]; !ok {
Expand Down Expand Up @@ -159,14 +164,84 @@ func (i *Info) Load(ctx context.Context, h *rpm.Header) error {
i.Digest = v.([]string)[0]
case rpm.TagSigPGP:
i.Signature = v.([]byte)
case rpm.TagDirnames:
dirname = v.([]string)
case rpm.TagDirindexes:
dirindex = v.([]int32)
case rpm.TagBasenames:
basename = v.([]string)
case rpm.TagFilenames:

Check warning on line 173 in rpm/native_db.go

View check run for this annotation

Codecov / codecov/patch

rpm/native_db.go#L173

Added line #L173 was not covered by tests
// Filenames is the tag used in rpm4 -- this is a best-effort for
// supporting it.
for _, name := range v.([]string) {
if !filePatterns.MatchString(name) {

Check warning on line 177 in rpm/native_db.go

View check run for this annotation

Codecov / codecov/patch

rpm/native_db.go#L176-L177

Added lines #L176 - L177 were not covered by tests
// Record the name as a relative path, as that's what we use
// everywhere else.
i.Filenames = append(i.Filenames, name[1:])

Check warning on line 180 in rpm/native_db.go

View check run for this annotation

Codecov / codecov/patch

rpm/native_db.go#L180

Added line #L180 was not covered by tests
}
}
}
}

// Catch panics from malformed headers. Can't think of a better way to
// handle this.
defer func() {
if r := recover(); r == nil {
return
}
zlog.Warn(ctx).
Str("name", i.Name).
Strs("basename", basename).
Strs("dirname", dirname).
Ints32("dirindex", dirindex).
Msg("caught panic in filename construction")
i.Filenames = nil

Check warning on line 198 in rpm/native_db.go

View check run for this annotation

Codecov / codecov/patch

rpm/native_db.go#L192-L198

Added lines #L192 - L198 were not covered by tests
}()
for j := range basename {
// We only want '/'-separated paths, even if running on some other,
// weird OS. It seems that RPM assumes '/' throughout.
name := path.Join(dirname[dirindex[j]], basename[j])
if !filePatterns.MatchString(name) {
// Record the name as a relative path, as that's what we use
// everywhere else.
i.Filenames = append(i.Filenames, name[1:])
}
}
return nil
}

// FilePatterns is a regular expression for *any* file that may need to be
// recorded alongside a package.
//
// The tested strings are absolute paths.
var filePatterns *regexp.Regexp

func init() {
// TODO(hank) The blanket binary pattern is too broad and can miss things.
// Long-term, we should add pattern matching akin to [yara] or file(1) as a
// plugin mechanism that all indexers can use. That way, the Go indexer
// could register a pattern and use a shared filter over the
// [fs.WalkDirFunc] while this package (and dpkg, etc) can tell that another
// indexer will find those files relevant.
//
// [yara]: https://github.com/VirusTotal/yara
pat := []string{
`^.*/[^/]+\.jar$`, // Jar files
`^.*/site-packages/[^/]+\.egg-info/PKG-INFO$`, // Python packages
`^.*/package.json$`, // npm packages
`^.*/[^/]+\.gemspec$`, // ruby gems
`^/usr/bin/[^/]+$`, // any executable
}
filePatterns = regexp.MustCompile(strings.Join(pat, `|`))
}

var wantTags = map[rpm.Tag]struct{}{
rpm.TagArch: {},
rpm.TagBasenames: {},
rpm.TagDirindexes: {},
rpm.TagDirnames: {},
rpm.TagEpoch: {},
rpm.TagFilenames: {},
rpm.TagModularityLabel: {},
rpm.TagName: {},
rpm.TagPayloadDigest: {},
Expand Down

0 comments on commit b1e2f67

Please sign in to comment.