Skip to content

Commit

Permalink
add latest.json validation
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Goodman <[email protected]>
  • Loading branch information
wagoodman committed Dec 17, 2024
1 parent 8d208f9 commit 3cf9ccc
Show file tree
Hide file tree
Showing 6 changed files with 234 additions and 15 deletions.
46 changes: 41 additions & 5 deletions manager/src/grype_db_manager/cli/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from yardstick.cli import config as ycfg
from yardstick.cli.validate import validate as yardstick_validate

from grype_db_manager import db, s3utils
from grype_db_manager import db, s3utils, grypedb
from grype_db_manager.cli import config, error
from grype_db_manager.db.format import Format
from grype_db_manager.grypedb import DB_DIR, DBManager, GrypeDB
Expand Down Expand Up @@ -125,10 +125,6 @@ def validate_db(
click.echo(f"no database found with session id {db_uuid}")
return

if db_info.schema_version >= 6:
# TODO: not implemented yet
raise NotImplementedError("validation for schema v6+ is not yet implemented")

if not skip_namespace_check:
if db_info.schema_version < 6:
# ensure the minimum number of namespaces are present
Expand All @@ -137,6 +133,21 @@ def validate_db(
# TODO: implement me
raise NotImplementedError("namespace validation for schema v6+ is not yet implemented")

_validate_db(ctx, cfg, db_info, images, db_uuid, verbosity, recapture)

logging.info(f"validating latest.json {db_uuid}")

if db_info.schema_version >= 6:
_validate_latest(cfg, db_info.latest_path, db_info.archive_path)

click.echo(f"{Format.BOLD}{Format.OKGREEN}Validation passed{Format.RESET}")


def _validate_db(ctx: click.Context, cfg: config.Application, db_info: grypedb.DBInfo, images: list[str], db_uuid: str, verbosity: int, recapture: bool) -> None:
if db_info.schema_version >= 6:
# TODO: not implemented yet
raise NotImplementedError("validation for schema v6+ is not yet implemented")

# resolve tool versions and install them
yardstick.store.config.set_values(store_root=cfg.data.yardstick_root)

Expand Down Expand Up @@ -207,6 +218,31 @@ def validate_db(
)


def _validate_latest(cfg: config.Application, latest_file: str, archive_path: str) -> None:
with open(latest_file) as f:
latest_obj = db.Latest.from_json(f.read())

if not cfg.validate.listing.image:
msg = "no image specified to validate against"
raise ValueError(msg)

if not cfg.validate.listing.minimum_packages:
msg = "minimum packages must be specified"
raise ValueError(msg)

if not cfg.validate.listing.minimum_vulnerabilities:
msg = "minimum vulnerabilities must be specified"
raise ValueError(msg)

db.latest.smoke_test(
latest_obj,
archive_path,
image=cfg.validate.listing.image,
minimum_packages=cfg.validate.listing.minimum_packages,
minimum_vulnerabilities=cfg.validate.listing.minimum_vulnerabilities,
)


@group.command(name="upload", help="upload a grype database")
@click.option("--ttl-seconds", "-t", default=DEFAULT_TTL_SECONDS, help="the TTL for the uploaded DB (should be relatively high)")
@click.argument("db-uuid")
Expand Down
2 changes: 1 addition & 1 deletion manager/src/grype_db_manager/cli/listing.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def validate_listing(cfg: config.Application, listing_file: str) -> None:
raise ValueError(msg)

if cfg.validate.listing.override_grype_version and not cfg.validate.listing.override_db_schema_version:
msg = "ovrerride db schema version must be specified if override grype version is specified"
msg = "override db schema version must be specified if override grype version is specified"
raise ValueError(msg)

override_schema_release = None
Expand Down
2 changes: 1 addition & 1 deletion manager/src/grype_db_manager/data/schema-info.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
},
{
"schema": "6",
"grype-version": "main",
"grype-version": "initial-db-dir",
"supported": false
}
]
Expand Down
3 changes: 3 additions & 0 deletions manager/src/grype_db_manager/db/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from . import listing, metadata, schema
from .latest import Latest
from .listing import Listing
from .metadata import Metadata
from .validation import capture_results

__all__ = [
"Latest",
"Listing",
"Metadata",
"latest",
"listing",
"metadata",
"schema",
Expand Down
169 changes: 169 additions & 0 deletions manager/src/grype_db_manager/db/latest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
from __future__ import annotations

import contextlib
import datetime
import functools
import json
import logging
import os
import tempfile
import threading
from dataclasses import dataclass
from http.server import HTTPServer, SimpleHTTPRequestHandler
from typing import TYPE_CHECKING

from dataclass_wizard import asdict, fromdict

from grype_db_manager import grype

if TYPE_CHECKING:
from collections.abc import Iterator

LATEST_FILENAME = "latest.json"

# Latest is a dataclass that represents the latest.json document for schemas v6.
@dataclass
class Latest:
# status indicates if the database is actively being maintained and distributed
status: str | None = None

# schema version of the DB schema
schema_version: str | None = None

# timestamp the database was built
built: datetime.datetime | None = None

# path to a DB archive relative to the listing file hosted location (NOT the absolute URL)
path: str = ""

# self-describing digest of the database archive referenced in path
checksum: str = ""


@classmethod
def from_json(cls, contents: str) -> Latest:
return cls.from_dict(json.loads(contents))

@classmethod
def from_dict(cls, contents: dict) -> Latest:
return fromdict(cls, contents)

def to_json(self, indent: int | None = None) -> str:
return json.dumps(self.to_dict(), indent=indent, sort_keys=True)

def to_dict(self) -> dict:
return asdict(self)


@contextlib.contextmanager
def _http_server(directory: str, schema_version: str) -> Iterator[str]:
major_version = schema_version.split(".")[0]
server_address = ("127.0.0.1", 5555)
url = f"http://{server_address[0]}:{server_address[1]}"
latest_url = f"{url}/v{major_version}/{LATEST_FILENAME}"

def serve() -> None:
httpd = HTTPServer(
server_address,
functools.partial(SimpleHTTPRequestHandler, directory=directory),
)
logging.info(f"starting test server at {url!r}")
# show tree output of the given directory to the log
_log_dir(directory)

httpd.serve_forever()

thread = threading.Thread(target=serve)
thread.daemon = True
thread.start()
try:
yield latest_url
finally:
pass


def _log_dir(path: str, prefix: str = ""):
items = sorted(os.listdir(path))
for i, item in enumerate(items):
is_last = (i == len(items) - 1)
connector = "└── " if is_last else "├── "
logging.info(f"{prefix}{connector}{item}")
new_prefix = prefix + (" " if is_last else "│ ")
item_path = os.path.join(path, item)
if os.path.isdir(item_path):
_log_dir(item_path, new_prefix)

def _smoke_test(
schema_version: str,
listing_url: str,
image: str,
minimum_packages: int,
minimum_vulnerabilities: int,
store_root: str,
) -> None:
logging.info(f"testing grype schema-version={schema_version!r}")
tool_obj = grype.Grype(
schema_version=schema_version,
store_root=store_root,
update_url=listing_url,
)

output = tool_obj.run(user_input=image)
packages, vulnerabilities = grype.Report(report_contents=output).parse()
logging.info(f"scan result with downloaded DB: packages={len(packages)} vulnerabilities={len(vulnerabilities)}")
if not packages or not vulnerabilities:
msg = "validation failed: missing packages and/or vulnerabilities"
raise ValueError(msg)

if len(packages) < minimum_packages:
msg = f"validation failed: expected at least {minimum_packages} packages, got {len(packages)}"
raise ValueError(msg)

if len(vulnerabilities) < minimum_vulnerabilities:
msg = f"validation failed: expected at least {minimum_vulnerabilities} vulnerabilities, got {len(vulnerabilities)}"
raise ValueError(msg)


def smoke_test(
test_latest: Latest,
archive_path: str,
image: str,
minimum_packages: int,
minimum_vulnerabilities: int,
) -> None:
# write the listing to a temp dir that is served up locally on an HTTP server. This is used by grype to locally
# download the latest.json file and check that it works against S3 (since the listing entries have DB urls that
# reside in S3).
with tempfile.TemporaryDirectory(prefix="grype-db-smoke-test") as tempdir:
listing_contents = test_latest.to_json()

installation_path = os.path.join(tempdir, "grype-install")

major_version = test_latest.schema_version.split(".")[0]

sub_path = os.path.join(tempdir, "v"+major_version)
os.makedirs(sub_path, exist_ok=True)

logging.info(listing_contents)
with open(os.path.join(sub_path, LATEST_FILENAME), "w") as f:
f.write(listing_contents)

# make the archive available at the expected location via symlink
archive_dest = os.path.join(sub_path, test_latest.path)
os.link(archive_path, archive_dest)


# ensure grype can perform a db update for all supported schema versions. Note: we are only testing the
# latest.json for the DB is usable (the download succeeds and grype and the update process, which does
# checksum verifications, passes). This test does NOT check the integrity of the DB since that has already
# been tested in the build steps.
with _http_server(directory=tempdir, schema_version=test_latest.schema_version) as listing_url:
_smoke_test(
schema_version=test_latest.schema_version,
listing_url=listing_url,
image=image,
minimum_packages=minimum_packages,
minimum_vulnerabilities=minimum_vulnerabilities,
store_root=installation_path,
)

27 changes: 19 additions & 8 deletions manager/src/grype_db_manager/grype.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
class Grype:
BIN = "grype"

def __init__(self, schema_version: int, store_root: str, update_url: str = "", release: str | None = None):
def __init__(self, schema_version: int | str, store_root: str, update_url: str = "", release: str | None = None):
if isinstance(schema_version, str):
schema_version = int(schema_version.split(".")[0])
self.schema_version = schema_version
if release:
logging.warning(f"overriding grype release for schema={schema_version!r} with release={release!r}")
Expand All @@ -31,7 +33,7 @@ def __init__(self, schema_version: int, store_root: str, update_url: str = "", r
self.release = schema.grype_version(schema_version)
logging.debug(f"using grype release={self.release!r} for schema={schema_version!r}")

env = {}
env = self._env()
if update_url:
env["GRYPE_DB_UPDATE_URL"] = update_url
self.tool = grype.Grype.install(version=self.release, path=os.path.join(store_root, self.release), env=env)
Expand All @@ -43,20 +45,29 @@ def supported_schema_versions() -> list[str]:
obj = json.load(fh)
return obj.keys()

def _env(self, env: dict[str, str] | None = None) -> dict[str, str]:
if not env:
env = os.environ.copy()
if self.schema_version >= 6:
env.update({
"GRYPE_EXP_DBV6": "true",
})
return env

def update_db(self) -> None:
self.tool.run("db", "update", "-vv")
self.tool.run("db", "update", "-vv", env=self._env())

# ensure the db cache is not empty for the current schema
check_db_cache_dir(self.schema_version, os.path.join(self.tool.path, "db"))

def import_db(self, db_path: str) -> None:
self.tool.run("db", "import", db_path)
self.tool.run("db", "import", db_path, env=self._env())

# ensure the db cache is not empty for the current schema
check_db_cache_dir(self.schema_version, os.path.join(self.tool.path, "db"))

def run(self, user_input: str) -> str:
return self.tool.run("-o", "json", "-v", user_input)
return self.tool.run("-o", "json", "-v", user_input, env=self._env())


class Report:
Expand Down Expand Up @@ -97,17 +108,17 @@ def parse(self) -> tuple[set[Package], set[Vulnerability]]:
return packages, vulnerabilities


def check_db_cache_dir(schema_version: str, db_runtime_dir: str) -> None:
def check_db_cache_dir(schema_version: int, db_runtime_dir: str) -> None:
"""
Ensure that there is a `metadata.json` file for the cache directory, which signals that there
are files related to a database pull
"""
# ensure the db cache is not empty for the current schema
if schema_version == "1":
if schema_version == 1:
# older grype versions do not support schema-based cache directories
db_metadata_file = os.path.join(db_runtime_dir, "metadata.json")
else:
db_metadata_file = os.path.join(db_runtime_dir, schema_version, "metadata.json")
db_metadata_file = os.path.join(db_runtime_dir, str(schema_version), "metadata.json")

if os.path.exists(db_metadata_file):
# the metadata.json file exists and grype will be able to work with it
Expand Down

0 comments on commit 3cf9ccc

Please sign in to comment.