diff --git a/manager/src/grype_db_manager/cli/db.py b/manager/src/grype_db_manager/cli/db.py index e6d12774..52078627 100644 --- a/manager/src/grype_db_manager/cli/db.py +++ b/manager/src/grype_db_manager/cli/db.py @@ -8,7 +8,7 @@ from yardstick.cli import config as ycfg from yardstick.cli.validate import validate as yardstick_validate -from grype_db_manager import db, s3utils +from grype_db_manager import db, s3utils, grypedb from grype_db_manager.cli import config, error from grype_db_manager.db.format import Format from grype_db_manager.grypedb import DB_DIR, DBManager, GrypeDB @@ -125,10 +125,6 @@ def validate_db( click.echo(f"no database found with session id {db_uuid}") return - if db_info.schema_version >= 6: - # TODO: not implemented yet - raise NotImplementedError("validation for schema v6+ is not yet implemented") - if not skip_namespace_check: if db_info.schema_version < 6: # ensure the minimum number of namespaces are present @@ -137,6 +133,21 @@ def validate_db( # TODO: implement me raise NotImplementedError("namespace validation for schema v6+ is not yet implemented") + _validate_db(ctx, cfg, db_info, images, db_uuid, verbosity, recapture) + + logging.info(f"validating latest.json {db_uuid}") + + if db_info.schema_version >= 6: + _validate_latest(cfg, db_info.latest_path, db_info.archive_path) + + click.echo(f"{Format.BOLD}{Format.OKGREEN}Validation passed{Format.RESET}") + + +def _validate_db(ctx: click.Context, cfg: config.Application, db_info: grypedb.DBInfo, images: list[str], db_uuid: str, verbosity: int, recapture: bool) -> None: + if db_info.schema_version >= 6: + # TODO: not implemented yet + raise NotImplementedError("validation for schema v6+ is not yet implemented") + # resolve tool versions and install them yardstick.store.config.set_values(store_root=cfg.data.yardstick_root) @@ -207,6 +218,31 @@ def validate_db( ) +def _validate_latest(cfg: config.Application, latest_file: str, archive_path: str) -> None: + with open(latest_file) as f: + latest_obj = db.Latest.from_json(f.read()) + + if not cfg.validate.listing.image: + msg = "no image specified to validate against" + raise ValueError(msg) + + if not cfg.validate.listing.minimum_packages: + msg = "minimum packages must be specified" + raise ValueError(msg) + + if not cfg.validate.listing.minimum_vulnerabilities: + msg = "minimum vulnerabilities must be specified" + raise ValueError(msg) + + db.latest.smoke_test( + latest_obj, + archive_path, + image=cfg.validate.listing.image, + minimum_packages=cfg.validate.listing.minimum_packages, + minimum_vulnerabilities=cfg.validate.listing.minimum_vulnerabilities, + ) + + @group.command(name="upload", help="upload a grype database") @click.option("--ttl-seconds", "-t", default=DEFAULT_TTL_SECONDS, help="the TTL for the uploaded DB (should be relatively high)") @click.argument("db-uuid") diff --git a/manager/src/grype_db_manager/cli/listing.py b/manager/src/grype_db_manager/cli/listing.py index 43f528c5..189cda77 100644 --- a/manager/src/grype_db_manager/cli/listing.py +++ b/manager/src/grype_db_manager/cli/listing.py @@ -110,7 +110,7 @@ def validate_listing(cfg: config.Application, listing_file: str) -> None: raise ValueError(msg) if cfg.validate.listing.override_grype_version and not cfg.validate.listing.override_db_schema_version: - msg = "ovrerride db schema version must be specified if override grype version is specified" + msg = "override db schema version must be specified if override grype version is specified" raise ValueError(msg) override_schema_release = None diff --git a/manager/src/grype_db_manager/data/schema-info.json b/manager/src/grype_db_manager/data/schema-info.json index 3038806c..908dbee8 100644 --- a/manager/src/grype_db_manager/data/schema-info.json +++ b/manager/src/grype_db_manager/data/schema-info.json @@ -27,7 +27,7 @@ }, { "schema": "6", - "grype-version": "main", + "grype-version": "initial-db-dir", "supported": false } ] diff --git a/manager/src/grype_db_manager/db/__init__.py b/manager/src/grype_db_manager/db/__init__.py index b7e04da3..a6a6c0d7 100644 --- a/manager/src/grype_db_manager/db/__init__.py +++ b/manager/src/grype_db_manager/db/__init__.py @@ -1,11 +1,14 @@ from . import listing, metadata, schema +from .latest import Latest from .listing import Listing from .metadata import Metadata from .validation import capture_results __all__ = [ + "Latest", "Listing", "Metadata", + "latest", "listing", "metadata", "schema", diff --git a/manager/src/grype_db_manager/db/latest.py b/manager/src/grype_db_manager/db/latest.py new file mode 100644 index 00000000..6dfbe740 --- /dev/null +++ b/manager/src/grype_db_manager/db/latest.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +import contextlib +import datetime +import functools +import json +import logging +import os +import tempfile +import threading +from dataclasses import dataclass +from http.server import HTTPServer, SimpleHTTPRequestHandler +from typing import TYPE_CHECKING + +from dataclass_wizard import asdict, fromdict + +from grype_db_manager import grype + +if TYPE_CHECKING: + from collections.abc import Iterator + +LATEST_FILENAME = "latest.json" + +# Latest is a dataclass that represents the latest.json document for schemas v6. +@dataclass +class Latest: + # status indicates if the database is actively being maintained and distributed + status: str | None = None + + # schema version of the DB schema + schema_version: str | None = None + + # timestamp the database was built + built: datetime.datetime | None = None + + # path to a DB archive relative to the listing file hosted location (NOT the absolute URL) + path: str = "" + + # self-describing digest of the database archive referenced in path + checksum: str = "" + + + @classmethod + def from_json(cls, contents: str) -> Latest: + return cls.from_dict(json.loads(contents)) + + @classmethod + def from_dict(cls, contents: dict) -> Latest: + return fromdict(cls, contents) + + def to_json(self, indent: int | None = None) -> str: + return json.dumps(self.to_dict(), indent=indent, sort_keys=True) + + def to_dict(self) -> dict: + return asdict(self) + + +@contextlib.contextmanager +def _http_server(directory: str, schema_version: str) -> Iterator[str]: + major_version = schema_version.split(".")[0] + server_address = ("127.0.0.1", 5555) + url = f"http://{server_address[0]}:{server_address[1]}" + latest_url = f"{url}/v{major_version}/{LATEST_FILENAME}" + + def serve() -> None: + httpd = HTTPServer( + server_address, + functools.partial(SimpleHTTPRequestHandler, directory=directory), + ) + logging.info(f"starting test server at {url!r}") + # show tree output of the given directory to the log + _log_dir(directory) + + httpd.serve_forever() + + thread = threading.Thread(target=serve) + thread.daemon = True + thread.start() + try: + yield latest_url + finally: + pass + + +def _log_dir(path: str, prefix: str = ""): + items = sorted(os.listdir(path)) + for i, item in enumerate(items): + is_last = (i == len(items) - 1) + connector = "└── " if is_last else "├── " + logging.info(f"{prefix}{connector}{item}") + new_prefix = prefix + (" " if is_last else "│ ") + item_path = os.path.join(path, item) + if os.path.isdir(item_path): + _log_dir(item_path, new_prefix) + +def _smoke_test( + schema_version: str, + listing_url: str, + image: str, + minimum_packages: int, + minimum_vulnerabilities: int, + store_root: str, +) -> None: + logging.info(f"testing grype schema-version={schema_version!r}") + tool_obj = grype.Grype( + schema_version=schema_version, + store_root=store_root, + update_url=listing_url, + ) + + output = tool_obj.run(user_input=image) + packages, vulnerabilities = grype.Report(report_contents=output).parse() + logging.info(f"scan result with downloaded DB: packages={len(packages)} vulnerabilities={len(vulnerabilities)}") + if not packages or not vulnerabilities: + msg = "validation failed: missing packages and/or vulnerabilities" + raise ValueError(msg) + + if len(packages) < minimum_packages: + msg = f"validation failed: expected at least {minimum_packages} packages, got {len(packages)}" + raise ValueError(msg) + + if len(vulnerabilities) < minimum_vulnerabilities: + msg = f"validation failed: expected at least {minimum_vulnerabilities} vulnerabilities, got {len(vulnerabilities)}" + raise ValueError(msg) + + +def smoke_test( + test_latest: Latest, + archive_path: str, + image: str, + minimum_packages: int, + minimum_vulnerabilities: int, +) -> None: + # write the listing to a temp dir that is served up locally on an HTTP server. This is used by grype to locally + # download the latest.json file and check that it works against S3 (since the listing entries have DB urls that + # reside in S3). + with tempfile.TemporaryDirectory(prefix="grype-db-smoke-test") as tempdir: + listing_contents = test_latest.to_json() + + installation_path = os.path.join(tempdir, "grype-install") + + major_version = test_latest.schema_version.split(".")[0] + + sub_path = os.path.join(tempdir, "v"+major_version) + os.makedirs(sub_path, exist_ok=True) + + logging.info(listing_contents) + with open(os.path.join(sub_path, LATEST_FILENAME), "w") as f: + f.write(listing_contents) + + # make the archive available at the expected location via symlink + archive_dest = os.path.join(sub_path, test_latest.path) + os.link(archive_path, archive_dest) + + + # ensure grype can perform a db update for all supported schema versions. Note: we are only testing the + # latest.json for the DB is usable (the download succeeds and grype and the update process, which does + # checksum verifications, passes). This test does NOT check the integrity of the DB since that has already + # been tested in the build steps. + with _http_server(directory=tempdir, schema_version=test_latest.schema_version) as listing_url: + _smoke_test( + schema_version=test_latest.schema_version, + listing_url=listing_url, + image=image, + minimum_packages=minimum_packages, + minimum_vulnerabilities=minimum_vulnerabilities, + store_root=installation_path, + ) + diff --git a/manager/src/grype_db_manager/grype.py b/manager/src/grype_db_manager/grype.py index d9db9806..52f31778 100644 --- a/manager/src/grype_db_manager/grype.py +++ b/manager/src/grype_db_manager/grype.py @@ -22,7 +22,9 @@ class Grype: BIN = "grype" - def __init__(self, schema_version: int, store_root: str, update_url: str = "", release: str | None = None): + def __init__(self, schema_version: int | str, store_root: str, update_url: str = "", release: str | None = None): + if isinstance(schema_version, str): + schema_version = int(schema_version.split(".")[0]) self.schema_version = schema_version if release: logging.warning(f"overriding grype release for schema={schema_version!r} with release={release!r}") @@ -31,7 +33,7 @@ def __init__(self, schema_version: int, store_root: str, update_url: str = "", r self.release = schema.grype_version(schema_version) logging.debug(f"using grype release={self.release!r} for schema={schema_version!r}") - env = {} + env = self._env() if update_url: env["GRYPE_DB_UPDATE_URL"] = update_url self.tool = grype.Grype.install(version=self.release, path=os.path.join(store_root, self.release), env=env) @@ -43,20 +45,29 @@ def supported_schema_versions() -> list[str]: obj = json.load(fh) return obj.keys() + def _env(self, env: dict[str, str] | None = None) -> dict[str, str]: + if not env: + env = os.environ.copy() + if self.schema_version >= 6: + env.update({ + "GRYPE_EXP_DBV6": "true", + }) + return env + def update_db(self) -> None: - self.tool.run("db", "update", "-vv") + self.tool.run("db", "update", "-vv", env=self._env()) # ensure the db cache is not empty for the current schema check_db_cache_dir(self.schema_version, os.path.join(self.tool.path, "db")) def import_db(self, db_path: str) -> None: - self.tool.run("db", "import", db_path) + self.tool.run("db", "import", db_path, env=self._env()) # ensure the db cache is not empty for the current schema check_db_cache_dir(self.schema_version, os.path.join(self.tool.path, "db")) def run(self, user_input: str) -> str: - return self.tool.run("-o", "json", "-v", user_input) + return self.tool.run("-o", "json", "-v", user_input, env=self._env()) class Report: @@ -97,17 +108,17 @@ def parse(self) -> tuple[set[Package], set[Vulnerability]]: return packages, vulnerabilities -def check_db_cache_dir(schema_version: str, db_runtime_dir: str) -> None: +def check_db_cache_dir(schema_version: int, db_runtime_dir: str) -> None: """ Ensure that there is a `metadata.json` file for the cache directory, which signals that there are files related to a database pull """ # ensure the db cache is not empty for the current schema - if schema_version == "1": + if schema_version == 1: # older grype versions do not support schema-based cache directories db_metadata_file = os.path.join(db_runtime_dir, "metadata.json") else: - db_metadata_file = os.path.join(db_runtime_dir, schema_version, "metadata.json") + db_metadata_file = os.path.join(db_runtime_dir, str(schema_version), "metadata.json") if os.path.exists(db_metadata_file): # the metadata.json file exists and grype will be able to work with it