Skip to content

Commit

Permalink
Add hab install system
Browse files Browse the repository at this point in the history
- Add `hab install` command that resolves all distros requied to resolve
multiple URI's and then download and install any missing distros.
- Distro resolving is now handled by a `DistroFinder` class. This class can
be replaced by a subclass to customize how distros are searched for and resolved.
- Add DistroFinder subclasses for finding distros stored as zip files and
to download from file services like AWS S3.
- Site now has a downloads dictionary that configures how `hab install`
can resolve remote distros for installing locally.
- `Resolver.distro_mode_override` with context can be used to change
`Resolver.distro`'s output to the `Site.downloads["distros"] used to find
remote distros to install.
- Add `hab.utils.glob_path` that supports globbing a pathlib.Path object
with glob strings in the path.
  • Loading branch information
MHendricks committed Dec 6, 2024
1 parent 14ff506 commit e524e90
Show file tree
Hide file tree
Showing 26 changed files with 1,670 additions and 86 deletions.
4 changes: 2 additions & 2 deletions hab/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
__all__ = ["__version__", "NotSet", "Resolver", "Site"]
__all__ = ["__version__", "DistroMode", "NotSet", "Resolver", "Site"]

from .utils import NotSet

# Note: Future imports depend on NotSet so it must be imported first
# isort: split

from .resolver import Resolver
from .resolver import DistroMode, Resolver
from .site import Site
from .version import version as __version__
6 changes: 5 additions & 1 deletion hab/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def generate_cache(self, resolver, site_file, version=1):
the provided site file. Use this method any time changes are made that
hab needs to be aware of. Caching is enabled by the existence of this file.
"""
from .distro_finders.distro_finder import DistroFinder
from .site import Site

# Indicate the version specification this habcache file conforms to.
Expand All @@ -125,6 +126,9 @@ def generate_cache(self, resolver, site_file, version=1):
glob_str, cls = stats
# Process each glob dir defined for this site
for dirname in temp_site.get(key, []):
# Caching is only supported for direct file paths
if isinstance(dirname, DistroFinder):
dirname = dirname.root
cfg_paths = output.setdefault(key, {}).setdefault(
platform_path_key(dirname).as_posix(), {}
)
Expand Down Expand Up @@ -180,7 +184,7 @@ def iter_cache_paths(cls, name, paths, cache, glob_str=None, include_path=True):
logger.debug(f"Using glob for {name} dir: {dirname}")
# Fallback to globing the file system
if glob_str:
paths = sorted(glob.glob(str(dirname / glob_str)))
paths = utils.glob_path(dirname / glob_str)
else:
paths = []
if not include_path:
Expand Down
92 changes: 81 additions & 11 deletions hab/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from click.shell_completion import CompletionItem
from colorama import Fore

from . import Resolver, Site, __version__, utils
from . import DistroMode, Resolver, Site, __version__, utils
from .parsers.unfrozen_config import UnfrozenConfig

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -603,7 +603,18 @@ def env(settings, uri, launch):
"--type",
"report_type",
type=click.Choice(
["nice", "site", "s", "uris", "u", "versions", "v", "forest", "f", "all-uris"]
# Note: Put short names on same line as full name
# fmt: off
[
"nice",
"site", "s",
"uris", "u",
"versions", "v",
"downloads",
"forest", "f",
"all-uris",
]
# fmt: on
),
default="nice",
help="Type of report.",
Expand Down Expand Up @@ -644,7 +655,7 @@ def dump(settings, uri, env, env_config, report_type, flat, verbosity, format_ty

resolver = settings.resolver

if report_type in ("uris", "versions", "forest"):
if report_type in ("uris", "versions", "downloads", "forest"):
from .parsers.format_parser import FormatParser

formatter = FormatParser(verbosity, color=True)
Expand All @@ -659,16 +670,22 @@ def dump(settings, uri, env, env_config, report_type, flat, verbosity, format_ty
resolver.configs, fmt=formatter.format
):
click.echo(line)
if report_type in ("versions", "forest"):
if report_type in ("versions", "downloads", "forest"):
click.echo(f'{Fore.YELLOW}{" Versions ".center(50, "-")}{Fore.RESET}')

for line in resolver.dump_forest(
resolver.distros,
attr="name",
fmt=formatter.format,
truncate=truncate,
):
click.echo(line)
mode = (
DistroMode.Downloaded
if report_type == "downloads"
else DistroMode.Installed
)
with resolver.distro_mode_override(mode):
for line in resolver.dump_forest(
resolver.distros,
attr="name",
fmt=formatter.format,
truncate=truncate,
):
click.echo(line)
elif report_type == "all-uris":
# Combines all non-placeholder URI's into a single json document and display.
# This can be used to compare changes to configs when editing them in bulk.
Expand Down Expand Up @@ -786,6 +803,59 @@ def cache(settings, path):
click.echo(f"Cache took: {e - s}, cache file: {out}")


@_cli.command()
@click.option(
"-u",
"--uri",
"uris",
multiple=True,
help="A URI that is resolved and all required distros are installed. Can "
"be used multiple times and each URI's distros are resolved independently.",
)
@click.option(
"-d",
"--distro",
"distros",
multiple=True,
help="Additional distros to install. Can be used multiple times and each use "
"is resolved independently.",
)
@click.option(
"--dry-run/--no-dry-run",
default=False,
help="Don't actually install anything, just print what would be installed.",
)
@click.option(
"--force-reinstall/--no-force-reinstall",
default=False,
help="Reinstall all resolved distros even if they are already installed.",
)
@click.option(
"--target",
type=click.Path(file_okay=False, resolve_path=True),
help="Install distros into DIRECTORY. Defaults to the sites "
'downloads["install_root"] setting.',
)
@click.pass_obj
def install(settings, uris, distros, dry_run, force_reinstall, target):
"""Install distros for use in hab. At least one uri or distro must be
specified to install. This is intended to install all versions of hab distros
that are required for a collection of hab URI on this system. This means that
unlike pip this may install multiple versions of hab distros.
"""
distros = list(distros) if distros else None
uris = list(uris) if uris else None
if not distros and not uris:
raise ValueError("You must specify at least one --uri or --distro to install.")
settings.resolver.install(
uris=uris,
additional_distros=distros,
target=target,
dry_run=dry_run,
replace=force_reinstall,
)


def cli(*args, **kwargs):
"""Runs the hab cli. If an exception is raised, only the exception message
is printed and the stack trace is hidden. Use `hab -v ...` to enable showing
Expand Down
Empty file added hab/distro_finders/__init__.py
Empty file.
133 changes: 133 additions & 0 deletions hab/distro_finders/cloud_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import logging
import pathlib
import time
import zipfile
from abc import ABCMeta, abstractmethod

import remotezip
from cloudpathlib import CloudPath

from .df_zip import DistroFinderZip

logger = logging.getLogger(__name__)


class HabRemoteZip(remotezip.RemoteZip):
"""`remotezip.RemoteZip` that doesn't call `close()` when exiting a with context.
Opening a new RemoteZip instance is slow and changes depending on the size
of the .zip file. Cloud based workflow doesn't need to close the file pointer
like you need to when working on a local file.
"""

def __exit__(self, type, value, traceback):
pass


class DistroFinderCloudZip(DistroFinderZip, metaclass=ABCMeta):
"""Works with zipped distros stored remotely in Amazon S3 buckets.
Working with zipped distros extracting the `hab_filename` information from
inside the .zip file. This is useful when you have direct access to the .zip
file.
For `path`, this class uses a .zip `member path`. A member path is the absolute
path to the .zip joined with the member path of files contained inside the .zip
file. So if the archive file path is `c:/temp/dist_a_v0.1.zip` and the member is
`hab_filename`, then the member_path would be `c:/temp/dist_a_v0.1.zip/.hab.json`.
Note:
This class should only be used to install distros in the hab download system.
This expects one file to exist with a specific naming convention:
- `{distro}_v{version}.zip` contains the entire contents of the distro.
This should also contain the top level file `hab_filename`. When the distro
is installed and using hab normally this file will be used. The `hab_filename`
file's contents are extracted from the zip file and used to initialize the
`DistroVersion` returned by `self.distro` without being written to disk.
"""

def __init__(self, root, site=None, safe=False, client=None):
# Only define client if it was passed, otherwise create it lazily.
if client:
self.client = client
super().__init__(root, site=site, safe=safe)
self._archives = {}

def as_posix(self):
"""Returns the root path as a posix style string."""
if isinstance(self.root, CloudPath):
# CloudPath doesn't need as_posix
return str(self.root)
return super().as_posix()

def cast_path(self, path):
"""Return path cast to the `pathlib.Path` like class preferred by this class."""
return CloudPath(path, client=self.client)

@property
@abstractmethod
def client(self):
"""A `cloudpathlib.client.Client` used to create `CloudPath` instances."""

@client.setter
@abstractmethod
def client(self, client):
pass

@abstractmethod
def credentials(self):
"""Returns the credentials needed for requests to connect to the cloud resource.
Generates these credentials using the client object.
"""

def archive(self, zip_path, partial=True):
"""Returns a `zipfile.Zipfile` like instance for zip_path.
Args:
zip_path (cloudpathlib.CloudPath): The path to the zip file to open.
partial (bool, optional): If True then you only need access to a small
part of the archive. If True then `HabRemoteZip` will be used
to only download specific files from the remote archive without
caching them to disk. If False then remote archives will be fully
downloaded to disk(using caching) before returning the open archive.
"""
if not partial or isinstance(zip_path, pathlib.PurePath):
logger.debug(f"Using CloudPath to open(downloading if needed) {zip_path}.")
archive = zipfile.ZipFile(zip_path)
archive.filename = zip_path
return archive

# Creating a RemoteZip instance is very slow compared to local file access.
# Reuse existing objects if already created.
if zip_path in self._archives:
logger.debug(f"Reusing cloud .zip resource: {zip_path}")
return self._archives[zip_path]

logger.debug(f"Connecting to cloud .zip resource: {zip_path}")
s = time.time()
auth, headers = self.credentials()

archive = HabRemoteZip(zip_path.as_url(), auth=auth, headers=headers)
archive.filename = zip_path
e = time.time()
logger.info(f"Connected to cloud .zip resource: {zip_path}, took: {e - s}")
self._archives[zip_path] = archive
return archive

def clear_cache(self, persistent=False):
"""Clear cached data in memory. If `persistent` is True then also remove
cache data from disk if it exists.
"""
if persistent:
self.remove_download_cache()
super().clear_cache(persistent=persistent)

# Ensure all cached archives are closed before clearing the cache.
for archive in self._archives.values():
archive.close()
self._archives = {}
if persistent:
# Clear downloaded temp files
self.client.clear_cache()
Loading

0 comments on commit e524e90

Please sign in to comment.