Skip to content

Commit

Permalink
Use python-isal for fast zip deflate compression in rocrate export
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdbeek committed Jan 23, 2024
1 parent 24dd984 commit 7e9c02b
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 1 deletion.
1 change: 1 addition & 0 deletions lib/galaxy/dependencies/pinned-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ idna==3.5 ; python_version >= "3.7" and python_version < "3.12"
importlib-metadata==4.13.0 ; python_version >= "3.7" and python_version < "3.12"
importlib-resources==5.12.0 ; python_version >= "3.7" and python_version < "3.12"
isa-rwval==0.10.10 ; python_version >= "3.7" and python_version < "3.12"
isal==1.3.0 ; python_version >= "3.7" and python_version < "3.12"
isodate==0.6.1 ; python_version >= "3.7" and python_version < "3.12"
jinja2==3.1.2 ; python_version >= "3.7" and python_version < "3.12"
jmespath==1.0.1 ; python_version >= "3.7" and python_version < "3.12"
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/model/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2753,7 +2753,7 @@ def _finalize(self) -> None:
out_file = out_file_name[: -len(".zip")]
else:
out_file = out_file_name
rval = shutil.make_archive(out_file, "zip", self.export_directory)
rval = shutil.make_archive(out_file, "fastzip", self.export_directory)
if not self.file_source_uri:
shutil.move(rval, self.out_file)
else:
Expand Down
88 changes: 88 additions & 0 deletions lib/galaxy/util/compression_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import io
import logging
import os
import shutil
import tarfile
import tempfile
import zipfile
Expand Down Expand Up @@ -30,6 +31,12 @@
is_gzip,
)

try:
from isal import isal_zlib
except ImportError:
isal_zlib = None # type: ignore[assignment]


log = logging.getLogger(__name__)

FileObjTypeStr = Union[IO[str], io.TextIOWrapper]
Expand Down Expand Up @@ -345,3 +352,84 @@ def zipfile_ok(self, path_to_archive: StrPath) -> bool:
if not member_path.startswith(basename):
return False
return True


class FastZipFile(zipfile.ZipFile):
"""
Simple wrapper around ZipFile that uses the default compression strategy of ISA-L
to write zip files. Ignores compresslevel and compresstype arguments, and is
3 to 4 times faster than the zlib implementation at the default compression level.
"""

def _open_to_write(self, *args, **kwargs): # type: ignore[no-untyped-def]
zwf = super()._open_to_write(*args, **kwargs) # type: ignore[misc]
if isal_zlib:
zwf._compressor = isal_zlib.compressobj(isal_zlib.ISAL_DEFAULT_COMPRESSION, isal_zlib.DEFLATED, -15, 9)
return zwf


# modified from shutil._make_zipfile
def make_fast_zipfile(
base_name: str,
base_dir: str,
verbose: int = 0,
dry_run: int = 0,
logger: Optional[logging.Logger] = None,
owner: Optional[str] = None,
group: Optional[str] = None,
root_dir: Optional[str] = None,
) -> str:
"""Create a zip file from all the files under 'base_dir'.
The output zip file will be named 'base_name' + ".zip". Returns the
name of the output zip file.
"""

zip_filename = base_name + ".zip"
archive_dir = os.path.dirname(base_name)

if archive_dir and not os.path.exists(archive_dir):
if logger is not None:
logger.info("creating %s", archive_dir)
if not dry_run:
os.makedirs(archive_dir)

if logger is not None:
logger.info("creating '%s' and adding '%s' to it", zip_filename, base_dir)

if not dry_run:
with FastZipFile(zip_filename, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
arcname = os.path.normpath(base_dir)
if root_dir is not None:
base_dir = os.path.join(root_dir, base_dir)
base_dir = os.path.normpath(base_dir)
if arcname != os.curdir:
zf.write(base_dir, arcname)
if logger is not None:
logger.info("adding '%s'", base_dir)
for dirpath, dirnames, filenames in os.walk(base_dir):
arcdirpath = dirpath
if root_dir is not None:
arcdirpath = os.path.relpath(arcdirpath, root_dir)
arcdirpath = os.path.normpath(arcdirpath)
for name in sorted(dirnames):
path = os.path.join(dirpath, name)
arcname = os.path.join(arcdirpath, name)
zf.write(path, arcname)
if logger is not None:
logger.info("adding '%s'", path)
for name in filenames:
path = os.path.join(dirpath, name)
path = os.path.normpath(path)
if os.path.isfile(path):
arcname = os.path.join(arcdirpath, name)
zf.write(path, arcname)
if logger is not None:
logger.info("adding '%s'", path)

if root_dir is not None:
zip_filename = os.path.abspath(zip_filename)
return zip_filename


shutil.register_archive_format("fastzip", make_fast_zipfile)
1 change: 1 addition & 0 deletions packages/data/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ install_requires =
h5grove
h5py
isa-rwval
isal
MarkupSafe
msal
mrcfile
Expand Down
1 change: 1 addition & 0 deletions packages/files/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ include_package_data = True
install_requires =
galaxy-util
fs
isal
typing-extensions
packages = find:
python_requires = >=3.7
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ h5py = "*"
importlib-metadata = "<5" # Work around https://github.com/celery/kombu/issues/1600
importlib-resources = "*"
isa-rwval = ">=0.10.10"
isal = "*"
kombu = "*"
lagom = "*"
Mako = "*"
Expand Down

0 comments on commit 7e9c02b

Please sign in to comment.