Skip to content

Commit

Permalink
Merge pull request #3244 from MatthiasZepper/seqera_containers_exampl…
Browse files Browse the repository at this point in the history
…e_mod

Download: Seqera container support - Patch 1
  • Loading branch information
MatthiasZepper authored Oct 25, 2024
2 parents eb4c237 + 32a7e6b commit 65db460
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 24 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

### Download

- First steps towards fixing [#3179](https://github.com/nf-core/tools/issues/3179): Modify `prioritize_direct_download()` to retain Seqera Singularity https:// Container URIs and hardcode Seqera Containers into `gather_registries()` ([#3244](https://github.com/nf-core/tools/pull/3244)).

### Linting

### Modules
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
--outdir ./${{ env.REPOTITLE_LOWERCASE }} \
--compress "none" \
--container-system 'singularity' \
--container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io" \
--container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io/library/" \
--container-cache-utilisation 'amend' \
--download-configuration 'yes'
Expand Down
47 changes: 38 additions & 9 deletions nf_core/pipelines/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,7 @@ def rectify_raw_container_matches(self, raw_findings):
"""
return self.prioritize_direct_download(cleaned_matches)

def prioritize_direct_download(self, container_list):
def prioritize_direct_download(self, container_list: List[str]) -> List[str]:
"""
Helper function that takes a list of container images (URLs and Docker URIs),
eliminates all Docker URIs for which also a URL is contained and returns the
Expand All @@ -993,13 +993,31 @@ def prioritize_direct_download(self, container_list):
we want to keep it and not replace with with whatever we have now (which might be the Docker URI).
A regex that matches http, r"^$|^http" could thus be used to prioritize the Docker URIs over http Downloads
We also need to handle a special case: The https:// Singularity downloads from Seqera Containers all end in 'data', although
they are not equivalent, e.g.:
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data'
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data'
"""
d = {}
d: Dict[str, str] = {}
seqera_containers: List[str] = []
all_others: List[str] = []

for c in container_list:
if bool(re.search(r"/data$", c)):
seqera_containers.append(c)
else:
all_others.append(c)

for c in all_others:
if re.match(r"^$|(?!^http)", d.get(k := re.sub(".*/(.*)", "\\1", c), "")):
log.debug(f"{c} matches and will be saved as {k}")
d[k] = c
return sorted(list(d.values()))

# combine deduplicated others and Seqera containers
return sorted(list(d.values()) + seqera_containers)

def gather_registries(self, workflow_directory: str) -> None:
"""Fetch the registries from the pipeline config and CLI arguments and store them in a set.
Expand All @@ -1023,7 +1041,13 @@ def gather_registries(self, workflow_directory: str) -> None:
self.registry_set.add(self.nf_config[registry])

# add depot.galaxyproject.org to the set, because it is the default registry for singularity hardcoded in modules
self.registry_set.add("depot.galaxyproject.org")
self.registry_set.add("depot.galaxyproject.org/singularity")

# add community.wave.seqera.io/library to the set to support the new Seqera Docker container registry
self.registry_set.add("community.wave.seqera.io/library")

# add chttps://community-cr-prod.seqera.io/docker/registry/v2/ to the set to support the new Seqera Singularity container registry
self.registry_set.add("community-cr-prod.seqera.io/docker/registry/v2")

def symlink_singularity_images(self, image_out_path: str) -> None:
"""Create a symlink for each registry in the registry set that points to the image.
Expand All @@ -1040,10 +1064,13 @@ def symlink_singularity_images(self, image_out_path: str) -> None:

if self.registry_set:
# Create a regex pattern from the set, in case trimming is needed.
trim_pattern = "|".join(f"^{re.escape(registry)}-?" for registry in self.registry_set)
trim_pattern = "|".join(f"^{re.escape(registry)}-?".replace("/", "[/-]") for registry in self.registry_set)

for registry in self.registry_set:
if not os.path.basename(image_out_path).startswith(registry):
# Nextflow will convert it like this as well, so we need it mimic its behavior
registry = registry.replace("/", "-")

if not bool(re.search(trim_pattern, os.path.basename(image_out_path))):
symlink_name = os.path.join("./", f"{registry}-{os.path.basename(image_out_path)}")
else:
trimmed_name = re.sub(f"{trim_pattern}", "", os.path.basename(image_out_path))
Expand Down Expand Up @@ -1263,7 +1290,7 @@ def singularity_image_filenames(self, container: str) -> Tuple[str, Optional[str
# if docker.registry / singularity.registry are set to empty strings at runtime, which can be included in the HPC config profiles easily.
if self.registry_set:
# Create a regex pattern from the set of registries
trim_pattern = "|".join(f"^{re.escape(registry)}-?" for registry in self.registry_set)
trim_pattern = "|".join(f"^{re.escape(registry)}-?".replace("/", "[/-]") for registry in self.registry_set)
# Use the pattern to trim the string
out_name = re.sub(f"{trim_pattern}", "", out_name)

Expand Down Expand Up @@ -1345,9 +1372,10 @@ def singularity_download_image(
log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'")
progress.update(task, description="Copying from cache to target directory")
shutil.copyfile(cache_path, out_path)
self.symlink_singularity_images(cache_path) # symlinks inside the cache directory

# Create symlinks to ensure that the images are found even with different registries being used.
self.symlink_singularity_images(output_path)
self.symlink_singularity_images(out_path)

progress.remove_task(task)

Expand Down Expand Up @@ -1456,9 +1484,10 @@ def singularity_pull_image(
log.debug(f"Copying {container} from cache: '{os.path.basename(out_path)}'")
progress.update(task, current_log="Copying from cache to target directory")
shutil.copyfile(cache_path, out_path)
self.symlink_singularity_images(cache_path) # symlinks inside the cache directory

# Create symlinks to ensure that the images are found even with different registries being used.
self.symlink_singularity_images(output_path)
self.symlink_singularity_images(out_path)

progress.remove_task(task)

Expand Down
11 changes: 11 additions & 0 deletions tests/data/mock_module_containers/modules/mock_seqera_container.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
process CAT_FASTQ {
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data' :
'community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264' }"

// truncated

}
Loading

0 comments on commit 65db460

Please sign in to comment.