diff --git a/ingestion_tools/dataset_configs/10311_draft.yaml b/ingestion_tools/dataset_configs/10311_draft.yaml new file mode 100644 index 00000000..2297a304 --- /dev/null +++ b/ingestion_tools/dataset_configs/10311_draft.yaml @@ -0,0 +1,128 @@ +datasets: +- sources: + - destination_glob: + list_glob: '{output_path}/*' + match_regex: '/\d*$' + name_regex: (.*) +depositions: +- metadata: + authors: &id001 + - ORCID: 0000-0003-4685-037X + corresponding_author_status: true + name: Utz Heinrich Ermel + primary_author_status: true + dates: &id002 + deposition_date: '2024-10-15' + last_modified_date: '2024-10-15' + release_date: '2024-10-15' + deposition_description: Standardized tomograms. + deposition_identifier: 10311 + deposition_title: Standardized tomograms + deposition_types: + - tomogram + sources: + - literal: + value: + - 10311 +runs: +- sources: + - destination_glob: + list_glob: '{dataset_output_path}/*' + match_regex: .* + name_regex: (.*) + exclude: + - Images + - dataset_metadata.json +standardization_config: + deposition_id: 10311 + source_prefix: CZII/standard_tomograms_v2/ +version: 1.1.0 +voxel_spacings: +- sources: + - source_glob: + list_glob: '{dataset_name}_{run_name}/*' + match_regex: '[\d\.]{5,7}$' + name_regex: '([\d\.]{5,7})$' + exclude: + - logs +alignments: +- metadata: + alignment_type: LOCAL + format: ARETOMO3 + method_type: projection_matching + is_portal_standard: true + offset: + x: 0 + y: 0 + z: 0 + x_rotation_offset: 0 + tilt_offset: 0 + sources: + - source_glob: + list_glob: '{dataset_name}_{run_name}/*/outputs/aretomo/reconstruct_only/{run_name}.aln' +tomograms: +- metadata: + align_software: ARETOMO3 + authors: *id001 + ctf_corrected: true + fiducial_alignment_status: FIDUCIAL + offset: + x: 0 + y: 0 + z: 0 + processing: filtered + processing_software: AreTomo3 + reconstruction_method: WBP + reconstruction_software: AreTomo3 + tomogram_version: 1.0 + voxel_spacing: float {voxel_spacing_name} + is_visualization_default: true + is_portal_standard: true + dates: *id002 + sources: + - source_glob: + list_glob: '{dataset_name}_{run_name}/{voxel_spacing_name}/outputs/aretomo/reconstruct_only/{run_name}_Vol.zarr' +- metadata: + align_software: ARETOMO3 + authors: *id001 + ctf_corrected: true + fiducial_alignment_status: FIDUCIAL + offset: + x: 0 + y: 0 + z: 0 + processing: raw + processing_software: AreTomo3 + reconstruction_method: WBP + reconstruction_software: AreTomo3 + tomogram_version: 1.0 + voxel_spacing: float {voxel_spacing_name} + is_visualization_default: false + is_standardized: true + dates: *id002 + sources: + - source_glob: + list_glob: '{dataset_name}_{run_name}/{voxel_spacing_name}/outputs/aretomo/reconstruct_only/{run_name}_2ND_Vol.zarr' + match_regex: .* +- metadata: + align_software: ARETOMO3 + authors: *id001 + ctf_corrected: true + fiducial_alignment_status: FIDUCIAL + offset: + x: 0 + y: 0 + z: 0 + processing: raw + processing_software: AreTomo3 + reconstruction_method: SART + reconstruction_software: AreTomo3 + tomogram_version: 1.0 + voxel_spacing: float {voxel_spacing_name} + is_visualization_default: false + is_standardized: true + dates: *id002 + sources: + - source_glob: + list_glob: '{dataset_name}_{run_name}/{voxel_spacing_name}/outputs/aretomo/reconstruct_only/{run_name}_3RD_Vol.zarr' + match_regex: .* diff --git a/ingestion_tools/dataset_configs/template.yaml b/ingestion_tools/dataset_configs/template.yaml index 1cc2cda0..b2d77c7b 100644 --- a/ingestion_tools/dataset_configs/template.yaml +++ b/ingestion_tools/dataset_configs/template.yaml @@ -298,7 +298,6 @@ tomograms: OPTIONAL processing_software: RECOMMENDED, STRING reconstruction_method: REQUIRED, STRING (ENUM [SART, Fourier Space, SIRT, WBP, Unknown]) reconstruction_software: REQUIRED, STRING - tomogram_type: OPTIONAL, STRING (ENUM [CANONICAL]) tomogram_version: REQUIRED, FLOAT voxel_spacing: REQUIRED, FLOAT (POSITIVE) is_visualization_default: OPTIONAL, BOOLEAN (DEFAULT TRUE) diff --git a/ingestion_tools/scripts/common/finders.py b/ingestion_tools/scripts/common/finders.py index 1d674a0d..b95f2732 100644 --- a/ingestion_tools/scripts/common/finders.py +++ b/ingestion_tools/scripts/common/finders.py @@ -201,8 +201,10 @@ def _should_search(self, **parent_objects: dict[str, Any] | None) -> bool: return False return True - def _get_glob_vars(self, **parent_objects: dict[str, Any] | None): - glob_vars = {} + def _get_glob_vars(self, config: DepositionImportConfig, **parent_objects: dict[str, Any] | None) -> dict[str, str]: + glob_vars = { + "output_path": config.output_prefix, + } for _, parent_obj in parent_objects.items(): glob_vars.update(parent_obj.get_glob_vars()) # These are *only* used by source glob finder, which is kindof a hack :'( @@ -234,7 +236,7 @@ def _get_results( **parent_objects: dict[str, Any] | None, ) -> list[BaseImporter]: loader = self.load(config, **parent_objects) - glob_vars = self._get_glob_vars(**parent_objects) + glob_vars = self._get_glob_vars(config, **parent_objects) found = loader.find(config, glob_vars) filtered_results = {} for name, path in found.items():