Skip to content

Commit

Permalink
Merge branch 'daniel-ji/s3-data-validation-neuroglancer'
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-ji committed Sep 5, 2024
2 parents d93bfdf + dec4460 commit 093f6e5
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 4 deletions.
12 changes: 12 additions & 0 deletions ingestion_tools/scripts/data_validation/fixtures/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,18 @@ def tomogram_metadata(tomo_meta_file: str, filesystem: FileSystemApi) -> Dict:
return json.load(f)


# ==================================================================================================
# Neuroglancer fixtures
# ==================================================================================================


@pytest.fixture(scope="session")
def neuroglancer_config(neuroglancer_config_file: str, filesystem: FileSystemApi) -> Dict:
"""Load the neuroglancer config."""
with filesystem.open(neuroglancer_config_file, "r") as f:
return json.load(f)


# ==================================================================================================
# Annotation fixtures
# ==================================================================================================
Expand Down
24 changes: 20 additions & 4 deletions ingestion_tools/scripts/data_validation/fixtures/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,12 +295,28 @@ def tomo_zarr_file(
return file


# =============================================================================
# Run and voxel-specific fixtures, Neuroglancer
# =============================================================================


@pytest.fixture(scope="session")
def tomo_basename(
tomo_zarr_file: str,
def neuroglancer_dir(tomo_dir: str, filesystem: FileSystemApi) -> str:
"""[Dataset]/[ExperimentRun]/Tomograms/VoxelSpacing[voxel_spacing]/CanonicalTomogram/"""
return tomo_dir


@pytest.fixture(scope="session")
def neuroglancer_config_file(
neuroglancer_dir: str,
filesystem: FileSystemApi,
) -> str:
"""[Dataset]/[ExperimentRun]/Tomograms/VoxelSpacing[voxel_spacing]/CanonicalTomogram/[tomo_name]"""
return os.path.splitext(tomo_zarr_file)[0]
"""[Dataset]/[ExperimentRun]/Tomograms/VoxelSpacing[voxel_spacing]/CanonicalTomogram/neuroglancer_config.json"""
dst = f"{neuroglancer_dir}/neuroglancer_config.json"
if filesystem.exists(dst):
return dst
else:
pytest.fail(f"Neuroglancer config file not found: {dst}")


# =============================================================================
Expand Down
23 changes: 23 additions & 0 deletions ingestion_tools/scripts/data_validation/tests/helper_mrc.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from typing import Dict

import allure
Expand All @@ -6,7 +7,11 @@
from mrcfile import utils
from mrcfile.mrcinterpreter import MrcInterpreter

from common.fs import S3Filesystem

SPACING_TOLERANCE = 0.001
# 1 MB
DISK_STORAGE_TOLERANCE = 2**20

# Used so that other classes that skip the pytest still have a title in the allure report
# Without repeating the allure title text in every skipped test
Expand Down Expand Up @@ -206,6 +211,24 @@ def check_subimage_start(header, _interpreter, _mrc_filename):

self.mrc_header_helper(check_subimage_start)

@mrc_allure_title
def test_disk_storage(self, filesystem: S3Filesystem):
def check_disk_storage(header, _interpreter, mrc_filename, filesystem: S3Filesystem):
del _interpreter
if not mrc_filename.endswith(".mrc"):
pytest.skip("Only checking disk storage for .mrc files (not compressed files)")

# volume size + extended header size + header size (1024 bytes)
expected_bytes = (
header.nx * header.ny * header.nz * utils.dtype_from_mode(header.mode).itemsize + header.nsymbt + 1024
)
actual_bytes = filesystem.s3fs.size(mrc_filename)
if actual_bytes != expected_bytes:
warnings.warn(f"Expected {expected_bytes} bytes, got {actual_bytes} bytes", stacklevel=2)
assert abs(expected_bytes - filesystem.s3fs.size(mrc_filename)) < DISK_STORAGE_TOLERANCE

self.mrc_header_helper(check_disk_storage, filesystem=filesystem)

### BEGIN Voxel-spacing tests ###
@mrc_allure_title
def test_mrc_spacing(self):
Expand Down
12 changes: 12 additions & 0 deletions ingestion_tools/scripts/data_validation/tests/test_neuroglancer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from typing import Dict

import allure
import pytest


@pytest.mark.tomogram
@pytest.mark.parametrize("run_name, voxel_spacing", pytest.run_spacing_combinations, scope="session")
class TestNeuroglancer:
@allure.title("Neuroglancer: sanity check neuroglancer config file.")
def test_metadata(self, neuroglancer_config: Dict):
assert "layers" in neuroglancer_config and len(neuroglancer_config["layers"]) > 0
12 changes: 12 additions & 0 deletions ingestion_tools/scripts/data_validation/tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import allure
import pytest
from tests.test_deposition import HelperTestDeposition

from common.fs import FileSystemApi


@pytest.mark.run
Expand All @@ -12,3 +15,12 @@ def test_metadata(self, run_name: str, run_metadata: Dict):
assert run_metadata["run_name"] == run_name
if "last_updated_at" in run_metadata:
assert isinstance(run_metadata["last_updated_at"], int)

@allure.title("Run: valid corresponding deposition metadata.")
def test_deposition_id(self, run_name, dataset_metadata: Dict, bucket: str, filesystem: FileSystemApi):
# need run_name as parameter to prevent pytest error (expects run_name as a parameterized argument)
del run_name
# TODO: Change this to failing instead of skipping when all run_metadata.json has deposition id?
if dataset_metadata["deposition_id"] is None:
pytest.skip("No deposition_id for run found.")
HelperTestDeposition.check_deposition_metadata(dataset_metadata["deposition_id"], bucket, filesystem)

0 comments on commit 093f6e5

Please sign in to comment.