diff --git a/lamindb/_artifact.py b/lamindb/_artifact.py index 9d6ca6da7..b7d0cf54c 100644 --- a/lamindb/_artifact.py +++ b/lamindb/_artifact.py @@ -569,7 +569,7 @@ def __init__(artifact: Artifact, *args, **kwargs): provisional_uid = init_uid(version=version, n_full_id=20) else: if not isinstance(revises, Artifact): - raise TypeError("revises has to be of type ln.Artifact") + raise TypeError("`revises` has to be of type `Artifact`") provisional_uid, version = get_uid_from_old_version(revises, version, using_key) if description is None: description = revises.description diff --git a/lamindb/core/versioning.py b/lamindb/core/versioning.py index f08c382b4..728230440 100644 --- a/lamindb/core/versioning.py +++ b/lamindb/core/versioning.py @@ -78,8 +78,6 @@ def set_version(version: str | None = None, previous_version: str | None = None) version: Version string. previous_version: Previous version string. """ - if version == previous_version: - raise ValueError(f"Please increment the previous version: '{previous_version}'") if version is None and previous_version is not None: version = bump_version(previous_version, bump_type="major") return version @@ -92,16 +90,23 @@ def init_uid( revises: IsVersioned | None = None, ) -> str: if revises is not None: - stem_uid = revises.stem_uid + suid = revises.stem_uid + ruid = increment_base62(revises.uid[-4:]) else: - stem_uid = ids.base62(n_full_id - 4) + suid = ids.base62(n_full_id - 4) + ruid = "0000" if version is not None: if not isinstance(version, str): raise ValueError( "`version` parameter must be `None` or `str`, e.g., '0.1', '1', '2'," " etc." ) - return stem_uid + ids.base62_4() + if revises is not None: + if version == revises.version: + raise ValueError( + f"Please increment the previous version: '{revises.version}'" + ) + return suid + ruid def get_uid_from_old_version( @@ -110,24 +115,11 @@ def get_uid_from_old_version( using_key: str | None = None, ) -> tuple[str, str]: """{}""" # noqa: D415 - msg = "" - if revises.version is None: - previous_version = "1" - msg = f"setting previous version to '{previous_version}'" - else: - previous_version = revises.version - version = set_version(version, previous_version) new_uid = init_uid( version=version, n_full_id=revises._len_full_uid, revises=revises, ) - # the following covers the edge case where the old file was unversioned - if revises.version is None: - revises.version = previous_version - revises.save(using=using_key) - if msg != "": - msg += f"& new version to '{version}'" return new_uid, version diff --git a/tests/test_artifact.py b/tests/test_artifact.py index c9b98a3d1..306aeb2b6 100644 --- a/tests/test_artifact.py +++ b/tests/test_artifact.py @@ -37,7 +37,6 @@ CloudPath, LocalPathClasses, UPath, - extract_suffix_from_path, ) # how do we properly abstract out the default storage variable? @@ -134,7 +133,25 @@ def test_data_is_anndata_paths(): assert not data_is_anndata("s3://somewhere/something.zarr") -def test_revises_versioned_file(df, adata): +def test_basic_ralidation(): + # extra kwargs + with pytest.raises(ValueError): + ln.Artifact("testpath.csv", description="test1b", extra_kwarg="extra") + + # > 1 args + with pytest.raises(ValueError) as error: + ln.Artifact("testpath.csv", "testpath.csv") + assert error.exconly() == "ValueError: Only one non-keyword arg allowed: data" + + +# comment out for now +# AUTO_KEY_PREFIX +# with pytest.raises(ValueError) as error: +# ln.Artifact.from_df(df, key=".lamindb/test_df.parquet") +# assert error.exconly() == "ValueError: Key cannot start with .lamindb/" + + +def test_revise_file(df, adata): # attempt to create a file with an invalid version with pytest.raises(ValueError) as error: artifact = ln.Artifact.from_df(df, description="test", version=0) @@ -153,53 +170,36 @@ def test_revises_versioned_file(df, adata): assert artifact.path.exists() with pytest.raises(ValueError) as error: - artifact_v2 = ln.Artifact.from_anndata(adata, revises=artifact, version="1") + artifact_r2 = ln.Artifact.from_anndata(adata, revises=artifact, version="1") assert error.exconly() == "ValueError: Please increment the previous version: '1'" # create new file from old file - artifact_v2 = ln.Artifact.from_anndata(adata, revises=artifact) + artifact_r2 = ln.Artifact.from_anndata(adata, revises=artifact) assert artifact.version == "1" - assert artifact_v2.stem_uid == artifact.stem_uid - assert artifact_v2.version == "2" - assert artifact_v2.key is None - assert artifact_v2.description == "test" + assert artifact_r2.stem_uid == artifact.stem_uid + assert artifact_r2.version is None + assert artifact_r2.key is None + assert artifact_r2.description == "test" - artifact_v2.save() - assert artifact_v2.path.exists() + artifact_r2.save() + assert artifact_r2.path.exists() # create new file from newly versioned file - df.iloc[0, 0] = 0 - file_v3 = ln.Artifact.from_df(df, description="test1", revises=artifact_v2) - assert file_v3.stem_uid == artifact.stem_uid - assert file_v3.version == "3" - assert file_v3.description == "test1" + df.iloc[0, 0] = 0 # mutate dataframe so that hash lookup doesn't trigger + file_r3 = ln.Artifact.from_df( + df, description="test1", revises=artifact_r2, version="2" + ) + assert file_r3.stem_uid == artifact.stem_uid + assert file_r3.version == "2" + assert file_r3.description == "test1" with pytest.raises(TypeError) as error: ln.Artifact.from_df(df, description="test1a", revises=ln.Transform()) - assert error.exconly() == "TypeError: revises has to be of type ln.Artifact" + assert error.exconly() == "TypeError: `revises` has to be of type `Artifact`" - # test that reference file cannot be deleted - artifact_v2.delete(permanent=True, storage=True) + artifact_r2.delete(permanent=True, storage=True) artifact.delete(permanent=True, storage=True) - # extra kwargs - with pytest.raises(ValueError): - ln.Artifact.from_df(df, description="test1b", extra_kwarg="extra") - - # > 1 args - with pytest.raises(ValueError) as error: - ln.Artifact(df, df) - assert error.exconly() == "ValueError: Only one non-keyword arg allowed: data" - - -# comment out for now -# AUTO_KEY_PREFIX -# with pytest.raises(ValueError) as error: -# ln.Artifact.from_df(df, key=".lamindb/test_df.parquet") -# assert error.exconly() == "ValueError: Key cannot start with .lamindb/" - - -def test_revises_unversioned_file(df, adata): # unversioned file artifact = ln.Artifact.from_df(df, description="test2") assert artifact.version is None @@ -210,9 +210,9 @@ def test_revises_unversioned_file(df, adata): # create new file from old file new_artifact = ln.Artifact.from_anndata(adata, revises=artifact) - assert artifact.version == "1" + assert artifact.version is None assert new_artifact.stem_uid == artifact.stem_uid - assert new_artifact.version == "2" + assert new_artifact.version is None assert new_artifact.description == artifact.description artifact.delete(permanent=True, storage=True) @@ -248,7 +248,7 @@ def test_create_from_dataframe_using_from_df_and_link_features(df): assert artifact.description == description assert artifact._accessor == "DataFrame" assert artifact.key == "folder/hello.parquet" - assert artifact._key_is_virtual + assert artifact._key_is_rirtual assert artifact.uid in artifact.path.as_posix() artifact.save() # register features from df columns @@ -269,7 +269,7 @@ def test_create_from_dataframe_using_from_df_and_link_features(df): def test_create_from_anndata_in_memory_and_link_features(adata): ln.save( - bt.Gene.from_values(adata.var.index, field=bt.Gene.symbol, organism="human") + bt.Gene.from_ralues(adata.var.index, field=bt.Gene.symbol, organism="human") ) ln.save(ln.Feature.from_df(adata.obs)) artifact = ln.Artifact.from_anndata(adata, description="test") @@ -322,13 +322,13 @@ def test_create_from_anndata_in_storage(data): # this tests the basic (non-provenance-related) metadata -@pytest.mark.parametrize("key_is_virtual", [True, False]) +@pytest.mark.parametrize("key_is_rirtual", [True, False]) @pytest.mark.parametrize("key", [None, "my_new_dir/my_artifact.csv", "nosuffix"]) @pytest.mark.parametrize("description", [None, "my description"]) def test_create_from_local_filepath( - get_test_filepaths, key_is_virtual, key, description + get_test_filepaths, key_is_rirtual, key, description ): - ln.settings.creation._artifact_use_virtual_keys = key_is_virtual + ln.settings.creation._artifact_use_rirtual_keys = key_is_rirtual is_in_registered_storage = get_test_filepaths[0] root_dir = get_test_filepaths[1] test_filepath = get_test_filepaths[3] @@ -391,7 +391,7 @@ def test_create_from_local_filepath( ) else: assert artifact.key == key - assert artifact._key_is_virtual == key_is_virtual + assert artifact._key_is_rirtual == key_is_rirtual if is_in_registered_storage: # this would only hit if the key matches the correct key assert artifact.storage.root == root_dir.resolve().as_posix() @@ -400,7 +400,7 @@ def test_create_from_local_filepath( ) else: # file is moved into default storage - if key_is_virtual: + if key_is_rirtual: assert ( artifact.path == lamindb_setup.settings.storage.root @@ -412,7 +412,7 @@ def test_create_from_local_filepath( # only delete from storage if a file copy took place delete_from_storage = str(test_filepath.resolve()) != str(artifact.path) artifact.delete(permanent=True, storage=delete_from_storage) - ln.settings.creation._artifact_use_virtual_keys = True + ln.settings.creation._artifact_use_rirtual_keys = True ERROR_MESSAGE = """\ @@ -454,7 +454,7 @@ def test_delete_artifact(df): artifact = ln.Artifact.from_df(df, description="My test file to delete") artifact.save() assert artifact.visibility == 1 - assert artifact.key is None or artifact._key_is_virtual + assert artifact.key is None or artifact._key_is_rirtual storage_path = artifact.path # trash behavior artifact.delete() @@ -471,7 +471,7 @@ def test_delete_artifact(df): ).first() is None ) - assert not storage_path.exists() # deletes from storage is key_is_virtual + assert not storage_path.exists() # deletes from storage is key_is_rirtual # test deleting artifact from non-managed storage artifact = ln.Artifact(