Skip to content

Commit

Permalink
♻️ Remove auto-versioning, increment ruid
Browse files Browse the repository at this point in the history
  • Loading branch information
falexwolf committed Aug 23, 2024
1 parent 4ed42bd commit 91576e7
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 67 deletions.
2 changes: 1 addition & 1 deletion lamindb/_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
provisional_uid = init_uid(version=version, n_full_id=20)
else:
if not isinstance(revises, Artifact):
raise TypeError("revises has to be of type ln.Artifact")
raise TypeError("`revises` has to be of type `Artifact`")
provisional_uid, version = get_uid_from_old_version(revises, version, using_key)
if description is None:
description = revises.description
Expand Down
28 changes: 10 additions & 18 deletions lamindb/core/versioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ def set_version(version: str | None = None, previous_version: str | None = None)
version: Version string.
previous_version: Previous version string.
"""
if version == previous_version:
raise ValueError(f"Please increment the previous version: '{previous_version}'")
if version is None and previous_version is not None:
version = bump_version(previous_version, bump_type="major")
return version
Expand All @@ -92,16 +90,23 @@ def init_uid(
revises: IsVersioned | None = None,
) -> str:
if revises is not None:
stem_uid = revises.stem_uid
suid = revises.stem_uid
ruid = increment_base62(revises.uid[-4:])
else:
stem_uid = ids.base62(n_full_id - 4)
suid = ids.base62(n_full_id - 4)
ruid = "0000"
if version is not None:
if not isinstance(version, str):
raise ValueError(
"`version` parameter must be `None` or `str`, e.g., '0.1', '1', '2',"
" etc."
)
return stem_uid + ids.base62_4()
if revises is not None:
if version == revises.version:
raise ValueError(
f"Please increment the previous version: '{revises.version}'"
)
return suid + ruid


def get_uid_from_old_version(
Expand All @@ -110,24 +115,11 @@ def get_uid_from_old_version(
using_key: str | None = None,
) -> tuple[str, str]:
"""{}""" # noqa: D415
msg = ""
if revises.version is None:
previous_version = "1"
msg = f"setting previous version to '{previous_version}'"
else:
previous_version = revises.version
version = set_version(version, previous_version)
new_uid = init_uid(
version=version,
n_full_id=revises._len_full_uid,
revises=revises,
)
# the following covers the edge case where the old file was unversioned
if revises.version is None:
revises.version = previous_version
revises.save(using=using_key)
if msg != "":
msg += f"& new version to '{version}'"
return new_uid, version


Expand Down
96 changes: 48 additions & 48 deletions tests/test_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
CloudPath,
LocalPathClasses,
UPath,
extract_suffix_from_path,
)

# how do we properly abstract out the default storage variable?
Expand Down Expand Up @@ -134,7 +133,25 @@ def test_data_is_anndata_paths():
assert not data_is_anndata("s3://somewhere/something.zarr")


def test_revises_versioned_file(df, adata):
def test_basic_ralidation():
# extra kwargs
with pytest.raises(ValueError):
ln.Artifact("testpath.csv", description="test1b", extra_kwarg="extra")

# > 1 args
with pytest.raises(ValueError) as error:
ln.Artifact("testpath.csv", "testpath.csv")
assert error.exconly() == "ValueError: Only one non-keyword arg allowed: data"


# comment out for now
# AUTO_KEY_PREFIX
# with pytest.raises(ValueError) as error:
# ln.Artifact.from_df(df, key=".lamindb/test_df.parquet")
# assert error.exconly() == "ValueError: Key cannot start with .lamindb/"


def test_revise_file(df, adata):
# attempt to create a file with an invalid version
with pytest.raises(ValueError) as error:
artifact = ln.Artifact.from_df(df, description="test", version=0)
Expand All @@ -153,53 +170,36 @@ def test_revises_versioned_file(df, adata):
assert artifact.path.exists()

with pytest.raises(ValueError) as error:
artifact_v2 = ln.Artifact.from_anndata(adata, revises=artifact, version="1")
artifact_r2 = ln.Artifact.from_anndata(adata, revises=artifact, version="1")
assert error.exconly() == "ValueError: Please increment the previous version: '1'"

# create new file from old file
artifact_v2 = ln.Artifact.from_anndata(adata, revises=artifact)
artifact_r2 = ln.Artifact.from_anndata(adata, revises=artifact)
assert artifact.version == "1"
assert artifact_v2.stem_uid == artifact.stem_uid
assert artifact_v2.version == "2"
assert artifact_v2.key is None
assert artifact_v2.description == "test"
assert artifact_r2.stem_uid == artifact.stem_uid
assert artifact_r2.version is None
assert artifact_r2.key is None
assert artifact_r2.description == "test"

artifact_v2.save()
assert artifact_v2.path.exists()
artifact_r2.save()
assert artifact_r2.path.exists()

# create new file from newly versioned file
df.iloc[0, 0] = 0
file_v3 = ln.Artifact.from_df(df, description="test1", revises=artifact_v2)
assert file_v3.stem_uid == artifact.stem_uid
assert file_v3.version == "3"
assert file_v3.description == "test1"
df.iloc[0, 0] = 0 # mutate dataframe so that hash lookup doesn't trigger
file_r3 = ln.Artifact.from_df(
df, description="test1", revises=artifact_r2, version="2"
)
assert file_r3.stem_uid == artifact.stem_uid
assert file_r3.version == "2"
assert file_r3.description == "test1"

with pytest.raises(TypeError) as error:
ln.Artifact.from_df(df, description="test1a", revises=ln.Transform())
assert error.exconly() == "TypeError: revises has to be of type ln.Artifact"
assert error.exconly() == "TypeError: `revises` has to be of type `Artifact`"

# test that reference file cannot be deleted
artifact_v2.delete(permanent=True, storage=True)
artifact_r2.delete(permanent=True, storage=True)
artifact.delete(permanent=True, storage=True)

# extra kwargs
with pytest.raises(ValueError):
ln.Artifact.from_df(df, description="test1b", extra_kwarg="extra")

# > 1 args
with pytest.raises(ValueError) as error:
ln.Artifact(df, df)
assert error.exconly() == "ValueError: Only one non-keyword arg allowed: data"


# comment out for now
# AUTO_KEY_PREFIX
# with pytest.raises(ValueError) as error:
# ln.Artifact.from_df(df, key=".lamindb/test_df.parquet")
# assert error.exconly() == "ValueError: Key cannot start with .lamindb/"


def test_revises_unversioned_file(df, adata):
# unversioned file
artifact = ln.Artifact.from_df(df, description="test2")
assert artifact.version is None
Expand All @@ -210,9 +210,9 @@ def test_revises_unversioned_file(df, adata):

# create new file from old file
new_artifact = ln.Artifact.from_anndata(adata, revises=artifact)
assert artifact.version == "1"
assert artifact.version is None
assert new_artifact.stem_uid == artifact.stem_uid
assert new_artifact.version == "2"
assert new_artifact.version is None
assert new_artifact.description == artifact.description

artifact.delete(permanent=True, storage=True)
Expand Down Expand Up @@ -248,7 +248,7 @@ def test_create_from_dataframe_using_from_df_and_link_features(df):
assert artifact.description == description
assert artifact._accessor == "DataFrame"
assert artifact.key == "folder/hello.parquet"
assert artifact._key_is_virtual
assert artifact._key_is_rirtual
assert artifact.uid in artifact.path.as_posix()
artifact.save()
# register features from df columns
Expand All @@ -269,7 +269,7 @@ def test_create_from_dataframe_using_from_df_and_link_features(df):

def test_create_from_anndata_in_memory_and_link_features(adata):
ln.save(
bt.Gene.from_values(adata.var.index, field=bt.Gene.symbol, organism="human")
bt.Gene.from_ralues(adata.var.index, field=bt.Gene.symbol, organism="human")
)
ln.save(ln.Feature.from_df(adata.obs))
artifact = ln.Artifact.from_anndata(adata, description="test")
Expand Down Expand Up @@ -322,13 +322,13 @@ def test_create_from_anndata_in_storage(data):


# this tests the basic (non-provenance-related) metadata
@pytest.mark.parametrize("key_is_virtual", [True, False])
@pytest.mark.parametrize("key_is_rirtual", [True, False])
@pytest.mark.parametrize("key", [None, "my_new_dir/my_artifact.csv", "nosuffix"])
@pytest.mark.parametrize("description", [None, "my description"])
def test_create_from_local_filepath(
get_test_filepaths, key_is_virtual, key, description
get_test_filepaths, key_is_rirtual, key, description
):
ln.settings.creation._artifact_use_virtual_keys = key_is_virtual
ln.settings.creation._artifact_use_rirtual_keys = key_is_rirtual
is_in_registered_storage = get_test_filepaths[0]
root_dir = get_test_filepaths[1]
test_filepath = get_test_filepaths[3]
Expand Down Expand Up @@ -391,7 +391,7 @@ def test_create_from_local_filepath(
)
else:
assert artifact.key == key
assert artifact._key_is_virtual == key_is_virtual
assert artifact._key_is_rirtual == key_is_rirtual
if is_in_registered_storage:
# this would only hit if the key matches the correct key
assert artifact.storage.root == root_dir.resolve().as_posix()
Expand All @@ -400,7 +400,7 @@ def test_create_from_local_filepath(
)
else:
# file is moved into default storage
if key_is_virtual:
if key_is_rirtual:
assert (
artifact.path
== lamindb_setup.settings.storage.root
Expand All @@ -412,7 +412,7 @@ def test_create_from_local_filepath(
# only delete from storage if a file copy took place
delete_from_storage = str(test_filepath.resolve()) != str(artifact.path)
artifact.delete(permanent=True, storage=delete_from_storage)
ln.settings.creation._artifact_use_virtual_keys = True
ln.settings.creation._artifact_use_rirtual_keys = True


ERROR_MESSAGE = """\
Expand Down Expand Up @@ -454,7 +454,7 @@ def test_delete_artifact(df):
artifact = ln.Artifact.from_df(df, description="My test file to delete")
artifact.save()
assert artifact.visibility == 1
assert artifact.key is None or artifact._key_is_virtual
assert artifact.key is None or artifact._key_is_rirtual
storage_path = artifact.path
# trash behavior
artifact.delete()
Expand All @@ -471,7 +471,7 @@ def test_delete_artifact(df):
).first()
is None
)
assert not storage_path.exists() # deletes from storage is key_is_virtual
assert not storage_path.exists() # deletes from storage is key_is_rirtual

# test deleting artifact from non-managed storage
artifact = ln.Artifact(
Expand Down

0 comments on commit 91576e7

Please sign in to comment.