diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 71fa7b719..d9ee679fa 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,7 +21,7 @@ jobs: - "faq" - "storage" - "cli" - timeout-minutes: 6 + timeout-minutes: 7 steps: - uses: actions/checkout@v4 diff --git a/lamindb/__init__.py b/lamindb/__init__.py index 8d0d8c656..161f47759 100644 --- a/lamindb/__init__.py +++ b/lamindb/__init__.py @@ -42,7 +42,7 @@ """ # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc. -__version__ = "0.75.1" +__version__ = "0.76a1" import os as _os diff --git a/lamindb/_artifact.py b/lamindb/_artifact.py index 2df60319b..bd4b786e5 100644 --- a/lamindb/_artifact.py +++ b/lamindb/_artifact.py @@ -594,6 +594,7 @@ def __init__(artifact: Artifact, *args, **kwargs): kwargs["description"] = description kwargs["visibility"] = visibility kwargs["_accessor"] = accessor + kwargs["is_new_version_of"] = is_new_version_of # this check needs to come down here because key might be populated from an # existing file path during get_artifact_kwargs_from_data() if ( diff --git a/lamindb/_collection.py b/lamindb/_collection.py index 33d00b0f5..ce2f17a85 100644 --- a/lamindb/_collection.py +++ b/lamindb/_collection.py @@ -158,6 +158,7 @@ def __init__( run=run, version=version, visibility=visibility, + is_new_version_of=is_new_version_of, **kwargs, ) collection._artifacts = artifacts diff --git a/lamindb/_query_set.py b/lamindb/_query_set.py index 5dce4e165..ffbbd1355 100644 --- a/lamindb/_query_set.py +++ b/lamindb/_query_set.py @@ -243,10 +243,10 @@ def one_or_none(self) -> Record | None: else: raise MultipleResultsFound(self.all()) - def latest_version(self) -> RecordsList: + def latest_version(self) -> QuerySet: """Filter every version family by latest version.""" if issubclass(self.model, IsVersioned): - return filter_query_set_by_latest_version(self) + return self.filter(is_latest=True) else: raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`") @@ -288,29 +288,6 @@ def standardize( return _standardize(cls=self, values=values, field=field, **kwargs) -def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList: - # evaluating length can be very costly, hence, the try-except block - try: - first_record = ordered_query_set[0] - except IndexError: - return ordered_query_set - records_in_view = {} - records_in_view[first_record.stem_uid] = first_record - for record in ordered_query_set: - # this overwrites user-provided ordering (relevant records ordered by a - # certain field will not show if they are not the latest version) - if record.stem_uid not in records_in_view: - records_in_view[record.stem_uid] = record - else: - if record.created_at > records_in_view[record.stem_uid].created_at: - # deleting the entry is needed to preserve the integrity of - # user-provided ordering - del records_in_view[record.stem_uid] - records_in_view[record.stem_uid] = record - list_records_in_view = RecordsList(records_in_view.values()) - return list_records_in_view - - models.QuerySet.df = QuerySet.df models.QuerySet.list = QuerySet.list models.QuerySet.first = QuerySet.first diff --git a/lamindb/_record.py b/lamindb/_record.py index 9d1025ab6..8dbb73dd5 100644 --- a/lamindb/_record.py +++ b/lamindb/_record.py @@ -5,7 +5,7 @@ import dj_database_url import lamindb_setup as ln_setup -from django.db import connections +from django.db import connections, transaction from django.db.models import IntegerField, Manager, Q, QuerySet, Value from lamin_utils import logger from lamin_utils._lookup import Lookup @@ -132,7 +132,10 @@ def get(cls, idlike: int | str) -> Record: else: qs = filter(cls, uid__startswith=idlike) if issubclass(cls, IsVersioned): - return qs.latest_version().one() + if len(idlike) <= cls._len_stem_uid: + return qs.latest_version().one() + else: + return qs.one() else: return qs.one() @@ -526,7 +529,28 @@ def save(self, *args, **kwargs) -> Record: if result is not None: init_self_from_db(self, result) else: - super(Record, self).save(*args, **kwargs) + # save versioned record + if isinstance(self, IsVersioned) and self._is_new_version_of is not None: + if self._is_new_version_of.is_latest: + is_new_version_of = self._is_new_version_of + else: + # need one additional request + is_new_version_of = self.__class__.objects.get( + is_latest=True, uid__startswith=self.stem_uid + ) + logger.warning( + f"didn't pass the latest version in `is_new_version_of`, retrieved it: {is_new_version_of}" + ) + is_new_version_of.is_latest = False + with transaction.atomic(): + is_new_version_of._is_new_version_of = ( + None # ensure we don't start a recursion + ) + is_new_version_of.save() + super(Record, self).save(*args, **kwargs) + # save unversioned record + else: + super(Record, self).save(*args, **kwargs) # perform transfer of many-to-many fields # only supported for Artifact and Collection records if db is not None and db != "default" and using_key is None: @@ -551,6 +575,30 @@ def save(self, *args, **kwargs) -> Record: return self +def delete(self) -> None: + """Delete the record.""" + # note that the logic below does not fire if a record is moved to the trash + # the idea is that moving a record to the trash should move its entire version family + # to the trash, whereas permanently deleting should default to only deleting a single record + # of a version family + # we can consider making it easy to permanently delete entire version families as well, + # but that's for another time + if isinstance(self, IsVersioned) and self.is_latest: + new_latest = ( + self.__class__.filter(is_latest=False, uid__startswith=self.stem_uid) + .order_by("-created_at") + .first() + ) + if new_latest is not None: + new_latest.is_latest = True + with transaction.atomic(): + new_latest.save() + super(Record, self).delete() + logger.warning(f"new latest version is {new_latest}") + return None + super(Record, self).delete() + + METHOD_NAMES = [ "__init__", "filter", @@ -559,6 +607,7 @@ def save(self, *args, **kwargs) -> Record: "search", "lookup", "save", + "delete", "from_values", "using", ] diff --git a/lamindb/_transform.py b/lamindb/_transform.py index 7f0ab99d0..4f8180468 100644 --- a/lamindb/_transform.py +++ b/lamindb/_transform.py @@ -22,7 +22,6 @@ def __init__(transform: Transform, *args, **kwargs): is_new_version_of: Transform | None = ( kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None ) - (kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None) version: str | None = kwargs.pop("version") if "version" in kwargs else None type: TransformType | None = kwargs.pop("type") if "type" in kwargs else "pipeline" reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None @@ -55,6 +54,7 @@ def __init__(transform: Transform, *args, **kwargs): reference=reference, reference_type=reference_type, _has_consciously_provided_uid=has_consciously_provided_uid, + is_new_version_of=is_new_version_of, ) diff --git a/lamindb/core/versioning.py b/lamindb/core/versioning.py index 2c521427c..2edf019ef 100644 --- a/lamindb/core/versioning.py +++ b/lamindb/core/versioning.py @@ -72,7 +72,6 @@ def set_version(version: str | None = None, previous_version: str | None = None) return version -# uses `initial_version_id` to extract a stem_id that's part of id def init_uid( *, version: str | None = None, diff --git a/sub/lnschema-core b/sub/lnschema-core index 312e9b4cd..bb842ca29 160000 --- a/sub/lnschema-core +++ b/sub/lnschema-core @@ -1 +1 @@ -Subproject commit 312e9b4cdc67d7e1dbcc83c3cb1a2f74db63e19e +Subproject commit bb842ca2903a48244c070a33c7ab29fb2cdcdafa diff --git a/tests/test_artifact.py b/tests/test_artifact.py index 313c20848..00f25dc5a 100644 --- a/tests/test_artifact.py +++ b/tests/test_artifact.py @@ -554,8 +554,7 @@ def test_create_big_file_from_remote_path(): filepath_str = "s3://lamindb-test/human_immune.h5ad" artifact = ln.Artifact(filepath_str) assert artifact.key == "human_immune.h5ad" - assert artifact.hash.endswith("-2") - assert artifact._hash_type == "md5-n" + assert artifact._hash_type == "md5-2" ln.settings.storage = previous_storage diff --git a/tests/test_versioning.py b/tests/test_versioning.py index 51a0fdffe..ebcccb470 100644 --- a/tests/test_versioning.py +++ b/tests/test_versioning.py @@ -87,15 +87,25 @@ def test_latest_version_and_get(): # build one version family transform_v1 = ln.Transform(name="Introduction") transform_v1.save() - transform_v2 = ln.Transform(name="Introduction", is_new_version_of=transform_v1) + assert transform_v1.is_latest + assert transform_v1.version is None + # pass the latest version, also vary the name for the fun of it + transform_v2 = ln.Transform(name="Introduction v2", is_new_version_of=transform_v1) transform_v2.save() - transform_v3 = ln.Transform(name="Introduction", is_new_version_of=transform_v2) + assert not transform_v1.is_latest + assert transform_v2.is_latest + assert transform_v2.version == "2" + # do not pass the latest version to is_new_version_of + transform_v3 = ln.Transform(name="Introduction", is_new_version_of=transform_v1) transform_v3.save() + assert not ln.Transform.objects.get(name="Introduction v2", version="2").is_latest + assert transform_v3.is_latest transform_v4 = ln.Transform(name="Introduction") transform_v4.save() + assert transform_v4.is_latest # add another transform with the same name that's not part of this family # but will also be a hit for the query - assert len(ln.Transform.filter(name="Introduction").all()) == 4 + assert len(ln.Transform.filter(name="Introduction").all()) == 3 assert len(ln.Transform.filter(name="Introduction").latest_version()) == 2 transform_v4.delete() with pytest.raises(MultipleResultsFound): @@ -103,10 +113,16 @@ def test_latest_version_and_get(): assert ( ln.Transform.filter(name="Introduction").latest_version().one() == transform_v3 ) + + # test get assert ln.Transform.get(transform_v3.uid) == transform_v3 assert ln.Transform.get(transform_v3.id) == transform_v3 assert ln.Transform.get(transform_v3.uid[:4]) == transform_v3 + # test delete + transform_v3.delete() + assert transform_v2.is_latest + # test empty QuerySet assert ( ln.Transform.filter(name="IntroductionNotExists").latest_version().one_or_none()