Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🚸 Improve UX of db$track() and db$finish() #2213

Merged
merged 22 commits into from
Dec 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repos:
- id: prettier
exclude: |
(?x)(
docs/changelog.md|.github/ISSUE_TEMPLATE/config.yml
docs/changelog.md|.github/ISSUE_TEMPLATE/config.yml|tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html
)
- repo: https://github.com/kynan/nbstripout
rev: 0.6.1
Expand Down Expand Up @@ -42,6 +42,10 @@ repos:
- id: mixed-line-ending
args: [--fix=lf]
- id: trailing-whitespace
exclude: |
(?x)(
tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html
)
- id: check-case-conflict
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.7.1
Expand Down
81 changes: 68 additions & 13 deletions lamindb/_finish.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from lamin_utils import logger
from lamindb_setup.core.hashing import hash_file

from lamindb.core.exceptions import NotebookNotSaved

if TYPE_CHECKING:
from pathlib import Path

Expand All @@ -16,6 +18,20 @@
from ._query_set import QuerySet


def get_r_save_notebook_message() -> str:
return f"Please save the notebook in RStudio (shortcut `{get_shortcut()}`) within 2 sec before calling `db$finish()`"


def get_shortcut() -> str:
import platform

return "CMD + s" if platform.system() == "Darwin" else "CTRL + s"


def get_seconds_since_modified(filepath) -> float:
return datetime.now().timestamp() - filepath.stat().st_mtime


# this is from the get_title function in nbproject
# should be moved into lamindb sooner or later
def prepare_notebook(
Expand Down Expand Up @@ -82,6 +98,29 @@
script_path.write_text(py_content)


# removes NotebookNotSaved error message from notebook html
def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
import re

cleaned_content = (
file_path.read_text()
) # at this point cleaned_content is still raw
pattern_title = r"<title>(.*?)</title>"
title_match = re.search(pattern_title, cleaned_content)
title_text = None
if title_match:
title_text = title_match.group(1)
pattern_h1 = f"<h1[^>]*>{re.escape(title_text)}</h1>"
cleaned_content = re.sub(pattern_title, "", cleaned_content)
cleaned_content = re.sub(pattern_h1, "", cleaned_content)
cleaned_content = cleaned_content.replace(
f"NotebookNotSaved: {get_r_save_notebook_message()}", ""
)
cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}")
cleaned_path.write_text(cleaned_content)
return title_text, cleaned_path


def save_context_core(
*,
run: Run,
Expand All @@ -104,7 +143,9 @@
# for scripts, things are easy
is_consecutive = True
is_ipynb = filepath.suffix == ".ipynb"
is_r_notebook = filepath.suffix in {".qmd", ".Rmd"}
source_code_path = filepath
report_path: Path | None = None
# for notebooks, we need more work
if is_ipynb:
try:
Expand Down Expand Up @@ -139,12 +180,21 @@
".ipynb", ".py"
)
notebook_to_script(transform, filepath, source_code_path)
elif is_r_notebook:
if filepath.with_suffix(".nb.html").exists():
report_path = filepath.with_suffix(".nb.html")

Check warning on line 185 in lamindb/_finish.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_finish.py#L185

Added line #L185 was not covered by tests
elif filepath.with_suffix(".html").exists():
report_path = filepath.with_suffix(".html")
else:
logger.warning(

Check warning on line 189 in lamindb/_finish.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_finish.py#L189

Added line #L189 was not covered by tests
f"no {filepath.with_suffix('.nb.html')} found, save your manually rendered .html report via the CLI: lamin save {filepath}"
)
ln.settings.creation.artifact_silence_missing_run_warning = True
# track source code
hash, _ = hash_file(source_code_path) # ignore hash_type for now
if (
transform._source_code_artifact_id is not None
or transform.source_code is not None # equivalent to transform.hash is not None
or transform.hash is not None # .hash is equivalent to .transform
):
# check if the hash of the transform source code matches
# (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
Expand All @@ -165,7 +215,7 @@
logger.warning("Please re-run `ln.track()` to make a new version")
return "rerun-the-notebook"
else:
logger.important("source code is already saved")
logger.debug("source code is already saved")
else:
transform.source_code = source_code_path.read_text()
transform.hash = hash
Expand Down Expand Up @@ -198,10 +248,15 @@
run.finished_at = datetime.now(timezone.utc)

# track report and set is_consecutive
if not is_ipynb:
run.is_consecutive = True
run.save()
else:
if report_path is not None:
if not from_cli:
if get_seconds_since_modified(report_path) > 2 and not ln_setup._TESTING:
# this can happen when auto-knitting an html with RStudio
falexwolf marked this conversation as resolved.
Show resolved Hide resolved
raise NotebookNotSaved(get_r_save_notebook_message())

Check warning on line 255 in lamindb/_finish.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_finish.py#L255

Added line #L255 was not covered by tests
if is_r_notebook:
title_text, report_path = clean_r_notebook_html(report_path)
if title_text is not None:
transform.name = title_text

Check warning on line 259 in lamindb/_finish.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_finish.py#L259

Added line #L259 was not covered by tests
if run.report_id is not None:
hash, _ = hash_file(report_path) # ignore hash_type for now
if hash != run.report.hash:
Expand All @@ -210,7 +265,7 @@
)
if response == "y":
run.report.replace(report_path)
run.report.save(upload=True)
run.report.save(upload=True, print_progress=False)
else:
logger.important("keeping old report")
else:
Expand All @@ -224,11 +279,13 @@
)
report_file.save(upload=True, print_progress=False)
run.report = report_file
run.is_consecutive = is_consecutive
run.save()
logger.debug(
f"saved transform.latest_run.report: {transform.latest_run.report}"
)
run.is_consecutive = is_consecutive

# save both run & transform records if we arrive here
run.save()
transform.save()

# finalize
Expand All @@ -250,11 +307,9 @@
f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
)
if not from_cli:
thing, name = (
("notebook", "notebook.ipynb") if is_ipynb else ("script", "script.py")
)
thing = "notebook" if (is_ipynb or is_r_notebook) else "script"
logger.important(
f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
f"if you want to update your {thing} without re-running it, use `lamin save {filepath}`"
)
# because run & transform changed, update the global context
context._run = run
Expand Down
20 changes: 8 additions & 12 deletions lamindb/core/_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def raise_missing_context(transform_type: str, key: str) -> bool:
f"you already have a transform with key '{key}': Transform('{transform.uid[:8]}')\n"
f' (1) to make a revision, run: ln.track("{new_uid}")\n (2) to create a new transform, rename your {transform_type} file and re-run: ln.track()'
)
if transform_type == "notebook":
if is_run_from_ipython:
print(f"→ {message}")
response = input("→ Ready to re-run? (y/n)")
if response == "y":
Expand Down Expand Up @@ -343,7 +343,7 @@ def track(
)
if run is not None: # loaded latest run
run.started_at = datetime.now(timezone.utc) # update run time
self._logging_message_track += f", started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"
self._logging_message_track += f", re-started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"

if run is None: # create new run
run = Run(
Expand Down Expand Up @@ -579,15 +579,11 @@ def finish(self, ignore_non_consecutive: None | bool = None) -> None:
`lamin save script.py` or `lamin save notebook.ipynb` → `docs </cli#lamin-save>`__

"""
from lamindb._finish import save_context_core

def get_seconds_since_modified(filepath) -> float:
return datetime.now().timestamp() - filepath.stat().st_mtime

def get_shortcut() -> str:
import platform

return "CMD + s" if platform.system() == "Darwin" else "CTRL + s"
from lamindb._finish import (
get_seconds_since_modified,
get_shortcut,
save_context_core,
)

if self.run is None:
raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
Expand All @@ -609,7 +605,7 @@ def get_shortcut() -> str:
self.transform.save()
if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING:
raise NotebookNotSaved(
f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) right before calling `ln.finish()`"
f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) within 2 sec before calling `ln.finish()`"
)
save_context_core(
run=self.run,
Expand Down
2 changes: 1 addition & 1 deletion lamindb/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class IntegrityError(Exception):
pass


class NoTitleError(Exception):
class NoTitleError(SystemExit):
"""Notebook has no title."""

pass
Expand Down
2 changes: 1 addition & 1 deletion sub/lamin-cli
42 changes: 42 additions & 0 deletions tests/core/notebooks/basic-r-notebook.Rmd.cleaned.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<!doctype html>
<html>
<meta charset="utf-8" />



<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHViR2xpY21GeWVTaHNZVzFwYm5JcFhHNWNibVJpSUR3dElHTnZibTVsWTNRb0tWeHVZR0JnSW4wPSAtLT5cblxuYGBgclxubGlicmFyeShsYW1pbnIpXG5cbmRiIDwtIGNvbm5lY3QoKVxuYGBgXG5cbjwhLS0gcm5iLXNvdXJjZS1lbmQgLS0+XG4ifQ== -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxubGlicmFyeShsYW1pbnIpXG5cbmRiIDwtIGNvbm5lY3QoKVxuYGBgIn0= -->
<pre class="r"><code>library(laminr)

db &lt;- connect()</code></pre>
<!-- rnb-source-end -->
<!-- rnb-output-end -->
<!-- rnb-output-begin eyJkYXRhIjoi4oaSIGNvbm5lY3RlZCBsYW1pbmRiOiBsYW1pbmxhYnMvbGFtaW5kYXRhXG4ifQ== -->
<pre><code>→ connected lamindb: laminlabs/lamindata</code></pre>
<!-- rnb-output-end -->
<!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHVaR0lrZEhKaFkyc29YQ0pzVDFOamRYaEVWRVJGTUhFd01EQXdYQ0lwWEc1Z1lHQWlmUT09IC0tPlxuXG5gYGByXG5kYiR0cmFjayhcImxPU2N1eERUREUwcTAwMDBcIilcbmBgYFxuXG48IS0tIHJuYi1zb3VyY2UtZW5kIC0tPlxuIn0= -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuZGIkdHJhY2soXCJsT1NjdXhEVERFMHEwMDAwXCIpXG5gYGAifQ== -->
<pre class="r"><code>db$track(&quot;lOScuxDTDE0q0000&quot;)</code></pre>
<!-- rnb-source-end -->
<!-- rnb-output-end -->
<!-- rnb-output-begin eyJkYXRhIjoi4oaSIGxvYWRlZCBUcmFuc2Zvcm0oJ2xPU2N1eERUJyksIHN0YXJ0ZWQgUnVuKCdHV3BhVHRVZycpIGF0IDIwMjQtMTItMDEgMTc6NDk6MTggVVRDXG4ifQ== -->
<pre><code>→ loaded Transform(&#39;lOScuxDT&#39;), started Run(&#39;GWpaTtUg&#39;) at 2024-12-01 17:49:18 UTC</code></pre>
<!-- rnb-output-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHVaR0lrWm1sdWFYTm9LQ2xjYm1CZ1lDSjkgLS0+XG5cbmBgYHJcbmRiJGZpbmlzaCgpXG5gYGBcblxuPCEtLSBybmItc291cmNlLWVuZCAtLT5cbiJ9 -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuZGIkZmluaXNoKClcbmBgYCJ9 -->
<pre class="r"><code>db$finish()</code></pre>
<!-- rnb-source-end -->
<!-- rnb-output-end -->
<!-- rnb-output-begin eyJkYXRhIjoiRXJyb3IgaW4gcHlfY2FsbF9pbXBsKGNhbGxhYmxlLCBjYWxsX2FyZ3MkdW5uYW1lZCwgY2FsbF9hcmdzJG5hbWVkKSA6IFxuICBsYW1pbmRiLmNvcmUuZXhjZXB0aW9ucy5Ob3RlYm9va05vdFNhdmVkOiBQbGVhc2Ugc2F2ZSB0aGUgbm90ZWJvb2sgaW4gUlN0dWRpbyAoc2hvcnRjdXQgYENNRCArIHNgKSB3aXRoaW4gMiBzZWMgYmVmb3JlIGNhbGxpbmcgYGRiJGZpbmlzaCgpYFxuUnVuIFx1MDAxYl04Oztyc3R1ZGlvOnJ1bjpyZXRpY3VsYXRlOjpweV9sYXN0X2Vycm9yKClcdTAwMDdgcmV0aWN1bGF0ZTo6cHlfbGFzdF9lcnJvcigpYFx1MDAxYl04OztcdTAwMDcgZm9yIGRldGFpbHMuXG4ifQ== -->
<pre><code>MoreOUTPUT </code></pre>
<!-- rnb-output-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
</html>
42 changes: 42 additions & 0 deletions tests/core/notebooks/basic-r-notebook.Rmd.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<!doctype html>
<html>
<meta charset="utf-8" />
<title>My exemplary R analysis</title>
<h1 class="title toc-ignore">My exemplary R analysis</h1>

<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHViR2xpY21GeWVTaHNZVzFwYm5JcFhHNWNibVJpSUR3dElHTnZibTVsWTNRb0tWeHVZR0JnSW4wPSAtLT5cblxuYGBgclxubGlicmFyeShsYW1pbnIpXG5cbmRiIDwtIGNvbm5lY3QoKVxuYGBgXG5cbjwhLS0gcm5iLXNvdXJjZS1lbmQgLS0+XG4ifQ== -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxubGlicmFyeShsYW1pbnIpXG5cbmRiIDwtIGNvbm5lY3QoKVxuYGBgIn0= -->
<pre class="r"><code>library(laminr)

db &lt;- connect()</code></pre>
<!-- rnb-source-end -->
<!-- rnb-output-end -->
<!-- rnb-output-begin eyJkYXRhIjoi4oaSIGNvbm5lY3RlZCBsYW1pbmRiOiBsYW1pbmxhYnMvbGFtaW5kYXRhXG4ifQ== -->
<pre><code>→ connected lamindb: laminlabs/lamindata</code></pre>
<!-- rnb-output-end -->
<!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHVaR0lrZEhKaFkyc29YQ0pzVDFOamRYaEVWRVJGTUhFd01EQXdYQ0lwWEc1Z1lHQWlmUT09IC0tPlxuXG5gYGByXG5kYiR0cmFjayhcImxPU2N1eERUREUwcTAwMDBcIilcbmBgYFxuXG48IS0tIHJuYi1zb3VyY2UtZW5kIC0tPlxuIn0= -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuZGIkdHJhY2soXCJsT1NjdXhEVERFMHEwMDAwXCIpXG5gYGAifQ== -->
<pre class="r"><code>db$track(&quot;lOScuxDTDE0q0000&quot;)</code></pre>
<!-- rnb-source-end -->
<!-- rnb-output-end -->
<!-- rnb-output-begin eyJkYXRhIjoi4oaSIGxvYWRlZCBUcmFuc2Zvcm0oJ2xPU2N1eERUJyksIHN0YXJ0ZWQgUnVuKCdHV3BhVHRVZycpIGF0IDIwMjQtMTItMDEgMTc6NDk6MTggVVRDXG4ifQ== -->
<pre><code>→ loaded Transform(&#39;lOScuxDT&#39;), started Run(&#39;GWpaTtUg&#39;) at 2024-12-01 17:49:18 UTC</code></pre>
<!-- rnb-output-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-output-begin eyJkYXRhIjoiXG48IS0tIHJuYi1zb3VyY2UtYmVnaW4gZXlKa1lYUmhJam9pWUdCZ2NseHVaR0lrWm1sdWFYTm9LQ2xjYm1CZ1lDSjkgLS0+XG5cbmBgYHJcbmRiJGZpbmlzaCgpXG5gYGBcblxuPCEtLSBybmItc291cmNlLWVuZCAtLT5cbiJ9 -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuZGIkZmluaXNoKClcbmBgYCJ9 -->
<pre class="r"><code>db$finish()</code></pre>
<!-- rnb-source-end -->
<!-- rnb-output-end -->
<!-- rnb-output-begin eyJkYXRhIjoiRXJyb3IgaW4gcHlfY2FsbF9pbXBsKGNhbGxhYmxlLCBjYWxsX2FyZ3MkdW5uYW1lZCwgY2FsbF9hcmdzJG5hbWVkKSA6IFxuICBsYW1pbmRiLmNvcmUuZXhjZXB0aW9ucy5Ob3RlYm9va05vdFNhdmVkOiBQbGVhc2Ugc2F2ZSB0aGUgbm90ZWJvb2sgaW4gUlN0dWRpbyAoc2hvcnRjdXQgYENNRCArIHNgKSB3aXRoaW4gMiBzZWMgYmVmb3JlIGNhbGxpbmcgYGRiJGZpbmlzaCgpYFxuUnVuIFx1MDAxYl04Oztyc3R1ZGlvOnJ1bjpyZXRpY3VsYXRlOjpweV9sYXN0X2Vycm9yKClcdTAwMDdgcmV0aWN1bGF0ZTo6cHlfbGFzdF9lcnJvcigpYFx1MDAxYl04OztcdTAwMDcgZm9yIGRldGFpbHMuXG4ifQ== -->
<pre><code>MoreOUTPUT NotebookNotSaved: Please save the notebook in RStudio (shortcut `SHORTCUT`) within 2 sec before calling `db$finish()`</code></pre>
<!-- rnb-output-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
</html>
Loading