Skip to content

Commit

Permalink
♻️ Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
falexwolf committed Mar 12, 2024
1 parent 6881fcd commit 956d195
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 71 deletions.
72 changes: 1 addition & 71 deletions lamindb/core/_run_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from lamin_utils import logger
from lamindb_setup import settings as setup_settings
from lamindb_setup.core import InstanceSettings
from lamindb_setup.core.hashing import hash_code
from lamindb_setup.core.types import UPathStr
from lnschema_core import Run, Transform, ids
from lnschema_core.types import TransformType
Expand All @@ -20,6 +19,7 @@
from lamindb.core._transform_settings import transform_settings

from ._settings import settings
from ._sync_git import get_transform_reference_from_git_repo

is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)

Expand Down Expand Up @@ -204,76 +204,6 @@ def raise_transform_settings_error() -> None:
)


def clone_git_repo(git_url: str) -> None:
if not git_url.endswith(".git"):
git_url += ".git"
logger.important(f"cloning {git_url}")
result = subprocess.run(
f"git clone --depth 10 {git_url}",
shell=True,
capture_output=True,
)
if result.returncode != 0:
raise RuntimeError(result.stderr.decode())


def dir_from_repo_url(repo_url: Optional[str]) -> Optional[str]:
if repo_url is not None:
cd_repo = repo_url.split("/")[-1].replace(".git", "")
return cd_repo


def get_git_commit_hash(
blob_hash: str, cd_repo: Optional[str]
) -> subprocess.CompletedProcess:
return subprocess.run(
f"git log --find-object={blob_hash} --pretty=format:%H",
shell=True,
capture_output=True,
cwd=cd_repo,
)


def get_filepath_within_git_repo(
commit_hash: str, blob_hash: str, cd_repo: Optional[str]
) -> str:
result = subprocess.run(
f"git ls-tree -r {commit_hash} | grep -E {blob_hash}",
shell=True,
capture_output=True,
cwd=cd_repo,
)
if result.returncode != 0:
raise RuntimeError(
f"git ls-tree -r {commit_hash} | grep -E {blob_hash}\n"
+ result.stderr.decode()
)
if len(result.stdout.decode()) == 0:
raise RuntimeError("Could not find filepath within git repo.")
filepath = result.stdout.decode().split()[-1]
return filepath


def get_transform_reference_from_git_repo(path: Path):
blob_hash = hash_code(path).hexdigest()
cd_repo = None
result = get_git_commit_hash(blob_hash, cd_repo=None)
commit_hash = result.stdout.decode()
if commit_hash == "" or result.returncode == 1:
cd_repo = dir_from_repo_url(settings.sync_git_repo)
clone_git_repo(settings.sync_git_repo)
result = get_git_commit_hash(blob_hash, cd_repo=cd_repo)
commit_hash = result.stdout.decode()
if commit_hash == "" or result.returncode == 1:
raise RuntimeError(
f"Did not find file in git repo\n{result.stderr.decode()}"
)
gitpath = get_filepath_within_git_repo(commit_hash, blob_hash, cd_repo)
reference = f"{settings.sync_git_repo}/blob/{commit_hash}/{gitpath}"
reference_type = "url"
return reference, reference_type


class run_context:
"""Global run context."""

Expand Down
78 changes: 78 additions & 0 deletions lamindb/core/_sync_git.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import subprocess
from pathlib import Path
from typing import Optional

from lamin_utils import logger
from lamindb_setup.core.hashing import hash_code

from ._settings import settings


def clone_git_repo(git_url: str) -> None:
if not git_url.endswith(".git"):
git_url += ".git"
logger.important(f"cloning {git_url}")
result = subprocess.run(
f"git clone --depth 10 {git_url}",
shell=True,
capture_output=True,
)
if result.returncode != 0:
raise RuntimeError(result.stderr.decode())


def dir_from_repo_url(repo_url: Optional[str]) -> Optional[str]:
if repo_url is not None:
cd_repo = repo_url.split("/")[-1].replace(".git", "")
return cd_repo


def get_git_commit_hash(
blob_hash: str, cd_repo: Optional[str]
) -> subprocess.CompletedProcess:
return subprocess.run(
f"git log --find-object={blob_hash} --pretty=format:%H",
shell=True,
capture_output=True,
cwd=cd_repo,
)


def get_filepath_within_git_repo(
commit_hash: str, blob_hash: str, cd_repo: Optional[str]
) -> str:
result = subprocess.run(
f"git ls-tree -r {commit_hash} | grep -E {blob_hash}",
shell=True,
capture_output=True,
cwd=cd_repo,
)
if result.returncode != 0:
raise RuntimeError(
f"git ls-tree -r {commit_hash} | grep -E {blob_hash}\n"
+ result.stderr.decode()
)
if len(result.stdout.decode()) == 0:
raise RuntimeError("Could not find filepath within git repo.")
filepath = result.stdout.decode().split()[-1]
return filepath


def get_transform_reference_from_git_repo(path: Path):
blob_hash = hash_code(path).hexdigest()
cd_repo = None
result = get_git_commit_hash(blob_hash, cd_repo=None)
commit_hash = result.stdout.decode()
if commit_hash == "" or result.returncode == 1:
cd_repo = dir_from_repo_url(settings.sync_git_repo)
clone_git_repo(settings.sync_git_repo)
result = get_git_commit_hash(blob_hash, cd_repo=cd_repo)
commit_hash = result.stdout.decode()
if commit_hash == "" or result.returncode == 1:
raise RuntimeError(
f"Did not find file in git repo\n{result.stderr.decode()}"
)
gitpath = get_filepath_within_git_repo(commit_hash, blob_hash, cd_repo)
reference = f"{settings.sync_git_repo}/blob/{commit_hash}/{gitpath}"
reference_type = "url"
return reference, reference_type

0 comments on commit 956d195

Please sign in to comment.