Skip to content

Commit

Permalink
feat: add cli for preparing env
Browse files Browse the repository at this point in the history
  • Loading branch information
williamfzc committed Nov 27, 2023
1 parent 49412b0 commit 2ac982e
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 2 deletions.
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,21 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry.dependencies]
python = "^3.8"
gitpython = "*"
chromadb = "*"
chromadb = "0.4.14"
pydantic-settings = "*"
pydantic = "*"
tqdm = "*"
loguru = "^0.7.2"
pandas = "^2.0.3"
click = "^8.1.7"

# actually srctag still requires `sentence_transformers` here
# but pytorch is a large dep which I don't want to manage it here
sentence-transformers = { version = "^2.2.2", optional = true }

[tool.poetry.extras]
embedding = ["sentence-transformers"]


[tool.poetry.scripts]
srctag = 'srctag.cli:cli'
33 changes: 33 additions & 0 deletions srctag/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import chromadb
import click

from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
from loguru import logger


@click.group()
def cli():
pass


@cli.command()
def prepare():
""" usually used for pre-downloading sentence-transformer models """

logger.info("Start checking env. It may takes a few minutes for downloading models ...")
# try to embed
chromadb_cli = chromadb.Client()
collection = chromadb_cli.get_or_create_collection(
"testonly",
embedding_function=SentenceTransformerEmbeddingFunction(),
)
collection.add(
documents=["doc"],
ids=["id"],
)
assert collection.count() == 1
click.echo("ok.")


if __name__ == '__main__':
cli()

0 comments on commit 2ac982e

Please sign in to comment.