diff --git a/README.md b/README.md index faa01c2..a9ae458 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,63 @@ workflows, aka [agile modeling](https://arxiv.org/abs/2302.12948). While we get this documentation ready, we recommend checking out the main [Perch repository](https://github.com/google-research/perch). +This repository consists of four sub-libraries: + +* `db` - The core database functionality for storing embeddings and related +metadata. The database also handles labels applied to embeddings and vector +search, both exact and approximate. +* `agile` - Tooling (and example notebooks) for agile modeling on top of the +Hoplite db layer, combining search and active learning approaches. +This library includes organizing labeled data and training linear +classifiers over embeddings, as well as tooling for embedding large datasets. +* `zoo` - A bioacoustics model zoo. A basic wrapper class is provided, and +any model which can transform windows of audio samples into embeddings +can then be used in the agile modeling workflow. +* `taxonomy` - A database of taxonomic information, especially for handling +conversions between the various bird taxonomies. + +Each sub-library has its own documentation. + +# Installation + +The repository can be installed with either pip or poetry. Poetry allows more +granular management of dependencies. + +First, install some basic dependencies. Note that for GPU support, you may +install `tensorflow[and-cuda]` instead of `tensorflow-cpu`. + +```bash +sudo apt-get update +sudo apt-get install libsndfile1 ffmpeg +pip install absl-py +pip install requests +pip install tensorflow-cpu +``` + +Then to install with pip: +```bash +pip install git+https://github.com/google-research/hoplite.git +``` + +Then run the tests and check that they pass: +```bash +python -m unittest discover -s hoplite/db/tests -p "*test.py" +python -m unittest discover -s hoplite/taxonomy -p "*test.py" +python -m unittest discover -s hoplite/zoo -p "*test.py" +python -m unittest discover -s hoplite/agile/tests -p "*test.py" +``` + +Or, install with poetry: +```bash +# Install Poetry for package management +curl -sSL https://install.python-poetry.org | python3 - + +# Install all dependencies specified in the poetry configs. +poetry install +``` + +## Notes on Dependencies + # Disclaimer This is not an officially supported Google product. This project is not diff --git a/hoplite/agile/1_embed_audio_v2.ipynb b/hoplite/agile/1_embed_audio_v2.ipynb index 2c5bcfa..e0addf3 100644 --- a/hoplite/agile/1_embed_audio_v2.ipynb +++ b/hoplite/agile/1_embed_audio_v2.ipynb @@ -11,6 +11,8 @@ "import os\n", "from IPython.display import display\n", "import ipywidgets as widgets\n", + "import numpy as np\n", + "from etils import epath\n", "\n", "from hoplite.agile import colab_utils\n", "from hoplite.agile import embed\n", @@ -105,8 +107,11 @@ "\n", "print('Initialized DB located at ', configs.db_config.db_config.db_path)\n", "\n", - "def drop_and_reload_db(_) -\u003e interface.GraphSearchDBInterface:\n", - " os.unlink(configs.db_config.db_config.db_path)\n", + "def drop_and_reload_db(_) -\u003e interface.HopliteDBInterface:\n", + " db_path = epath.Path(configs.db_config.db_config.db_path)\n", + " for fp in db_path.glob('hoplite.sqlite*'):\n", + " fp.unlink()\n", + " (db_path / 'usearch.index').unlink()\n", " print('\\n Deleted previous db at: ', configs.db_config.db_config.db_path)\n", " db = configs.db_config.load_db()\n", "\n", @@ -172,8 +177,17 @@ "source": [ "q = db.get_embedding(444)\n", "%time results, scores = brutalism.brute_search(worker.db, query_embedding=q, search_list_size=128, score_fn=np.dot)\n", - "print([r.embedding_id for r in results])" + "print([int(r.embedding_id) for r in results])" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vHFlcn9PnSPJ" + }, + "outputs": [], + "source": [] } ], "metadata": {