Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tensorchord dev #349

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Release Nightly

on:
workflow_dispatch:
pull_request:

permissions:
actions: write

jobs:
trigger:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERIO_USERNAME }}
password: ${{ secrets.DOCKERIO_TOKEN }}
- name: Push binary release to Docker Registry
uses: docker/build-push-action@v4
with:
context: .
push: true
file: .Dockerfile
tags: ${{ secrets.DOCKERIO_USERNAME }}/vectordbbench:latest
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,7 @@ COPY --from=builder-image /usr/local/lib/python3.11/site-packages /usr/local/lib
WORKDIR /opt/code
COPY . .
ENV PYTHONPATH /opt/code
RUN sed -i 's/deb.debian.org/mirrors.ustc.edu.cn/g' /etc/apt/sources.list
RUN apt-get update && apt-get install libpq5 -y

ENTRYPOINT ["python3", "-m", "vectordb_bench"]
10 changes: 2 additions & 8 deletions install/requirements_py3.11.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
grpcio==1.53.0
grpcio-tools==1.53.0
qdrant-client
pinecone-client
weaviate-client
elasticsearch
pgvector
sqlalchemy
redis
chromadb
pgvecto.rs @ git+https://github.com/cutecutecat/pgvecto.rs-py@django-types
pytz
streamlit-autorefresh
streamlit>=1.23.0
Expand All @@ -21,3 +14,4 @@ environs
pydantic<v2
scikit-learn
pymilvus
psycopg
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ pinecone = [ "pinecone-client" ]
weaviate = [ "weaviate-client" ]
elastic = [ "elasticsearch" ]
pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
pgvecto_rs = [ "psycopg2" ]
pgvecto_rs = [ "pgvecto_rs@git+https://github.com/cutecutecat/pgvecto.rs-py#django-types", "psycopg[binary]>=3.1.12" ]
redis = [ "redis" ]
chromadb = [ "chromadb" ]
zilliz_cloud = []
Expand Down
12 changes: 6 additions & 6 deletions vectordb_bench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ class config:
LOAD_TIMEOUT_1536D_500K = 2.5 * 3600 # 2.5h
LOAD_TIMEOUT_1536D_5M = 25 * 3600 # 25h

OPTIMIZE_TIMEOUT_DEFAULT = 30 * 60 # 30min
OPTIMIZE_TIMEOUT_768D_1M = 30 * 60 # 30min
OPTIMIZE_TIMEOUT_768D_10M = 5 * 3600 # 5h
OPTIMIZE_TIMEOUT_768D_100M = 50 * 3600 # 50h
OPTIMIZE_TIMEOUT_DEFAULT = LOAD_TIMEOUT_DEFAULT
OPTIMIZE_TIMEOUT_768D_1M = LOAD_TIMEOUT_768D_1M
OPTIMIZE_TIMEOUT_768D_10M = LOAD_TIMEOUT_768D_10M
OPTIMIZE_TIMEOUT_768D_100M = LOAD_TIMEOUT_768D_100M


OPTIMIZE_TIMEOUT_1536D_500K = 15 * 60 # 15min
OPTIMIZE_TIMEOUT_1536D_5M = 2.5 * 3600 # 2.5h
OPTIMIZE_TIMEOUT_1536D_500K = LOAD_TIMEOUT_1536D_500K
OPTIMIZE_TIMEOUT_1536D_5M = LOAD_TIMEOUT_1536D_5M
def display(self) -> str:
tmp = [
i for i in inspect.getmembers(self)
Expand Down
1 change: 0 additions & 1 deletion vectordb_bench/backend/cases.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import typing
import logging
from enum import Enum, auto
from typing import Type
Expand Down
1 change: 0 additions & 1 deletion vectordb_bench/backend/clients/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from abc import ABC, abstractmethod
from enum import Enum
from typing import Any, Type
from contextlib import contextmanager

from pydantic import BaseModel, validator, SecretStr
Expand Down
6 changes: 3 additions & 3 deletions vectordb_bench/backend/clients/milvus/milvus.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pymilvus import Collection, utility
from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusException

from ..api import VectorDB, IndexType
from ..api import VectorDB
from .config import MilvusIndexConfig


Expand Down Expand Up @@ -129,7 +129,7 @@ def wait_index():
log.warning(f"{self.name} compact error: {e}")
if hasattr(e, 'code'):
if e.code().name == 'PERMISSION_DENIED':
log.warning(f"Skip compact due to permission denied.")
log.warning("Skip compact due to permission denied.")
pass
else:
raise e
Expand Down Expand Up @@ -165,7 +165,7 @@ def optimize(self):
def need_normalize_cosine(self) -> bool:
"""Wheather this database need to normalize dataset to support COSINE"""
if self.case_config.is_gpu_index:
log.info(f"current gpu_index only supports IP / L2, cosine dataset need normalize.")
log.info("current gpu_index only supports IP / L2, cosine dataset need normalize.")
return True

return False
Expand Down
154 changes: 154 additions & 0 deletions vectordb_bench/backend/clients/pgvecto_rs/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
from typing import Annotated, Optional, Unpack

import click
import os
from pydantic import SecretStr

from ....cli.cli import (
CommonTypedDict,
HNSWFlavor1,
IVFFlatTypedDict,
cli,
click_parameter_decorators_from_typed_dict,
run,
)
from vectordb_bench.backend.clients import DB


class PgVectoRSTypedDict(CommonTypedDict):
user_name: Annotated[
str, click.option("--user-name", type=str, help="Db username", required=True)
]
password: Annotated[
str,
click.option(
"--password",
type=str,
help="Postgres database password",
default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
show_default="$POSTGRES_PASSWORD",
),
]

host: Annotated[
str, click.option("--host", type=str, help="Db host", required=True)
]
db_name: Annotated[
str, click.option("--db-name", type=str, help="Db name", required=True)
]
max_parallel_workers: Annotated[
Optional[int],
click.option(
"--max-parallel-workers",
type=int,
help="Sets the maximum number of parallel processes per maintenance operation (index creation)",
required=False,
),
]
quantization_type: Annotated[
str,
click.option(
"--quantization-type",
type=click.Choice(["trivial", "scalar", "product"]),
help="quantization type for vectors",
required=False,
),
]
quantization_ratio: Annotated[
str,
click.option(
"--quantization-ratio",
type=click.Choice(["x4", "x8", "x16", "x32", "x64"]),
help="quantization ratio(for product quantization)",
required=False,
),
]


class PgVectoRSFlatTypedDict(PgVectoRSTypedDict, IVFFlatTypedDict): ...


@cli.command()
@click_parameter_decorators_from_typed_dict(PgVectoRSFlatTypedDict)
def PgVectoRSFlat(
**parameters: Unpack[PgVectoRSFlatTypedDict],
):
from .config import PgVectoRSConfig, PgVectoRSFLATConfig

run(
db=DB.PgVectoRS,
db_config=PgVectoRSConfig(
db_label=parameters["db_label"],
user_name=SecretStr(parameters["user_name"]),
password=SecretStr(parameters["password"]),
host=parameters["host"],
db_name=parameters["db_name"],
),
db_case_config=PgVectoRSFLATConfig(
max_parallel_workers=parameters["max_parallel_workers"],
quantization_type=parameters["quantization_type"],
quantization_ratio=parameters["quantization_ratio"],
),
**parameters,
)


class PgVectoRSIVFFlatTypedDict(PgVectoRSTypedDict, IVFFlatTypedDict): ...


@cli.command()
@click_parameter_decorators_from_typed_dict(PgVectoRSIVFFlatTypedDict)
def PgVectoRSIVFFlat(
**parameters: Unpack[PgVectoRSIVFFlatTypedDict],
):
from .config import PgVectoRSConfig, PgVectoRSIVFFlatConfig

run(
db=DB.PgVectoRS,
db_config=PgVectoRSConfig(
db_label=parameters["db_label"],
user_name=SecretStr(parameters["user_name"]),
password=SecretStr(parameters["password"]),
host=parameters["host"],
db_name=parameters["db_name"],
),
db_case_config=PgVectoRSIVFFlatConfig(
max_parallel_workers=parameters["max_parallel_workers"],
quantization_type=parameters["quantization_type"],
quantization_ratio=parameters["quantization_ratio"],
probes=parameters["probes"],
lists=parameters["lists"],
),
**parameters,
)


class PgVectoRSHNSWTypedDict(PgVectoRSTypedDict, HNSWFlavor1): ...


@cli.command()
@click_parameter_decorators_from_typed_dict(PgVectoRSHNSWTypedDict)
def PgVectoRSHNSW(
**parameters: Unpack[PgVectoRSHNSWTypedDict],
):
from .config import PgVectoRSConfig, PgVectoRSHNSWConfig

run(
db=DB.PgVectoRS,
db_config=PgVectoRSConfig(
db_label=parameters["db_label"],
user_name=SecretStr(parameters["user_name"]),
password=SecretStr(parameters["password"]),
host=parameters["host"],
db_name=parameters["db_name"],
),
db_case_config=PgVectoRSHNSWConfig(
max_parallel_workers=parameters["max_parallel_workers"],
quantization_type=parameters["quantization_type"],
quantization_ratio=parameters["quantization_ratio"],
m=parameters["m"],
ef_construction=parameters["ef_construction"],
ef_search=parameters["ef_search"],
),
**parameters,
)
Loading
Loading