Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added pgvectorscale client #355

Merged
merged 3 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ All the database client supported
| elastic | `pip install vectordb-bench[elastic]` |
| pgvector | `pip install vectordb-bench[pgvector]` |
| pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
| pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
| redis | `pip install vectordb-bench[redis]` |
| memorydb | `pip install vectordb-bench[memorydb]` |
| chromadb | `pip install vectordb-bench[chromadb]` |
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ pinecone = [ "pinecone-client" ]
weaviate = [ "weaviate-client" ]
elastic = [ "elasticsearch" ]
pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
pgvectorscale = [ "psycopg", "psycopg-binary", "pgvector" ]
pgvecto_rs = [ "pgvecto_rs[psycopg3]>=0.2.1" ]
redis = [ "redis" ]
memorydb = [ "memorydb" ]
Expand Down
13 changes: 13 additions & 0 deletions vectordb_bench/backend/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class DB(Enum):
WeaviateCloud = "WeaviateCloud"
PgVector = "PgVector"
PgVectoRS = "PgVectoRS"
PgVectorScale = "PgVectorScale"
Redis = "Redis"
MemoryDB = "MemoryDB"
Chroma = "Chroma"
Expand Down Expand Up @@ -71,6 +72,10 @@ def init_cls(self) -> Type[VectorDB]:
if self == DB.PgVectoRS:
from .pgvecto_rs.pgvecto_rs import PgVectoRS
return PgVectoRS

if self == DB.PgVectorScale:
from .pgvectorscale.pgvectorscale import PgVectorScale
return PgVectorScale

if self == DB.Redis:
from .redis.redis import Redis
Expand Down Expand Up @@ -123,6 +128,10 @@ def config_cls(self) -> Type[DBConfig]:
from .pgvecto_rs.config import PgVectoRSConfig
return PgVectoRSConfig

if self == DB.PgVectorScale:
from .pgvectorscale.config import PgVectorScaleConfig
return PgVectorScaleConfig

if self == DB.Redis:
from .redis.config import RedisConfig
return RedisConfig
Expand Down Expand Up @@ -172,6 +181,10 @@ def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseCon
from .aws_opensearch.config import AWSOpenSearchIndexConfig
return AWSOpenSearchIndexConfig

if self == DB.PgVectorScale:
from .pgvectorscale.config import _pgvectorscale_case_config
return _pgvectorscale_case_config.get(index_type)

# DB.Pinecone, DB.Chroma, DB.Redis
return EmptyDBCaseConfig

Expand Down
1 change: 1 addition & 0 deletions vectordb_bench/backend/clients/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class MetricType(str, Enum):
class IndexType(str, Enum):
HNSW = "HNSW"
DISKANN = "DISKANN"
STREAMING_DISKANN = "DISKANN"
IVFFlat = "IVF_FLAT"
IVFSQ8 = "IVF_SQ8"
Flat = "FLAT"
Expand Down
111 changes: 111 additions & 0 deletions vectordb_bench/backend/clients/pgvectorscale/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from abc import abstractmethod
from typing import TypedDict
from pydantic import BaseModel, SecretStr
from typing_extensions import LiteralString
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType

POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"


class PgVectorScaleConfigDict(TypedDict):
"""These keys will be directly used as kwargs in psycopg connection string,
so the names must match exactly psycopg API"""

user: str
password: str
host: str
port: int
dbname: str


class PgVectorScaleConfig(DBConfig):
user_name: SecretStr = SecretStr("postgres")
password: SecretStr
host: str = "localhost"
port: int = 5432
db_name: str

def to_dict(self) -> PgVectorScaleConfigDict:
user_str = self.user_name.get_secret_value()
pwd_str = self.password.get_secret_value()
return {
"host": self.host,
"port": self.port,
"dbname": self.db_name,
"user": user_str,
"password": pwd_str,
}


class PgVectorScaleIndexConfig(BaseModel, DBCaseConfig):
metric_type: MetricType | None = None
create_index_before_load: bool = False
create_index_after_load: bool = True

def parse_metric(self) -> str:
if self.metric_type == MetricType.COSINE:
return "vector_cosine_ops"
return ""

def parse_metric_fun_op(self) -> LiteralString:
if self.metric_type == MetricType.COSINE:
return "<=>"
return ""

def parse_metric_fun_str(self) -> str:
if self.metric_type == MetricType.COSINE:
return "cosine_distance"
return ""

@abstractmethod
def index_param(self) -> dict:
...

@abstractmethod
def search_param(self) -> dict:
...

@abstractmethod
def session_param(self) -> dict:
...


class PgVectorScaleStreamingDiskANNConfig(PgVectorScaleIndexConfig):
index: IndexType = IndexType.STREAMING_DISKANN
storage_layout: str | None
num_neighbors: int | None
search_list_size: int | None
max_alpha: float | None
num_dimensions: int | None
num_bits_per_dimension: int | None
query_search_list_size: int | None
query_rescore: int | None

def index_param(self) -> dict:
return {
"metric": self.parse_metric(),
"index_type": self.index.value,
"options": {
"storage_layout": self.storage_layout,
"num_neighbors": self.num_neighbors,
"search_list_size": self.search_list_size,
"max_alpha": self.max_alpha,
"num_dimensions": self.num_dimensions,
},
}

def search_param(self) -> dict:
return {
"metric": self.parse_metric(),
"metric_fun_op": self.parse_metric_fun_op(),
}

def session_param(self) -> dict:
return {
"diskann.query_search_list_size": self.query_search_list_size,
"diskann.query_rescore": self.query_rescore,
}

_pgvectorscale_case_config = {
IndexType.STREAMING_DISKANN: PgVectorScaleStreamingDiskANNConfig,
}
Loading