Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update hippo client #283

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# EditorConfig is awesome: https://EditorConfig.org

# top-most EditorConfig file
root = true

[*]
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = false
insert_final_newline = false

[Dockerfile*]
indent_style = space
indent_size = 4

[*.json]
indent_style = space
indent_size = 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

each file should end with a newline, same for other files.

15 changes: 15 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,18 @@ __MACOSX
build/
venv/
.idea/

# vscode files
.vscode/*
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/settings.json
!.vscode/*.code-snippets
!.vscode/c_cpp_properties.json

# Local History for Visual Studio Code
.history/

# Built Visual Studio Code Extensions
*.vsix
28 changes: 28 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Streamlit",
"type": "debugpy",
"request": "launch",
"module": "streamlit",
"args": [
"run",
"vectordb_bench/frontend/vdb_benchmark.py",
"--logger.level",
"info",
"--theme.base",
"light",
"--theme.primaryColor",
"#3670F2",
"--theme.secondaryBackgroundColor",
"#F0F2F6",
],
"subProcess": true,
"justMyCode": false
}
]
}
10 changes: 10 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"[python]": {
"editor.formatOnSave": false,
// "editor.codeActionsOnSave": {
// "source.fixAll": "always",
// "source.organizeImports": "always"
// },
"editor.defaultFormatter": "charliermarsh.ruff"
}
}
29 changes: 29 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "build vectordb bench",
"type": "shell",
"command": "python",
"args": [
"-m",
"pip",
"install",
"-e",
".[test]"
],
"group": {
"kind": "build",
"isDefault": true
}
},
{
"label": "run vectordb bench",
"type": "shell",
"command": "init_bench",
"problemMatcher": []
}
]
}
13 changes: 13 additions & 0 deletions vectordb_bench/backend/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class DB(Enum):
PgVectoRS = "PgVectoRS"
Redis = "Redis"
Chroma = "Chroma"
Hippo = "Hippo"


@property
Expand Down Expand Up @@ -76,6 +77,10 @@ def init_cls(self) -> Type[VectorDB]:
if self == DB.Chroma:
from .chroma.chroma import ChromaClient
return ChromaClient

if self == DB.Hippo:
from .hippo.hippo import Hippo
return Hippo

@property
def config_cls(self) -> Type[DBConfig]:
Expand Down Expand Up @@ -120,6 +125,10 @@ def config_cls(self) -> Type[DBConfig]:
from .chroma.config import ChromaConfig
return ChromaConfig

if self == DB.Hippo:
from .hippo.config import HippoConfig
return HippoConfig

def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseConfig]:
if self == DB.Milvus:
from .milvus.config import _milvus_case_config
Expand Down Expand Up @@ -149,6 +158,10 @@ def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseCon
from .pgvecto_rs.config import _pgvecto_rs_case_config
return _pgvecto_rs_case_config.get(index_type)

if self == DB.Hippo:
from .hippo.config import HippoIndexConfig
return HippoIndexConfig

# DB.Pinecone, DB.Chroma, DB.Redis
return EmptyDBCaseConfig

Expand Down
67 changes: 67 additions & 0 deletions vectordb_bench/backend/clients/hippo/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from pydantic import BaseModel, Field, SecretStr
from transwarp_hippo_api.hippo_type import IndexType
from transwarp_hippo_api.hippo_type import MetricType as HippoMetricType

from ..api import DBCaseConfig, DBConfig, MetricType


class HippoConfig(DBConfig):
ip: SecretStr = ""
port: SecretStr = "18902"
username: SecretStr = "shiva"
password: SecretStr = "shiva"
number_of_shards: int = Field(default=1, ge=1)
number_of_replicas: int = Field(default=1, ge=1)
insert_batch_size: int = Field(default=100, ge=1)

def to_dict(self) -> dict:
return {
"host_port": [
f"{self.ip.get_secret_value()}:{self.port.get_secret_value()}"
],
"username": self.username.get_secret_value(),
"pwd": self.password.get_secret_value(),
"number_of_shards": self.number_of_shards,
"number_of_replicas": self.number_of_replicas,
"insert_batch_size": self.insert_batch_size,
}


class HippoIndexConfig(BaseModel, DBCaseConfig):
index: IndexType = IndexType.HNSW # HNSW, FLAT, IVF_FLAT, IVF_SQ, IVF_PQ, ANNOY
metric_type: MetricType | None = None
M: int = 30 # [4,96]
ef_construction: int = 360 # [8, 512]
ef_search: int = 100 # [topk, 32768]
nlist: int = 1024 # [1,65536]
nprobe: int = 64 # [1, nlist]
m: int = 16 # divisible by dim
nbits: int = 8 # [1, 16]
k_factor: int = 100 # [10, 1000]

def parse_metric(self) -> HippoMetricType:
if self.metric_type == MetricType.COSINE:
return HippoMetricType.COSINE
if self.metric_type == MetricType.IP:
return HippoMetricType.IP
if self.metric_type == MetricType.L2:
return HippoMetricType.L2
return ""

def index_param(self) -> dict:
return {
"M": self.M,
"ef_construction": self.ef_construction,
"ef_search": self.ef_search,
"nlist": self.nlist,
"nprobe": self.nprobe,
"m": self.m,
"nbits": self.nbits,
}

def search_param(self) -> dict:
return {
"ef_search": self.ef_search,
"nprobe": self.nprobe,
"k_factor": self.k_factor,
}
Loading