Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support MariaDB database #375

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ all = [
"opensearch-dsl",
"opensearch-py",
"memorydb",
"mariadb",
]

qdrant = [ "qdrant-client" ]
Expand All @@ -81,6 +82,7 @@ redis = [ "redis" ]
memorydb = [ "memorydb" ]
chromadb = [ "chromadb" ]
opensearch = [ "opensearch-py" ]
mariadb = [ "mariadb" ]

[project.urls]
"repository" = "https://github.com/zilliztech/VectorDBBench"
Expand Down
13 changes: 13 additions & 0 deletions vectordb_bench/backend/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class DB(Enum):
Chroma = "Chroma"
AWSOpenSearch = "OpenSearch"
AliyunElasticsearch = "AliyunElasticsearch"
MariaDB = "MariaDB"
Test = "test"
AliyunOpenSearch = "AliyunOpenSearch"

Expand Down Expand Up @@ -113,6 +114,10 @@ def init_cls(self) -> Type[VectorDB]:
from .aliyun_opensearch.aliyun_opensearch import AliyunOpenSearch
return AliyunOpenSearch

if self == DB.MariaDB:
from .mariadb.mariadb import MariaDB
return MariaDB

@property
def config_cls(self) -> Type[DBConfig]:
"""Import while in use"""
Expand Down Expand Up @@ -184,6 +189,10 @@ def config_cls(self) -> Type[DBConfig]:
from .aliyun_opensearch.config import AliyunOpenSearchConfig
return AliyunOpenSearchConfig

if self == DB.MariaDB:
from .mariadb.config import MariaDBConfig
return MariaDBConfig

def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseConfig]:
if self == DB.Milvus:
from .milvus.config import _milvus_case_config
Expand Down Expand Up @@ -237,6 +246,10 @@ def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseCon
from .aliyun_opensearch.config import AliyunOpenSearchIndexConfig
return AliyunOpenSearchIndexConfig

if self == DB.MariaDB:
from .mariadb.config import _mariadb_case_config
return _mariadb_case_config.get(index_type)

# DB.Pinecone, DB.Chroma, DB.Redis
return EmptyDBCaseConfig

Expand Down
107 changes: 107 additions & 0 deletions vectordb_bench/backend/clients/mariadb/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from typing import Annotated, Optional, Unpack

import click
import os
from pydantic import SecretStr

from ....cli.cli import (
CommonTypedDict,
HNSWFlavor1,
cli,
click_parameter_decorators_from_typed_dict,
run,
)
from vectordb_bench.backend.clients import DB


class MariaDBTypedDict(CommonTypedDict):
user_name: Annotated[
str, click.option("--username",
type=str,
help="Username",
required=True,
),
]
password: Annotated[
str, click.option("--password",
type=str,
help="Password",
required=True,
),
]

host: Annotated[
str, click.option("--host",
type=str,
help="Db host",
default="127.0.0.1",
),
]

port: Annotated[
int, click.option("--port",
type=int,
default=3306,
help="Db Port",
),
]

storage_engine: Annotated[
int, click.option("--storage-engine",
type=click.Choice(["InnoDB", "MyISAM"]),
help="DB storage engine",
required=True,
),
]

class MariaDBHNSWTypedDict(MariaDBTypedDict):
...
m: Annotated[
Optional[int], click.option("--m",
type=int,
help="M parameter in MHNSW vector indexing",
required=False,
),
]

ef_search: Annotated[
Optional[int], click.option("--ef-search",
type=int,
help="MariaDB system variable mhnsw_min_limit",
required=False,
),
]

max_cache_size: Annotated[
Optional[int], click.option("--max-cache-size",
type=int,
help="MariaDB system variable mhnsw_max_cache_size",
required=False,
),
]


@cli.command()
@click_parameter_decorators_from_typed_dict(MariaDBHNSWTypedDict)
def MariaDBHNSW(
**parameters: Unpack[MariaDBHNSWTypedDict],
):
from .config import MariaDBConfig, MariaDBHNSWConfig

run(
db=DB.MariaDB,
db_config=MariaDBConfig(
db_label=parameters["db_label"],
user_name=parameters["username"],
password=SecretStr(parameters["password"]),
host=parameters["host"],
port=parameters["port"],
),
db_case_config=MariaDBHNSWConfig(
M=parameters["m"],
ef_search=parameters["ef_search"],
storage_engine=parameters["storage_engine"],
max_cache_size=parameters["max_cache_size"],
),
**parameters,
)
71 changes: 71 additions & 0 deletions vectordb_bench/backend/clients/mariadb/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from pydantic import SecretStr, BaseModel
from typing import TypedDict
from ..api import DBConfig, DBCaseConfig, MetricType, IndexType

class MariaDBConfigDict(TypedDict):
"""These keys will be directly used as kwargs in mariadb connection string,
so the names must match exactly mariadb API"""

user: str
password: str
host: str
port: int


class MariaDBConfig(DBConfig):
user_name: str = "root"
password: SecretStr
host: str = "127.0.0.1"
port: int = 3306

def to_dict(self) -> MariaDBConfigDict:
pwd_str = self.password.get_secret_value()
return {
"host": self.host,
"port": self.port,
"user": self.user_name,
"password": pwd_str,
}


class MariaDBIndexConfig(BaseModel):
"""Base config for MariaDB"""

metric_type: MetricType | None = None

def parse_metric(self) -> str:
if self.metric_type == MetricType.L2:
return "euclidean"
elif self.metric_type == MetricType.COSINE:
return "cosine"
else:
raise ValueError(f"Metric type {self.metric_type} is not supported!")

class MariaDBHNSWConfig(MariaDBIndexConfig, DBCaseConfig):
M: int | None
ef_search: int | None
index: IndexType = IndexType.HNSW
storage_engine: str = "InnoDB"
max_cache_size: int | None

def index_param(self) -> dict:
return {
"storage_engine": self.storage_engine,
"metric_type": self.parse_metric(),
"index_type": self.index.value,
"M": self.M,
"max_cache_size": self.max_cache_size,
}

def search_param(self) -> dict:
return {
"metric_type": self.parse_metric(),
"ef_search": self.ef_search,
}


_mariadb_case_config = {
IndexType.HNSW: MariaDBHNSWConfig,
}


Loading