From 854278a5094b96dd06ebdd7ade1c9f3ee1631bdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=A2=E5=B0=91=E6=95=8F?= Date: Wed, 11 Dec 2024 09:45:39 +0800 Subject: [PATCH] support alibaba cloud elasticsearch (#418) * add aliyun elasticsearch * code reuse --- vectordb_bench/backend/clients/__init__.py | 13 +++++++ .../aliyun_elasticsearch.py | 27 +++++++++++++ .../clients/aliyun_elasticsearch/config.py | 19 +++++++++ .../frontend/config/dbCaseConfigs.py | 39 +++++++++++++++++++ 4 files changed, 98 insertions(+) create mode 100644 vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py create mode 100644 vectordb_bench/backend/clients/aliyun_elasticsearch/config.py diff --git a/vectordb_bench/backend/clients/__init__.py b/vectordb_bench/backend/clients/__init__.py index 8859381b..ba78c35c 100644 --- a/vectordb_bench/backend/clients/__init__.py +++ b/vectordb_bench/backend/clients/__init__.py @@ -37,6 +37,7 @@ class DB(Enum): MemoryDB = "MemoryDB" Chroma = "Chroma" AWSOpenSearch = "OpenSearch" + AliyunElasticsearch = "AliyunElasticsearch" Test = "test" @@ -103,6 +104,10 @@ def init_cls(self) -> Type[VectorDB]: from .alloydb.alloydb import AlloyDB return AlloyDB + if self == DB.AliyunElasticsearch: + from .aliyun_elasticsearch.aliyun_elasticsearch import AliyunElasticsearch + return AliyunElasticsearch + @property def config_cls(self) -> Type[DBConfig]: """Import while in use""" @@ -166,6 +171,10 @@ def config_cls(self) -> Type[DBConfig]: from .alloydb.config import AlloyDBConfig return AlloyDBConfig + if self == DB.AliyunElasticsearch: + from .aliyun_elasticsearch.config import AliyunElasticsearchConfig + return AliyunElasticsearchConfig + def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseConfig]: if self == DB.Milvus: from .milvus.config import _milvus_case_config @@ -211,6 +220,10 @@ def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseCon from .alloydb.config import _alloydb_case_config return _alloydb_case_config.get(index_type) + if self == DB.AliyunElasticsearch: + from .elastic_cloud.config import ElasticCloudIndexConfig + return ElasticCloudIndexConfig + # DB.Pinecone, DB.Chroma, DB.Redis return EmptyDBCaseConfig diff --git a/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py b/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py new file mode 100644 index 00000000..41253ca1 --- /dev/null +++ b/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py @@ -0,0 +1,27 @@ +from ..elastic_cloud.elastic_cloud import ElasticCloud +from ..elastic_cloud.config import ElasticCloudIndexConfig + + +class AliyunElasticsearch(ElasticCloud): + def __init__( + self, + dim: int, + db_config: dict, + db_case_config: ElasticCloudIndexConfig, + indice: str = "vdb_bench_indice", # must be lowercase + id_col_name: str = "id", + vector_col_name: str = "vector", + drop_old: bool = False, + **kwargs, + ): + super().__init__( + dim=dim, + db_config=db_config, + db_case_config=db_case_config, + indice=indice, + id_col_name=id_col_name, + vector_col_name=vector_col_name, + drop_old=drop_old, + **kwargs, + ) + diff --git a/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py b/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py new file mode 100644 index 00000000..a2de4dc7 --- /dev/null +++ b/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py @@ -0,0 +1,19 @@ +from enum import Enum +from pydantic import SecretStr, BaseModel + +from ..api import DBConfig, DBCaseConfig, MetricType, IndexType + + +class AliyunElasticsearchConfig(DBConfig, BaseModel): + #: Protocol in use to connect to the node + scheme: str = "http" + host: str = "" + port: int = 9200 + user: str = "elastic" + password: SecretStr + + def to_dict(self) -> dict: + return { + "hosts": [{'scheme': self.scheme, 'host': self.host, 'port': self.port}], + "basic_auth": (self.user, self.password.get_secret_value()), + } diff --git a/vectordb_bench/frontend/config/dbCaseConfigs.py b/vectordb_bench/frontend/config/dbCaseConfigs.py index f8632105..88794e30 100644 --- a/vectordb_bench/frontend/config/dbCaseConfigs.py +++ b/vectordb_bench/frontend/config/dbCaseConfigs.py @@ -1040,6 +1040,35 @@ class CaseConfigInput(BaseModel): }, ) +CaseConfigParamInput_EFConstruction_AliES = CaseConfigInput( + label=CaseConfigParamType.EFConstruction, + inputType=InputType.Number, + inputConfig={ + "min": 8, + "max": 512, + "value": 360, + }, +) + +CaseConfigParamInput_M_AliES = CaseConfigInput( + label=CaseConfigParamType.M, + inputType=InputType.Number, + inputConfig={ + "min": 4, + "max": 64, + "value": 30, + }, +) +CaseConfigParamInput_NumCandidates_AliES = CaseConfigInput( + label=CaseConfigParamType.numCandidates, + inputType=InputType.Number, + inputConfig={ + "min": 1, + "max": 10000, + "value": 100, + }, +) + MilvusLoadConfig = [ CaseConfigParamInput_IndexType, @@ -1206,6 +1235,12 @@ class CaseConfigInput(BaseModel): CaseConfigParamInput_max_parallel_workers_AlloyDB, ] +AliyunElasticsearchLoadingConfig = [CaseConfigParamInput_EFConstruction_AliES, CaseConfigParamInput_M_AliES] +AliyunElasticsearchPerformanceConfig = [ + CaseConfigParamInput_EFConstruction_AliES, + CaseConfigParamInput_M_AliES, + CaseConfigParamInput_NumCandidates_AliES, +] CASE_CONFIG_MAP = { DB.Milvus: { @@ -1247,4 +1282,8 @@ class CaseConfigInput(BaseModel): CaseLabel.Load: AlloyDBLoadConfig, CaseLabel.Performance: AlloyDBPerformanceConfig, }, + DB.AliyunElasticsearch: { + CaseLabel.Load: AliyunElasticsearchLoadingConfig, + CaseLabel.Performance: AliyunElasticsearchPerformanceConfig, + }, }