Skip to content

Commit

Permalink
add quantization
Browse files Browse the repository at this point in the history
  • Loading branch information
marevol committed Apr 28, 2024
1 parent dbe47b8 commit da918cf
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 65 deletions.
16 changes: 13 additions & 3 deletions run-elasticsearch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
" hnsw_ef_construction: int\n",
" hnsw_ef: int\n",
" update_docs_per_sec: int\n",
" quantization: str\n",
"\n",
" elasticsearch_name: str = \"benchmark_es\"\n",
" elasticsearch_host: str = \"localhost\"\n",
Expand All @@ -69,6 +70,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"int8\",\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
Expand All @@ -84,6 +86,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"int8\",\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
Expand All @@ -99,6 +102,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"int8\",\n",
" },\n",
" }\n",
" return DataSetConfig(**setting.get(target_name))\n"
Expand Down Expand Up @@ -249,7 +253,13 @@
"outputs": [],
"source": [
"def create_index(config, number_of_shards=1, number_of_replicas=0):\n",
" print(F\"Creating {config.index_name}... \", end=\"\")\n",
" if config.exact:\n",
" knn_type = \"flat\"\n",
" if config.quantization == \"int8\":\n",
" knn_type = \"int8_hnsw\"\n",
" else:\n",
" knn_type = \"hnsw\"\n",
" print(F\"Creating {config.index_name} with {knn_type}... \", end=\"\")\n",
" response = requests.put(f\"http://{config.elasticsearch_host}:{config.elasticsearch_port}/{config.index_name}\",\n",
" headers={\"Content-Type\": \"application/json\"},\n",
" json={\n",
Expand Down Expand Up @@ -278,10 +288,10 @@
" \"embedding\": {\n",
" \"type\": \"dense_vector\",\n",
" \"dims\": config.dimension,\n",
" \"index\": not config.exact,\n",
" \"index\": True,\n",
" \"similarity\": config.distance,\n",
" \"index_options\": {\n",
" \"type\": \"int8_hnsw\",\n",
" \"type\": knn_type,\n",
" \"m\" : config.hnsw_m,\n",
" \"ef_construction\" : config.hnsw_ef_construction\n",
" }\n",
Expand Down
42 changes: 24 additions & 18 deletions run-qdrant.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
" hnsw_ef_construction: int\n",
" hnsw_ef: int\n",
" update_docs_per_sec: int\n",
" quantization: str\n",
"\n",
" qdrant_name: str = \"benchmark_qdrant\"\n",
" qdrant_host: str = \"localhost\"\n",
Expand All @@ -68,6 +69,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"int8\",\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
Expand All @@ -83,6 +85,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"int8\",\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
Expand All @@ -98,6 +101,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"int8\",\n",
" },\n",
" }\n",
" return DataSetConfig(**setting.get(target_name))\n"
Expand Down Expand Up @@ -244,26 +248,28 @@
"outputs": [],
"source": [
"def create_index(config):\n",
" print(F\"Creating Collection {config.index_name}... \", end=\"\")\n",
" print(F\"Creating Collection {config.index_name} with {config.quantization}... \", end=\"config.quantization\")\n",
" schema = {\n",
" \"vectors\": {\n",
" \"size\": config.dimension,\n",
" \"distance\": config.distance,\n",
" \"hnsw_config\": {\n",
" \"m\": config.hnsw_m,\n",
" \"ef_construction\": config.hnsw_ef_construction,\n",
" }\n",
" }\n",
" }\n",
" if config.quantization == \"int8\":\n",
" schema[\"quantization_config\"] = {\n",
" \"scalar\": {\n",
" \"type\": \"int8\",\n",
" \"quantile\": 0.99,\n",
" \"always_ram\": True\n",
" }\n",
" }\n",
" response = requests.put(f\"http://{config.qdrant_host}:{config.qdrant_port}/collections/{config.index_name}\",\n",
" headers={\"Content-Type\": \"application/json\"},\n",
" json={\n",
" \"vectors\": {\n",
" \"size\": config.dimension,\n",
" \"distance\": config.distance,\n",
" \"hnsw_config\": {\n",
" \"m\": config.hnsw_m,\n",
" \"ef_construction\": config.hnsw_ef_construction,\n",
" }\n",
" },\n",
" \"quantization_config\": {\n",
" \"scalar\": {\n",
" \"type\": \"int8\",\n",
" \"quantile\": 0.99,\n",
" \"always_ram\": True\n",
" }\n",
" }\n",
" })\n",
" json=schema)\n",
" if response.status_code == 200:\n",
" print(\"[OK]\")\n",
" else:\n",
Expand Down
13 changes: 9 additions & 4 deletions run-vespa.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
" hnsw_ef_construction: int\n",
" hnsw_ef: int\n",
" update_docs_per_sec: int\n",
" quantization: str\n",
"\n",
" vespa_name: str = \"benchmark_vespa\"\n",
" vespa_host: str = \"localhost\"\n",
Expand All @@ -80,6 +81,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"bfloat16\",\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
Expand All @@ -95,6 +97,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"bfloat16\",\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
Expand All @@ -110,6 +113,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"bfloat16\",\n",
" },\n",
" }\n",
" return DataSetConfig(**setting.get(target_name))\n"
Expand Down Expand Up @@ -305,7 +309,7 @@
" indexing: index | summary\n",
" index: enable-bm25\n",
" }\n",
" field embedding type tensor<float>(x[{dimension}]) {\n",
" field embedding type tensor<{float_type}>(x[{dimension}]) {\n",
" indexing: attribute | index\n",
" attribute {\n",
" distance-metric: {distance}\n",
Expand Down Expand Up @@ -334,8 +338,8 @@
" match-features: distance(field, embedding)\n",
"\n",
" inputs {\n",
" query(q) tensor<float>(x[{dimension}])\n",
" query(qa) tensor<float>(x[{dimension}])\n",
" query(q) tensor<{float_type}>(x[{dimension}])\n",
" query(qa) tensor<{float_type}>(x[{dimension}])\n",
" }\n",
"\n",
" first-phase {\n",
Expand All @@ -347,7 +351,8 @@
" .replace(\"{distance}\", str(config.distance))\\\n",
" .replace(\"{dimension}\", str(config.dimension))\\\n",
" .replace(\"{hnsw_m}\", str(config.hnsw_m))\\\n",
" .replace(\"{hnsw_ef_construction}\", str(config.hnsw_ef_construction))\n",
" .replace(\"{hnsw_ef_construction}\", str(config.hnsw_ef_construction))\\\n",
" .replace(\"{float_type}\", config.quantization)\n",
"\n",
" query_profile_str = \"\"\"\n",
"<query-profile id=\"default\">\n",
Expand Down
117 changes: 77 additions & 40 deletions run-weaviate.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
" hnsw_ef_construction: int\n",
" hnsw_ef: int\n",
" update_docs_per_sec: int\n",
" quantization: str\n",
"\n",
" weaviate_name: str = \"benchmark_weaviate\"\n",
" weaviate_host: str = \"localhost\"\n",
Expand All @@ -66,6 +67,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"none\", # \"pq\",\n",
" },\n",
" \"1m-768-m49-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
Expand All @@ -80,6 +82,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"none\", # \"pq\",\n",
" },\n",
" \"5m-768-m49-ef100-ip\": {\n",
" \"content_path\": Path(\"dataset/passages-c400-jawiki-20230403\"),\n",
Expand All @@ -94,6 +97,7 @@
" \"hnsw_ef_construction\": 200,\n",
" \"hnsw_ef\": 100,\n",
" \"update_docs_per_sec\": 0,\n",
" \"quantization\": \"none\", # \"pq\",\n",
" },\n",
" }\n",
" return DataSetConfig(**setting.get(target_name))\n"
Expand All @@ -115,6 +119,7 @@
" \"--name\", config.weaviate_name,\n",
" \"-p\", f\"{config.weaviate_port}:8080\",\n",
" # \"-v\", f\"{volume_dir}:/data/content\",\n",
" \"-e\", \"ASYNC_INDEXING=true\", # to enable AutoPQ\n",
" f\"cr.weaviate.io/semitechnologies/weaviate:{config.weaviate_version}\"\n",
" ]\n",
" result = subprocess.run(docker_cmd, capture_output=True, text=True)\n",
Expand Down Expand Up @@ -240,48 +245,54 @@
"outputs": [],
"source": [
"def create_index(config):\n",
" print(F\"Creating {config.index_name}... \", end=\"\")\n",
" print(F\"Creating {config.index_name} with {config.quantization}... \", end=\"\")\n",
" schema = {\n",
" \"class\": config.index_name,\n",
" \"vectorIndexType\": \"hnsw\",\n",
" \"vectorIndexConfig\": {\n",
" \"distance\": config.distance,\n",
" \"maxConnections\": config.hnsw_m,\n",
" \"ef\": config.hnsw_ef,\n",
" \"efConstruction\": config.hnsw_ef_construction,\n",
" },\n",
" \"properties\": [\n",
" {\n",
" \"name\": \"doc_id\",\n",
" \"dataType\": [\"int\"]\n",
" },\n",
" {\n",
" \"name\": \"page_id\",\n",
" \"dataType\": [\"int\"]\n",
" },\n",
" {\n",
" \"name\": \"rev_id\",\n",
" \"dataType\": [\"int\"]\n",
" },\n",
" {\n",
" \"name\": \"section\",\n",
" \"dataType\": [\"string\"],\n",
" \"indexInverted\": True\n",
" },\n",
" {\n",
" \"name\": \"text\",\n",
" \"dataType\": [\"text\"],\n",
" \"indexInverted\": True\n",
" },\n",
" {\n",
" \"name\": \"title\",\n",
" \"dataType\": [\"text\"],\n",
" \"indexInverted\": True\n",
" }\n",
" ]\n",
" }\n",
" if config.quantization == \"pq\":\n",
" schema[\"vectorIndexConfig\"][\"pq\"] = {\n",
" \"enabled\": True,\n",
" \"trainingLimit\": 100000 - 10000,\n",
" }\n",
" response = requests.post(f\"http://{config.weaviate_host}:{config.weaviate_port}/v1/schema\",\n",
" headers={\"Content-Type\": \"application/json\"},\n",
" json={\n",
" \"class\": config.index_name,\n",
" \"vectorIndexType\": \"hnsw\",\n",
" \"vectorIndexConfig\": {\n",
" \"distance\": config.distance,\n",
" \"maxConnections\": config.hnsw_m,\n",
" \"ef\": config.hnsw_ef,\n",
" \"efConstruction\": config.hnsw_ef_construction,\n",
" },\n",
" \"properties\": [\n",
" {\n",
" \"name\": \"doc_id\",\n",
" \"dataType\": [\"int\"]\n",
" },\n",
" {\n",
" \"name\": \"pageId\",\n",
" \"dataType\": [\"int\"]\n",
" },\n",
" {\n",
" \"name\": \"revId\",\n",
" \"dataType\": [\"int\"]\n",
" },\n",
" {\n",
" \"name\": \"section\",\n",
" \"dataType\": [\"string\"],\n",
" \"indexInverted\": True\n",
" },\n",
" {\n",
" \"name\": \"text\",\n",
" \"dataType\": [\"text\"],\n",
" \"indexInverted\": True\n",
" },\n",
" {\n",
" \"name\": \"title\",\n",
" \"dataType\": [\"text\"],\n",
" \"indexInverted\": True\n",
" }\n",
" ]\n",
" })\n",
" json=schema)\n",
" if response.status_code == 200:\n",
" print(\"[OK]\")\n",
" else:\n",
Expand Down Expand Up @@ -352,6 +363,30 @@
" print(\" [FAIL]\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a6c35f9-e61d-402a-b83e-38059dd1d1d9",
"metadata": {},
"outputs": [],
"source": [
"def wait_for_indexing(config, retry_count=60):\n",
" print(f\"Waiting for {config.index_name}\", end=\"\")\n",
" for i in range(retry_count):\n",
" try:\n",
" response = requests.get(f\"http://{config.weaviate_host}:{config.weaviate_port}/v1/schema/{config.index_name}/shards\")\n",
" if response.status_code == 200:\n",
" obj = json.loads(response.text)\n",
" if obj is not None and len(obj) > 0 and obj[0].get(\"status\") == \"READY\":\n",
" print(\" [OK]\") \n",
" return\n",
" except:\n",
" pass\n",
" print(\".\", end=\"\")\n",
" time.sleep(1)\n",
" print(\" [FAIL]\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -432,6 +467,8 @@
" if len(docs) > 0:\n",
" total_time += send_data(count)\n",
"\n",
" wait_for_indexing(config)\n",
"\n",
" execution_time = time.time() - start_time\n",
" hours, remainder = divmod(execution_time, 3600)\n",
" minutes, seconds = divmod(remainder, 60)\n",
Expand Down

0 comments on commit da918cf

Please sign in to comment.