From 3905f46d2f200886d8a4adf9fc352457ef9ef0a2 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Mon, 14 Oct 2024 18:58:24 +0500
Subject: [PATCH 01/20] Add separate configs for index and search  for multiple
 datasets.

---
 ...onfig-custom-dataset-small-hnsw-1000k.json | 38 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-1500k.json | 38 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-2000k.json | 38 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-2500k.json | 38 +++++++++++++++++++
 ...config-custom-dataset-small-hnsw-500k.json | 38 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-1000k.json | 38 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-1500k.json | 38 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-2000k.json | 38 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-2500k.json | 38 +++++++++++++++++++
 ...config-custom-dataset-small-hnsw-500k.json | 38 +++++++++++++++++++
 10 files changed, 380 insertions(+)
 create mode 100644 custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
 create mode 100644 custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
 create mode 100644 custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
 create mode 100644 custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
 create mode 100644 custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
 create mode 100644 custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
 create mode 100644 custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
 create mode 100644 custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
 create mode 100644 custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
 create mode 100644 custom-run-configs/config-custom-dataset-small-hnsw-500k.json

diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
new file mode 100644
index 000000000..6a468866b
--- /dev/null
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann2",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-run-seqon",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "20GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-1m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_1000k",
+        "custom-dataset-size": 1000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 2,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
new file mode 100644
index 000000000..4194ad0e4
--- /dev/null
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-run-seqon",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "20GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-1_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_1500k",
+        "custom-dataset-size": 1500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 3,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
new file mode 100644
index 000000000..147101672
--- /dev/null
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann3",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-run-seqon",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "20GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-2m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_2000k",
+        "custom-dataset-size": 2000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 4,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
new file mode 100644
index 000000000..e950b60e5
--- /dev/null
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann2",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-run-seqon",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "20GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-2_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_2500k",
+        "custom-dataset-size": 2500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 5,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
new file mode 100644
index 000000000..d2a489f3b
--- /dev/null
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-run-seqon",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "20GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-500K",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_500k",
+        "custom-dataset-size": 500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 1,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
new file mode 100644
index 000000000..352361e4f
--- /dev/null
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann2",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-run-seqon",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "8GB",
+        "max-parallel-workers": 3,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-1m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_1000k",
+        "custom-dataset-size": 1000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 2,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
new file mode 100644
index 000000000..a85fec9d8
--- /dev/null
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-run-seqon",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "8GB",
+        "max-parallel-workers": 3,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-1_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_1500k",
+        "custom-dataset-size": 1500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 3,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
new file mode 100644
index 000000000..3fa809c27
--- /dev/null
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann3",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-run-seqon",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "8GB",
+        "max-parallel-workers": 3,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-2m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_2000k",
+        "custom-dataset-size": 2000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 4,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
new file mode 100644
index 000000000..814ceb598
--- /dev/null
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann2",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-run-seqon",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "8GB",
+        "max-parallel-workers": 3,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-2_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_2500k",
+        "custom-dataset-size": 2500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 5,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
new file mode 100644
index 000000000..39cfb13e6
--- /dev/null
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-run-seqon",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "8GB",
+        "max-parallel-workers": 3,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-500K",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_500k",
+        "custom-dataset-size": 500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 1,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file

From 4eb23bb52f5f15f00ba7a520cda0a0543f0aea34 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Mon, 14 Oct 2024 18:58:47 +0500
Subject: [PATCH 02/20] Update run script to handle mulitiple configs in a
 directory.

---
 run-custom-dataset.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/run-custom-dataset.py b/run-custom-dataset.py
index a5d1213d5..b3f71ae29 100644
--- a/run-custom-dataset.py
+++ b/run-custom-dataset.py
@@ -1,3 +1,4 @@
+import argparse
 import json
 import time
 from contextlib import redirect_stdout
@@ -246,16 +247,23 @@ def run_benchmark(case, db_config):
             time.sleep(60)
 
 def main():
-    config = load_config("config.json")
-    start_time = time.time()
-    for case in config['cases']:
-        print(f"Running case: {case['db-label']}")
-        setup_database(config)
-
-        run_benchmark(case, config['database'])
-    end_time = time.time()
-    execution_time = end_time - start_time
-    print(f"COMPLETED ALL EXECUTIONS. total_duration={execution_time}")
+    parser = argparse.ArgumentParser(description="Run benchmarks on a custom dataset.")
+    parser.add_argument("--config-dir-path", type=str, help="Path to the config files directory.")
+    args = parser.parse_args()
+
+    for dir_path, _, file_names in os.walk(args.config_dir_path):
+        for file_name in file_names:
+            config = load_config(os.path.join(dir_path, file_name))
+            start_time = time.time()
+            for case in config['cases']:
+                print(f"Running case: {case['db-label']}")
+                setup_database(config)
+
+                run_benchmark(case, config['database'])
+                teardown_database(config)
+            end_time = time.time()
+            execution_time = end_time - start_time
+            print(f"COMPLETED ALL EXECUTIONS of config {file_name}. total_duration={execution_time}")
 
 if __name__ == "__main__":
     main()

From f662dd142e3c7446f29bdf74fa624d3b58944b01 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 17:37:55 +0500
Subject: [PATCH 03/20] resolved comments

---
 .../config-custom-dataset-small-hnsw-1000k.json           | 8 ++++----
 .../config-custom-dataset-small-hnsw-1500k.json           | 8 ++++----
 .../config-custom-dataset-small-hnsw-2000k.json           | 8 ++++----
 .../config-custom-dataset-small-hnsw-2500k.json           | 8 ++++----
 .../config-custom-dataset-small-hnsw-500k.json            | 8 ++++----
 .../config-custom-dataset-small-hnsw-1000k.json           | 4 ++--
 .../config-custom-dataset-small-hnsw-1500k.json           | 4 ++--
 .../config-custom-dataset-small-hnsw-2000k.json           | 4 ++--
 .../config-custom-dataset-small-hnsw-2500k.json           | 4 ++--
 .../config-custom-dataset-small-hnsw-500k.json            | 4 ++--
 10 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
index 6a468866b..12a041ab6 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -3,21 +3,21 @@
       "host": "localhost1",
       "username": "postgres",
       "password": "postgres",
-      "db_name": "ann2",
+      "db_name": "ann-1000k",
       "instance_type": "db.m6i.large",
       "provider": "aws",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-run-seqon",
+        "db-label": "memory-comparison",
         "drop_old": true,
         "load": true,
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "20GB",
-        "max-parallel-workers": 7,
+        "maintenance-work-mem": "42GB",
+        "max-parallel-workers": 15,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
index 4194ad0e4..f978cdca5 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -3,21 +3,21 @@
       "host": "localhost2",
       "username": "postgres",
       "password": "postgres",
-      "db_name": "ann",
+      "db_name": "ann-1500k",
       "instance_type": "db.m6i.large",
       "provider": "aws",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-run-seqon",
+        "db-label": "memory-comparison",
         "drop_old": true,
         "load": true,
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "20GB",
-        "max-parallel-workers": 7,
+        "maintenance-work-mem": "42GB",
+        "max-parallel-workers": 15,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
index 147101672..3b8068674 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -3,21 +3,21 @@
       "host": "localhost1",
       "username": "postgres",
       "password": "postgres",
-      "db_name": "ann3",
+      "db_name": "ann-2000k",
       "instance_type": "db.m6i.large",
       "provider": "aws",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-run-seqon",
+        "db-label": "memory-comparison",
         "drop_old": true,
         "load": true,
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "20GB",
-        "max-parallel-workers": 7,
+        "maintenance-work-mem": "42GB",
+        "max-parallel-workers": 15,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
index e950b60e5..5612e29db 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -3,21 +3,21 @@
       "host": "localhost2",
       "username": "postgres",
       "password": "postgres",
-      "db_name": "ann2",
+      "db_name": "ann-2500k",
       "instance_type": "db.m6i.large",
       "provider": "aws",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-run-seqon",
+        "db-label": "memory-comparison",
         "drop_old": true,
         "load": true,
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "20GB",
-        "max-parallel-workers": 7,
+        "maintenance-work-mem": "42GB",
+        "max-parallel-workers": 15,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
index d2a489f3b..dc86e930a 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
@@ -3,21 +3,21 @@
       "host": "localhost1",
       "username": "postgres",
       "password": "postgres",
-      "db_name": "ann",
+      "db_name": "ann-500k",
       "instance_type": "db.m6i.large",
       "provider": "aws",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-run-seqon",
+        "db-label": "memory-comparison",
         "drop_old": true,
         "load": true,
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "20GB",
-        "max-parallel-workers": 7,
+        "maintenance-work-mem": "42GB",
+        "max-parallel-workers": 15,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
index 352361e4f..bed3ebad1 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -3,14 +3,14 @@
       "host": "localhost1",
       "username": "postgres",
       "password": "postgres",
-      "db_name": "ann2",
+      "db_name": "ann-1000k",
       "instance_type": "db.m6i.large",
       "provider": "aws",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-run-seqon",
+        "db-label": "memory-comparison",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
index a85fec9d8..41fd90c94 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -3,14 +3,14 @@
       "host": "localhost2",
       "username": "postgres",
       "password": "postgres",
-      "db_name": "ann",
+      "db_name": "ann-1500k",
       "instance_type": "db.m6i.large",
       "provider": "aws",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-run-seqon",
+        "db-label": "memory-comparison",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
index 3fa809c27..f488c736a 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -3,14 +3,14 @@
       "host": "localhost1",
       "username": "postgres",
       "password": "postgres",
-      "db_name": "ann3",
+      "db_name": "ann-2000k",
       "instance_type": "db.m6i.large",
       "provider": "aws",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-run-seqon",
+        "db-label": "memory-comparison",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
index 814ceb598..7111acc5f 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -3,14 +3,14 @@
       "host": "localhost2",
       "username": "postgres",
       "password": "postgres",
-      "db_name": "ann2",
+      "db_name": "ann-2500k",
       "instance_type": "db.m6i.large",
       "provider": "aws",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-run-seqon",
+        "db-label": "memory-comparison",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
index 39cfb13e6..0867ef241 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
@@ -3,14 +3,14 @@
       "host": "localhost1",
       "username": "postgres",
       "password": "postgres",
-      "db_name": "ann",
+      "db_name": "ann-500k",
       "instance_type": "db.m6i.large",
       "provider": "aws",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-run-seqon",
+        "db-label": "memory-comparison",
         "drop_old": false,
         "load": false,
         "search-serial": true,

From 10e1d7495bfbbf788444e8fdc2e4017f4ce45e3f Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 17:38:49 +0500
Subject: [PATCH 04/20] Updated script to only generate dataset of specified
 file_count.

---
 create_dataset_subsets.py | 66 +++++++++++++++++++++++++++------------
 1 file changed, 46 insertions(+), 20 deletions(-)

diff --git a/create_dataset_subsets.py b/create_dataset_subsets.py
index efb1d3c82..c1263cabf 100644
--- a/create_dataset_subsets.py
+++ b/create_dataset_subsets.py
@@ -1,7 +1,11 @@
 import os
 import shutil
 import argparse
+import logging
 
+# Set up logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
 
 def get_file_name(file_name: str, file_prefix: str, files_count: int) -> str:
     if files_count == 1:
@@ -9,28 +13,47 @@ def get_file_name(file_name: str, file_prefix: str, files_count: int) -> str:
     file_name = file_name.split("of-")[0]
     return file_name + "of-" + str(files_count).zfill(2) + ".parquet"
 
-def create_subsets(base_dir, save_dir_path, subset_prefix, file_prefix, step_size):
-    files = sorted([f for f in os.listdir(base_dir) if f.startswith(file_prefix)])
+def create_dataset(base_dir, save_dir_path, subset_prefix, file_prefix, file_count, row_count=500_00):
+    logger.info(f"Starting dataset creation with {file_count} files.")
+
+    # Sort the files and pick only the first 'file_count' files
+    files = sorted([f for f in os.listdir(base_dir) if f.startswith(file_prefix)])[:file_count]
     num_files = len(files)
+
+    if num_files == 0:
+        logger.warning("No files found with the specified prefix.")
+        return
+
+    logger.info(f"Found {num_files} files. Creating dataset...")
+
+    # Create the directory for the dataset
+    subset_dir = os.path.join(save_dir_path, f"{subset_prefix}_{file_count * row_count // 1000}k")
+    os.makedirs(subset_dir, exist_ok=True)
+    logger.info(f"Created directory for the dataset: {subset_dir}")
+
+    # Copy the first 'file_count' files into the subset directory
+    for file in files:
+        src_file = os.path.join(base_dir, file)
+        dst_file = os.path.join(subset_dir, get_file_name(file, file_prefix, file_count))
+        shutil.copy(src_file, dst_file)
+        logger.info(f"Copied {file} to {dst_file}")
     
-    for i in range(1, num_files + 1):
-        subset_dir = os.path.join(save_dir_path, f"{subset_prefix}_{i * step_size // 1000}k")
-        os.makedirs(subset_dir, exist_ok=True)
-
-        for j in range(i):
-            src_file = os.path.join(base_dir, files[j])
-            dst_file = os.path.join(subset_dir, get_file_name(files[j], file_prefix, i))
-            shutil.copy(src_file, dst_file)
-        src_test_file = os.path.join(base_dir, "test.parquet")
-        dst_test_file = os.path.join(subset_dir, "test.parquet")
-        shutil.copy(src_test_file, dst_test_file)
+    # Also copy the test.parquet file
+    src_test_file = os.path.join(base_dir, "test.parquet")
+    dst_test_file = os.path.join(subset_dir, "test.parquet")
+    shutil.copy(src_test_file, dst_test_file)
+    logger.info(f"Copied test.parquet to {subset_dir}")
+
+    logger.info(f"Dataset creation completed. {file_count} files have been copied to {subset_dir}.")
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Create subsets of Parquet files using Dask.")
-    parser.add_argument("--directory", type=str, help="Path to the directory containing Parquet files.")
-    parser.add_argument("--save-dir-path", type=str, help="Directory path where data will be saved")
-    parser.add_argument("--dataset-name-prefix", type=str, help="Name prefix of the folder where each subset will be saved.")
+    parser = argparse.ArgumentParser(description="Create a dataset with a specified number of Parquet files.")
+    parser.add_argument("--directory", type=str, required=True, help="Path to the directory containing Parquet files.")
+    parser.add_argument("--save-dir-path", type=str, required=True, help="Directory path where the dataset will be saved.")
+    parser.add_argument("--dataset-name-prefix", type=str, help="Name prefix for the dataset folder.")
     parser.add_argument("--is-shuffled", type=bool, help="Whether the files are shuffled or not.")
+    parser.add_argument("--file-count", type=int, required=True, help="Number of Parquet files to include in the dataset.")
+    
     args = parser.parse_args()
 
     file_prefix = (
@@ -48,7 +71,10 @@ def create_subsets(base_dir, save_dir_path, subset_prefix, file_prefix, step_siz
         if args.save_dir_path
         else args.directory
     )
-    step_size = 500_000  # 500k
 
-    create_subsets(args.directory, save_dir_path, subset_prefix, file_prefix, step_size)
-    print(f'Finished creating subsets of Parquet files in {args.directory}.')
\ No newline at end of file
+    # Log the input parameters
+    logger.info(f"Parameters received: directory={args.directory}, save_dir_path={args.save_dir_path}, file_count={args.file_count}, dataset_name_prefix={subset_prefix}, is_shuffled={args.is_shuffled}")
+
+    # Create the dataset with the specified file_count
+    create_dataset(args.directory, save_dir_path, subset_prefix, file_prefix, args.file_count)
+    logger.info(f'Finished creating a dataset with {args.file_count} Parquet files.')
\ No newline at end of file

From a5cb055db2ef05674e7e99f6f3d5fb01e8eeac1d Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 17:41:46 +0500
Subject: [PATCH 05/20] Added config files of large datasets

---
 ...onfig-custom-dataset-small-hnsw-3000k.json | 38 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-3500k.json | 38 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-4000k.json | 38 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-4500k.json | 38 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-5000k.json | 38 +++++++++++++++++++
 5 files changed, 190 insertions(+)
 create mode 100644 custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
 create mode 100644 custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
 create mode 100644 custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
 create mode 100644 custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
 create mode 100644 custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json

diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
new file mode 100644
index 000000000..681462f70
--- /dev/null
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-3000k",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "42GB",
+        "max-parallel-workers": 15,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-3m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_3000k",
+        "custom-dataset-size": 3000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 6,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
new file mode 100644
index 000000000..5da2a8fa7
--- /dev/null
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-3500k",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "42GB",
+        "max-parallel-workers": 15,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-3_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_3500k",
+        "custom-dataset-size": 3500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 7,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
new file mode 100644
index 000000000..8ff014218
--- /dev/null
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-4000k",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "42GB",
+        "max-parallel-workers": 15,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-4m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_4000k",
+        "custom-dataset-size": 4000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 8,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
new file mode 100644
index 000000000..f57ae2c9e
--- /dev/null
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-4500k",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "42GB",
+        "max-parallel-workers": 15,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-4_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_4500k",
+        "custom-dataset-size": 4500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 9,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
new file mode 100644
index 000000000..a9b3e20c3
--- /dev/null
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
@@ -0,0 +1,38 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-5000k",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "42GB",
+        "max-parallel-workers": 15,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_5000k",
+        "custom-dataset-size": 5000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 10,
+        "custom-dataset-use-shuffled": false,
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file

From b605c835808a6c27e877bb6620cc92122730a5fa Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 19:19:08 +0500
Subject: [PATCH 06/20] Updated scripts and configs to create dataset on
 runtime.

---
 create_dataset_subsets.py                     |  7 +-
 ...onfig-custom-dataset-small-hnsw-3000k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-3500k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-4000k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-4500k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-5000k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-1000k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-1500k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-2000k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-2500k.json |  5 ++
 ...config-custom-dataset-small-hnsw-500k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-1000k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-1500k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-2000k.json |  5 ++
 ...onfig-custom-dataset-small-hnsw-2500k.json |  5 ++
 ...config-custom-dataset-small-hnsw-500k.json |  5 ++
 run-custom-dataset.py                         | 77 +++++++++++++++++++
 17 files changed, 157 insertions(+), 2 deletions(-)

diff --git a/create_dataset_subsets.py b/create_dataset_subsets.py
index c1263cabf..21b131a84 100644
--- a/create_dataset_subsets.py
+++ b/create_dataset_subsets.py
@@ -13,7 +13,7 @@ def get_file_name(file_name: str, file_prefix: str, files_count: int) -> str:
     file_name = file_name.split("of-")[0]
     return file_name + "of-" + str(files_count).zfill(2) + ".parquet"
 
-def create_dataset(base_dir, save_dir_path, subset_prefix, file_prefix, file_count, row_count=500_00):
+def create_dataset(base_dir, save_dir_path, subset_prefix, file_prefix, file_count, row_count=500_000):
     logger.info(f"Starting dataset creation with {file_count} files.")
 
     # Sort the files and pick only the first 'file_count' files
@@ -53,7 +53,6 @@ def create_dataset(base_dir, save_dir_path, subset_prefix, file_prefix, file_cou
     parser.add_argument("--dataset-name-prefix", type=str, help="Name prefix for the dataset folder.")
     parser.add_argument("--is-shuffled", type=bool, help="Whether the files are shuffled or not.")
     parser.add_argument("--file-count", type=int, required=True, help="Number of Parquet files to include in the dataset.")
-    
     args = parser.parse_args()
 
     file_prefix = (
@@ -72,6 +71,10 @@ def create_dataset(base_dir, save_dir_path, subset_prefix, file_prefix, file_cou
         else args.directory
     )
 
+    if os.path.exists(save_dir_path) and os.listdir(save_dir_path):
+        shutil.rmtree(save_dir_path)
+        logger.info(f"Deleted existing directory: {save_dir_path}")
+    
     # Log the input parameters
     logger.info(f"Parameters received: directory={args.directory}, save_dir_path={args.save_dir_path}, file_count={args.file_count}, dataset_name_prefix={subset_prefix}, is_shuffled={args.is_shuffled}")
 
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
index 681462f70..e82e520fc 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 6,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
index 5da2a8fa7..b56c50230 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 7,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
index 8ff014218..9d4d7e671 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 8,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
index f57ae2c9e..331d818cd 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 9,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
index a9b3e20c3..9dc745bc5 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 10,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
index 12a041ab6..ccda5c237 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 2,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
index f978cdca5..381d5c982 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 3,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
index 3b8068674..ada201112 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 4,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
index 5612e29db..1d1f669b5 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 5,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
index dc86e930a..74514cf2a 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 1,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
index bed3ebad1..f1cc2534e 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 2,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
index 41fd90c94..93aa2d4db 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 3,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
index f488c736a..31d62fec7 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 4,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
index 7111acc5f..08b380cc0 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 5,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
index 0867ef241..8e173c630 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
@@ -31,6 +31,11 @@
         "custom-dataset-dim": 1536,
         "custom-dataset-file-count": 1,
         "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
         "run_count": 1
       }
     ]
diff --git a/run-custom-dataset.py b/run-custom-dataset.py
index b3f71ae29..bde14a6be 100644
--- a/run-custom-dataset.py
+++ b/run-custom-dataset.py
@@ -7,6 +7,7 @@
 import psycopg
 from psycopg import sql
 import os
+import shutil
 
 os.environ["LOG_LEVEL"] = "DEBUG"
 
@@ -41,11 +42,62 @@ def setup_database(config):
         cursor = conn.cursor()
         cursor.execute("CREATE EXTENSION IF NOT EXISTS vector;")
         cursor.execute("CREATE EXTENSION IF NOT EXISTS pg_buffercache;")
+        cursor.execute("CREATE EXTENSION IF NOT EXISTS pg_prewarm;")
         conn.commit()
         conn.close()
     except Exception as e:
         print(f"Setup failed: {e}")
 
+
+def create_dataset(args: dict) -> bool:
+    """
+    This function creates a dataset from the original dataset using script
+    create_dataset_subsets.py and pass required arguments to it.
+    """
+    file_count = args.get("file-count")
+    is_shuffled = args.get("is-shuffled")
+    directory = args.get("directory")
+    output_dir = args.get("save-dir-path")
+
+    try:
+        # Define the command to run the create_dataset_subsets.py script
+        command = [
+            "python3", "create_dataset_subsets.py",
+            "--directory", directory,
+            "--save-dir-path", output_dir,
+            "--file-count", str(file_count),
+        ]
+        print(f"Running command: {' '.join(command)}")
+
+        file_prefix = "train"
+        if is_shuffled:
+            file_prefix = "shuffle_train"
+            command += ["--is-shuffled", "True"]
+        subprocess.run(command, check=True)
+        print("Check if dataset was created successfully.")
+
+        created_files_count = sum([1 for _, _, files in os.walk(output_dir) for f in files if f.startswith(file_prefix)])
+        print(f"Number of files in the output dataset directory: {created_files_count}")
+
+        if created_files_count != file_count:
+            raise Exception("Incorrect number of files.")
+        print("Dataset creation successful.")
+    except (subprocess.CalledProcessError, Exception) as e:
+        print(f"Dataset creation failed: {e}")
+        return False
+    
+    return True
+
+def delete_dataset(dataset_dir: str):
+    try:
+        if os.path.exists(dataset_dir):
+            shutil.rmtree(dataset_dir)
+            print(f"Deleted directory: {dataset_dir}")
+        else:
+            print(f"Directory does not exist: {dataset_dir}")
+    except Exception as e:
+        print(f"Failed to delete directory: {e}")
+
 def teardown_database(config):
     # Optionally drop the database after the test
     pass
@@ -133,6 +185,21 @@ def query_configurations(config):
         print(f"Failed to query configurations: {e}")
         return {}
 
+def pre_warm(config):
+    print("Running pre warm")
+    try:
+        conn = psycopg.connect(
+                dbname=config['db_name'],
+                user=config['username'],
+                password=config['password'],
+                host=config['host'],
+        )
+        cursor = conn.cursor()
+        cursor.execute("SELECT pg_prewarm('public.pgvector_index')");
+        conn.close()
+        print("Pre-warm completed")
+    except Exception:
+        pass
 
 def run_benchmark(case, db_config):
     base_command = [
@@ -226,6 +293,8 @@ def run_benchmark(case, db_config):
                             print(f"{key}: {value}")
                         get_stats(db_config)
                         f.flush()
+                        pre_warm(db_config)
+                        print(f"Running with prewarm")
                         print(f"Running command: {' '.join(command)}")
                         f.flush()
 
@@ -258,9 +327,17 @@ def main():
             for case in config['cases']:
                 print(f"Running case: {case['db-label']}")
                 setup_database(config)
+                
+                create_dataset_args = case['create-dataset-args']
+                create_dataset_args["file-count"] = case["custom-dataset-file-count"]
+                dataset_created = create_dataset(create_dataset_args)
+                if not dataset_created:
+                    print(f"Failed to create dataset for case: {case['custom-case-name']} -- Skipping execution.")
+                    continue
 
                 run_benchmark(case, config['database'])
                 teardown_database(config)
+                delete_dataset(create_dataset_args["save-dir-path"])
             end_time = time.time()
             execution_time = end_time - start_time
             print(f"COMPLETED ALL EXECUTIONS of config {file_name}. total_duration={execution_time}")

From 12a439a7167439810d3266309c8b278a52e1be70 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 19:24:30 +0500
Subject: [PATCH 07/20] Updated dataset directory paths

---
 .../config-custom-dataset-small-hnsw-3000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-3500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-4000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-4500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-5000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-1000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-1500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-2000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-2500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-500k.json                | 4 ++--
 .../config-custom-dataset-small-hnsw-1000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-1500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-2000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-2500k.json               | 4 ++--
 custom-run-configs/config-custom-dataset-small-hnsw-500k.json | 4 ++--
 15 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
index e82e520fc..2a822902d 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 6,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
index b56c50230..a1cc4ec50 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 7,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
index 9d4d7e671..26615954e 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 8,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
index 331d818cd..f666ed424 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 9,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
index 9dc745bc5..0fe1c9a9d 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 10,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
index ccda5c237..f6b49680e 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 2,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
index 381d5c982..af3ea6175 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 3,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
index ada201112..e80b1b20e 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 4,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
index 1d1f669b5..0a572765d 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 5,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
index 74514cf2a..e9a61787e 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 1,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
index f1cc2534e..06d81d4f5 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 2,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
index 93aa2d4db..c2bd493d5 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 3,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
index 31d62fec7..4611c6062 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 4,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
index 08b380cc0..776cb9d8c 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 5,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
index 8e173c630..f204bf56b 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 1,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/azureuser/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/azureuser/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1

From d990ac5f8405e544daaee3d189d46582952613d1 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 19:58:42 +0500
Subject: [PATCH 08/20] Added run configs upto 5 million custom dataset

---
 ...onfig-custom-dataset-small-hnsw-3000k.json | 43 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-3500k.json | 43 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-4000k.json | 43 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-4500k.json | 43 +++++++++++++++++++
 ...onfig-custom-dataset-small-hnsw-5000k.json | 43 +++++++++++++++++++
 5 files changed, 215 insertions(+)
 create mode 100644 custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
 create mode 100644 custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
 create mode 100644 custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
 create mode 100644 custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
 create mode 100644 custom-run-configs/config-custom-dataset-small-hnsw-5000k.json

diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
new file mode 100644
index 000000000..0be352d78
--- /dev/null
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-3000k",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "8GB",
+        "max-parallel-workers": 3,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-3m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_3000k",
+        "custom-dataset-size": 3000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 6,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
new file mode 100644
index 000000000..d87c5fbb5
--- /dev/null
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-3500k",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "8GB",
+        "max-parallel-workers": 3,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-3_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_3500k",
+        "custom-dataset-size": 3500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 7,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
new file mode 100644
index 000000000..e0cbfd5ce
--- /dev/null
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-4000k",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "8GB",
+        "max-parallel-workers": 3,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-4m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_4000k",
+        "custom-dataset-size": 4000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 8,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
new file mode 100644
index 000000000..c7110eb85
--- /dev/null
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-4500k",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "8GB",
+        "max-parallel-workers": 3,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-4_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_4500k",
+        "custom-dataset-size": 4500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 9,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
new file mode 100644
index 000000000..1d9124996
--- /dev/null
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-5000k",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "8GB",
+        "max-parallel-workers": 3,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_5000k",
+        "custom-dataset-size": 5000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 10,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file

From b6dd4d5159941bfe8ed37e689d5ee992c7ca58f6 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 20:00:49 +0500
Subject: [PATCH 09/20] Increased run count to 3 for search configs

---
 custom-run-configs/config-custom-dataset-small-hnsw-1000k.json | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-1500k.json | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-2000k.json | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-2500k.json | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-3000k.json | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-3500k.json | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-4000k.json | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-4500k.json | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-5000k.json | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-500k.json  | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
index 06d81d4f5..633aad799 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -36,7 +36,7 @@
           "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
-        "run_count": 1
+        "run_count": 3
       }
     ]
   }
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
index c2bd493d5..d84947c21 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -36,7 +36,7 @@
           "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
-        "run_count": 1
+        "run_count": 3
       }
     ]
   }
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
index 4611c6062..e04da17b4 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -36,7 +36,7 @@
           "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
-        "run_count": 1
+        "run_count": 3
       }
     ]
   }
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
index 776cb9d8c..752d71d60 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -36,7 +36,7 @@
           "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
-        "run_count": 1
+        "run_count": 3
       }
     ]
   }
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
index 0be352d78..2c8f015e3 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
@@ -36,7 +36,7 @@
           "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
-        "run_count": 1
+        "run_count": 3
       }
     ]
   }
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
index d87c5fbb5..dfb850610 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
@@ -36,7 +36,7 @@
           "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
-        "run_count": 1
+        "run_count": 3
       }
     ]
   }
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
index e0cbfd5ce..68074b2f4 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
@@ -36,7 +36,7 @@
           "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
-        "run_count": 1
+        "run_count": 3
       }
     ]
   }
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
index c7110eb85..24d830036 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
@@ -36,7 +36,7 @@
           "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
-        "run_count": 1
+        "run_count": 3
       }
     ]
   }
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
index 1d9124996..63371ff68 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
@@ -36,7 +36,7 @@
           "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
-        "run_count": 1
+        "run_count": 3
       }
     ]
   }
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
index f204bf56b..81d89826d 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
@@ -36,7 +36,7 @@
           "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
           "is-shuffled": false
         },
-        "run_count": 1
+        "run_count": 3
       }
     ]
   }

From 17b34e058ca55bd74e52fb8d4d00f189382a4426 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 20:07:42 +0500
Subject: [PATCH 10/20] Updated db labels in configs.

---
 .../config-custom-dataset-small-hnsw-3000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-3500k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-4000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-4500k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-5000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-1000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-1500k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-2000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-2500k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-500k.json                  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-1000k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-1500k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-2000k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-2500k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-3000k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-3500k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-4000k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-5000k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-500k.json   | 2 +-
 19 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
index 2a822902d..477f395f4 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-3000k",
         "drop_old": true,
         "load": true,
         "search-serial": false,
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
index a1cc4ec50..2b3284e4c 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-3500k",
         "drop_old": true,
         "load": true,
         "search-serial": false,
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
index 26615954e..25979e397 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-4000k",
         "drop_old": true,
         "load": true,
         "search-serial": false,
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
index f666ed424..caf5fa5e3 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-4500k",
         "drop_old": true,
         "load": true,
         "search-serial": false,
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
index 0fe1c9a9d..f897ec422 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-5000k",
         "drop_old": true,
         "load": true,
         "search-serial": false,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
index f6b49680e..8d1bcf5c7 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-1000k",
         "drop_old": true,
         "load": true,
         "search-serial": false,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
index af3ea6175..b0edfe6f2 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-1500k",
         "drop_old": true,
         "load": true,
         "search-serial": false,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
index e80b1b20e..eba71b9f5 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-2000k",
         "drop_old": true,
         "load": true,
         "search-serial": false,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
index 0a572765d..530e11bbc 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-2500k",
         "drop_old": true,
         "load": true,
         "search-serial": false,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
index e9a61787e..b49db3f1a 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-500k",
         "drop_old": true,
         "load": true,
         "search-serial": false,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
index 633aad799..f3aea5ec3 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-1000k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
index d84947c21..98c5c71f7 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-1500k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
index e04da17b4..2452055a8 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-2000k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
index 752d71d60..100cc6cb1 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-2500k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
index 2c8f015e3..c1d3829b1 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-3000k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
index dfb850610..a69d3015a 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-3500k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
index 68074b2f4..8096bc863 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-4000k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
index 63371ff68..a00471e09 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-5000k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
index 81d89826d..a151768b0 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-500k",
         "drop_old": false,
         "load": false,
         "search-serial": true,

From 0ab30ce37893611a78a151c27689e6215be56331 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 20:09:05 +0500
Subject: [PATCH 11/20] Updated db-label in config.

---
 custom-run-configs/config-custom-dataset-small-hnsw-4500k.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
index 24d830036..2f3b3ba0d 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison",
+        "db-label": "memory-comparison-4500k",
         "drop_old": false,
         "load": false,
         "search-serial": true,

From 2603723b980c46867ec856d0b3d0b37f172b57d8 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 20:52:10 +0500
Subject: [PATCH 12/20] Dividing configs based on dataset size for building
 indexes in parallel

---
 .../config-custom-dataset-small-hnsw-2000k.json                   | 0
 .../config-custom-dataset-small-hnsw-500k.json                    | 0
 .../config-custom-dataset-small-hnsw-3500k.json                   | 0
 .../config-custom-dataset-small-hnsw-4000k.json                   | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename {custom-run-build-index-configs => custom-run-build-index-configs-large}/config-custom-dataset-small-hnsw-2000k.json (100%)
 rename {custom-run-build-index-configs => custom-run-build-index-configs-large}/config-custom-dataset-small-hnsw-500k.json (100%)
 rename {custom-run-build-index-configs-large => custom-run-build-index-configs}/config-custom-dataset-small-hnsw-3500k.json (100%)
 rename {custom-run-build-index-configs-large => custom-run-build-index-configs}/config-custom-dataset-small-hnsw-4000k.json (100%)

diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json
similarity index 100%
rename from custom-run-build-index-configs/config-custom-dataset-small-hnsw-2000k.json
rename to custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json
similarity index 100%
rename from custom-run-build-index-configs/config-custom-dataset-small-hnsw-500k.json
rename to custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json
similarity index 100%
rename from custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3500k.json
rename to custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json
similarity index 100%
rename from custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4000k.json
rename to custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json

From 22e2fd9a9fbdc127eced22c4d41c6256d94ba048 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 21:14:25 +0500
Subject: [PATCH 13/20] Fixed create_dataset_args directory paths

---
 .../config-custom-dataset-small-hnsw-2000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-3000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-4500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-5000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-500k.json                | 4 ++--
 .../config-custom-dataset-small-hnsw-1000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-1500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-2500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-3500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-4000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-1000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-1500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-2000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-2500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-3000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-3500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-4000k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-4500k.json               | 4 ++--
 .../config-custom-dataset-small-hnsw-5000k.json               | 4 ++--
 custom-run-configs/config-custom-dataset-small-hnsw-500k.json | 4 ++--
 20 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json
index eba71b9f5..b17bca14b 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 4,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
index 477f395f4..eb77e65e1 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 6,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
index caf5fa5e3..3cd27b7a8 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 9,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
index f897ec422..b3292219f 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 10,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json
index b49db3f1a..92dd5af3c 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 1,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
index 8d1bcf5c7..4f70e0d08 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 2,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
index b0edfe6f2..c65b6f3ae 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 3,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
index 530e11bbc..9eb156f0e 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 5,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json
index 2b3284e4c..9622276c3 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 7,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json
index 25979e397..61f278149 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 8,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 1
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
index f3aea5ec3..3f6a20112 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 2,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 3
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
index 98c5c71f7..7e7f66318 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 3,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 3
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
index 2452055a8..07ecf7844 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 4,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 3
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
index 100cc6cb1..d58d863a5 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 5,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 3
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
index c1d3829b1..f2b5975fa 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 6,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 3
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
index a69d3015a..29cf8bd89 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 7,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 3
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
index 8096bc863..1e48bd316 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 8,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 3
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
index 2f3b3ba0d..e2bd637f9 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 9,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 3
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
index a00471e09..a15a1ba0a 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 10,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 3
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
index a151768b0..99ec24468 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-500k.json
@@ -32,8 +32,8 @@
         "custom-dataset-file-count": 1,
         "custom-dataset-use-shuffled": false,
         "create-dataset-args": {
-          "directory": "/home/ubuntu/vectordb_bench/datasets/openai/openai_large_5m",
-          "save-dir-path": "/home/ubuntu/vectordb_bench/datasets/custom-openai/",
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
           "is-shuffled": false
         },
         "run_count": 3

From 34e6b4514d47c10ece8996d9cd3e56e85e8fbe56 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Tue, 15 Oct 2024 21:34:22 +0500
Subject: [PATCH 14/20] copy neigbors.parquet in created_dataset directory

---
 create_dataset_subsets.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/create_dataset_subsets.py b/create_dataset_subsets.py
index 21b131a84..55c147374 100644
--- a/create_dataset_subsets.py
+++ b/create_dataset_subsets.py
@@ -44,6 +44,11 @@ def create_dataset(base_dir, save_dir_path, subset_prefix, file_prefix, file_cou
     shutil.copy(src_test_file, dst_test_file)
     logger.info(f"Copied test.parquet to {subset_dir}")
 
+    src_test_file = os.path.join(base_dir, "neighbors.parquet")
+    dst_test_file = os.path.join(subset_dir, "neighbors.parquet")
+    shutil.copy(src_test_file, dst_test_file)
+    logger.info(f"Copied neighbors.parquet to {subset_dir}")
+
     logger.info(f"Dataset creation completed. {file_count} files have been copied to {subset_dir}.")
 
 if __name__ == "__main__":

From 130425e01f6eeab9eac6409afecb0e3f51463371 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Wed, 16 Oct 2024 11:26:03 +0500
Subject: [PATCH 15/20] Divided run config in 2 folders to run in parallel on 2
 instances.

---
 .../config-custom-dataset-small-hnsw-2000k.json                   | 0
 .../config-custom-dataset-small-hnsw-3000k.json                   | 0
 .../config-custom-dataset-small-hnsw-4500k.json                   | 0
 .../config-custom-dataset-small-hnsw-5000k.json                   | 0
 .../config-custom-dataset-small-hnsw-500k.json                    | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 rename {custom-run-configs => custom-run-configs-large}/config-custom-dataset-small-hnsw-2000k.json (100%)
 rename {custom-run-configs => custom-run-configs-large}/config-custom-dataset-small-hnsw-3000k.json (100%)
 rename {custom-run-configs => custom-run-configs-large}/config-custom-dataset-small-hnsw-4500k.json (100%)
 rename {custom-run-configs => custom-run-configs-large}/config-custom-dataset-small-hnsw-5000k.json (100%)
 rename {custom-run-configs => custom-run-configs-large}/config-custom-dataset-small-hnsw-500k.json (100%)

diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json
similarity index 100%
rename from custom-run-configs/config-custom-dataset-small-hnsw-2000k.json
rename to custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3000k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json
similarity index 100%
rename from custom-run-configs/config-custom-dataset-small-hnsw-3000k.json
rename to custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4500k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json
similarity index 100%
rename from custom-run-configs/config-custom-dataset-small-hnsw-4500k.json
rename to custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-5000k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json
similarity index 100%
rename from custom-run-configs/config-custom-dataset-small-hnsw-5000k.json
rename to custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json
similarity index 100%
rename from custom-run-configs/config-custom-dataset-small-hnsw-500k.json
rename to custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json

From 43c18e651cf071d0a5b6d076074059d6c6023b51 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Wed, 16 Oct 2024 11:43:17 +0500
Subject: [PATCH 16/20] Updated instance type in all configs.

---
 .../config-custom-dataset-small-hnsw-2000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-3000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-4500k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-5000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-500k.json                  | 2 +-
 .../config-custom-dataset-small-hnsw-1000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-1500k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-2500k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-3500k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-4000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-2000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-3000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-4500k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-5000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-500k.json                  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-1000k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-1500k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-2500k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-3500k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-4000k.json  | 2 +-
 sample-configs/config-custom-dataset-small-hnsw.json            | 2 +-
 21 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json
index b17bca14b..1a1649783 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-2000k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
index eb77e65e1..ebad41a1b 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-3000k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
index 3cd27b7a8..348ecc763 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-4500k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
index b3292219f..a9ce08aef 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-5000k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json
index 92dd5af3c..edcbd1fd9 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-500k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
index 4f70e0d08..d0fe32403 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-1000k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
index c65b6f3ae..4b4cb164e 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-1500k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
index 9eb156f0e..89b84446c 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-2500k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json
index 9622276c3..9550bbdf1 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-3500k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json
index 61f278149..0ef34ddfe 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-4000k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json
index 07ecf7844..deddc393a 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-2000k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json
index f2b5975fa..6dc1c0cca 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-3000k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json
index e2bd637f9..f41ab4999 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-4500k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json
index a15a1ba0a..398891f9f 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-5000k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json
index 99ec24468..4dba59c2e 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-500k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
index 3f6a20112..91bc15ec3 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-1000k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
index 7e7f66318..1cc8990d1 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-1500k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
index d58d863a5..0fde71065 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-2500k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
index 29cf8bd89..c9097398c 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-3500k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
index 1e48bd316..c52ac849a 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-4000k",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/sample-configs/config-custom-dataset-small-hnsw.json b/sample-configs/config-custom-dataset-small-hnsw.json
index 8eb2b865b..707ec41fa 100644
--- a/sample-configs/config-custom-dataset-small-hnsw.json
+++ b/sample-configs/config-custom-dataset-small-hnsw.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },

From 2efaf440b50f40e0c68ffb82f89237a7c9ea4318 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Thu, 17 Oct 2024 13:54:46 +0500
Subject: [PATCH 17/20] Added prewarm query result in logs.

---
 run-custom-dataset.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/run-custom-dataset.py b/run-custom-dataset.py
index bde14a6be..01979ff46 100644
--- a/run-custom-dataset.py
+++ b/run-custom-dataset.py
@@ -186,7 +186,7 @@ def query_configurations(config):
         return {}
 
 def pre_warm(config):
-    print("Running pre warm")
+    print(f"Running pre warm for database:{config['db_name']}")
     try:
         conn = psycopg.connect(
                 dbname=config['db_name'],
@@ -195,11 +195,14 @@ def pre_warm(config):
                 host=config['host'],
         )
         cursor = conn.cursor()
-        cursor.execute("SELECT pg_prewarm('public.pgvector_index')");
+        cursor.execute("SELECT pg_prewarm('public.pgvector_index') as block_loaded")
+        conn.commit()
+
+        result = cursor.fetchone()
+        print(f"Pre-warm blocks loaded: {result[0]}")
         conn.close()
-        print("Pre-warm completed")
-    except Exception:
-        pass
+    except Exception as e:
+        print(f"Failed to pre-warm the database: {e}")
 
 def run_benchmark(case, db_config):
     base_command = [
@@ -294,7 +297,6 @@ def run_benchmark(case, db_config):
                         get_stats(db_config)
                         f.flush()
                         pre_warm(db_config)
-                        print(f"Running with prewarm")
                         print(f"Running command: {' '.join(command)}")
                         f.flush()
 

From 7a695e5da3280eac47bceebb45ee2c952f22af30 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Thu, 17 Oct 2024 14:13:07 +0500
Subject: [PATCH 18/20] Updated db label in all configs

---
 .../config-custom-dataset-small-hnsw-2000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-3000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-4500k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-5000k.json                 | 2 +-
 .../config-custom-dataset-small-hnsw-500k.json                  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-1000k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-1500k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-2500k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-3500k.json  | 2 +-
 custom-run-configs/config-custom-dataset-small-hnsw-4000k.json  | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json
index deddc393a..5fb71007e 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison-2000k",
+        "db-label": "memory-comparison-2000k-20k-test-dataset",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json
index 6dc1c0cca..1ddec8fcf 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison-3000k",
+        "db-label": "memory-comparison-3000k-20k-test-dataset",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json
index f41ab4999..3405f3d4c 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison-4500k",
+        "db-label": "memory-comparison-4500k-20k-test-dataset",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json
index 398891f9f..101d94b4e 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison-5000k",
+        "db-label": "memory-comparison-5000k-20k-test-dataset",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json
index 4dba59c2e..8af70db82 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison-500k",
+        "db-label": "memory-comparison-500k-20k-test-dataset",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
index 91bc15ec3..eb2f6fd68 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison-1000k",
+        "db-label": "memory-comparison-1000k-20k-test-dataset",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
index 1cc8990d1..af7588328 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison-1500k",
+        "db-label": "memory-comparison-1500k-20k-test-dataset",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
index 0fde71065..692afe519 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison-2500k",
+        "db-label": "memory-comparison-2500k-20k-test-dataset",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
index c9097398c..8be316aa3 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison-3500k",
+        "db-label": "memory-comparison-3500k-20k-test-dataset",
         "drop_old": false,
         "load": false,
         "search-serial": true,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
index c52ac849a..4aa10662b 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
@@ -10,7 +10,7 @@
     },
     "cases": [
       {
-        "db-label": "memory-comparison-4000k",
+        "db-label": "memory-comparison-4000k-20k-test-dataset",
         "drop_old": false,
         "load": false,
         "search-serial": true,

From aaff72d1a4559d318d6051cd56c1b19cb6cd9adc Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Mon, 4 Nov 2024 17:47:12 +0500
Subject: [PATCH 19/20] Updated custom dataset configs

---
 .../config-custom-dataset-small-hnsw-1000k.json        |  8 ++++----
 .../config-custom-dataset-small-hnsw-2000k.json        |  8 ++++----
 .../config-custom-dataset-small-hnsw-3500k.json        |  8 ++++----
 .../config-custom-dataset-small-hnsw-4000k.json        |  8 ++++----
 .../config-custom-dataset-small-hnsw-1500k.json        |  8 ++++----
 .../config-custom-dataset-small-hnsw-2500k.json        |  8 ++++----
 .../config-custom-dataset-small-hnsw-4500k.json        |  8 ++++----
 .../config-custom-dataset-small-hnsw-3000k.json        |  8 ++++----
 .../config-custom-dataset-small-hnsw-5000k.json        |  8 ++++----
 .../config-custom-dataset-small-hnsw-500k.json         |  8 ++++----
 .../config-custom-dataset-small-hnsw-1000k.json        | 10 +++++-----
 .../config-custom-dataset-small-hnsw-2000k.json        | 10 +++++-----
 .../config-custom-dataset-small-hnsw-3500k.json        | 10 +++++-----
 .../config-custom-dataset-small-hnsw-4000k.json        | 10 +++++-----
 .../config-custom-dataset-small-hnsw-1500k.json        | 10 +++++-----
 .../config-custom-dataset-small-hnsw-2500k.json        |  8 ++++----
 .../config-custom-dataset-small-hnsw-4500k.json        | 10 +++++-----
 .../config-custom-dataset-small-hnsw-3000k.json        | 10 +++++-----
 .../config-custom-dataset-small-hnsw-5000k.json        | 10 +++++-----
 .../config-custom-dataset-small-hnsw-500k.json         | 10 +++++-----
 20 files changed, 89 insertions(+), 89 deletions(-)
 rename {custom-run-build-index-configs => custom-run-build-index-configs-1}/config-custom-dataset-small-hnsw-1000k.json (89%)
 rename {custom-run-build-index-configs-large => custom-run-build-index-configs-1}/config-custom-dataset-small-hnsw-2000k.json (89%)
 rename {custom-run-build-index-configs => custom-run-build-index-configs-1}/config-custom-dataset-small-hnsw-3500k.json (89%)
 rename {custom-run-build-index-configs => custom-run-build-index-configs-1}/config-custom-dataset-small-hnsw-4000k.json (89%)
 rename {custom-run-build-index-configs => custom-run-build-index-configs-2}/config-custom-dataset-small-hnsw-1500k.json (89%)
 rename {custom-run-build-index-configs => custom-run-build-index-configs-2}/config-custom-dataset-small-hnsw-2500k.json (89%)
 rename {custom-run-build-index-configs-large => custom-run-build-index-configs-2}/config-custom-dataset-small-hnsw-4500k.json (89%)
 rename {custom-run-build-index-configs-large => custom-run-build-index-configs-3}/config-custom-dataset-small-hnsw-3000k.json (89%)
 rename {custom-run-build-index-configs-large => custom-run-build-index-configs-3}/config-custom-dataset-small-hnsw-5000k.json (89%)
 rename {custom-run-build-index-configs-large => custom-run-build-index-configs-3}/config-custom-dataset-small-hnsw-500k.json (89%)
 rename {custom-run-configs => custom-run-configs-1}/config-custom-dataset-small-hnsw-1000k.json (84%)
 rename {custom-run-configs-large => custom-run-configs-1}/config-custom-dataset-small-hnsw-2000k.json (84%)
 rename {custom-run-configs => custom-run-configs-1}/config-custom-dataset-small-hnsw-3500k.json (84%)
 rename {custom-run-configs => custom-run-configs-1}/config-custom-dataset-small-hnsw-4000k.json (84%)
 rename {custom-run-configs => custom-run-configs-2}/config-custom-dataset-small-hnsw-1500k.json (84%)
 rename {custom-run-configs => custom-run-configs-2}/config-custom-dataset-small-hnsw-2500k.json (86%)
 rename {custom-run-configs-large => custom-run-configs-2}/config-custom-dataset-small-hnsw-4500k.json (84%)
 rename {custom-run-configs-large => custom-run-configs-3}/config-custom-dataset-small-hnsw-3000k.json (84%)
 rename {custom-run-configs-large => custom-run-configs-3}/config-custom-dataset-small-hnsw-5000k.json (84%)
 rename {custom-run-configs-large => custom-run-configs-3}/config-custom-dataset-small-hnsw-500k.json (84%)

diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-1000k.json
similarity index 89%
rename from custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
rename to custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-1000k.json
index d0fe32403..c3674f572 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-1000k.json
@@ -4,8 +4,8 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-1000k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
@@ -16,8 +16,8 @@
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "42GB",
-        "max-parallel-workers": 15,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-2000k.json
similarity index 89%
rename from custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json
rename to custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-2000k.json
index 1a1649783..0d95f1197 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-2000k.json
@@ -4,8 +4,8 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-2000k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
@@ -16,8 +16,8 @@
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "42GB",
-        "max-parallel-workers": 15,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-3500k.json
similarity index 89%
rename from custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json
rename to custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-3500k.json
index 9550bbdf1..1cd60da9e 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-3500k.json
@@ -4,8 +4,8 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-3500k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
@@ -16,8 +16,8 @@
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "42GB",
-        "max-parallel-workers": 15,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-4000k.json
similarity index 89%
rename from custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json
rename to custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-4000k.json
index 0ef34ddfe..017c2bdcf 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-4000k.json
@@ -4,8 +4,8 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-4000k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
@@ -16,8 +16,8 @@
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "42GB",
-        "max-parallel-workers": 15,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-1500k.json
similarity index 89%
rename from custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
rename to custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-1500k.json
index 4b4cb164e..d726ea1fd 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-1500k.json
@@ -4,8 +4,8 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-1500k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
@@ -16,8 +16,8 @@
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "42GB",
-        "max-parallel-workers": 15,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-2500k.json
similarity index 89%
rename from custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
rename to custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-2500k.json
index 89b84446c..801005c0c 100644
--- a/custom-run-build-index-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-2500k.json
@@ -4,8 +4,8 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-2500k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
@@ -16,8 +16,8 @@
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "42GB",
-        "max-parallel-workers": 15,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-4500k.json
similarity index 89%
rename from custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
rename to custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-4500k.json
index 348ecc763..e31cf3bfa 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-4500k.json
@@ -4,8 +4,8 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-4500k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
@@ -16,8 +16,8 @@
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "42GB",
-        "max-parallel-workers": 15,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-3000k.json
similarity index 89%
rename from custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
rename to custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-3000k.json
index ebad41a1b..496d868db 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-3000k.json
@@ -4,8 +4,8 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-3000k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
@@ -16,8 +16,8 @@
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "42GB",
-        "max-parallel-workers": 15,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-5000k.json
similarity index 89%
rename from custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
rename to custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-5000k.json
index a9ce08aef..0656dd083 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-5000k.json
@@ -4,8 +4,8 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-5000k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
@@ -16,8 +16,8 @@
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "42GB",
-        "max-parallel-workers": 15,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-500k.json
similarity index 89%
rename from custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json
rename to custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-500k.json
index edcbd1fd9..61c3cd037 100644
--- a/custom-run-build-index-configs-large/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-500k.json
@@ -4,8 +4,8 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-500k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
@@ -16,8 +16,8 @@
         "search-serial": false,
         "search-concurrent": false,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "42GB",
-        "max-parallel-workers": 15,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json b/custom-run-configs-1/config-custom-dataset-small-hnsw-1000k.json
similarity index 84%
rename from custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
rename to custom-run-configs-1/config-custom-dataset-small-hnsw-1000k.json
index eb2f6fd68..cba9c28c2 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1000k.json
+++ b/custom-run-configs-1/config-custom-dataset-small-hnsw-1000k.json
@@ -4,20 +4,20 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-1000k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-1000k-20k-test-dataset",
+        "db-label": "memory-comparison-1000k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
         "search-concurrent": true,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "8GB",
-        "max-parallel-workers": 3,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs-1/config-custom-dataset-small-hnsw-2000k.json
similarity index 84%
rename from custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json
rename to custom-run-configs-1/config-custom-dataset-small-hnsw-2000k.json
index 5fb71007e..fc56c9280 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-2000k.json
+++ b/custom-run-configs-1/config-custom-dataset-small-hnsw-2000k.json
@@ -4,20 +4,20 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-2000k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-2000k-20k-test-dataset",
+        "db-label": "memory-comparison-2000k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
         "search-concurrent": true,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "8GB",
-        "max-parallel-workers": 3,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json b/custom-run-configs-1/config-custom-dataset-small-hnsw-3500k.json
similarity index 84%
rename from custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
rename to custom-run-configs-1/config-custom-dataset-small-hnsw-3500k.json
index 8be316aa3..3f1145cf5 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-3500k.json
+++ b/custom-run-configs-1/config-custom-dataset-small-hnsw-3500k.json
@@ -4,20 +4,20 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-3500k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-3500k-20k-test-dataset",
+        "db-label": "memory-comparison-3500k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
         "search-concurrent": true,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "8GB",
-        "max-parallel-workers": 3,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json b/custom-run-configs-1/config-custom-dataset-small-hnsw-4000k.json
similarity index 84%
rename from custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
rename to custom-run-configs-1/config-custom-dataset-small-hnsw-4000k.json
index 4aa10662b..a74a21264 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-4000k.json
+++ b/custom-run-configs-1/config-custom-dataset-small-hnsw-4000k.json
@@ -4,20 +4,20 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-4000k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-4000k-20k-test-dataset",
+        "db-label": "memory-comparison-4000k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
         "search-concurrent": true,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "8GB",
-        "max-parallel-workers": 3,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json b/custom-run-configs-2/config-custom-dataset-small-hnsw-1500k.json
similarity index 84%
rename from custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
rename to custom-run-configs-2/config-custom-dataset-small-hnsw-1500k.json
index af7588328..7e3c29493 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-1500k.json
+++ b/custom-run-configs-2/config-custom-dataset-small-hnsw-1500k.json
@@ -4,20 +4,20 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-1500k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-1500k-20k-test-dataset",
+        "db-label": "memory-comparison-1500k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
         "search-concurrent": true,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "8GB",
-        "max-parallel-workers": 3,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json b/custom-run-configs-2/config-custom-dataset-small-hnsw-2500k.json
similarity index 86%
rename from custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
rename to custom-run-configs-2/config-custom-dataset-small-hnsw-2500k.json
index 692afe519..4aed76165 100644
--- a/custom-run-configs/config-custom-dataset-small-hnsw-2500k.json
+++ b/custom-run-configs-2/config-custom-dataset-small-hnsw-2500k.json
@@ -4,20 +4,20 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-2500k",
-      "instance_type": "db.m6i.xlarge",
+      "instance_type": "db.m6i.large",
       "provider": "aws",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-2500k-20k-test-dataset",
+        "db-label": "memory-comparison-2500k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
         "search-concurrent": true,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "8GB",
-        "max-parallel-workers": 3,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json b/custom-run-configs-2/config-custom-dataset-small-hnsw-4500k.json
similarity index 84%
rename from custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json
rename to custom-run-configs-2/config-custom-dataset-small-hnsw-4500k.json
index 3405f3d4c..be671b538 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-4500k.json
+++ b/custom-run-configs-2/config-custom-dataset-small-hnsw-4500k.json
@@ -4,20 +4,20 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-4500k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-4500k-20k-test-dataset",
+        "db-label": "memory-comparison-4500k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
         "search-concurrent": true,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "8GB",
-        "max-parallel-workers": 3,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json b/custom-run-configs-3/config-custom-dataset-small-hnsw-3000k.json
similarity index 84%
rename from custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json
rename to custom-run-configs-3/config-custom-dataset-small-hnsw-3000k.json
index 1ddec8fcf..1cdfb7beb 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-3000k.json
+++ b/custom-run-configs-3/config-custom-dataset-small-hnsw-3000k.json
@@ -4,20 +4,20 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-3000k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-3000k-20k-test-dataset",
+        "db-label": "memory-comparison-3000k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
         "search-concurrent": true,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "8GB",
-        "max-parallel-workers": 3,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json b/custom-run-configs-3/config-custom-dataset-small-hnsw-5000k.json
similarity index 84%
rename from custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json
rename to custom-run-configs-3/config-custom-dataset-small-hnsw-5000k.json
index 101d94b4e..b1244c0cb 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-5000k.json
+++ b/custom-run-configs-3/config-custom-dataset-small-hnsw-5000k.json
@@ -4,20 +4,20 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-5000k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-5000k-20k-test-dataset",
+        "db-label": "memory-comparison-5000k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
         "search-concurrent": true,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "8GB",
-        "max-parallel-workers": 3,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,
diff --git a/custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs-3/config-custom-dataset-small-hnsw-500k.json
similarity index 84%
rename from custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json
rename to custom-run-configs-3/config-custom-dataset-small-hnsw-500k.json
index 8af70db82..f3ef4ba40 100644
--- a/custom-run-configs-large/config-custom-dataset-small-hnsw-500k.json
+++ b/custom-run-configs-3/config-custom-dataset-small-hnsw-500k.json
@@ -4,20 +4,20 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann-500k",
-      "instance_type": "db.m6i.xlarge",
-      "provider": "aws",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
       "enable_seqscan": "on"
     },
     "cases": [
       {
-        "db-label": "memory-comparison-500k-20k-test-dataset",
+        "db-label": "memory-comparison-500k",
         "drop_old": false,
         "load": false,
         "search-serial": true,
         "search-concurrent": true,
         "case-type": "PerformanceCustomDataset",
-        "maintenance-work-mem": "8GB",
-        "max-parallel-workers": 3,
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
         "ef-search": [40],
         "ef-construction": 128,
         "m": 32,

From c675feb86014c8fd0764ead4775bd14a6ebe8548 Mon Sep 17 00:00:00 2001
From: Sheharyar Ahmad <sheharyar.572@gmail.com>
Date: Mon, 4 Nov 2024 18:03:55 +0500
Subject: [PATCH 20/20] set shuffled_data to false

---
 vectordb_bench/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vectordb_bench/__init__.py b/vectordb_bench/__init__.py
index 3d8419a4f..3795535ed 100644
--- a/vectordb_bench/__init__.py
+++ b/vectordb_bench/__init__.py
@@ -21,7 +21,7 @@ class config:
     NUM_PER_BATCH = env.int("NUM_PER_BATCH", 5000)
 
     DROP_OLD = env.bool("DROP_OLD", True)
-    USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
+    USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", False)
 
     NUM_CONCURRENCY = env.list("NUM_CONCURRENCY",  [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100], subcast=int )