diff --git a/create_dataset_subsets.py b/create_dataset_subsets.py
index efb1d3c82..55c147374 100644
--- a/create_dataset_subsets.py
+++ b/create_dataset_subsets.py
@@ -1,7 +1,11 @@
 import os
 import shutil
 import argparse
+import logging
 
+# Set up logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
 
 def get_file_name(file_name: str, file_prefix: str, files_count: int) -> str:
     if files_count == 1:
@@ -9,28 +13,51 @@ def get_file_name(file_name: str, file_prefix: str, files_count: int) -> str:
     file_name = file_name.split("of-")[0]
     return file_name + "of-" + str(files_count).zfill(2) + ".parquet"
 
-def create_subsets(base_dir, save_dir_path, subset_prefix, file_prefix, step_size):
-    files = sorted([f for f in os.listdir(base_dir) if f.startswith(file_prefix)])
+def create_dataset(base_dir, save_dir_path, subset_prefix, file_prefix, file_count, row_count=500_000):
+    logger.info(f"Starting dataset creation with {file_count} files.")
+
+    # Sort the files and pick only the first 'file_count' files
+    files = sorted([f for f in os.listdir(base_dir) if f.startswith(file_prefix)])[:file_count]
     num_files = len(files)
+
+    if num_files == 0:
+        logger.warning("No files found with the specified prefix.")
+        return
+
+    logger.info(f"Found {num_files} files. Creating dataset...")
+
+    # Create the directory for the dataset
+    subset_dir = os.path.join(save_dir_path, f"{subset_prefix}_{file_count * row_count // 1000}k")
+    os.makedirs(subset_dir, exist_ok=True)
+    logger.info(f"Created directory for the dataset: {subset_dir}")
+
+    # Copy the first 'file_count' files into the subset directory
+    for file in files:
+        src_file = os.path.join(base_dir, file)
+        dst_file = os.path.join(subset_dir, get_file_name(file, file_prefix, file_count))
+        shutil.copy(src_file, dst_file)
+        logger.info(f"Copied {file} to {dst_file}")
     
-    for i in range(1, num_files + 1):
-        subset_dir = os.path.join(save_dir_path, f"{subset_prefix}_{i * step_size // 1000}k")
-        os.makedirs(subset_dir, exist_ok=True)
-
-        for j in range(i):
-            src_file = os.path.join(base_dir, files[j])
-            dst_file = os.path.join(subset_dir, get_file_name(files[j], file_prefix, i))
-            shutil.copy(src_file, dst_file)
-        src_test_file = os.path.join(base_dir, "test.parquet")
-        dst_test_file = os.path.join(subset_dir, "test.parquet")
-        shutil.copy(src_test_file, dst_test_file)
+    # Also copy the test.parquet file
+    src_test_file = os.path.join(base_dir, "test.parquet")
+    dst_test_file = os.path.join(subset_dir, "test.parquet")
+    shutil.copy(src_test_file, dst_test_file)
+    logger.info(f"Copied test.parquet to {subset_dir}")
+
+    src_test_file = os.path.join(base_dir, "neighbors.parquet")
+    dst_test_file = os.path.join(subset_dir, "neighbors.parquet")
+    shutil.copy(src_test_file, dst_test_file)
+    logger.info(f"Copied neighbors.parquet to {subset_dir}")
+
+    logger.info(f"Dataset creation completed. {file_count} files have been copied to {subset_dir}.")
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Create subsets of Parquet files using Dask.")
-    parser.add_argument("--directory", type=str, help="Path to the directory containing Parquet files.")
-    parser.add_argument("--save-dir-path", type=str, help="Directory path where data will be saved")
-    parser.add_argument("--dataset-name-prefix", type=str, help="Name prefix of the folder where each subset will be saved.")
+    parser = argparse.ArgumentParser(description="Create a dataset with a specified number of Parquet files.")
+    parser.add_argument("--directory", type=str, required=True, help="Path to the directory containing Parquet files.")
+    parser.add_argument("--save-dir-path", type=str, required=True, help="Directory path where the dataset will be saved.")
+    parser.add_argument("--dataset-name-prefix", type=str, help="Name prefix for the dataset folder.")
     parser.add_argument("--is-shuffled", type=bool, help="Whether the files are shuffled or not.")
+    parser.add_argument("--file-count", type=int, required=True, help="Number of Parquet files to include in the dataset.")
     args = parser.parse_args()
 
     file_prefix = (
@@ -48,7 +75,14 @@ def create_subsets(base_dir, save_dir_path, subset_prefix, file_prefix, step_siz
         if args.save_dir_path
         else args.directory
     )
-    step_size = 500_000  # 500k
 
-    create_subsets(args.directory, save_dir_path, subset_prefix, file_prefix, step_size)
-    print(f'Finished creating subsets of Parquet files in {args.directory}.')
\ No newline at end of file
+    if os.path.exists(save_dir_path) and os.listdir(save_dir_path):
+        shutil.rmtree(save_dir_path)
+        logger.info(f"Deleted existing directory: {save_dir_path}")
+    
+    # Log the input parameters
+    logger.info(f"Parameters received: directory={args.directory}, save_dir_path={args.save_dir_path}, file_count={args.file_count}, dataset_name_prefix={subset_prefix}, is_shuffled={args.is_shuffled}")
+
+    # Create the dataset with the specified file_count
+    create_dataset(args.directory, save_dir_path, subset_prefix, file_prefix, args.file_count)
+    logger.info(f'Finished creating a dataset with {args.file_count} Parquet files.')
\ No newline at end of file
diff --git a/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-1000k.json b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-1000k.json
new file mode 100644
index 000000000..c3674f572
--- /dev/null
+++ b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-1000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-1000k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-1000k",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-1m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_1000k",
+        "custom-dataset-size": 1000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 2,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-2000k.json b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-2000k.json
new file mode 100644
index 000000000..0d95f1197
--- /dev/null
+++ b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-2000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-2000k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-2000k",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-2m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_2000k",
+        "custom-dataset-size": 2000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 4,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-3500k.json b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-3500k.json
new file mode 100644
index 000000000..1cd60da9e
--- /dev/null
+++ b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-3500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-3500k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-3500k",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-3_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_3500k",
+        "custom-dataset-size": 3500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 7,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-4000k.json b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-4000k.json
new file mode 100644
index 000000000..017c2bdcf
--- /dev/null
+++ b/custom-run-build-index-configs-1/config-custom-dataset-small-hnsw-4000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-4000k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-4000k",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-4m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_4000k",
+        "custom-dataset-size": 4000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 8,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-1500k.json b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-1500k.json
new file mode 100644
index 000000000..d726ea1fd
--- /dev/null
+++ b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-1500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-1500k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-1500k",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-1_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_1500k",
+        "custom-dataset-size": 1500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 3,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-2500k.json b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-2500k.json
new file mode 100644
index 000000000..801005c0c
--- /dev/null
+++ b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-2500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-2500k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-2500k",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-2_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_2500k",
+        "custom-dataset-size": 2500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 5,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-4500k.json b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-4500k.json
new file mode 100644
index 000000000..e31cf3bfa
--- /dev/null
+++ b/custom-run-build-index-configs-2/config-custom-dataset-small-hnsw-4500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-4500k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-4500k",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-4_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_4500k",
+        "custom-dataset-size": 4500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 9,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-3000k.json b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-3000k.json
new file mode 100644
index 000000000..496d868db
--- /dev/null
+++ b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-3000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-3000k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-3000k",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-3m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_3000k",
+        "custom-dataset-size": 3000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 6,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-5000k.json b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-5000k.json
new file mode 100644
index 000000000..0656dd083
--- /dev/null
+++ b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-5000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-5000k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-5000k",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_5000k",
+        "custom-dataset-size": 5000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 10,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-500k.json b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-500k.json
new file mode 100644
index 000000000..61c3cd037
--- /dev/null
+++ b/custom-run-build-index-configs-3/config-custom-dataset-small-hnsw-500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-500k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-500k",
+        "drop_old": true,
+        "load": true,
+        "search-serial": false,
+        "search-concurrent": false,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-500K",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_500k",
+        "custom-dataset-size": 500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 1,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 1
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs-1/config-custom-dataset-small-hnsw-1000k.json b/custom-run-configs-1/config-custom-dataset-small-hnsw-1000k.json
new file mode 100644
index 000000000..cba9c28c2
--- /dev/null
+++ b/custom-run-configs-1/config-custom-dataset-small-hnsw-1000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-1000k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-1000k",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-1m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_1000k",
+        "custom-dataset-size": 1000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 2,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 3
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs-1/config-custom-dataset-small-hnsw-2000k.json b/custom-run-configs-1/config-custom-dataset-small-hnsw-2000k.json
new file mode 100644
index 000000000..fc56c9280
--- /dev/null
+++ b/custom-run-configs-1/config-custom-dataset-small-hnsw-2000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-2000k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-2000k",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-2m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_2000k",
+        "custom-dataset-size": 2000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 4,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 3
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs-1/config-custom-dataset-small-hnsw-3500k.json b/custom-run-configs-1/config-custom-dataset-small-hnsw-3500k.json
new file mode 100644
index 000000000..3f1145cf5
--- /dev/null
+++ b/custom-run-configs-1/config-custom-dataset-small-hnsw-3500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-3500k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-3500k",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-3_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_3500k",
+        "custom-dataset-size": 3500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 7,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 3
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs-1/config-custom-dataset-small-hnsw-4000k.json b/custom-run-configs-1/config-custom-dataset-small-hnsw-4000k.json
new file mode 100644
index 000000000..a74a21264
--- /dev/null
+++ b/custom-run-configs-1/config-custom-dataset-small-hnsw-4000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-4000k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-4000k",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-4m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_4000k",
+        "custom-dataset-size": 4000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 8,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 3
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs-2/config-custom-dataset-small-hnsw-1500k.json b/custom-run-configs-2/config-custom-dataset-small-hnsw-1500k.json
new file mode 100644
index 000000000..7e3c29493
--- /dev/null
+++ b/custom-run-configs-2/config-custom-dataset-small-hnsw-1500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-1500k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-1500k",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-1_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_1500k",
+        "custom-dataset-size": 1500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 3,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 3
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs-2/config-custom-dataset-small-hnsw-2500k.json b/custom-run-configs-2/config-custom-dataset-small-hnsw-2500k.json
new file mode 100644
index 000000000..4aed76165
--- /dev/null
+++ b/custom-run-configs-2/config-custom-dataset-small-hnsw-2500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-2500k",
+      "instance_type": "db.m6i.large",
+      "provider": "aws",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-2500k",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-2_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_2500k",
+        "custom-dataset-size": 2500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 5,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 3
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs-2/config-custom-dataset-small-hnsw-4500k.json b/custom-run-configs-2/config-custom-dataset-small-hnsw-4500k.json
new file mode 100644
index 000000000..be671b538
--- /dev/null
+++ b/custom-run-configs-2/config-custom-dataset-small-hnsw-4500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost2",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-4500k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-4500k",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-4_5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_4500k",
+        "custom-dataset-size": 4500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 9,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 3
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs-3/config-custom-dataset-small-hnsw-3000k.json b/custom-run-configs-3/config-custom-dataset-small-hnsw-3000k.json
new file mode 100644
index 000000000..1cdfb7beb
--- /dev/null
+++ b/custom-run-configs-3/config-custom-dataset-small-hnsw-3000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-3000k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-3000k",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-3m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_3000k",
+        "custom-dataset-size": 3000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 6,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 3
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs-3/config-custom-dataset-small-hnsw-5000k.json b/custom-run-configs-3/config-custom-dataset-small-hnsw-5000k.json
new file mode 100644
index 000000000..b1244c0cb
--- /dev/null
+++ b/custom-run-configs-3/config-custom-dataset-small-hnsw-5000k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-5000k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-5000k",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-5m",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_5000k",
+        "custom-dataset-size": 5000000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 10,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 3
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/custom-run-configs-3/config-custom-dataset-small-hnsw-500k.json b/custom-run-configs-3/config-custom-dataset-small-hnsw-500k.json
new file mode 100644
index 000000000..f3ef4ba40
--- /dev/null
+++ b/custom-run-configs-3/config-custom-dataset-small-hnsw-500k.json
@@ -0,0 +1,43 @@
+{
+    "database": {
+      "host": "localhost1",
+      "username": "postgres",
+      "password": "postgres",
+      "db_name": "ann-500k",
+      "instance_type": "Standard_D8ds_v5",
+      "provider": "azure",
+      "enable_seqscan": "on"
+    },
+    "cases": [
+      {
+        "db-label": "memory-comparison-500k",
+        "drop_old": false,
+        "load": false,
+        "search-serial": true,
+        "search-concurrent": true,
+        "case-type": "PerformanceCustomDataset",
+        "maintenance-work-mem": "16GB",
+        "max-parallel-workers": 7,
+        "ef-search": [40],
+        "ef-construction": 128,
+        "m": 32,
+        "num-concurrency": "1,10,20,30,40,50,60,70,80,90,100",
+        "concurrency-duration": 30,
+        "k": 10,
+        "custom-case-name": "hnsw-1536D-500K",
+        "custom-dataset-name": "custom-openai",
+        "custom-dataset-dir": "openai_500k",
+        "custom-dataset-size": 500000,
+        "custom-dataset-dim": 1536,
+        "custom-dataset-file-count": 1,
+        "custom-dataset-use-shuffled": false,
+        "create-dataset-args": {
+          "directory": "/home/ubuntu/vectordb_bench/dataset/openai/openai_large_5m",
+          "save-dir-path": "/home/ubuntu/vectordb_bench/dataset/custom-openai/",
+          "is-shuffled": false
+        },
+        "run_count": 3
+      }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/run-custom-dataset.py b/run-custom-dataset.py
index a5d1213d5..01979ff46 100644
--- a/run-custom-dataset.py
+++ b/run-custom-dataset.py
@@ -1,3 +1,4 @@
+import argparse
 import json
 import time
 from contextlib import redirect_stdout
@@ -6,6 +7,7 @@
 import psycopg
 from psycopg import sql
 import os
+import shutil
 
 os.environ["LOG_LEVEL"] = "DEBUG"
 
@@ -40,11 +42,62 @@ def setup_database(config):
         cursor = conn.cursor()
         cursor.execute("CREATE EXTENSION IF NOT EXISTS vector;")
         cursor.execute("CREATE EXTENSION IF NOT EXISTS pg_buffercache;")
+        cursor.execute("CREATE EXTENSION IF NOT EXISTS pg_prewarm;")
         conn.commit()
         conn.close()
     except Exception as e:
         print(f"Setup failed: {e}")
 
+
+def create_dataset(args: dict) -> bool:
+    """
+    This function creates a dataset from the original dataset using script
+    create_dataset_subsets.py and pass required arguments to it.
+    """
+    file_count = args.get("file-count")
+    is_shuffled = args.get("is-shuffled")
+    directory = args.get("directory")
+    output_dir = args.get("save-dir-path")
+
+    try:
+        # Define the command to run the create_dataset_subsets.py script
+        command = [
+            "python3", "create_dataset_subsets.py",
+            "--directory", directory,
+            "--save-dir-path", output_dir,
+            "--file-count", str(file_count),
+        ]
+        print(f"Running command: {' '.join(command)}")
+
+        file_prefix = "train"
+        if is_shuffled:
+            file_prefix = "shuffle_train"
+            command += ["--is-shuffled", "True"]
+        subprocess.run(command, check=True)
+        print("Check if dataset was created successfully.")
+
+        created_files_count = sum([1 for _, _, files in os.walk(output_dir) for f in files if f.startswith(file_prefix)])
+        print(f"Number of files in the output dataset directory: {created_files_count}")
+
+        if created_files_count != file_count:
+            raise Exception("Incorrect number of files.")
+        print("Dataset creation successful.")
+    except (subprocess.CalledProcessError, Exception) as e:
+        print(f"Dataset creation failed: {e}")
+        return False
+    
+    return True
+
+def delete_dataset(dataset_dir: str):
+    try:
+        if os.path.exists(dataset_dir):
+            shutil.rmtree(dataset_dir)
+            print(f"Deleted directory: {dataset_dir}")
+        else:
+            print(f"Directory does not exist: {dataset_dir}")
+    except Exception as e:
+        print(f"Failed to delete directory: {e}")
+
 def teardown_database(config):
     # Optionally drop the database after the test
     pass
@@ -132,6 +185,24 @@ def query_configurations(config):
         print(f"Failed to query configurations: {e}")
         return {}
 
+def pre_warm(config):
+    print(f"Running pre warm for database:{config['db_name']}")
+    try:
+        conn = psycopg.connect(
+                dbname=config['db_name'],
+                user=config['username'],
+                password=config['password'],
+                host=config['host'],
+        )
+        cursor = conn.cursor()
+        cursor.execute("SELECT pg_prewarm('public.pgvector_index') as block_loaded")
+        conn.commit()
+
+        result = cursor.fetchone()
+        print(f"Pre-warm blocks loaded: {result[0]}")
+        conn.close()
+    except Exception as e:
+        print(f"Failed to pre-warm the database: {e}")
 
 def run_benchmark(case, db_config):
     base_command = [
@@ -225,6 +296,7 @@ def run_benchmark(case, db_config):
                             print(f"{key}: {value}")
                         get_stats(db_config)
                         f.flush()
+                        pre_warm(db_config)
                         print(f"Running command: {' '.join(command)}")
                         f.flush()
 
@@ -246,16 +318,31 @@ def run_benchmark(case, db_config):
             time.sleep(60)
 
 def main():
-    config = load_config("config.json")
-    start_time = time.time()
-    for case in config['cases']:
-        print(f"Running case: {case['db-label']}")
-        setup_database(config)
-
-        run_benchmark(case, config['database'])
-    end_time = time.time()
-    execution_time = end_time - start_time
-    print(f"COMPLETED ALL EXECUTIONS. total_duration={execution_time}")
+    parser = argparse.ArgumentParser(description="Run benchmarks on a custom dataset.")
+    parser.add_argument("--config-dir-path", type=str, help="Path to the config files directory.")
+    args = parser.parse_args()
+
+    for dir_path, _, file_names in os.walk(args.config_dir_path):
+        for file_name in file_names:
+            config = load_config(os.path.join(dir_path, file_name))
+            start_time = time.time()
+            for case in config['cases']:
+                print(f"Running case: {case['db-label']}")
+                setup_database(config)
+                
+                create_dataset_args = case['create-dataset-args']
+                create_dataset_args["file-count"] = case["custom-dataset-file-count"]
+                dataset_created = create_dataset(create_dataset_args)
+                if not dataset_created:
+                    print(f"Failed to create dataset for case: {case['custom-case-name']} -- Skipping execution.")
+                    continue
+
+                run_benchmark(case, config['database'])
+                teardown_database(config)
+                delete_dataset(create_dataset_args["save-dir-path"])
+            end_time = time.time()
+            execution_time = end_time - start_time
+            print(f"COMPLETED ALL EXECUTIONS of config {file_name}. total_duration={execution_time}")
 
 if __name__ == "__main__":
     main()
diff --git a/sample-configs/config-custom-dataset-small-hnsw.json b/sample-configs/config-custom-dataset-small-hnsw.json
index 8eb2b865b..707ec41fa 100644
--- a/sample-configs/config-custom-dataset-small-hnsw.json
+++ b/sample-configs/config-custom-dataset-small-hnsw.json
@@ -4,7 +4,7 @@
       "username": "postgres",
       "password": "postgres",
       "db_name": "ann",
-      "instance_type": "db.m6i.large",
+      "instance_type": "db.m6i.xlarge",
       "provider": "aws",
       "enable_seqscan": "on"
     },
diff --git a/vectordb_bench/__init__.py b/vectordb_bench/__init__.py
index 3d8419a4f..3795535ed 100644
--- a/vectordb_bench/__init__.py
+++ b/vectordb_bench/__init__.py
@@ -21,7 +21,7 @@ class config:
     NUM_PER_BATCH = env.int("NUM_PER_BATCH", 5000)
 
     DROP_OLD = env.bool("DROP_OLD", True)
-    USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
+    USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", False)
 
     NUM_CONCURRENCY = env.list("NUM_CONCURRENCY",  [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100], subcast=int )