diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
new file mode 100644
index 000000000..7c133cd5f
--- /dev/null
+++ b/.github/workflows/pull_request.yml
@@ -0,0 +1,36 @@
+name: Test on pull request
+
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build:
+    name: Run Python Tests
+    strategy:
+      matrix:
+        python-version: [3.11, 3.12]
+        os: [ubuntu-latest, windows-latest]
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Fetch tags
+        run: |
+          git fetch --prune --unshallow --tags
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[test]"
+
+      - name: Test with pytest
+        run: |
+          make unittest
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..562615f6d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,2 @@
+unittest:
+	PYTHONPATH=`pwd` python3 -m pytest tests/test_dataset.py::TestDataSet::test_download_small -svv
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 60a219e25..c7678c206 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -36,7 +36,7 @@ def test_iter_cohere(self):
         dur_iter = time.time() - before
         log.warning(f"iter through cohere_10m cost={dur_iter/60}min")
 
-    # pytest -sv tests/test_dataset.py::TestDataSet::test_iter_laion 
+    # pytest -sv tests/test_dataset.py::TestDataSet::test_iter_laion
     def test_iter_laion(self):
         laion_100m = Dataset.LAION.manager(100_000_000)
         from vectordb_bench.backend.data_source import DatasetSource
@@ -50,17 +50,30 @@ def test_iter_laion(self):
         dur_iter = time.time() - before
         log.warning(f"iter through laion_100m cost={dur_iter/60}min")
 
-    # https://github.com/zilliztech/VectorDBBench/issues/285
-    # TODO: ok
-    def test_iter_openai(self):
-        
-        openai_500k = Dataset.OPENAI.manager(500_000)
-        openai_500k.prepare(source=DatasetSource.AliyunOSS, check=False)
+    def test_download_small(self):
+        openai_50k = Dataset.OPENAI.manager(50_000)
+        files = [
+            "test.parquet",
+            "neighbors.parquet",
+            "neighbors_head_1p.parquet",
+            "neighbors_tail_1p.parquet",
+        ]
 
-        import time
-        before = time.time()
-        for i in openai_500k:
-            log.debug(i.head(1))
+        file_path = openai_50k.data_dir.joinpath("test.parquet")
+        import os
+
+        DatasetSource.S3.reader().read(
+            openai_50k.data.dir_name.lower(),
+            files=files,
+            local_ds_root=openai_50k.data_dir,
+            check_etag=False,
+        )
+
+        os.remove(file_path)
+        DatasetSource.AliyunOSS.reader().read(
+            openai_50k.data.dir_name.lower(),
+            files=files,
+            local_ds_root=openai_50k.data_dir,
+            check_etag=False,
+        )
 
-        dur_iter = time.time() - before
-        log.warning(f"iter through openai 500K cost={dur_iter/60}min, source=AliyunOSS")
diff --git a/vectordb_bench/backend/data_source.py b/vectordb_bench/backend/data_source.py
index 65926ff6b..28e3c3636 100644
--- a/vectordb_bench/backend/data_source.py
+++ b/vectordb_bench/backend/data_source.py
@@ -76,11 +76,11 @@ def read(self, dataset: str, files: list[str], local_ds_root: pathlib.Path, chec
         if not local_ds_root.exists():
             log.info(f"local dataset root path not exist, creating it: {local_ds_root}")
             local_ds_root.mkdir(parents=True)
-            downloads = [(pathlib.Path("benchmark", dataset, f), local_ds_root.joinpath(f)) for f in files]
+            downloads = [(pathlib.PurePosixPath("benchmark", dataset, f), local_ds_root.joinpath(f)) for f in files]
 
         else:
             for file in files:
-                remote_file = pathlib.Path("benchmark", dataset, file)
+                remote_file = pathlib.PurePosixPath("benchmark", dataset, file)
                 local_file = local_ds_root.joinpath(file)
 
                 # Don't check etags for Dataset from Aliyun OSS
@@ -93,8 +93,8 @@ def read(self, dataset: str, files: list[str], local_ds_root: pathlib.Path, chec
 
         log.info(f"Start to downloading files, total count: {len(downloads)}")
         for remote_file, local_file in tqdm(downloads):
-            log.debug(f"downloading file {remote_file} to {local_ds_root}")
-            self.bucket.get_object_to_file(remote_file.as_posix(), local_file.as_posix())
+            log.debug(f"downloading file {remote_file} to {local_file}")
+            self.bucket.get_object_to_file(remote_file.as_posix(), local_file.absolute())
 
         log.info(f"Succeed to download all files, downloaded file count = {len(downloads)}")
 
@@ -125,11 +125,11 @@ def read(self, dataset: str, files: list[str], local_ds_root: pathlib.Path, chec
         if not local_ds_root.exists():
             log.info(f"local dataset root path not exist, creating it: {local_ds_root}")
             local_ds_root.mkdir(parents=True)
-            downloads = [pathlib.Path(self.remote_root, dataset, f) for f in files]
+            downloads = [pathlib.PurePosixPath(self.remote_root, dataset, f) for f in files]
 
         else:
             for file in files:
-                remote_file = pathlib.Path(self.remote_root, dataset, file)
+                remote_file = pathlib.PurePosixPath(self.remote_root, dataset, file)
                 local_file = local_ds_root.joinpath(file)
 
                 if (not local_file.exists()) or (not self.validate_file(remote_file, local_file, check_etag)):