From a68712aeb5256a9935e5bb2156d9a4c3f958407b Mon Sep 17 00:00:00 2001
From: estelle <estelle.scifo@neo4j.com>
Date: Thu, 19 Dec 2024 15:50:03 +0100
Subject: [PATCH] WIP: e2e tests

---
 poetry.lock                                   |  13 +-
 pyproject.toml                                |   1 +
 tests/e2e/conftest.py                         |   6 +
 .../config/test_pipeline_runner_e2e.py        |   6 -
 tests/e2e/test_simplekgpipeline_e2e.py        | 147 +++++++++++++++++-
 5 files changed, 164 insertions(+), 9 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index eae77d89..ddbea133 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -5079,6 +5079,17 @@ platformdirs = ">=3.9.1,<5"
 docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
 test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
 
+[[package]]
+name = "weaviate"
+version = "0.1.2"
+description = "A placeholder package for the Weaviate name"
+optional = false
+python-versions = "*"
+files = [
+    {file = "weaviate-0.1.2-py3-none-any.whl", hash = "sha256:40f1c1cf0b769036315d2b6026c8cd823a3a6e951c90d4e70a001a770ba8a444"},
+    {file = "weaviate-0.1.2.tar.gz", hash = "sha256:a381b8bb0eb236bd10256def8612953ed9024e6738b8a259e7ec11e626ae0665"},
+]
+
 [[package]]
 name = "weaviate-client"
 version = "4.10.2"
@@ -5306,4 +5317,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9.0"
-content-hash = "e7c42a84eefc50ce165d6603f7b90788e33108651ca01351a4ed33cfb29ab20d"
+content-hash = "b05fbd4ce521d080d3dc2b7d2fa6a8e66cdc88b60f635b627aa7e7cd81ea65d4"
diff --git a/pyproject.toml b/pyproject.toml
index e65006bb..9b9c6415 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,6 +52,7 @@ json-repair = "^0.30.2"
 types-pyyaml = "^6.0.12.20240917"
 ollama = {version = "^0.4.4", optional = true}
 uuid = "^1.30"
+weaviate = "^0.1.2"
 
 [tool.poetry.group.dev.dependencies]
 urllib3 = "<2"
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index 42c21cf8..a48a70ea 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -46,6 +46,12 @@ def driver() -> Generator[Any, Any, Any]:
     driver.close()
 
 
+@pytest.fixture(scope="function", autouse=True)
+def clear_db(driver: Driver) -> Any:
+    driver.execute_query("MATCH (n) DETACH DELETE n")
+    yield
+
+
 @pytest.fixture(scope="function")
 def llm() -> MagicMock:
     return MagicMock(spec=LLMInterface)
diff --git a/tests/e2e/experimental/pipeline/config/test_pipeline_runner_e2e.py b/tests/e2e/experimental/pipeline/config/test_pipeline_runner_e2e.py
index 9122a935..a0ba328a 100644
--- a/tests/e2e/experimental/pipeline/config/test_pipeline_runner_e2e.py
+++ b/tests/e2e/experimental/pipeline/config/test_pipeline_runner_e2e.py
@@ -9,12 +9,6 @@
 from neo4j_graphrag.llm import LLMResponse
 
 
-@pytest.fixture(scope="function", autouse=True)
-def clear_db(driver: neo4j.Driver) -> Any:
-    driver.execute_query("MATCH (n) DETACH DELETE n")
-    yield
-
-
 @pytest.mark.asyncio
 async def test_pipeline_from_json_config(harry_potter_text: str, driver: Mock) -> None:
     os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
diff --git a/tests/e2e/test_simplekgpipeline_e2e.py b/tests/e2e/test_simplekgpipeline_e2e.py
index 4d3059c7..6b5ec04f 100644
--- a/tests/e2e/test_simplekgpipeline_e2e.py
+++ b/tests/e2e/test_simplekgpipeline_e2e.py
@@ -108,5 +108,148 @@ async def test_pipeline_builder_happy_path(
     )
 
     # Run the knowledge graph building process with text input
-    text_input = "John Doe lives in New York City."
-    await kg_builder_text.run_async(text=text_input)
+    await kg_builder_text.run_async(text=harry_potter_text)
+
+
+
+@pytest.mark.asyncio
+@pytest.mark.usefixtures("setup_neo4j_for_kg_construction")
+async def test_pipeline_builder_two_documents(
+    harry_potter_text_part1: str,
+    harry_potter_text_part2: str,
+    llm: MagicMock,
+    embedder: MagicMock,
+    driver: neo4j.Driver,
+) -> None:
+    """When everything works as expected, extracted entities, relations and text
+    chunks must be in the DB
+    """
+    driver.execute_query("MATCH (n) DETACH DELETE n")
+    embedder.embed_query.return_value = [1, 2, 3]
+    llm.ainvoke.side_effect = [
+        # first document
+        # first chunk
+        LLMResponse(
+            content="""{
+                        "nodes": [
+                            {
+                                "id": "0",
+                                "label": "Person",
+                                "properties": {
+                                    "name": "Harry Potter"
+                                }
+                            },
+                        ],
+                        "relationships": []
+                    }"""
+        ),
+        # second chunk
+        LLMResponse(content='{"nodes": [], "relationships": []}'),
+        # second document
+        # first chunk
+        LLMResponse(
+            content="""{
+                        "nodes": [
+                            {
+                                "id": "0",
+                                "label": "Person",
+                                "properties": {
+                                    "name": "Hermione Granger"
+                                }
+                            },
+                        ],
+                        "relationships": []
+                    }"""
+        ),
+        # second chunk
+        LLMResponse(content='{"nodes": [], "relationships": []}'),
+    ]
+
+    # Create an instance of the SimpleKGPipeline
+    kg_builder_text = SimpleKGPipeline(
+        llm=llm,
+        driver=driver,
+        embedder=embedder,
+        from_pdf=False,
+    )
+
+    # Run the knowledge graph building process with text input
+    await kg_builder_text.run_async(text=harry_potter_text_part1)
+    await kg_builder_text.run_async(text=harry_potter_text_part2)
+
+    # check graph content
+    records, _, _ = driver.execute_query("MATCH (n) RETURN n")
+    print(records)
+
+    assert False
+
+
+@pytest.mark.asyncio
+@pytest.mark.usefixtures("setup_neo4j_for_kg_construction")
+async def test_pipeline_builder_same_document_two_runs(
+    harry_potter_text_part1: str,
+    llm: MagicMock,
+    embedder: MagicMock,
+    driver: neo4j.Driver,
+) -> None:
+    """When everything works as expected, extracted entities, relations and text
+    chunks must be in the DB
+    """
+    driver.execute_query("MATCH (n) DETACH DELETE n")
+    embedder.embed_query.return_value = [1, 2, 3]
+    llm.ainvoke.side_effect = [
+        # first run
+        # first chunk
+        LLMResponse(
+            content="""{
+                        "nodes": [
+                            {
+                                "id": "0",
+                                "label": "Person",
+                                "properties": {
+                                    "name": "Harry Potter"
+                                }
+                            },
+                        ],
+                        "relationships": []
+                    }"""
+        ),
+        # second chunk
+        LLMResponse(content='{"nodes": [], "relationships": []}'),
+        # second run
+        # first chunk
+        LLMResponse(
+            content="""{
+                        "nodes": [
+                            {
+                                "id": "0",
+                                "label": "Person",
+                                "properties": {
+                                    "name": "Harry Potter"
+                                }
+                            },
+                        ],
+                        "relationships": []
+                    }"""
+        ),
+        # second chunk
+        LLMResponse(content='{"nodes": [], "relationships": []}'),
+    ]
+
+    # Create an instance of the SimpleKGPipeline
+    kg_builder_text = SimpleKGPipeline(
+        llm=llm,
+        driver=driver,
+        embedder=embedder,
+        from_pdf=False,
+    )
+
+    # Run the knowledge graph building process with text input
+    await kg_builder_text.run_async(text=harry_potter_text_part1)
+    await kg_builder_text.run_async(text=harry_potter_text_part1)
+
+    # check graph content
+    records, _, _ = driver.execute_query("MATCH (n) RETURN n")
+    print(records)
+
+    assert False