WIP: e2e tests

neo4j · Dec 19, 2024 · a68712a · a68712a
1 parent 1b724c5
commit a68712a
Show file tree

Hide file tree

Showing 5 changed files with 164 additions and 9 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,6 +52,7 @@ json-repair = "^0.30.2"
 types-pyyaml = "^6.0.12.20240917"
 ollama = {version = "^0.4.4", optional = true}
 uuid = "^1.30"
+weaviate = "^0.1.2"
 
 [tool.poetry.group.dev.dependencies]
 urllib3 = "<2"

diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
@@ -46,6 +46,12 @@ def driver() -> Generator[Any, Any, Any]:
     driver.close()
 
 
+@pytest.fixture(scope="function", autouse=True)
+def clear_db(driver: Driver) -> Any:
+    driver.execute_query("MATCH (n) DETACH DELETE n")
+    yield
+
+
 @pytest.fixture(scope="function")
 def llm() -> MagicMock:
     return MagicMock(spec=LLMInterface)

diff --git a/tests/e2e/experimental/pipeline/config/test_pipeline_runner_e2e.py b/tests/e2e/experimental/pipeline/config/test_pipeline_runner_e2e.py
@@ -9,12 +9,6 @@
 from neo4j_graphrag.llm import LLMResponse
 
 
-@pytest.fixture(scope="function", autouse=True)
-def clear_db(driver: neo4j.Driver) -> Any:
-    driver.execute_query("MATCH (n) DETACH DELETE n")
-    yield
-
-
 @pytest.mark.asyncio
 async def test_pipeline_from_json_config(harry_potter_text: str, driver: Mock) -> None:
     os.environ["NEO4J_URI"] = "neo4j://localhost:7687"

diff --git a/tests/e2e/test_simplekgpipeline_e2e.py b/tests/e2e/test_simplekgpipeline_e2e.py
@@ -108,5 +108,148 @@ async def test_pipeline_builder_happy_path(
     )
 
     # Run the knowledge graph building process with text input
-    text_input = "John Doe lives in New York City."
-    await kg_builder_text.run_async(text=text_input)
+    await kg_builder_text.run_async(text=harry_potter_text)
+
+
+
+@pytest.mark.asyncio
+@pytest.mark.usefixtures("setup_neo4j_for_kg_construction")
+async def test_pipeline_builder_two_documents(
+    harry_potter_text_part1: str,
+    harry_potter_text_part2: str,
+    llm: MagicMock,
+    embedder: MagicMock,
+    driver: neo4j.Driver,
+) -> None:
+    """When everything works as expected, extracted entities, relations and text
+    chunks must be in the DB
+    """
+    driver.execute_query("MATCH (n) DETACH DELETE n")
+    embedder.embed_query.return_value = [1, 2, 3]
+    llm.ainvoke.side_effect = [
+        # first document
+        # first chunk
+        LLMResponse(
+            content="""{
+                        "nodes": [
+                            {
+                                "id": "0",
+                                "label": "Person",
+                                "properties": {
+                                    "name": "Harry Potter"
+                                }
+                            },
+                        ],
+                        "relationships": []
+                    }"""
+        ),
+        # second chunk
+        LLMResponse(content='{"nodes": [], "relationships": []}'),
+        # second document
+        # first chunk
+        LLMResponse(
+            content="""{
+                        "nodes": [
+                            {
+                                "id": "0",
+                                "label": "Person",
+                                "properties": {
+                                    "name": "Hermione Granger"
+                                }
+                            },
+                        ],
+                        "relationships": []
+                    }"""
+        ),
+        # second chunk
+        LLMResponse(content='{"nodes": [], "relationships": []}'),
+    ]
+
+    # Create an instance of the SimpleKGPipeline
+    kg_builder_text = SimpleKGPipeline(
+        llm=llm,
+        driver=driver,
+        embedder=embedder,
+        from_pdf=False,
+    )
+
+    # Run the knowledge graph building process with text input
+    await kg_builder_text.run_async(text=harry_potter_text_part1)
+    await kg_builder_text.run_async(text=harry_potter_text_part2)
+
+    # check graph content
+    records, _, _ = driver.execute_query("MATCH (n) RETURN n")
+    print(records)
+
+    assert False
+
+
+@pytest.mark.asyncio
+@pytest.mark.usefixtures("setup_neo4j_for_kg_construction")
+async def test_pipeline_builder_same_document_two_runs(
+    harry_potter_text_part1: str,
+    llm: MagicMock,
+    embedder: MagicMock,
+    driver: neo4j.Driver,
+) -> None:
+    """When everything works as expected, extracted entities, relations and text
+    chunks must be in the DB
+    """
+    driver.execute_query("MATCH (n) DETACH DELETE n")
+    embedder.embed_query.return_value = [1, 2, 3]
+    llm.ainvoke.side_effect = [
+        # first run
+        # first chunk
+        LLMResponse(
+            content="""{
+                        "nodes": [
+                            {
+                                "id": "0",
+                                "label": "Person",
+                                "properties": {
+                                    "name": "Harry Potter"
+                                }
+                            },
+                        ],
+                        "relationships": []
+                    }"""
+        ),
+        # second chunk
+        LLMResponse(content='{"nodes": [], "relationships": []}'),
+        # second run
+        # first chunk
+        LLMResponse(
+            content="""{
+                        "nodes": [
+                            {
+                                "id": "0",
+                                "label": "Person",
+                                "properties": {
+                                    "name": "Harry Potter"
+                                }
+                            },
+                        ],
+                        "relationships": []
+                    }"""
+        ),
+        # second chunk
+        LLMResponse(content='{"nodes": [], "relationships": []}'),
+    ]
+
+    # Create an instance of the SimpleKGPipeline
+    kg_builder_text = SimpleKGPipeline(
+        llm=llm,
+        driver=driver,
+        embedder=embedder,
+        from_pdf=False,
+    )
+
+    # Run the knowledge graph building process with text input
+    await kg_builder_text.run_async(text=harry_potter_text_part1)
+    await kg_builder_text.run_async(text=harry_potter_text_part1)
+
+    # check graph content
+    records, _, _ = driver.execute_query("MATCH (n) RETURN n")
+    print(records)
+
+    assert False