From a68712aeb5256a9935e5bb2156d9a4c3f958407b Mon Sep 17 00:00:00 2001 From: estelle Date: Thu, 19 Dec 2024 15:50:03 +0100 Subject: [PATCH] WIP: e2e tests --- poetry.lock | 13 +- pyproject.toml | 1 + tests/e2e/conftest.py | 6 + .../config/test_pipeline_runner_e2e.py | 6 - tests/e2e/test_simplekgpipeline_e2e.py | 147 +++++++++++++++++- 5 files changed, 164 insertions(+), 9 deletions(-) diff --git a/poetry.lock b/poetry.lock index eae77d89..ddbea133 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5079,6 +5079,17 @@ platformdirs = ">=3.9.1,<5" docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +[[package]] +name = "weaviate" +version = "0.1.2" +description = "A placeholder package for the Weaviate name" +optional = false +python-versions = "*" +files = [ + {file = "weaviate-0.1.2-py3-none-any.whl", hash = "sha256:40f1c1cf0b769036315d2b6026c8cd823a3a6e951c90d4e70a001a770ba8a444"}, + {file = "weaviate-0.1.2.tar.gz", hash = "sha256:a381b8bb0eb236bd10256def8612953ed9024e6738b8a259e7ec11e626ae0665"}, +] + [[package]] name = "weaviate-client" version = "4.10.2" @@ -5306,4 +5317,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = "^3.9.0" -content-hash = "e7c42a84eefc50ce165d6603f7b90788e33108651ca01351a4ed33cfb29ab20d" +content-hash = "b05fbd4ce521d080d3dc2b7d2fa6a8e66cdc88b60f635b627aa7e7cd81ea65d4" diff --git a/pyproject.toml b/pyproject.toml index e65006bb..9b9c6415 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ json-repair = "^0.30.2" types-pyyaml = "^6.0.12.20240917" ollama = {version = "^0.4.4", optional = true} uuid = "^1.30" +weaviate = "^0.1.2" [tool.poetry.group.dev.dependencies] urllib3 = "<2" diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 42c21cf8..a48a70ea 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -46,6 +46,12 @@ def driver() -> Generator[Any, Any, Any]: driver.close() +@pytest.fixture(scope="function", autouse=True) +def clear_db(driver: Driver) -> Any: + driver.execute_query("MATCH (n) DETACH DELETE n") + yield + + @pytest.fixture(scope="function") def llm() -> MagicMock: return MagicMock(spec=LLMInterface) diff --git a/tests/e2e/experimental/pipeline/config/test_pipeline_runner_e2e.py b/tests/e2e/experimental/pipeline/config/test_pipeline_runner_e2e.py index 9122a935..a0ba328a 100644 --- a/tests/e2e/experimental/pipeline/config/test_pipeline_runner_e2e.py +++ b/tests/e2e/experimental/pipeline/config/test_pipeline_runner_e2e.py @@ -9,12 +9,6 @@ from neo4j_graphrag.llm import LLMResponse -@pytest.fixture(scope="function", autouse=True) -def clear_db(driver: neo4j.Driver) -> Any: - driver.execute_query("MATCH (n) DETACH DELETE n") - yield - - @pytest.mark.asyncio async def test_pipeline_from_json_config(harry_potter_text: str, driver: Mock) -> None: os.environ["NEO4J_URI"] = "neo4j://localhost:7687" diff --git a/tests/e2e/test_simplekgpipeline_e2e.py b/tests/e2e/test_simplekgpipeline_e2e.py index 4d3059c7..6b5ec04f 100644 --- a/tests/e2e/test_simplekgpipeline_e2e.py +++ b/tests/e2e/test_simplekgpipeline_e2e.py @@ -108,5 +108,148 @@ async def test_pipeline_builder_happy_path( ) # Run the knowledge graph building process with text input - text_input = "John Doe lives in New York City." - await kg_builder_text.run_async(text=text_input) + await kg_builder_text.run_async(text=harry_potter_text) + + + +@pytest.mark.asyncio +@pytest.mark.usefixtures("setup_neo4j_for_kg_construction") +async def test_pipeline_builder_two_documents( + harry_potter_text_part1: str, + harry_potter_text_part2: str, + llm: MagicMock, + embedder: MagicMock, + driver: neo4j.Driver, +) -> None: + """When everything works as expected, extracted entities, relations and text + chunks must be in the DB + """ + driver.execute_query("MATCH (n) DETACH DELETE n") + embedder.embed_query.return_value = [1, 2, 3] + llm.ainvoke.side_effect = [ + # first document + # first chunk + LLMResponse( + content="""{ + "nodes": [ + { + "id": "0", + "label": "Person", + "properties": { + "name": "Harry Potter" + } + }, + ], + "relationships": [] + }""" + ), + # second chunk + LLMResponse(content='{"nodes": [], "relationships": []}'), + # second document + # first chunk + LLMResponse( + content="""{ + "nodes": [ + { + "id": "0", + "label": "Person", + "properties": { + "name": "Hermione Granger" + } + }, + ], + "relationships": [] + }""" + ), + # second chunk + LLMResponse(content='{"nodes": [], "relationships": []}'), + ] + + # Create an instance of the SimpleKGPipeline + kg_builder_text = SimpleKGPipeline( + llm=llm, + driver=driver, + embedder=embedder, + from_pdf=False, + ) + + # Run the knowledge graph building process with text input + await kg_builder_text.run_async(text=harry_potter_text_part1) + await kg_builder_text.run_async(text=harry_potter_text_part2) + + # check graph content + records, _, _ = driver.execute_query("MATCH (n) RETURN n") + print(records) + + assert False + + +@pytest.mark.asyncio +@pytest.mark.usefixtures("setup_neo4j_for_kg_construction") +async def test_pipeline_builder_same_document_two_runs( + harry_potter_text_part1: str, + llm: MagicMock, + embedder: MagicMock, + driver: neo4j.Driver, +) -> None: + """When everything works as expected, extracted entities, relations and text + chunks must be in the DB + """ + driver.execute_query("MATCH (n) DETACH DELETE n") + embedder.embed_query.return_value = [1, 2, 3] + llm.ainvoke.side_effect = [ + # first run + # first chunk + LLMResponse( + content="""{ + "nodes": [ + { + "id": "0", + "label": "Person", + "properties": { + "name": "Harry Potter" + } + }, + ], + "relationships": [] + }""" + ), + # second chunk + LLMResponse(content='{"nodes": [], "relationships": []}'), + # second run + # first chunk + LLMResponse( + content="""{ + "nodes": [ + { + "id": "0", + "label": "Person", + "properties": { + "name": "Harry Potter" + } + }, + ], + "relationships": [] + }""" + ), + # second chunk + LLMResponse(content='{"nodes": [], "relationships": []}'), + ] + + # Create an instance of the SimpleKGPipeline + kg_builder_text = SimpleKGPipeline( + llm=llm, + driver=driver, + embedder=embedder, + from_pdf=False, + ) + + # Run the knowledge graph building process with text input + await kg_builder_text.run_async(text=harry_potter_text_part1) + await kg_builder_text.run_async(text=harry_potter_text_part1) + + # check graph content + records, _, _ = driver.execute_query("MATCH (n) RETURN n") + print(records) + + assert False