Skip to content

Commit

Permalink
Document schema functions (#228)
Browse files Browse the repository at this point in the history
* Document schema functions

* Ruff

* Ruff again and fix tests
  • Loading branch information
stellasia authored Dec 9, 2024
1 parent 6ac97b7 commit c166afc
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 5 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## Next

### Fixed
- Added schema functions to the documentation.

## 1.2.1

### Added
Expand Down
12 changes: 12 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,11 @@ Database Interaction

.. autofunction:: neo4j_graphrag.indexes.async_upsert_vector_on_relationship

.. autofunction:: neo4j_graphrag.schema.get_structured_schema

.. autofunction:: neo4j_graphrag.schema.get_schema


******
Errors
******
Expand Down Expand Up @@ -408,6 +413,13 @@ FilterValidationError
:show-inheritance:


EmbeddingsGenerationError
========================

.. autoclass:: neo4j_graphrag.exceptions.EmbeddingsGenerationError
:show-inheritance:


EmbeddingRequiredError
======================

Expand Down
57 changes: 54 additions & 3 deletions src/neo4j_graphrag/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import neo4j
from neo4j.exceptions import ClientError

BASE_KG_BUILDER_LABEL = "__KGBuilder__"
BASE_ENTITY_LABEL = "__Entity__"
EXCLUDED_LABELS = ["_Bloom_Perspective_", "_Bloom_Scene_"]
EXCLUDED_RELS = ["_Bloom_HAS_SCENE_"]
Expand Down Expand Up @@ -82,13 +83,23 @@ def get_schema(
driver: neo4j.Driver,
) -> str:
"""
Returns the schema of the graph.
Returns the schema of the graph as a string with following format:
.. code-block:: text
Node properties:
Person {id: INTEGER, name: STRING}
Relationship properties:
KNOWS {fromDate: DATE}
The relationships:
(:Person)-[:KNOWS]->(:Person)
Args:
driver (neo4j.Driver): Neo4j Python driver instance.
Returns:
str: the graph schema information in a serialized format.
"""
structured_schema = get_structured_schema(driver)

Expand Down Expand Up @@ -129,6 +140,40 @@ def get_structured_schema(driver: neo4j.Driver) -> dict[str, Any]:
"""
Returns the structured schema of the graph.
Returns a dict with following format:
.. code:: python
{
'node_props': {
'Person': [{'property': 'id', 'type': 'INTEGER'}, {'property': 'name', 'type': 'STRING'}]
},
'rel_props': {
'KNOWS': [{'property': 'fromDate', 'type': 'DATE'}]
},
'relationships': [
{'start': 'Person', 'type': 'KNOWS', 'end': 'Person'}
],
'metadata': {
'constraint': [
{'id': 7, 'name': 'person_id', 'type': 'UNIQUENESS', 'entityType': 'NODE', 'labelsOrTypes': ['Persno'], 'properties': ['id'], 'ownedIndex': 'person_id', 'propertyType': None},
],
'index': [
{'label': 'Person', 'properties': ['name'], 'size': 2, 'type': 'RANGE', 'valuesSelectivity': 1.0, 'distinctValues': 2.0},
]
}
}
Note:
The internal structure of the returned dict depends on the apoc.meta.data
and apoc.schema.nodes procedures.
Warning:
Some labels are excluded from the output schema:
- The `__Entity__` and `__KGBuilder__` node labels which are created by the KG Builder pipeline within this package
- Some labels related to Bloom internals.
Args:
driver (neo4j.Driver): Neo4j Python driver instance.
Expand All @@ -140,7 +185,10 @@ def get_structured_schema(driver: neo4j.Driver) -> dict[str, Any]:
for data in query_database(
driver,
NODE_PROPERTIES_QUERY,
params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]},
params={
"EXCLUDED_LABELS": EXCLUDED_LABELS
+ [BASE_ENTITY_LABEL, BASE_KG_BUILDER_LABEL]
},
)
]

Expand All @@ -156,7 +204,10 @@ def get_structured_schema(driver: neo4j.Driver) -> dict[str, Any]:
for data in query_database(
driver,
REL_QUERY,
params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]},
params={
"EXCLUDED_LABELS": EXCLUDED_LABELS
+ [BASE_ENTITY_LABEL, BASE_KG_BUILDER_LABEL]
},
)
]

Expand Down
11 changes: 9 additions & 2 deletions tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from neo4j import Driver
from neo4j_graphrag.schema import (
BASE_ENTITY_LABEL,
BASE_KG_BUILDER_LABEL,
EXCLUDED_LABELS,
EXCLUDED_RELS,
INDEX_QUERY,
Expand Down Expand Up @@ -84,15 +85,21 @@ def test_get_structured_schema_happy_path(driver: MagicMock) -> None:
assert 5 == driver.execute_query.call_count
driver.execute_query.assert_any_call(
NODE_PROPERTIES_QUERY,
{"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]},
{
"EXCLUDED_LABELS": EXCLUDED_LABELS
+ [BASE_ENTITY_LABEL, BASE_KG_BUILDER_LABEL]
},
)
driver.execute_query.assert_any_call(
REL_PROPERTIES_QUERY,
{"EXCLUDED_LABELS": EXCLUDED_RELS},
)
driver.execute_query.assert_any_call(
REL_QUERY,
{"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]},
{
"EXCLUDED_LABELS": EXCLUDED_LABELS
+ [BASE_ENTITY_LABEL, BASE_KG_BUILDER_LABEL]
},
)
driver.execute_query.assert_any_call("SHOW CONSTRAINTS", {})
driver.execute_query.assert_any_call(INDEX_QUERY, {})
Expand Down

0 comments on commit c166afc

Please sign in to comment.