diff --git a/CHANGELOG.md b/CHANGELOG.md index 8eb1b98b..1c0227b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ ## Next +### Added +- Introduced a fail_if_exist option to index creation functions to control behavior when an index already exists. + +### Changed +- Comprehensive rewrite of the README to improve clarity and provide detailed usage examples. + ## 1.0.0 ### Fixed diff --git a/src/neo4j_graphrag/indexes.py b/src/neo4j_graphrag/indexes.py index cdbc38d7..cf8cdadc 100644 --- a/src/neo4j_graphrag/indexes.py +++ b/src/neo4j_graphrag/indexes.py @@ -38,6 +38,7 @@ def create_vector_index( embedding_property: str, dimensions: int, similarity_fn: Literal["euclidean", "cosine"], + fail_if_exists: bool = False, neo4j_database: Optional[str] = None, ) -> None: """ @@ -46,7 +47,6 @@ def create_vector_index( See Cypher manual on `creating vector indexes `_. - Important: This operation will fail if an index with the same name already exists. Ensure that the index name provided is unique within the database context. Example: @@ -72,6 +72,7 @@ def create_vector_index( embedding_property="vectorProperty", dimensions=1536, similarity_fn="euclidean", + fail_if_exists=False, ) @@ -83,6 +84,7 @@ def create_vector_index( dimensions (int): Vector embedding dimension similarity_fn (str): case-insensitive values for the vector similarity function: ``euclidean`` or ``cosine``. + fail_if_exists (bool): If True raise an error if the index already exists. Defaults to False. neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to "neo4j" in the database (`see reference to documentation `_). Raises: @@ -105,7 +107,7 @@ def create_vector_index( try: query = ( - f"CREATE VECTOR INDEX $name FOR (n:{label}) ON n.{embedding_property} OPTIONS " + f"CREATE VECTOR INDEX $name {'' if fail_if_exists else 'IF NOT EXISTS'} FOR (n:{label}) ON n.{embedding_property} OPTIONS " "{ indexConfig: { `vector.dimensions`: toInteger($dimensions), `vector.similarity_function`: $similarity_fn } }" ) logger.info(f"Creating vector index named '{name}'") @@ -123,6 +125,7 @@ def create_fulltext_index( name: str, label: str, node_properties: list[str], + fail_if_exists: bool = False, neo4j_database: Optional[str] = None, ) -> None: """ @@ -131,7 +134,6 @@ def create_fulltext_index( See Cypher manual on `creating fulltext indexes `_. - Important: This operation will fail if an index with the same name already exists. Ensure that the index name provided is unique within the database context. Example: @@ -155,6 +157,7 @@ def create_fulltext_index( INDEX_NAME, label="Document", node_properties=["vectorProperty"], + fail_if_exists=False, ) @@ -163,6 +166,7 @@ def create_fulltext_index( name (str): The unique name of the index. label (str): The node label to be indexed. node_properties (list[str]): The node properties to create the fulltext index on. + fail_if_exists (bool): If True raise an error if the index already exists. Defaults to False. neo4j_database (Optional[str]): The name of the Neo4j database. If not provided, this defaults to "neo4j" in the database (`see reference to documentation `_). Raises: @@ -180,7 +184,7 @@ def create_fulltext_index( try: query = ( - "CREATE FULLTEXT INDEX $name " + f"CREATE FULLTEXT INDEX $name {'' if fail_if_exists else 'IF NOT EXISTS'} " f"FOR (n:`{label}`) ON EACH " f"[{', '.join(['n.`' + prop + '`' for prop in node_properties])}]" ) diff --git a/tests/unit/test_indexes.py b/tests/unit/test_indexes.py index a9f4cd1c..aa6d5c30 100644 --- a/tests/unit/test_indexes.py +++ b/tests/unit/test_indexes.py @@ -30,7 +30,7 @@ def test_create_vector_index_happy_path(driver: MagicMock) -> None: create_query = ( - "CREATE VECTOR INDEX $name FOR (n:People) ON n.name OPTIONS " + "CREATE VECTOR INDEX $name IF NOT EXISTS FOR (n:People) ON n.name OPTIONS " "{ indexConfig: { `vector.dimensions`: toInteger($dimensions), `vector.similarity_function`: $similarity_fn } }" ) @@ -43,9 +43,26 @@ def test_create_vector_index_happy_path(driver: MagicMock) -> None: ) +def test_create_vector_index_fail_if_exists(driver: MagicMock) -> None: + create_query = ( + "CREATE VECTOR INDEX $name FOR (n:People) ON n.name OPTIONS " + "{ indexConfig: { `vector.dimensions`: toInteger($dimensions), `vector.similarity_function`: $similarity_fn } }" + ) + + create_vector_index( + driver, "my-index", "People", "name", 2048, "cosine", fail_if_exists=True + ) + + driver.execute_query.assert_called_once_with( + create_query, + {"name": "my-index", "dimensions": 2048, "similarity_fn": "cosine"}, + database_=None, + ) + + def test_create_vector_index_ensure_escaping(driver: MagicMock) -> None: create_query = ( - "CREATE VECTOR INDEX $name FOR (n:People) ON n.name OPTIONS " + "CREATE VECTOR INDEX $name IF NOT EXISTS FOR (n:People) ON n.name OPTIONS " "{ indexConfig: { `vector.dimensions`: toInteger($dimensions), `vector.similarity_function`: $similarity_fn } }" ) @@ -120,7 +137,7 @@ def test_create_fulltext_index_happy_path(driver: MagicMock) -> None: label = "node-label" text_node_properties = ["property-1", "property-2"] create_query = ( - "CREATE FULLTEXT INDEX $name " + "CREATE FULLTEXT INDEX $name IF NOT EXISTS " f"FOR (n:`{label}`) ON EACH " f"[{', '.join(['n.`' + property + '`' for property in text_node_properties])}]" ) @@ -134,6 +151,26 @@ def test_create_fulltext_index_happy_path(driver: MagicMock) -> None: ) +def test_create_fulltext_index_fail_if_exists(driver: MagicMock) -> None: + label = "node-label" + text_node_properties = ["property-1", "property-2"] + create_query = ( + "CREATE FULLTEXT INDEX $name " + f"FOR (n:`{label}`) ON EACH " + f"[{', '.join(['n.`' + property + '`' for property in text_node_properties])}]" + ) + + create_fulltext_index( + driver, "my-index", label, text_node_properties, fail_if_exists=True + ) + + driver.execute_query.assert_called_once_with( + create_query, + {"name": "my-index"}, + database_=None, + ) + + def test_create_fulltext_index_raises_error_with_neo4j_client_error( driver: MagicMock, ) -> None: @@ -159,7 +196,7 @@ def test_create_fulltext_index_ensure_escaping(driver: MagicMock) -> None: label = "node-label" text_node_properties = ["property-1", "property-2"] create_query = ( - "CREATE FULLTEXT INDEX $name " + "CREATE FULLTEXT INDEX $name IF NOT EXISTS " f"FOR (n:`{label}`) ON EACH " f"[{', '.join(['n.`' + property + '`' for property in text_node_properties])}]" )