Update llamaindex property graph integration docs (#87)

neo4j-documentation · Jun 7, 2024 · 0cf123c · 0cf123c
1 parent 1f6739c
commit 0cf123c
Showing 1 changed file with 184 additions and 65 deletions.
diff --git a/modules/genai-ecosystem/pages/llamaindex.adoc b/modules/genai-ecosystem/pages/llamaindex.adoc
@@ -107,105 +107,224 @@ retriever = index.as_retriever(filters=filters)
 retriever.retrieve("What is inception about?")
 ----
 
-=== Neo4jGraphStore
+=== Neo4jPropertyGraphStore
 
-The Neo4j Graph Store integration is a wrapper for the Neo4j Python driver. 
+The Neo4j Property Graph Store integration is a wrapper for the Neo4j Python driver. 
 It allows querying and updating the Neo4j database in a simplified manner from LlamaIndex.
-Many integrations allow you to use the Neo4j Graph Store as a source of data for LlamaIndex.
+Many integrations allow you to use the Neo4j Property Graph Store as a source of data for LlamaIndex.
 
-==== Knowledge graph index
+==== Property graph index
 
 Knowledge graph index can be used to extract graph representation of information from text and use it to construct a knowledge graph.
 The graph information can then be retrieved in a RAG application for more accurate responses.
 
 [source,python]
 ----
-%pip install llama-index-llms-openai
-%pip install llama-index-graph-stores-neo4j
-%pip install llama-index-embeddings-openai
-%pip install neo4j
+%pip install llama-index llama-index-graph-stores-neo4j
 
-from llama_index.llms.openai import OpenAI
+from llama_index.core import SimpleDirectoryReader
+from llama_index.core import PropertyGraphIndex
 from llama_index.embeddings.openai import OpenAIEmbedding
-from llama_index.core import (
-    VectorStoreIndex,
-    SimpleDirectoryReader,
-    KnowledgeGraphIndex,
-    Settings
+from llama_index.llms.openai import OpenAI
+from llama_index.core.indices.property_graph import SchemaLLMPathExtractor
+
+from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
+
+documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
+graph_store = Neo4jPropertyGraphStore(
+    username="neo4j",
+    password="password",
+    url="bolt://localhost:7687",
+)
+# Extract graph from documents
+index = PropertyGraphIndex.from_documents(
+    documents,
+    embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
+    kg_extractors=[
+        SchemaLLMPathExtractor(
+            llm=OpenAI(model="gpt-3.5-turbo", temperature=0.0)
+        )
+    ],
+    property_graph_store=graph_store,
+    show_progress=True,
 )
-from llama_index.graph_stores.neo4j import Neo4jGraphStore
 
-llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
-embedding_llm = OpenAIEmbedding(model="text-embedding-ada-002")
+# Define retriever
+retriever = index.as_retriever(
+    include_text=False,  # include source text in returned nodes, default True
+)
+results = retriever.retrieve("What happened at Interleaf and Viaweb?")
+for record in results:
+    print(record.text)
+
+# Question answering
+query_engine = index.as_query_engine(include_text=True)
+response = query_engine.query("What happened at Interleaf and Viaweb?")
+print(str(response))
+----
 
-Settings.llm = llm
-Settings.embed_model = embedding_llm
-Settings.chunk_size = 512
+==== Property Graph constructing modules
 
-documents = SimpleDirectoryReader(
-    "../../../../examples/paul_graham_essay/data"
-).load_data()
+LlamaIndex features multiple graph construction modules.
+Property graph construction in LlamaIndex works by performing a series of `kg_extractors` on each text chunk, and attaching entities and relations as metadata to each llama-index node.
+You can use as many as you like here, and they will all get applied.
+Learn more about them in the https://docs.llamaindex.ai/en/latest/module_guides/indexing/lpg_index_guide/#construction[documentation^].
 
-graph_store = Neo4jGraphStore(username=username,password=password,
-    url=url,database=database)
+Here is an example of graph construction using a predefined schema.
 
-storage_context = StorageContext.from_defaults(graph_store=graph_store)
+[source,python]
+----
+%pip install llama-index llama-index-graph-stores-neo4j
 
-index = KnowledgeGraphIndex.from_documents(documents,
-    storage_context=storage_context, max_triplets_per_chunk=2,
-    include_embeddings=True
+from typing import Literal
+from llama_index.core import SimpleDirectoryReader
+from llama_index.core import PropertyGraphIndex
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.llms.openai import OpenAI
+from llama_index.core.indices.property_graph import SchemaLLMPathExtractor
+
+from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
+
+# best practice to use upper-case
+entities = Literal["PERSON", "PLACE", "ORGANIZATION"]
+relations = Literal["HAS", "PART_OF", "WORKED_ON", "WORKED_WITH", "WORKED_AT"]
+
+# define which entities can have which relations
+validation_schema = {
+    "PERSON": ["HAS", "PART_OF", "WORKED_ON", "WORKED_WITH", "WORKED_AT"],
+    "PLACE": ["HAS", "PART_OF", "WORKED_AT"],
+    "ORGANIZATION": ["HAS", "PART_OF", "WORKED_WITH"],
+}
+
+kg_extractor = SchemaLLMPathExtractor(
+    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.0),
+    possible_entities=entities,
+    possible_relations=relations,
+    kg_validation_schema=validation_schema,
+    # if false, allows for values outside of the schema
+    # useful for using the schema as a suggestion
+    strict=True,
 )
-
-query_engine = index.as_query_engine(
-    include_text=True,
-    response_mode="tree_summarize",
-    embedding_mode="hybrid",
-    similarity_top_k=5,
+graph_store = Neo4jPropertyGraphStore(
+    username="neo4j",
+    password="password",
+    url="bolt://localhost:7687",
 )
-
-response = query_engine.query(
-    "Tell me more about what the author worked on at Interleaf"
+documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
+index = PropertyGraphIndex.from_documents(
+    documents,
+    kg_extractors=[kg_extractor],
+    embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
+    property_graph_store=graph_store,
+    show_progress=True,
 )
 ----
 
-==== Knowledge graph query engine
+==== Property graph querying modules
+
+Labeled property graphs can be queried in several ways to retrieve nodes and paths.
+And in LlamaIndex, you can combine several node retrieval methods at once!
+Learn more about which ones are available in the https://docs.llamaindex.ai/en/latest/module_guides/indexing/lpg_index_guide/#retrieval-and-querying[documentation^].
 
-The Knowledge Graph Query Engine generated Cypher statements based on natural language input to retrieve information from the knowledge graph.
+You can also define a custom graph retriever as shown below.
 
 [source,python]
 ----
-%pip install llama-index-llms-openai
-%pip install llama-index-graph-stores-neo4j
-%pip install llama-index-embeddings-openai
-%pip install neo4j
-
-from llama_index.llms.openai import OpenAI
-from llama_index.embeddings.openai import OpenAIEmbedding
-from llama_index.query_engine import KnowledgeGraphQueryEngine
-from llama_index.graph_stores.neo4j import Neo4jGraphStore
-
-llm=OpenAI(model_name="gpt-3.5-turbo")
-service_context = ServiceContext.from_defaults(llm=llm, chunk_size=256)
-graph_store = Neo4jGraphStore(username=username,password=password,
-    url=url,database=database)
-
-storage_context = StorageContext.from_defaults(graph_store=graph_store)
-query_engine = KnowledgeGraphQueryEngine(
-    storage_context=storage_context,
-    service_context=service_context,
-    llm=llm,
-    verbose=True,
-    refresh_schema=True
+from llama_index.core.retrievers import (
+    CustomPGRetriever,
+    VectorContextRetriever,
+    TextToCypherRetriever,
+)
+from llama_index.core.query_engine import RetrieverQueryEngine
+from llama_index.core.graph_stores import PropertyGraphStore
+from llama_index.core.vector_stores.types import VectorStore
+from llama_index.core.embeddings import BaseEmbedding
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.llms import LLM
+from llama_index.postprocessor.cohere_rerank import CohereRerank
+
+
+from typing import Optional, Any, Union
+
+
+class MyCustomRetriever(CustomPGRetriever):
+    """Custom retriever with cohere reranking."""
+
+    def init(
+        self,
+        ## vector context retriever params
+        embed_model: Optional[BaseEmbedding] = None,
+        vector_store: Optional[VectorStore] = None,
+        similarity_top_k: int = 4,
+        path_depth: int = 1,
+        ## text-to-cypher params
+        llm: Optional[LLM] = None,
+        text_to_cypher_template: Optional[Union[PromptTemplate, str]] = None,
+        ## cohere reranker params
+        cohere_api_key: Optional[str] = None,
+        cohere_top_n: int = 2,
+        **kwargs: Any,
+    ) -> None:
+        """Uses any kwargs passed in from class constructor."""
+
+        self.vector_retriever = VectorContextRetriever(
+            self.graph_store,
+            include_text=self.include_text,
+            embed_model=embed_model,
+            vector_store=vector_store,
+            similarity_top_k=similarity_top_k,
+            path_depth=path_depth,
+        )
+
+        self.cypher_retriever = TextToCypherRetriever(
+            self.graph_store,
+            llm=llm,
+            text_to_cypher_template=text_to_cypher_template
+            ## NOTE: you can attach other parameters here if you'd like
+        )
+
+        self.reranker = CohereRerank(
+            api_key=cohere_api_key, top_n=cohere_top_n
+        )
+
+    def custom_retrieve(self, query_str: str) -> str:
+        """Define custom retriever with reranking.
+
+        Could return `str`, `TextNode`, `NodeWithScore`, or a list of those.
+        """
+        nodes_1 = self.vector_retriever.retrieve(query_str)
+        nodes_2 = self.cypher_retriever.retrieve(query_str)
+        reranked_nodes = self.reranker.postprocess_nodes(
+            nodes_1 + nodes_2, query_str=query_str
+        )
+
+        ## TMP: please change
+        final_text = "\n\n".join(
+            [n.get_content(metadata_mode="llm") for n in reranked_nodes]
+        )
+
+        return final_text
+
+custom_sub_retriever = MyCustomRetriever(
+    index.property_graph_store,
+    include_text=True,
+    vector_store=index.vector_store,
+    cohere_api_key="...",
 )
 
-response = query_engine.query(
-    "Tell me more about what the author worked on at Interleaf",
+query_engine = RetrieverQueryEngine.from_args(
+    index.as_retriever(sub_retrievers=[custom_sub_retriever]), llm=llm
 )
+
+response = query_engine.query("Did the author like programming?")
+print(str(response))
 ----
 
 == Documentation
 
-* https://docs.llamaindex.ai/en/latest/examples/index_structs/knowledge_graph/Neo4jKGIndexDemo.html[Neo4jKGIndexDemo^]
+* https://docs.llamaindex.ai/en/latest/module_guides/indexing/lpg_index_guide/[Property Graph Index^]
+* https://docs.llamaindex.ai/en/stable/examples/property_graph/property_graph_custom_retriever/[Custom retriever example^]
+* https://github.com/run-llama/llama_parse/blob/main/examples/knowledge_graphs/kg_agent.ipynb[Knowledge graph agent with LlamaParse^]
 * https://docs.llamaindex.ai/en/stable/examples/vector_stores/Neo4jVectorDemo.html[Neo4jVectorDemo^]
 
 * https://llamahub.ai/l/readers/llama-index-readers-graphdb-cypher[Cypher Loader^]