From d5e090a1165923da5dc22f0a04f6f760a68c5cd5 Mon Sep 17 00:00:00 2001 From: Michael Hunger Date: Thu, 28 Nov 2024 17:00:51 +0100 Subject: [PATCH] Added graphrag-python page --- modules/genai-ecosystem/nav.adoc | 1 + .../pages/_developer_survey.adoc | 7 + .../genai-ecosystem/pages/genai-stack.adoc | 8 +- .../pages/graphrag-python.adoc | 239 ++++++++++++++++++ modules/genai-ecosystem/pages/index.adoc | 9 +- modules/genai-ecosystem/pages/langchain.adoc | 8 +- modules/genai-ecosystem/pages/llamaindex.adoc | 9 +- .../pages/llm-graph-builder-deployment.adoc | 2 +- .../pages/llm-graph-builder.adoc | 8 +- modules/genai-ecosystem/pages/rag-demo.adoc | 9 +- .../genai-ecosystem/pages/vector-search.adoc | 8 +- modules/neodash/pages/index.adoc | 2 +- 12 files changed, 258 insertions(+), 52 deletions(-) create mode 100644 modules/genai-ecosystem/pages/_developer_survey.adoc create mode 100644 modules/genai-ecosystem/pages/graphrag-python.adoc diff --git a/modules/genai-ecosystem/nav.adoc b/modules/genai-ecosystem/nav.adoc index cf61ed1..356f614 100644 --- a/modules/genai-ecosystem/nav.adoc +++ b/modules/genai-ecosystem/nav.adoc @@ -7,6 +7,7 @@ **** xref:neoconverse.adoc[NeoConverse] **** xref:genai-stack.adoc[GenAI Stack] *** Neo4j GenAI Features +**** xref:graphrag-python.adoc[GraphRAG Python Package] **** xref:vector-search.adoc[Vector Index and Search] **** xref:apoc-genai.adoc[APOC GenAI] *** Cloud Examples diff --git a/modules/genai-ecosystem/pages/_developer_survey.adoc b/modules/genai-ecosystem/pages/_developer_survey.adoc new file mode 100644 index 0000000..0cc31dd --- /dev/null +++ b/modules/genai-ecosystem/pages/_developer_survey.adoc @@ -0,0 +1,7 @@ +// Developer survey +:page-ad-icon: ~ +:page-ad-title: Neo4j Developer Survey +:page-ad-description: Your input matters! Share your Feedback +:page-ad-underline-role: button +:page-ad-underline: Start Here +:page-ad-link: https://neo4j.typeform.com/to/E6yOZ2Py?utm_source=GA&utm_medium=blurb&utm_campaign=survey diff --git a/modules/genai-ecosystem/pages/genai-stack.adoc b/modules/genai-ecosystem/pages/genai-stack.adoc index e925d2f..992b6a6 100644 --- a/modules/genai-ecosystem/pages/genai-stack.adoc +++ b/modules/genai-ecosystem/pages/genai-stack.adoc @@ -7,13 +7,7 @@ include::_graphacademy_llm.adoc[] :neo4j-versions: 5.x :page-pagination: :page-product: genai-stack -// Developer survey -:page-ad-icon: ~ -:page-ad-title: Neo4j Developer Survey -:page-ad-description: Your input matters! Share your Feedback -:page-ad-underline-role: button -:page-ad-underline: Start Here -:page-ad-link: https://neo4j.typeform.com/to/E6yOZ2Py?utm_source=GA&utm_medium=blurb&utm_campaign=survey +include::_developer_survey.adoc[] image::https://dist.neo4j.com/wp-content/uploads/20231005063102/import-embed-data-stack-overflow.png[width=800] diff --git a/modules/genai-ecosystem/pages/graphrag-python.adoc b/modules/genai-ecosystem/pages/graphrag-python.adoc new file mode 100644 index 0000000..5dbd922 --- /dev/null +++ b/modules/genai-ecosystem/pages/graphrag-python.adoc @@ -0,0 +1,239 @@ += Neo4j GraphRAG Python Package +include::_graphacademy_llm.adoc[] +:slug: graphrag-python +:author: +:category: genai-ecosystem +:tags: graphrag, knowledgegraph, embedding, vectorsearch, neo4j, python +:neo4j-versions: 5.23+ +:page-pagination: +:page-product: neo4j + + +The Neo4j GraphRAG package is a comprehensive Python library that allows building GenAI applications. +It supports knowledge graph creation through a pipeline that extracts entities from unstructured text, generates embeddings, and creates a graph in Neo4j. +The package also provides a number of retrievers, for graph search, vector search and integration with vector databases. + +== Functionality Includes + +* Knowledge Graph Construction Pipeline +* Neo4j Vector Retriever +* Vector Cypher Retriever +* Vector Database Retriever + +image::https://cdn.graphacademy.neo4j.com/assets/img/courses/banners/genai-workshop-graphrag.png[width=800,link="https://graphacademy.neo4j.com/courses/genai-workshop-graphrag/"] + +== Usage - Examples for a BioMedical Knowledge Graph + +First Knowlege Graph Construction using the SimpleKGPipeline + +image::https://dist.neo4j.com/wp-content/uploads/20241015075828/simplekgpipeline-1.png[] + +Setup of Neo4j connection, schema and foundation models (LLM, Eebeddings) and extraction prompt template. + +[source,python] +---- +# Neo4j Driver +import neo4j + +neo4j_driver = neo4j.GraphDatabase.driver(NEO4J_URI, + auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) + +# LLM and Embedding Model +from neo4j_graphrag.llm import OpenAILLM +from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings + +llm=OpenAILLM( + model_name="gpt-4o-mini", + model_params={ + "response_format": {"type": "json_object"}, # use json_object formatting for best results + "temperature": 0 # turning temperature down for more deterministic results + } +) + +# Graph Schema Setup +basic_node_labels = ["Object", "Entity", "Group", "Person", "Organization", "Place"] + +academic_node_labels = ["ArticleOrPaper", "PublicationOrJournal"] + +medical_node_labels = ["Anatomy", "BiologicalProcess", "Cell", "CellularComponent", + "CellType", "Condition", "Disease", "Drug", + "EffectOrPhenotype", "Exposure", "GeneOrProtein", "Molecule", + "MolecularFunction", "Pathway"] + +node_labels = basic_node_labels + academic_node_labels + medical_node_labels + +# define relationship types +rel_types = ["ACTIVATES", "AFFECTS", "ASSESSES", "ASSOCIATED_WITH", "AUTHORED", + "BIOMARKER_FOR", …] + +#create text embedder +embedder = OpenAIEmbeddings() + +# define prompt template +prompt_template = ''' +You are a medical researcher tasks with extracting information from papers +and structuring it in a property graph to inform further medical and research Q&A. + +Extract the entities (nodes) and specify their type from the following Input text. +Also extract the relationships between these nodes. the relationship direction goes from the start node to the end node. + + +Return result as JSON using the following format: +{{"nodes": [ {{"id": "0", "label": "the type of entity", "properties": {{"name": "name of entity" }} }}], + "relationships": [{{"type": "TYPE_OF_RELATIONSHIP", "start_node_id": "0", "end_node_id": "1", "properties": {{"details": "Description of the relationship"}} }}] }} + +... + +Use only fhe following nodes and relationships: +{schema} + +Assign a unique ID (string) to each node, and reuse it to define relationships. +Do respect the source and target node types for relationship and the relationship direction. + +Do not return any additional information other than the JSON in it. + +Examples: +{examples} + +Input text: + +{text} +''' +---- + +Knowledge Graph Pipeline Setup and Execution with example PDFs + +[source,python] +---- +# Knowledge Graph Builder +from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import FixedSizeSplitter +from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline + +kg_builder_pdf = SimpleKGPipeline( + llm=ex_llm, + driver=driver, + text_splitter=FixedSizeSplitter(chunk_size=500, chunk_overlap=100), + embedder=embedder, + entities=node_labels, + relations=rel_types, + prompt_template=prompt_template, + from_pdf=True +) + +pdf_file_paths = ['truncated-pdfs/biomolecules-11-00928-v2-trunc.pdf', + 'truncated-pdfs/GAP-between-patients-and-clinicians_2023_Best-Practice-trunc.pdf', + 'truncated-pdfs/pgpm-13-39-trunc.pdf'] + +for path in pdf_file_paths: + print(f"Processing : {path}") + pdf_result = await kg_builder_pdf.run_async(file_path=path) + print(f"Result: {pdf_result}") +---- + +image::https://dist.neo4j.com/wp-content/uploads/20241015075652/document-chunk-entity.png[width=800] + +Then running the GraphRAG Search with the VectorCypher Retriever. + +[source,python] +---- +from neo4j_graphrag.indexes import create_vector_index + +create_vector_index(driver, name="text_embeddings", label="Chunk", + embedding_property="embedding", dimensions=1536, similarity_fn="cosine") + +# Vector Retriever +from neo4j_graphrag.retrievers import VectorRetriever + +vector_retriever = VectorRetriever( + driver, + index_name="text_embeddings", + embedder=embedder, + return_properties=["text"], +) + +# GraphRAG Vector Cypher Retriever +from neo4j_graphrag.retrievers import VectorCypherRetriever + +graph_retriever = VectorCypherRetriever( + driver, + index_name="text_embeddings", + embedder=embedder, + retrieval_query=""" +//1) Go out 2-3 hops in the entity graph and get relationships +WITH node AS chunk +MATCH (chunk)<-[:FROM_CHUNK]-(entity)-[relList:!FROM_CHUNK]-{1,2}(nb) +UNWIND relList AS rel + +//2) collect relationships and text chunks +WITH collect(DISTINCT chunk) AS chunks, collect(DISTINCT rel) AS rels + +//3) format and return context +RETURN apoc.text.join([c in chunks | c.text], '\n') + + apoc.text.join([r in rels | + startNode(r).name+' - '+type(r)+' '+r.details+' -> '+endNode(r).name], + '\n') AS info +""" +) + +llm = LLM(model_name="gpt-4o", model_params={"temperature": 0.0}) + +rag_template = RagTemplate(template='''Answer the Question using the following Context. Only respond with information mentioned in the Context. Do not inject any speculative information not mentioned. + +# Question: +{query_text} + +# Context: +{context} + +# Answer: +''', expected_inputs=['query_text', 'context']) + +vector_rag = GraphRAG(llm=llm, retriever=vector_retriever, prompt_template=rag_template) + +graph_rag = GraphRAG(llm=llm, retriever=graph_retriever, prompt_template=rag_template) + +q = "Can you summarize systemic lupus erythematosus (SLE)? including common effects, biomarkers, and treatments? Provide in detailed list format." + +vector_rag.search(q, retriever_config={'top_k':5}).answer +graph_rag.search(q, retriever_config={'top_k':5}).answer +---- + +image::https://dist.neo4j.com/wp-content/uploads/20241128072906/Bildschirmfoto-2024-11-19-um-17.31.45.png[] + +== Documentation + +[cols="1,4"] +|=== +| icon:book[] Documentation | https://neo4j.com/docs/neo4j-graphrag-python/current/ +| icon:book[] Guides | https://neo4j.com/docs/neo4j-graphrag-python/current/user_guide_rag.html[RAG & GraphRAG^] +| icon:book[] Guides | https://neo4j.com/docs/neo4j-graphrag-python/current/user_guide_kg_builder.html[Guide Knowledge Graph Builder^] + +|=== + +== Relevant Links + +[cols="1,4"] +|=== +| icon:user[] Authors | Neo4j Engineering +| icon:github[] Repository | https://github.com/neo4j/neo4j-graphrag-python[GitHub] +| icon:github[] Issues | https://github.com/neo4j/neo4j-graphrag-python/issues +|=== + + +== Videos & Tutorials + +++++ + +++++ + +++++ + +++++ + +== Highlighted Articles + +* https://neo4j.com/blog/graphrag-python-package/[GraphRAG Python Package: Accelerating GenAI With Knowledge Graphs^] +* https://neo4j.com/developer-blog/get-started-graphrag-python-package/[Getting Started With the Neo4j GraphRAG Python Package^] +* https://neo4j.com/developer-blog/graph-traversal-graphrag-python-package/[Vector Search With Graph Traversal the Using Neo4j GraphRAG Package^] +* https://neo4j.com/developer-blog/hybrid-retrieval-graphrag-python-package/[Hybrid Retrieval Using the Neo4j GraphRAG Package for Python^] +* https://neo4j.com/developer-blog/enhancing-hybrid-retrieval-graphrag-python-package/[Enhancing Hybrid Retrieval With Graph Traversal: Neo4j GraphRAG Python^] \ No newline at end of file diff --git a/modules/genai-ecosystem/pages/index.adoc b/modules/genai-ecosystem/pages/index.adoc index c06ed04..21fbc5d 100644 --- a/modules/genai-ecosystem/pages/index.adoc +++ b/modules/genai-ecosystem/pages/index.adoc @@ -8,13 +8,7 @@ include::_graphacademy_llm.adoc[] :neo4j-versions: 5.X :page-pagination: :page-product: GenAI Ecosystem -// Developer survey -:page-ad-icon: ~ -:page-ad-title: Neo4j Developer Survey -:page-ad-description: Your input matters! Share your Feedback -:page-ad-underline-role: button -:page-ad-underline: Start Here -:page-ad-link: https://neo4j.typeform.com/to/E6yOZ2Py?utm_source=GA&utm_medium=blurb&utm_campaign=survey +include::_developer_survey.adoc[] image::https://dist.neo4j.com/wp-content/uploads/20231030151119/genai-art-diagram-1.svg[width=800] @@ -79,6 +73,7 @@ Neo4j has worked with the main cloud providers to create GenAI integrations and Neo4j added a number of features to make it easier to build GenAI applications and integrate LLMs with knowledge graphs. +* xref:graphrag-python.adoc[GraphRAG Package (Python)] * xref:vector-search.adoc[Vector Index & Search] * xref:apoc-genai.adoc[APOC GenAI Procedures] diff --git a/modules/genai-ecosystem/pages/langchain.adoc b/modules/genai-ecosystem/pages/langchain.adoc index 480a83d..d1ffb02 100644 --- a/modules/genai-ecosystem/pages/langchain.adoc +++ b/modules/genai-ecosystem/pages/langchain.adoc @@ -7,13 +7,7 @@ include::_graphacademy_llm.adoc[] :neo4j-versions: 5.x :page-pagination: :page-product: langchain -// Developer survey -:page-ad-icon: ~ -:page-ad-title: Neo4j Developer Survey -:page-ad-description: Your input matters! Share your Feedback -:page-ad-underline-role: button -:page-ad-underline: Start Here -:page-ad-link: https://neo4j.typeform.com/to/E6yOZ2Py?utm_source=GA&utm_medium=blurb&utm_campaign=survey +include::_developer_survey.adoc[] image::https://dist.neo4j.com/wp-content/uploads/20230615211357/1AH05dvGA_7db_EMySc9AAw.png[width=800] diff --git a/modules/genai-ecosystem/pages/llamaindex.adoc b/modules/genai-ecosystem/pages/llamaindex.adoc index abb7cfd..189fdd7 100644 --- a/modules/genai-ecosystem/pages/llamaindex.adoc +++ b/modules/genai-ecosystem/pages/llamaindex.adoc @@ -6,14 +6,7 @@ :neo4j-versions: 5.x :page-pagination: :page-product: llamaindex -// Developer survey -:page-ad-icon: ~ -:page-ad-title: Neo4j Developer Survey -:page-ad-description: Your input matters! Share your Feedback -:page-ad-underline-role: button -:page-ad-underline: Start Here -:page-ad-link: https://neo4j.typeform.com/to/E6yOZ2Py?utm_source=GA&utm_medium=blurb&utm_campaign=survey - +include::_developer_survey.adoc[] // image::todo.png[width=800] diff --git a/modules/genai-ecosystem/pages/llm-graph-builder-deployment.adoc b/modules/genai-ecosystem/pages/llm-graph-builder-deployment.adoc index a0479bd..3f75c01 100644 --- a/modules/genai-ecosystem/pages/llm-graph-builder-deployment.adoc +++ b/modules/genai-ecosystem/pages/llm-graph-builder-deployment.adoc @@ -171,7 +171,7 @@ uvicorn score:app --reload | Env Variable Name | Mandatory/Optional | Default Value | Description | VITE_BACKEND_API_URL | Optional | http://localhost:8000 | URL for backend API | VITE_SOURCES | Optional | local,youtube,wiki,s3 | List of input sources that will be available -| VITE_BLOOM_URL | Optional | https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph | URL for Bloom visualization +| VITE_BLOOM_URL | Optional | https://workspace-preview.neo4j.io/workspace/explore?connectURL=\{CONNECT_URL\}&search=Show+me+a+graph | URL for Bloom visualization | VITE_LLM_MODELS | Mandatory | diffbot,gpt-4o,gpt-4o-mini | Comma separated list of LLM Model names to show in the selector |=== diff --git a/modules/genai-ecosystem/pages/llm-graph-builder.adoc b/modules/genai-ecosystem/pages/llm-graph-builder.adoc index 90354b8..b3a5b8e 100644 --- a/modules/genai-ecosystem/pages/llm-graph-builder.adoc +++ b/modules/genai-ecosystem/pages/llm-graph-builder.adoc @@ -9,13 +9,7 @@ include::_graphacademy_llm.adoc[] :page-pagination: :page-product: llm-graph-builder :imagesdir: https://dev.assets.neo4j.com/wp-content/uploads/2024/ -// Developer survey -:page-ad-icon: ~ -:page-ad-title: Neo4j Developer Survey -:page-ad-description: Your input matters! Share your Feedback -:page-ad-underline-role: button -:page-ad-underline: Start Here -:page-ad-link: https://neo4j.typeform.com/to/E6yOZ2Py?utm_source=GA&utm_medium=blurb&utm_campaign=survey +include::_developer_survey.adoc[] // image::llm-graph-builder.png[width=600, align=center] diff --git a/modules/genai-ecosystem/pages/rag-demo.adoc b/modules/genai-ecosystem/pages/rag-demo.adoc index de6812a..e225e4c 100644 --- a/modules/genai-ecosystem/pages/rag-demo.adoc +++ b/modules/genai-ecosystem/pages/rag-demo.adoc @@ -7,13 +7,8 @@ include::_graphacademy_llm.adoc[] :neo4j-versions: 5.x :page-pagination: :page-product: rag-demo -// Developer survey -:page-ad-icon: ~ -:page-ad-title: Neo4j Developer Survey -:page-ad-description: Your input matters! Share your Feedback -:page-ad-underline-role: button -:page-ad-underline: Start Here -:page-ad-link: https://neo4j.typeform.com/to/E6yOZ2Py?utm_source=GA&utm_medium=blurb&utm_campaign=survey + +include::_developer_survey.adoc[] image::https://res.cloudinary.com/dk0tizgdn/image/upload/v1707842287/rag-demo-short_vwezew.gif[width=600, align=center] diff --git a/modules/genai-ecosystem/pages/vector-search.adoc b/modules/genai-ecosystem/pages/vector-search.adoc index 40e5380..bdd2eb6 100644 --- a/modules/genai-ecosystem/pages/vector-search.adoc +++ b/modules/genai-ecosystem/pages/vector-search.adoc @@ -1,5 +1,6 @@ = Neo4j Vector Index and Search include::_graphacademy_llm.adoc[] +include::_developer_survey.adoc[] :slug: vector-search :author: :category: genai-ecosystem @@ -7,13 +8,6 @@ include::_graphacademy_llm.adoc[] :neo4j-versions: 5.11+ :page-pagination: :page-product: neo4j -// Developer survey -:page-ad-icon: ~ -:page-ad-title: Neo4j Developer Survey -:page-ad-description: Your input matters! Share your Feedback -:page-ad-underline-role: button -:page-ad-underline: Start Here -:page-ad-link: https://neo4j.typeform.com/to/E6yOZ2Py?utm_source=GA&utm_medium=blurb&utm_campaign=survey image::https://dist.neo4j.com/wp-content/uploads/20230821135317/Grounding-LLM-Responses-with-Implicit-and-Explicit-Search-Through-Neo4js-Knowledge-Graph-2048x1152.png[width=800] diff --git a/modules/neodash/pages/index.adoc b/modules/neodash/pages/index.adoc index e35aaf8..32ea2f4 100644 --- a/modules/neodash/pages/index.adoc +++ b/modules/neodash/pages/index.adoc @@ -5,7 +5,7 @@ For users of the supported NeoDash offering, refer to https://neo4j.com/docs/neo ==== -= NeoDash - Dashboard Builder for Neo4j +== NeoDash - Dashboard Builder for Neo4j :imagesdir: https://s3.amazonaws.com/dev.assets.neo4j.com/wp-content/uploads :slug: neodash :author: Niels de Jong