diff --git a/POC_Documents/V1/From Local to Global.docx b/POC_Documents/V1/From Local to Global.docx deleted file mode 100644 index 267ad2478..000000000 Binary files a/POC_Documents/V1/From Local to Global.docx and /dev/null differ diff --git a/POC_Documents/V1/propsed RAG genAI Architecture.docx b/POC_Documents/V1/propsed RAG genAI Architecture.docx deleted file mode 100644 index d149b5195..000000000 Binary files a/POC_Documents/V1/propsed RAG genAI Architecture.docx and /dev/null differ diff --git a/POC_Documents/V1/propsed chatbot architecture.jpg b/POC_Documents/V1/propsed chatbot architecture.jpg deleted file mode 100644 index 5cb87df27..000000000 Binary files a/POC_Documents/V1/propsed chatbot architecture.jpg and /dev/null differ diff --git a/README.md b/README.md index a048e6d6d..6f4b324d6 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,34 @@ - # Knowledge Graph Builder App -This application is designed to convert PDF documents into a knowledge graph stored in Neo4j. It utilizes the power of OpenAI's GPT/Diffbot LLM(Large language model) to extract nodes, relationships and properties from the text content of the PDF and then organizes them into a structured knowledge graph using Langchain framework. -Files can be uploaded from local machine or S3 bucket and then LLM model can be chosen to create the knowledge graph. -### Getting started +Creating knowledge graphs from unstructured data + + +# LLM Graph Builder + +![Python](https://img.shields.io/badge/Python-yellow) +![FastAPI](https://img.shields.io/badge/FastAPI-green) +![React](https://img.shields.io/badge/React-blue) + +## Overview +This application is designed to turn Unstructured data (pdfs,docs,txt,youtube video,web pages,etc.) into a knowledge graph stored in Neo4j. It utilizes the power of Large language models (OpenAI,Gemini,etc.) to extract nodes, relationships and their properties from the text and create a structured knowledge graph using Langchain framework. + +Upload your files from local machine, GCS or S3 bucket or from web sources, choose your LLM model and generate knowledge graph. + +## Key Features +- **Knowledge Graph Creation**: Transform unstructured data into structured knowledge graphs using LLMs. +- **Providing Schema**: Provide your own custom schema or use existing schema in settings to generate graph. +- **View Graph**: View graph for a particular source or multiple sources at a time in Bloom. +- **Chat with Data**: Interact with your data in a Neo4j database through conversational queries, also retrive metadata about the source of response to your queries. + +## Getting started :warning: You will need to have a Neo4j Database V5.15 or later with [APOC installed](https://neo4j.com/docs/apoc/current/installation/) to use this Knowledge Graph Builder. You can use any [Neo4j Aura database](https://neo4j.com/aura/) (including the free database) If you are using Neo4j Desktop, you will not be able to use the docker-compose but will have to follow the [separate deployment of backend and frontend section](#running-backend-and-frontend-separately-dev-environment). :warning: -### Deploy locally + +## Deployment +### Local deployment #### Running through docker-compose By default only OpenAI and Diffbot are enabled since Gemini requires extra GCP configurations. @@ -21,13 +40,13 @@ DIFFBOT_API_KEY="your-diffbot-key" if you only want OpenAI: ```env -LLM_MODELS="OpenAI GPT 3.5,OpenAI GPT 4o" +LLM_MODELS="gpt-3.5,gpt-4o" OPENAI_API_KEY="your-openai-key" ``` if you only want Diffbot: ```env -LLM_MODELS="Diffbot" +LLM_MODELS="diffbot" DIFFBOT_API_KEY="your-diffbot-key" ``` @@ -36,16 +55,16 @@ You can then run Docker Compose to build and start all components: docker-compose up --build ``` -##### Additional configs +#### Additional configs -By default, the input sources will be: Local files, Youtube, Wikipedia and AWS S3. As this default config is applied: +By default, the input sources will be: Local files, Youtube, Wikipedia ,AWS S3 and Webpages. As this default config is applied: ```env -REACT_APP_SOURCES="local,youtube,wiki,s3" +REACT_APP_SOURCES="local,youtube,wiki,s3,web" ``` If however you want the Google GCS integration, add `gcs` and your Google client ID: ```env -REACT_APP_SOURCES="local,youtube,wiki,s3,gcs" +REACT_APP_SOURCES="local,youtube,wiki,s3,gcs,web" GOOGLE_CLIENT_ID="xxxx" ``` @@ -76,7 +95,24 @@ Alternatively, you can run the backend and frontend separately: pip install -r requirements.txt uvicorn score:app --reload ``` -### ENV +### Deploy in Cloud +To deploy the app and packages on Google Cloud Platform, run the following command on google cloud run: +```bash +# Frontend deploy +gcloud run deploy +source location current directory > Frontend +region : 32 [us-central 1] +Allow unauthenticated request : Yes +``` +```bash +# Backend deploy +gcloud run deploy --set-env-vars "OPENAI_API_KEY = " --set-env-vars "DIFFBOT_API_KEY = " --set-env-vars "NEO4J_URI = " --set-env-vars "NEO4J_PASSWORD = " --set-env-vars "NEO4J_USERNAME = " +source location current directory > Backend +region : 32 [us-central 1] +Allow unauthenticated request : Yes +``` + +## ENV | Env Variable Name | Mandatory/Optional | Default Value | Description | |-------------------------|--------------------|---------------|--------------------------------------------------------------------------------------------------| | OPENAI_API_KEY | Mandatory | | API key for OpenAI | @@ -86,7 +122,7 @@ Alternatively, you can run the backend and frontend separately: | KNN_MIN_SCORE | Optional | 0.94 | Minimum score for KNN algorithm | | GEMINI_ENABLED | Optional | False | Flag to enable Gemini | | GCP_LOG_METRICS_ENABLED | Optional | False | Flag to enable Google Cloud logs | -| NUMBER_OF_CHUNKS_TO_COMBINE | Optional | 6 | Number of chunks to combine when processing embeddings | +| NUMBER_OF_CHUNKS_TO_COMBINE | Optional | 5 | Number of chunks to combine when processing embeddings | | UPDATE_GRAPH_CHUNKS_PROCESSED | Optional | 20 | Number of chunks processed before updating progress | | NEO4J_URI | Optional | neo4j://database:7687 | URI for Neo4j database | | NEO4J_USERNAME | Optional | neo4j | Username for Neo4j database | @@ -98,86 +134,36 @@ Alternatively, you can run the backend and frontend separately: | BACKEND_API_URL | Optional | http://localhost:8000 | URL for backend API | | BLOOM_URL | Optional | https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true | URL for Bloom visualization | | REACT_APP_SOURCES | Optional | local,youtube,wiki,s3 | List of input sources that will be available | -| LLM_MODELS | Optional | Diffbot,OpenAI GPT 3.5,OpenAI GPT 4o | Models available for selection on the frontend, used for entities extraction and Q&A Chatbot | +| LLM_MODELS | Optional | diffbot,gpt-3.5,gpt-4o | Models available for selection on the frontend, used for entities extraction and Q&A Chatbot | | ENV | Optional | DEV | Environment variable for the app | | TIME_PER_CHUNK | Optional | 4 | Time per chunk for processing | -| CHUNK_SIZE | Optional | 5242880 | Size of each chunk for processing | +| CHUNK_SIZE | Optional | 5242880 | Size of each chunk of file for upload | | GOOGLE_CLIENT_ID | Optional | | Client ID for Google authentication | +| GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally | -### -To deploy the app and packages on Google Cloud Platform, run the following command on google cloud run: -```bash -# Frontend deploy -gcloud run deploy -source location current directory > Frontend -region : 32 [us-central 1] -Allow unauthenticated request : Yes -``` -```bash -# Backend deploy -gcloud run deploy --set-env-vars "OPENAI_API_KEY = " --set-env-vars "DIFFBOT_API_KEY = " --set-env-vars "NEO4J_URI = " --set-env-vars "NEO4J_PASSWORD = " --set-env-vars "NEO4J_USERNAME = " -source location current directory > Backend -region : 32 [us-central 1] -Allow unauthenticated request : Yes -``` -### Features -- **PDF Upload**: Users can upload PDF documents using the Drop Zone. -- **S3 Bucket Integration**: Users can also specify PDF documents stored in an S3 bucket for processing. -- **Knowledge Graph Generation**: The application employs OpenAI/Diffbot's LLM to extract relevant information from the PDFs and construct a knowledge graph. -- **Neo4j Integration**: The extracted nodes and relationships are stored in a Neo4j database for easy visualization and querying. -- **Grid View of source node files with** : Name,Type,Size,Nodes,Relations,Duration,Status,Source,Model - -## Functions/Modules - -#### extract_graph_from_file(uri, userName, password, file_path, model): - Extracts nodes , relationships and properties from a PDF file leveraging LLM models. - - Args: - uri: URI of the graph to extract - userName: Username to use for graph creation ( if None will use username from config file ) - password: Password to use for graph creation ( if None will use password from config file ) - file: File object containing the PDF file path to be used - model: Type of model to use ('Gemini Pro' or 'Diffbot') - - Returns: - Json response to API with fileName, nodeCount, relationshipCount, processingTime, - status and model as attributes. - -neoooo - -#### create_source_node_graph(uri, userName, password, file): - - Creates a source node in Neo4jGraph and sets properties. - - Args: - uri: URI of Graph Service to connect to - userName: Username to connect to Graph Service with ( default : None ) - password: Password to connect to Graph Service with ( default : None ) - file: File object with information about file to be added - - Returns: - Success or Failure message of node creation - -neo_workspace - - -#### get_source_list_from_graph(): - - Returns a list of file sources in the database by querying the graph and - sorting the list by the last updated date. - -get_source - -#### Chunk nodes and embeddings creation in Neo4j - -chunking - - -## Application Walkthrough -https://github.com/neo4j-labs/llm-graph-builder/assets/121786590/b725a503-6ade-46d2-9e70-61d57443c311 + +## Usage +1. Connect to Neo4j Aura Instance by passing URI and password or using Neo4j credentials file. +2. Choose your source from a list of Unstructured sources to create graph. +3. Change the LLM (if required) from drop down, which will be used to generate graph. +4. Optionally, define schema(nodes and relationship labels) in entity graph extraction settings. +5. Either select multiple files to 'Generate Graph' or all the files in 'New' status will be processed for graph creation. +6. Have a look at the graph for individial files using 'View' in grid or select one or more files and 'Preview Graph' +7. Ask questions related to the processed/completed sources to chat-bot, Also get detailed information about your answers generated by LLM. ## Links - The Public [ Google cloud Run URL](https://devfrontend-dcavk67s4a-uc.a.run.app). - [Workspace URL](https://workspace-preview.neo4j.io/workspace) +[LLM Knowledge Graph Builder Application](https://llm-graph-builder.neo4jlabs.com/) + +[Neo4j Workspace](https://workspace-preview.neo4j.io/workspace/query) + +## Reference + +[Demo of application](https://www.youtube.com/watch?v=LlNy5VmV290) + +## Contact +For any inquiries or support, feel free to raise [Github Issue](https://github.com/neo4j-labs/llm-graph-builder/issues) + + +## Happy Graph Building! \ No newline at end of file diff --git a/backend/example.env b/backend/example.env index 20574cc68..fe9124bc8 100644 --- a/backend/example.env +++ b/backend/example.env @@ -20,4 +20,6 @@ LANGCHAIN_API_KEY = "" LANGCHAIN_PROJECT = "" LANGCHAIN_TRACING_V2 = "" LANGCHAIN_ENDPOINT = "" -GCS_FILE_CACHE = "" #save the file into GCS or local, SHould be True or False \ No newline at end of file +GCS_FILE_CACHE = "" #save the file into GCS or local, SHould be True or False +NEO4J_USER_AGENT = "" +ENABLE_USER_AGENT = "" \ No newline at end of file diff --git a/backend/score.py b/backend/score.py index 70cb10480..abfa5007d 100644 --- a/backend/score.py +++ b/backend/score.py @@ -18,6 +18,7 @@ from src.graphDB_dataAccess import graphDBdataAccess from src.graph_query import get_graph_results from src.chunkid_entities import get_entities_from_chunkids +from src.post_processing import create_fulltext from sse_starlette.sse import EventSourceResponse import json from typing import List, Mapping @@ -30,7 +31,9 @@ from google.cloud import logging as gclogger from src.logger import CustomLogger from datetime import datetime +from fastapi.middleware.gzip import GZipMiddleware import time +import gc logger = CustomLogger() CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") @@ -55,6 +58,7 @@ def sick(): allow_methods=["*"], allow_headers=["*"], ) +app.add_middleware(GZipMiddleware, minimum_size=1000) is_gemini_enabled = os.environ.get("GEMINI_ENABLED", "False").lower() in ("true", "1", "yes") if is_gemini_enabled: @@ -97,6 +101,9 @@ async def create_source_knowledge_graph_url( elif source_type == 'gcs bucket': lst_file_name,success_count,failed_count = create_source_node_graph_url_gcs(graph, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, source_type,Credentials(access_token) ) + elif source_type == 'web-url': + lst_file_name,success_count,failed_count = await asyncio.to_thread(create_source_node_graph_web_url,graph, model, source_url, source_type + ) elif source_type == 'youtube': lst_file_name,success_count,failed_count = await asyncio.to_thread(create_source_node_graph_url_youtube,graph, model, source_url, source_type ) @@ -107,7 +114,7 @@ async def create_source_knowledge_graph_url( return create_api_response('Failed',message='source_type is other than accepted source') message = f"Source Node created successfully for source type: {source_type} and source: {source}" - josn_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name} + josn_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query} logger.log_struct(josn_obj) return create_api_response("Success",message=message,success_count=success_count,failed_count=failed_count,file_name=lst_file_name) except Exception as e: @@ -116,6 +123,8 @@ async def create_source_knowledge_graph_url( logging.exception(f'Exception Stack trace:') return create_api_response('Failed',message=message + error_message[:80],error=error_message,file_source=source_type) finally: + gc.collect() + if graph is not None: close_db_connection(graph, 'url/scan') @app.post("/extract") @@ -167,6 +176,10 @@ async def extract_knowledge_graph_from_file( elif source_type == 's3 bucket' and source_url: result = await asyncio.to_thread( extract_graph_from_file_s3, graph, model, source_url, aws_access_key_id, aws_secret_access_key, allowedNodes, allowedRelationship) + + elif source_type == 'web-url': + result = await asyncio.to_thread( + extract_graph_from_web_page, graph, model, source_url, allowedNodes, allowedRelationship) elif source_type == 'youtube' and source_url: result = await asyncio.to_thread( @@ -184,6 +197,9 @@ async def extract_knowledge_graph_from_file( if result is not None: result['db_url'] = uri result['api_name'] = 'extract' + result['source_url'] = source_url + result['wiki_query'] = wiki_query + result['source_type'] = source_type logger.log_struct(result) return create_api_response('Success', data=result, file_source= source_type) except Exception as e: @@ -194,15 +210,19 @@ async def extract_knowledge_graph_from_file( if source_type == 'local file': if gcs_file_cache == 'True': folder_name = create_gcs_bucket_folder_name_hashed(uri,file_name) + copy_failed_file(BUCKET_UPLOAD, BUCKET_FAILED_FILE, folder_name, file_name) + time.sleep(5) delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name) else: logging.info(f'Deleted File Path: {merged_file_path} and Deleted File Name : {file_name}') delete_uploaded_local_file(merged_file_path,file_name) - josn_obj = {'message':message,'error_message':error_message, 'file_name': file_name,'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type} + josn_obj = {'message':message,'error_message':error_message, 'file_name': file_name,'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query} logger.log_struct(josn_obj) logging.exception(f'File Failed in extraction: {josn_obj}') return create_api_response('Failed', message=message + error_message[:100], error=error_message, file_name = file_name) finally: + gc.collect() + if graph is not None: close_db_connection(graph, 'extract') @app.get("/sources_list") @@ -225,41 +245,51 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None logging.exception(f'Exception:{error_message}') return create_api_response(job_status, message=message, error=error_message) -@app.post("/update_similarity_graph") -async def update_similarity_graph(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None)): - """ - Calls 'update_graph' which post the query to update the similiar nodes in the graph - """ +@app.post("/post_processing") +async def post_processing(uri=Form(None), userName=Form(None), password=Form(None), database=Form(None), tasks=Form(None)): try: graph = create_graph_database_connection(uri, userName, password, database) - await asyncio.to_thread(update_graph, graph) - - josn_obj = {'api_name':'update_similarity_graph','db_url':uri} - logger.log_struct(josn_obj) - return create_api_response('Success',message='Updated KNN Graph') + tasks = set(map(str.strip, json.loads(tasks))) + + if "update_similarity_graph" in tasks: + await asyncio.to_thread(update_graph, graph) + josn_obj = {'api_name': 'post_processing/update_similarity_graph', 'db_url': uri} + logger.log_struct(josn_obj) + logging.info(f'Updated KNN Graph') + if "create_fulltext_index" in tasks: + await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database) + josn_obj = {'api_name': 'post_processing/create_fulltext_index', 'db_url': uri} + logger.log_struct(josn_obj) + logging.info(f'Full Text index created') + + return create_api_response('Success', message='All tasks completed successfully') + except Exception as e: job_status = "Failed" - message="Unable to update KNN Graph" error_message = str(e) - logging.exception(f'Exception in update KNN graph:{error_message}') + message = f"Unable to complete tasks" + logging.exception(f'Exception in post_processing tasks: {error_message}') return create_api_response(job_status, message=message, error=error_message) + finally: - close_db_connection(graph, 'update_similarity_graph') + gc.collect() + if graph is not None: + close_db_connection(graph, 'post_processing') @app.post("/chat_bot") -async def chat_bot(uri=Form(None),model=Form(None),userName=Form(None), password=Form(None), database=Form(None),question=Form(None), session_id=Form(None)): +async def chat_bot(uri=Form(None),model=Form(None),userName=Form(None), password=Form(None), database=Form(None),question=Form(None), session_id=Form(None),mode=Form(None)): logging.info(f"QA_RAG called at {datetime.now()}") qa_rag_start_time = time.time() try: # database = "neo4j" graph = create_graph_database_connection(uri, userName, password, database) - result = await asyncio.to_thread(QA_RAG,graph=graph,model=model,question=question,session_id=session_id) + result = await asyncio.to_thread(QA_RAG,graph=graph,model=model,question=question,session_id=session_id,mode=mode) total_call_time = time.time() - qa_rag_start_time logging.info(f"Total Response time is {total_call_time:.2f} seconds") result["info"]["response_time"] = round(total_call_time, 2) - josn_obj = {'api_name':'chat_bot','db_url':uri} + josn_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id} logger.log_struct(josn_obj) return create_api_response('Success',data=result) except Exception as e: @@ -268,11 +298,13 @@ async def chat_bot(uri=Form(None),model=Form(None),userName=Form(None), password error_message = str(e) logging.exception(f'Exception in chat bot:{error_message}') return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() @app.post("/chunk_entities") async def chunk_entities(uri=Form(None),userName=Form(None), password=Form(None), chunk_ids=Form(None)): try: - logging.info(f"URI: {uri}, Username: {userName},password:{password}, chunk_ids: {chunk_ids}") + logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}") result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, chunk_ids=chunk_ids) josn_obj = {'api_name':'chunk_entities','db_url':uri} logger.log_struct(josn_obj) @@ -283,6 +315,8 @@ async def chunk_entities(uri=Form(None),userName=Form(None), password=Form(None) error_message = str(e) logging.exception(f'Exception in chat bot:{error_message}') return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() @app.post("/graph_query") async def graph_query( @@ -302,7 +336,7 @@ async def graph_query( query_type=query_type, document_names=document_names ) - josn_obj = {'api_name':'graph_query','db_url':uri} + josn_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names} logger.log_struct(josn_obj) return create_api_response('Success', data=result) except Exception as e: @@ -311,6 +345,9 @@ async def graph_query( error_message = str(e) logging.exception(f'Exception in graph query: {error_message}') return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() + @app.post("/clear_chat_bot") async def clear_chat_bot(uri=Form(None),userName=Form(None), password=Form(None), database=Form(None), session_id=Form(None)): @@ -325,6 +362,8 @@ async def clear_chat_bot(uri=Form(None),userName=Form(None), password=Form(None) logging.exception(f'Exception in chat bot:{error_message}') return create_api_response(job_status, message=message, error=error_message) finally: + gc.collect() + if graph is not None: close_db_connection(graph, 'clear_chat_bot') @app.post("/connect") @@ -363,6 +402,8 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber logging.exception(f'Exception:{error_message}') return create_api_response('Failed', message=message + error_message[:100], error=error_message, file_name = originalname) finally: + gc.collect() + if graph is not None: close_db_connection(graph, 'upload') @app.post("/schema") @@ -381,6 +422,8 @@ async def get_structured_schema(uri=Form(None), userName=Form(None), password=Fo logging.exception(f'Exception:{error_message}') return create_api_response("Failed", message=message, error=error_message) finally: + gc.collect() + if graph is not None: close_db_connection(graph, 'schema') def decode_password(pwd): @@ -424,13 +467,13 @@ async def generate(): return EventSourceResponse(generate(),ping=60) @app.post("/delete_document_and_entities") -async def delete_document_and_entities(uri=Form(None), - userName=Form(None), - password=Form(None), - database=Form(None), - filenames=Form(None), - source_types=Form(None), - deleteEntities=Form(None)): +async def delete_document_and_entities(uri=Form(), + userName=Form(), + password=Form(), + database=Form(), + filenames=Form(), + source_types=Form(), + deleteEntities=Form()): try: graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) @@ -447,6 +490,8 @@ async def delete_document_and_entities(uri=Form(None), logging.exception(f'{message}:{error_message}') return create_api_response(job_status, message=message, error=error_message) finally: + gc.collect() + if graph is not None: close_db_connection(graph, 'delete_document_and_entities') @app.get('/document_status/{file_name}') @@ -497,7 +542,9 @@ async def cancelled_job(uri=Form(None), userName=Form(None), password=Form(None) logging.exception(f'Exception in cancelling the running job:{error_message}') return create_api_response(job_status, message=message, error=error_message) finally: - close_db_connection(graph, 'cancelled_job') + gc.collect() + if graph is not None: + close_db_connection(graph, 'cancelled_job') @app.post("/populate_graph_schema") async def populate_graph_schema(input_text=Form(None), model=Form(None), is_schema_description_checked=Form(None)): @@ -510,6 +557,44 @@ async def populate_graph_schema(input_text=Form(None), model=Form(None), is_sche error_message = str(e) logging.exception(f'Exception in getting the schema from text:{error_message}') return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() + +@app.post("/get_unconnected_nodes_list") +async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form(), database=Form()): + try: + graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + result = graphDb_data_Access.list_unconnected_nodes() + return create_api_response('Success',data=result) + except Exception as e: + job_status = "Failed" + message="Unable to get the list of unconnected nodes" + error_message = str(e) + logging.exception(f'Exception in getting list of unconnected nodes:{error_message}') + return create_api_response(job_status, message=message, error=error_message) + finally: + if graph is not None: + close_db_connection(graph,"get_unconnected_nodes_list") + gc.collect() + +@app.post("/delete_unconnected_nodes") +async def get_unconnected_nodes_list(uri=Form(), userName=Form(), password=Form(), database=Form(),unconnected_entities_list=Form()): + try: + graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) + result = graphDb_data_Access.delete_unconnected_nodes(unconnected_entities_list) + return create_api_response('Success',data=result,message="Unconnected entities delete successfully") + except Exception as e: + job_status = "Failed" + message="Unable to delete the unconnected nodes" + error_message = str(e) + logging.exception(f'Exception in delete the unconnected nodes:{error_message}') + return create_api_response(job_status, message=message, error=error_message) + finally: + if graph is not None: + close_db_connection(graph,"delete_unconnected_nodes") + gc.collect() if __name__ == "__main__": uvicorn.run(app) \ No newline at end of file diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py index 8a7063691..20347d076 100644 --- a/backend/src/QA_integration_new.py +++ b/backend/src/QA_integration_new.py @@ -2,9 +2,6 @@ from langchain.graphs import Neo4jGraph import os from dotenv import load_dotenv -from langchain_openai import ChatOpenAI -from langchain_google_vertexai import ChatVertexAI -from langchain_google_vertexai import HarmBlockThreshold, HarmCategory import logging from langchain_community.chat_message_histories import Neo4jChatMessageHistory from src.shared.common_fn import load_embedding_model, get_llm @@ -28,74 +25,13 @@ EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL) -RETRIEVAL_QUERY = """ -WITH node as chunk, score -MATCH (chunk)-[:PART_OF]->(d:Document) -CALL { WITH chunk -MATCH (chunk)-[:HAS_ENTITY]->(e) -MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk&!Document) -UNWIND rels as r -RETURN collect(distinct r) as rels -} -WITH d, collect(distinct chunk) as chunks, avg(score) as score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels -WITH d, score, -[c in chunks | c.text] as texts, [c in chunks | c.id] as chunkIds, [c in chunks | c.start_time] as start_time, [c in chunks | c.page_number] as page_numbers, [c in chunks | c.start_time] as start_times, -[r in rels | coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+ startNode(r).id + " "+ type(r) + " " + coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + endNode(r).id] as entities -WITH d, score, -apoc.text.join(texts,"\n----\n") + -apoc.text.join(entities,"\n") -as text, entities, chunkIds, page_numbers ,start_times -RETURN text, score, {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkIds:chunkIds, page_numbers:page_numbers,start_times:start_times,entities:entities} as metadata -""" - -SYSTEM_TEMPLATE = """ -You are an AI-powered question-answering agent. Your task is to provide accurate and comprehensive responses to user queries based on the given context, chat history, and available resources. - -### Response Guidelines: -1. **Direct Answers**: Provide clear and thorough answers to the user's queries without headers unless requested. Avoid speculative responses. -2. **Utilize History and Context**: Leverage relevant information from previous interactions, the current user input, and the context provided below. -3. **No Greetings in Follow-ups**: Start with a greeting in initial interactions. Avoid greetings in subsequent responses unless there's a significant break or the chat restarts. -4. **Admit Unknowns**: Clearly state if an answer is unknown. Avoid making unsupported statements. -5. **Avoid Hallucination**: Only provide information based on the context provided. Do not invent information. -6. **Response Length**: Keep responses concise and relevant. Aim for clarity and completeness within 4-5 sentences unless more detail is requested. -7. **Tone and Style**: Maintain a professional and informative tone. Be friendly and approachable. -8. **Error Handling**: If a query is ambiguous or unclear, ask for clarification rather than providing a potentially incorrect answer. -9. **Fallback Options**: If the required information is not available in the provided context, provide a polite and helpful response. Example: "I don't have that information right now." or "I'm sorry, but I don't have that information. Is there something else I can help with?" -10. **Context Availability**: If the context is empty, do not provide answers based solely on internal knowledge. Instead, respond appropriately by indicating the lack of information. - - -**IMPORTANT** : DO NOT ANSWER FROM YOUR KNOWLEDGE BASE USE THE BELOW CONTEXT - -### Context: - -{context} - - -### Example Responses: -User: Hi -AI Response: 'Hello there! How can I assist you today?' - -User: "What is Langchain?" -AI Response: "Langchain is a framework that enables the development of applications powered by large language models, such as chatbots. It simplifies the integration of language models into various applications by providing useful tools and components." - -User: "Can you explain how to use memory management in Langchain?" -AI Response: "Langchain's memory management involves utilizing built-in mechanisms to manage conversational context effectively. It ensures that the conversation remains coherent and relevant by maintaining the history of interactions and using it to inform responses." - -User: "I need help with PyCaret's classification model." -AI Response: "PyCaret simplifies the process of building and deploying machine learning models. For classification tasks, you can use PyCaret's setup function to prepare your data. After setup, you can compare multiple models to find the best one, and then fine-tune it for better performance." - -User: "What can you tell me about the latest realtime trends in AI?" -AI Response: "I don't have that information right now. Is there something else I can help with?" - -Note: This system does not generate answers based solely on internal knowledge. It answers from the information provided in the user's current and previous inputs, and from the context. -""" - -def get_neo4j_retriever(graph, index_name="vector", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): + +def get_neo4j_retriever(graph, retrieval_query,index_name="vector", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD): try: neo_db = Neo4jVector.from_existing_index( embedding=EMBEDDING_FUNCTION, index_name=index_name, - retrieval_query=RETRIEVAL_QUERY, + retrieval_query=retrieval_query, graph=graph ) logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'") @@ -107,20 +43,16 @@ def get_neo4j_retriever(graph, index_name="vector", search_k=CHAT_SEARCH_KWARG_K return None def create_document_retriever_chain(llm,retriever): - question_template= "Given the below conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else." - query_transform_prompt = ChatPromptTemplate.from_messages( [ - ("system", question_template), + ("system", QUESTION_TRANSFORM_TEMPLATE), MessagesPlaceholder(variable_name="messages") ] ) output_parser = StrOutputParser() - splitter = TokenTextSplitter(chunk_size=2000, chunk_overlap=0) - # extractor = LLMChainExtractor.from_llm(llm) - # redundant_filter = EmbeddingsRedundantFilter(embeddings=EMBEDDING_FUNCTION) - embeddings_filter = EmbeddingsFilter(embeddings=EMBEDDING_FUNCTION, similarity_threshold=0.25) + splitter = TokenTextSplitter(chunk_size=CHAT_DOC_SPLIT_SIZE, chunk_overlap=0) + embeddings_filter = EmbeddingsFilter(embeddings=EMBEDDING_FUNCTION, similarity_threshold=CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD) pipeline_compressor = DocumentCompressorPipeline( transformers=[splitter, embeddings_filter] @@ -157,9 +89,14 @@ def create_neo4j_chat_message_history(graph, session_id): logging.error(f"Error creating Neo4jChatMessageHistory: {e}") return None -def format_documents(documents): +def format_documents(documents,model): + prompt_token_cutoff = 4 + for models,value in CHAT_TOKEN_CUT_OFF.items(): + if model in models: + prompt_token_cutoff = value + sorted_documents = sorted(documents, key=lambda doc: doc.state["query_similarity_score"], reverse=True) - sorted_documents = sorted_documents[:7] + sorted_documents = sorted_documents[:prompt_token_cutoff] formatted_docs = [] sources = set() @@ -178,7 +115,7 @@ def format_documents(documents): return "\n\n".join(formatted_docs), sources -def get_rag_chain(llm,system_template=SYSTEM_TEMPLATE): +def get_rag_chain(llm,system_template=CHAT_SYSTEM_TEMPLATE): question_answering_prompt = ChatPromptTemplate.from_messages( [ ("system", system_template), @@ -193,54 +130,26 @@ def get_rag_chain(llm,system_template=SYSTEM_TEMPLATE): return question_answering_chain -def update_timestamps_with_min_seconds(result_dict): - def time_to_seconds(time_str): - h, m, s = map(int, time_str.split(':')) - return h * 3600 + m * 60 + s - - for source in result_dict.get('sources', []): - time_stamps = source.get('start_time', []) - if time_stamps: - seconds_list = [time_to_seconds(ts) for ts in time_stamps] - min_seconds = min(seconds_list) - source['start_time'] = min_seconds - - return result_dict - def get_sources_and_chunks(sources_used, docs): - docs_metadata = dict() + chunkdetails_list = [] + sources_used_set = set(sources_used) + for doc in docs: source = doc.metadata["source"] - chunkids = doc.metadata["chunkIds"] - page_numbers = doc.metadata["page_numbers"] - start_times = doc.metadata["start_times"] - docs_metadata[source] = [chunkids,page_numbers,start_times] - chunkids = list() - output_sources = list() - for source in sources_used: - if source in set(docs_metadata.keys()): - chunkids.extend(docs_metadata[source][0]) - page_numbers = docs_metadata[source][1] - start_times = docs_metadata[source][2] - current_source = { - "source_name":source, - "page_numbers":page_numbers if len(page_numbers) > 1 and page_numbers[0] is not None else [], - "start_time": start_times if len(start_times) > 1 and start_times[0] is not None else [], - } - output_sources.append(current_source) + chunkdetails = doc.metadata["chunkdetails"] + if source in sources_used_set: + chunkdetails = [{**chunkdetail, "score": round(chunkdetail["score"], 4)} for chunkdetail in chunkdetails] + chunkdetails_list.extend(chunkdetails) result = { - 'sources': output_sources, - 'chunkIds': chunkids + 'sources': sources_used, + 'chunkdetails': chunkdetails_list } - - result = update_timestamps_with_min_seconds(result) return result def summarize_messages(llm,history,stored_messages): if len(stored_messages) == 0: return False - # summarization_template = "Distill the below chat messages into a single summary message. Include as many specific details as you can." summarization_prompt = ChatPromptTemplate.from_messages( [ MessagesPlaceholder(variable_name="chat_history"), @@ -273,75 +182,98 @@ def clear_chat_history(graph,session_id): "user": "chatbot" } -def QA_RAG(graph,model,question,session_id): +def setup_chat(model, graph, session_id, retrieval_query): + start_time = time.time() + model_version = MODEL_VERSIONS[model] + llm = get_llm(model_version) + retriever = get_neo4j_retriever(graph=graph,retrieval_query=retrieval_query) + doc_retriever = create_document_retriever_chain(llm, retriever) + history = create_neo4j_chat_message_history(graph, session_id) + chat_setup_time = time.time() - start_time + logging.info(f"Chat setup completed in {chat_setup_time:.2f} seconds") + + return llm, doc_retriever, history, model_version + +def retrieve_documents(doc_retriever, messages): + start_time = time.time() + docs = doc_retriever.invoke({"messages": messages}) + doc_retrieval_time = time.time() - start_time + logging.info(f"Documents retrieved in {doc_retrieval_time:.2f} seconds") + return docs + +def process_documents(docs, question, messages, llm,model): + start_time = time.time() + formatted_docs, sources = format_documents(docs,model) + rag_chain = get_rag_chain(llm=llm) + ai_response = rag_chain.invoke({ + "messages": messages[:-1], + "context": formatted_docs, + "input": question + }) + result = get_sources_and_chunks(sources, docs) + content = ai_response.content + + if "gemini" in model: + total_tokens = ai_response.response_metadata['usage_metadata']['prompt_token_count'] + else: + total_tokens = ai_response.response_metadata['token_usage']['total_tokens'] + + predict_time = time.time() - start_time + logging.info(f"Final Response predicted in {predict_time:.2f} seconds") + + return content, result, total_tokens + +def summarize_and_log(history, messages, llm): + start_time = time.time() + summarize_messages(llm, history, messages) + history_summarized_time = time.time() - start_time + logging.info(f"Chat History summarized in {history_summarized_time:.2f} seconds") + +def QA_RAG(graph, model, question, session_id, mode): try: - start_time = time.time() - print(model) - model_version = MODEL_VERSIONS[model] - llm = get_llm(model_version) - retriever = get_neo4j_retriever(graph=graph) - doc_retriever = create_document_retriever_chain(llm,retriever) - history = create_neo4j_chat_message_history(graph,session_id ) - chat_setup_time = time.time() - start_time - logging.info(f"Chat setup completed in {chat_setup_time:.2f} seconds") - - start_time = time.time() + logging.info(f"Chat Mode : {mode}") + if mode == "vector": + retrieval_query = VECTOR_SEARCH_QUERY + elif mode == "graph": + #WIP + result = { + "session_id": session_id, + "user": "chatbot" + } + return result + else: + retrieval_query = VECTOR_GRAPH_SEARCH_QUERY + + llm, doc_retriever, history, model_version = setup_chat(model, graph, session_id, retrieval_query) messages = history.messages user_question = HumanMessage(content=question) messages.append(user_question) - docs = doc_retriever.invoke( - { - "messages":messages - } - ) + + docs = retrieve_documents(doc_retriever, messages) + if docs: - # print(docs) - formatted_docs,sources = format_documents(docs) - - doc_retrieval_time = time.time() - start_time - logging.info(f"Modified question and Documents retrieved in {doc_retrieval_time:.2f} seconds") - - start_time = time.time() - rag_chain = get_rag_chain(llm=llm) - ai_response = rag_chain.invoke( - { - "messages" : messages[:-1], - "context" : formatted_docs, - "input" : question - } - ) - result = get_sources_and_chunks(sources,docs) - content = ai_response.content - if "Gemini" in model: - total_tokens = ai_response.response_metadata['usage_metadata']['prompt_token_count'] - else: - total_tokens = ai_response.response_metadata['token_usage']['total_tokens'] - predict_time = time.time() - start_time - logging.info(f"Final Response predicted in {predict_time:.2f} seconds") + content, result, total_tokens = process_documents(docs, question, messages, llm,model) else: - ai_response = AIMessage(content="I couldn't find any relevant documents to answer your question.") - result = {"sources": [], "chunkIds": []} + content = "I couldn't find any relevant documents to answer your question." + result = {"sources": [], "chunkdetails": []} total_tokens = 0 - content = ai_response.content - - start_time = time.time() + + ai_response = AIMessage(content=content) messages.append(ai_response) - summarize_messages(llm,history,messages) - history_summarized_time = time.time() - start_time - logging.info(f"Chat History summarized in {history_summarized_time:.2f} seconds") - + summarize_and_log(history, messages, llm) + return { "session_id": session_id, "message": content, "info": { "sources": result["sources"], "model": model_version, - "chunkids":result["chunkIds"], + "chunkdetails": result["chunkdetails"], "total_tokens": total_tokens, "response_time": 0 }, "user": "chatbot" - } + } except Exception as e: logging.exception(f"Exception in QA component at {datetime.now()}: {str(e)}") @@ -354,4 +286,5 @@ def QA_RAG(graph,model,question,session_id): "chunkids": [], "error": f"{error_name} :- {str(e)}" }, - "user": "chatbot"} + "user": "chatbot" + } diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index aeaf66590..9785403a3 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -102,14 +102,23 @@ def get_entities_from_chunkids(uri, username, password, chunk_ids): """ try: logging.info(f"Starting graph query process for chunk ids") - chunk_ids_list = chunk_ids.split(",") - driver = get_graphDB_driver(uri, username, password) - records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids_list) - result = process_records(records) - logging.info(f"Nodes and relationships are processed") - result["chunk_data"] = process_chunk_data(records) - logging.info(f"Query process completed successfully for chunk ids") - return result + if chunk_ids: + chunk_ids_list = chunk_ids.split(",") + driver = get_graphDB_driver(uri, username, password) + records, summary, keys = driver.execute_query(CHUNK_QUERY, chunksIds=chunk_ids_list) + result = process_records(records) + logging.info(f"Nodes and relationships are processed") + result["chunk_data"] = process_chunk_data(records) + logging.info(f"Query process completed successfully for chunk ids") + return result + else: + logging.info(f"chunkid_entities module: No chunk ids are passed") + result = { + "nodes": [], + "relationships": [], + "chunk_data":[] + } + return result except Exception as e: logging.error(f"chunkid_entities module: An error occurred in get_entities_from_chunkids. Error: {str(e)}") diff --git a/backend/src/document_sources/gcs_bucket.py b/backend/src/document_sources/gcs_bucket.py index 7c5dc4a57..5d8ba90c3 100644 --- a/backend/src/document_sources/gcs_bucket.py +++ b/backend/src/document_sources/gcs_bucket.py @@ -139,3 +139,14 @@ def delete_file_from_gcs(bucket_name,folder_name, file_name): logging.info('File deleted from GCS successfully') except Exception as e: raise Exception(e) + +def copy_failed_file(source_bucket_name,dest_bucket_name,folder_name, file_name): + try: + storage_client = storage.Client() + bucket = storage_client.bucket(source_bucket_name) + folder_file_name = folder_name +'/'+file_name + source_blob = bucket.blob(folder_file_name) + bucket.copy_blob(source_blob,dest_bucket_name,file_name) + logging.info(f'Failed file {file_name} copied to {dest_bucket_name} from {source_bucket_name} in GCS successfully') + except Exception as e: + raise Exception(e) diff --git a/backend/src/document_sources/web_pages.py b/backend/src/document_sources/web_pages.py new file mode 100644 index 000000000..39f2fb855 --- /dev/null +++ b/backend/src/document_sources/web_pages.py @@ -0,0 +1,16 @@ +import logging +from langchain_community.document_loaders import WebBaseLoader +from src.api_response import create_api_response + +def get_documents_from_web_page(source_url:str): + try: + pages = WebBaseLoader(source_url).load() + file_name = pages[0].metadata['title'] + return file_name, pages + except Exception as e: + job_status = "Failed" + message="Failed To Process Web URL" + error_message = str(e) + logging.error(f"Failed To Process Web URL: {file_name}") + logging.exception(f'Exception Stack trace: {error_message}') + return create_api_response(job_status,message=message,error=error_message,file_name=file_name) \ No newline at end of file diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 3c0ec191b..367de329b 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -213,4 +213,25 @@ def delete_file_from_graph(self, filenames, source_types, deleteEntities:str, me result = self.execute_query(query_to_delete_document, param) logging.info(f"Deleting {len(filename_list)} documents = '{filename_list}' from '{source_types_list}' with their entities from database") - return result, len(filename_list) \ No newline at end of file + return result, len(filename_list) + + def list_unconnected_nodes(self): + query = """ + MATCH (e:!Chunk&!Document) + WHERE NOT exists { (e)--(:!Chunk&!Document) } + OPTIONAL MATCH (doc:Document)<-[:PART_OF]-(c:Chunk)-[:HAS_ENTITY]->(e) + RETURN e {.*, embedding:null, elementId:elementId(e), labels:labels(e)} as e, + collect(distinct doc.fileName) as documents, count(distinct c) as chunkConnections + ORDER BY e.id ASC + LIMIT 100 + """ + return self.execute_query(query) + + def delete_unconnected_nodes(self,unconnected_entities_list): + entities_list = list(map(str.strip, json.loads(unconnected_entities_list))) + query = """ + MATCH (e) WHERE elementId(e) IN $elementIds + DETACH DELETE e + """ + param = {"elementIds":entities_list} + return self.execute_query(query,param) \ No newline at end of file diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index 4a321db1a..dcce6fadb 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -55,7 +55,11 @@ def get_graphDB_driver(uri, username, password): """ try: logging.info(f"Attempting to connect to the Neo4j database at {uri}") - driver = GraphDatabase.driver(uri, auth=(username, password), user_agent=os.environ.get('NEO4J_USER_AGENT')) + enable_user_agent = os.environ.get("ENABLE_USER_AGENT", "False").lower() in ("true", "1", "yes") + if enable_user_agent: + driver = GraphDatabase.driver(uri, auth=(username, password), user_agent=os.environ.get('NEO4J_USER_AGENT')) + else: + driver = GraphDatabase.driver(uri, auth=(username, password)) logging.info("Connection successful") return driver except Exception as e: diff --git a/backend/src/main.py b/backend/src/main.py index f51a334ab..34adacb06 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -15,8 +15,9 @@ from src.document_sources.youtube import * from src.shared.common_fn import * from src.make_relationships import * +from src.document_sources.web_pages import * import re -from langchain_community.document_loaders import WikipediaLoader +from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader import warnings from pytube import YouTube import sys @@ -96,6 +97,32 @@ def create_source_node_graph_url_gcs(graph, model, gcs_project_id, gcs_bucket_na 'gcsBucketName': gcs_bucket_name, 'gcsBucketFolder':obj_source_node.gcsBucketFolder, 'gcsProjectId':obj_source_node.gcsProjectId}) return lst_file_name,success_count,failed_count +def create_source_node_graph_web_url(graph, model, source_url, source_type): + success_count=0 + failed_count=0 + lst_file_name = [] + pages = WebBaseLoader(source_url, verify_ssl=False).load() + if pages==None or len(pages)==0: + failed_count+=1 + message = f"Unable to read data for given url : {source_url}" + raise Exception(message) + obj_source_node = sourceNode() + obj_source_node.file_type = 'text' + obj_source_node.file_source = source_type + obj_source_node.model = model + obj_source_node.total_pages = 1 + obj_source_node.url = urllib.parse.unquote(source_url) + obj_source_node.created_at = datetime.now() + obj_source_node.file_name = pages[0].metadata['title'] + obj_source_node.language = pages[0].metadata['language'] + obj_source_node.file_size = sys.getsizeof(pages[0].page_content) + + graphDb_data_Access = graphDBdataAccess(graph) + graphDb_data_Access.create_source_node(obj_source_node) + lst_file_name.append({'fileName':obj_source_node.file_name,'fileSize':obj_source_node.file_size,'url':obj_source_node.url,'status':'Success'}) + success_count+=1 + return lst_file_name,success_count,failed_count + def create_source_node_graph_url_youtube(graph, model, source_url, source_type): youtube_url, language = check_url_source(source_type=source_type, yt_url=source_url) @@ -110,7 +137,7 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): obj_source_node.url = youtube_url obj_source_node.created_at = datetime.now() match = re.search(r'(?:v=)([0-9A-Za-z_-]{11})\s*',obj_source_node.url) - logging.info(f"match value{match}") + logging.info(f"match value: {match}") obj_source_node.file_name = YouTube(obj_source_node.url).title transcript= get_youtube_combined_transcript(match.group(1)) if transcript==None or len(transcript)==0: @@ -165,7 +192,7 @@ def extract_graph_from_file_local_file(graph, model, merged_file_path, fileName, else: file_name, pages, file_extension = get_documents_from_file_by_path(merged_file_path,fileName) if pages==None or len(pages)==0: - raise Exception(f'Pdf content is not available for file : {file_name}') + raise Exception(f'File content is not available for file : {file_name}') return processing_source(graph, model, file_name, pages, allowedNodes, allowedRelationship, True, merged_file_path, uri) @@ -178,7 +205,16 @@ def extract_graph_from_file_s3(graph, model, source_url, aws_access_key_id, aws_ file_name, pages = get_documents_from_s3(source_url, aws_access_key_id, aws_secret_access_key) if pages==None or len(pages)==0: - raise Exception(f'Pdf content is not available for file : {file_name}') + raise Exception(f'File content is not available for file : {file_name}') + + return processing_source(graph, model, file_name, pages, allowedNodes, allowedRelationship) + +def extract_graph_from_web_page(graph, model, source_url, allowedNodes, allowedRelationship): + + file_name, pages = get_documents_from_web_page(source_url) + + if pages==None or len(pages)==0: + raise Exception(f'Content is not available for given URL : {file_name}') return processing_source(graph, model, file_name, pages, allowedNodes, allowedRelationship) @@ -203,7 +239,7 @@ def extract_graph_from_file_gcs(graph, model, gcs_project_id, gcs_bucket_name, g file_name, pages = get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token) if pages==None or len(pages)==0: - raise Exception(f'Pdf content is not available for file : {file_name}') + raise Exception(f'File content is not available for file : {file_name}') return processing_source(graph, model, file_name, pages, allowedNodes, allowedRelationship) @@ -239,7 +275,7 @@ def processing_source(graph, model, file_name, pages, allowedNodes, allowedRelat pages[i]=Document(page_content=str(text), metadata=pages[i].metadata) create_chunks_obj = CreateChunksofDocument(pages, graph) chunks = create_chunks_obj.split_file_into_chunks() - + chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks) if result[0]['Status'] != 'Processing': obj_source_node = sourceNode() status = "Processing" @@ -259,12 +295,12 @@ def processing_source(graph, model, file_name, pages, allowedNodes, allowedRelat job_status = "Completed" node_count = 0 rel_count = 0 - for i in range(0, len(chunks), update_graph_chunk_processed): + for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed): select_chunks_upto = i+update_graph_chunk_processed logging.info(f'Selected Chunks upto: {select_chunks_upto}') - if len(chunks) <= select_chunks_upto: - select_chunks_upto = len(chunks) - selected_chunks = chunks[i:select_chunks_upto] + if len(chunkId_chunkDoc_list) <= select_chunks_upto: + select_chunks_upto = len(chunkId_chunkDoc_list) + selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto] result = graphDb_data_Access.get_current_status_document_node(file_name) is_cancelled_status = result[0]['is_cancelled'] logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}") @@ -326,8 +362,7 @@ def processing_source(graph, model, file_name, pages, allowedNodes, allowedRelat else: logging.info('File does not process because it\'s already in Processing status') -def processing_chunks(chunks,graph,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count): - chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks) +def processing_chunks(chunkId_chunkDoc_list,graph,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count): #create vector index and update chunk node with embedding update_embedding_create_vector_index( graph, chunkId_chunkDoc_list, file_name) logging.info("Get graph document list from models") diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py new file mode 100644 index 000000000..60c202d2d --- /dev/null +++ b/backend/src/post_processing.py @@ -0,0 +1,58 @@ +from neo4j import GraphDatabase +import logging +import time + + +DROP_INDEX_QUERY = "DROP INDEX entities IF EXISTS;" +LABELS_QUERY = "CALL db.labels()" +FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX entities FOR (n{labels_str}) ON EACH [n.id, n.description];" +FILTER_LABELS = ["Chunk","Document"] + +def create_fulltext(uri, username, password, database): + start_time = time.time() + logging.info("Starting the process of creating a full-text index.") + + try: + driver = GraphDatabase.driver(uri, auth=(username, password), database=database) + driver.verify_connectivity() + logging.info("Database connectivity verified.") + except Exception as e: + logging.error(f"Failed to create a database driver or verify connectivity: {e}") + return + + try: + with driver.session() as session: + try: + start_step = time.time() + session.run(DROP_INDEX_QUERY) + logging.info(f"Dropped existing index (if any) in {time.time() - start_step:.2f} seconds.") + except Exception as e: + logging.error(f"Failed to drop index: {e}") + return + try: + start_step = time.time() + result = session.run(LABELS_QUERY) + labels = [record["label"] for record in result] + + for label in FILTER_LABELS: + if label in labels: + labels.remove(label) + + labels_str = ":" + "|".join([f"`{label}`" for label in labels]) + logging.info(f"Fetched labels in {time.time() - start_step:.2f} seconds.") + except Exception as e: + logging.error(f"Failed to fetch labels: {e}") + return + try: + start_step = time.time() + session.run(FULL_TEXT_QUERY.format(labels_str=labels_str)) + logging.info(f"Created full-text index in {time.time() - start_step:.2f} seconds.") + except Exception as e: + logging.error(f"Failed to create full-text index: {e}") + return + except Exception as e: + logging.error(f"An error occurred during the session: {e}") + finally: + driver.close() + logging.info("Driver closed.") + logging.info(f"Process completed in {time.time() - start_time:.2f} seconds.") \ No newline at end of file diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py index ec52bfd83..67a094915 100644 --- a/backend/src/shared/common_fn.py +++ b/backend/src/shared/common_fn.py @@ -18,7 +18,7 @@ from langchain_experimental.graph_transformers.diffbot import DiffbotGraphTransformer # from neo4j.debug import watch -#watch("neo4j") +# watch("neo4j") def check_url_source(source_type, yt_url:str=None, wiki_query:str=None): @@ -77,8 +77,11 @@ def get_chunk_and_graphDocument(graph_document_list, chunkId_chunkDoc_list): return lst_chunk_chunkId_document def create_graph_database_connection(uri, userName, password, database): - graph = Neo4jGraph(url=uri, database=database, username=userName, password=password, refresh_schema=False, sanitize=True) - #driver_config={'user_agent':os.environ.get('NEO4J_USER_AGENT')} + enable_user_agent = os.environ.get("ENABLE_USER_AGENT", "False").lower() in ("true", "1", "yes") + if enable_user_agent: + graph = Neo4jGraph(url=uri, database=database, username=userName, password=password, refresh_schema=False, sanitize=True,driver_config={'user_agent':os.environ.get('NEO4J_USER_AGENT')}) + else: + graph = Neo4jGraph(url=uri, database=database, username=userName, password=password, refresh_schema=False, sanitize=True) return graph @@ -142,7 +145,7 @@ def get_llm(model_version:str) : model_name=model_version) else: - llm = DiffbotGraphTransformer(diffbot_api_key=os.environ.get('DIFFBOT_API_KEY')) + llm = DiffbotGraphTransformer(diffbot_api_key=os.environ.get('DIFFBOT_API_KEY'),extract_types=['entities','facts']) logging.info(f"Model created - Model Version: {model_version}") return llm diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index ba9a68dfc..22cd5d8f5 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -9,9 +9,168 @@ } OPENAI_MODELS = ["gpt-3.5", "gpt-4o"] GEMINI_MODELS = ["gemini-1.0-pro", "gemini-1.5-pro"] +GROQ_MODELS = ["groq-llama3"] +BUCKET_UPLOAD = 'llm-graph-builder-upload' +BUCKET_FAILED_FILE = 'llm-graph-builder-failed' +PROJECT_ID = 'llm-experiments-387609' + + +## CHAT SETUP CHAT_MAX_TOKENS = 1000 CHAT_SEARCH_KWARG_K = 3 CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.7 -GROQ_MODELS = ["groq-llama3"] -BUCKET_UPLOAD = 'llm-graph-builder-upload' -PROJECT_ID = 'llm-experiments-387609' +CHAT_DOC_SPLIT_SIZE = 3000 +CHAT_EMBEDDING_FILTER_SCORE_THRESHOLD = 0.10 +CHAT_TOKEN_CUT_OFF = { + ("gpt-3.5","gemini-1.0-pro","gemini-1.5-pro","groq-llama3" ) : 4, + ("gpt-4","diffbot" , "gpt-4o") : 28 +} + + +### CHAT TEMPLATES +CHAT_SYSTEM_TEMPLATE = """ +You are an AI-powered question-answering agent. Your task is to provide accurate and comprehensive responses to user queries based on the given context, chat history, and available resources. + +### Response Guidelines: +1. **Direct Answers**: Provide clear and thorough answers to the user's queries without headers unless requested. Avoid speculative responses. +2. **Utilize History and Context**: Leverage relevant information from previous interactions, the current user input, and the context provided below. +3. **No Greetings in Follow-ups**: Start with a greeting in initial interactions. Avoid greetings in subsequent responses unless there's a significant break or the chat restarts. +4. **Admit Unknowns**: Clearly state if an answer is unknown. Avoid making unsupported statements. +5. **Avoid Hallucination**: Only provide information based on the context provided. Do not invent information. +6. **Response Length**: Keep responses concise and relevant. Aim for clarity and completeness within 4-5 sentences unless more detail is requested. +7. **Tone and Style**: Maintain a professional and informative tone. Be friendly and approachable. +8. **Error Handling**: If a query is ambiguous or unclear, ask for clarification rather than providing a potentially incorrect answer. +9. **Fallback Options**: If the required information is not available in the provided context, provide a polite and helpful response. Example: "I don't have that information right now." or "I'm sorry, but I don't have that information. Is there something else I can help with?" +10. **Context Availability**: If the context is empty, do not provide answers based solely on internal knowledge. Instead, respond appropriately by indicating the lack of information. + + +**IMPORTANT** : DO NOT ANSWER FROM YOUR KNOWLEDGE BASE USE THE BELOW CONTEXT + +### Context: + +{context} + + +### Example Responses: +User: Hi +AI Response: 'Hello there! How can I assist you today?' + +User: "What is Langchain?" +AI Response: "Langchain is a framework that enables the development of applications powered by large language models, such as chatbots. It simplifies the integration of language models into various applications by providing useful tools and components." + +User: "Can you explain how to use memory management in Langchain?" +AI Response: "Langchain's memory management involves utilizing built-in mechanisms to manage conversational context effectively. It ensures that the conversation remains coherent and relevant by maintaining the history of interactions and using it to inform responses." + +User: "I need help with PyCaret's classification model." +AI Response: "PyCaret simplifies the process of building and deploying machine learning models. For classification tasks, you can use PyCaret's setup function to prepare your data. After setup, you can compare multiple models to find the best one, and then fine-tune it for better performance." + +User: "What can you tell me about the latest realtime trends in AI?" +AI Response: "I don't have that information right now. Is there something else I can help with?" + +Note: This system does not generate answers based solely on internal knowledge. It answers from the information provided in the user's current and previous inputs, and from the context. +""" + + +QUESTION_TRANSFORM_TEMPLATE = "Given the below conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else." + + +## CHAT QUERIES +VECTOR_SEARCH_QUERY = """ +WITH node AS chunk, score +MATCH (chunk)-[:PART_OF]->(d:Document) +WITH d, collect(distinct {chunk: chunk, score: score}) as chunks, avg(score) as avg_score +WITH d, avg_score, + [c in chunks | c.chunk.text] as texts, + [c in chunks | {id: c.chunk.id, score: c.score}] as chunkdetails +WITH d, avg_score, chunkdetails, + apoc.text.join(texts, "\n----\n") as text +RETURN text, avg_score AS score, + {source: COALESCE(CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} as metadata +""" + +# VECTOR_GRAPH_SEARCH_QUERY=""" +# WITH node as chunk, score +# MATCH (chunk)-[:PART_OF]->(d:Document) +# CALL { WITH chunk +# MATCH (chunk)-[:HAS_ENTITY]->(e) +# MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk&!Document) +# UNWIND rels as r +# RETURN collect(distinct r) as rels +# } +# WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels +# WITH d, avg_score, +# [c IN chunks | c.chunk.text] AS texts, +# [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails, +# [r in rels | coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+ startNode(r).id + " "+ type(r) + " " + coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + endNode(r).id] as entities +# WITH d, avg_score,chunkdetails, +# apoc.text.join(texts,"\n----\n") + +# apoc.text.join(entities,"\n") +# as text +# RETURN text, avg_score AS score, {source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} AS metadata +# """ + + +VECTOR_GRAPH_SEARCH_QUERY = """ +WITH node as chunk, score +// find the document of the chunk +MATCH (chunk)-[:PART_OF]->(d:Document) +// fetch entities +CALL { WITH chunk +// entities connected to the chunk +// todo only return entities that are actually in the chunk, remember we connect all extracted entities to all chunks +MATCH (chunk)-[:HAS_ENTITY]->(e) + +// depending on match to query embedding either 1 or 2 step expansion +WITH CASE WHEN true // vector.similarity.cosine($embedding, e.embedding ) <= 0.95 +THEN +collect { MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,1}(:!Chunk&!Document) RETURN path } +ELSE +collect { MATCH path=(e)(()-[rels:!HAS_ENTITY&!PART_OF]-()){0,2}(:!Chunk&!Document) RETURN path } +END as paths + +RETURN collect{ unwind paths as p unwind relationships(p) as r return distinct r} as rels, +collect{ unwind paths as p unwind nodes(p) as n return distinct n} as nodes +} +// aggregate chunk-details and de-duplicate nodes and relationships +WITH d, collect(DISTINCT {chunk: chunk, score: score}) AS chunks, avg(score) as avg_score, apoc.coll.toSet(apoc.coll.flatten(collect(rels))) as rels, + +// TODO sort by relevancy (embeddding comparision?) cut off after X (e.g. 25) nodes? +apoc.coll.toSet(apoc.coll.flatten(collect( + [r in rels |[startNode(r),endNode(r)]]),true)) as nodes + +// generate metadata and text components for chunks, nodes and relationships +WITH d, avg_score, + [c IN chunks | c.chunk.text] AS texts, + [c IN chunks | {id: c.chunk.id, score: c.score}] AS chunkdetails, + apoc.coll.sort([n in nodes | + +coalesce(apoc.coll.removeAll(labels(n),['__Entity__'])[0],"") +":"+ +n.id + (case when n.description is not null then " ("+ n.description+")" else "" end)]) as nodeTexts, + apoc.coll.sort([r in rels + // optional filter if we limit the node-set + // WHERE startNode(r) in nodes AND endNode(r) in nodes + | +coalesce(apoc.coll.removeAll(labels(startNode(r)),['__Entity__'])[0],"") +":"+ +startNode(r).id + +" " + type(r) + " " + +coalesce(apoc.coll.removeAll(labels(endNode(r)),['__Entity__'])[0],"") +":" + +endNode(r).id +]) as relTexts + +// combine texts into response-text +WITH d, avg_score,chunkdetails, +"Text Content:\n" + +apoc.text.join(texts,"\n----\n") + +"\n----\nEntities:\n"+ +apoc.text.join(nodeTexts,"\n") + +"\n----\nRelationships:\n"+ +apoc.text.join(relTexts,"\n") + +as text +RETURN text, avg_score as score, {length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails} AS metadata +""" + + + + + diff --git a/docker-compose.yml b/docker-compose.yml index 63ade96e6..a93be695b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,6 +24,7 @@ services: - GCP_LOG_METRICS_ENABLED=${GCP_LOG_METRICS_ENABLED-False} - UPDATE_GRAPH_CHUNKS_PROCESSED=${UPDATE_GRAPH_CHUNKS_PROCESSED-20} - NUMBER_OF_CHUNKS_TO_COMBINE=${NUMBER_OF_CHUNKS_TO_COMBINE-6} + - GCS_FILE_CACHE=${GCS_FILE_CACHE-False} container_name: backend ports: - "8000:8000" @@ -39,7 +40,7 @@ services: args: - BACKEND_API_URL=${BACKEND_API_URL-http://localhost:8000} - REACT_APP_SOURCES=${REACT_APP_SOURCES-local,youtube,wiki,s3} - - LLM_MODELS=${LLM_MODELS-Diffbot,OpenAI GPT 3.5,OpenAI GPT 4o} + - LLM_MODELS=${LLM_MODELS-diffbot,gpt-3.5,gpt-4o} - GOOGLE_CLIENT_ID=${GOOGLE_CLIENT_ID-""} - BLOOM_URL=${BLOOM_URL-https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true} - TIME_PER_CHUNK=${TIME_PER_CHUNK-4} diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc new file mode 100644 index 000000000..a2887d6f9 --- /dev/null +++ b/docs/backend/backend_docs.adoc @@ -0,0 +1,712 @@ += LLM Knowledge Graph Builder Backend + +== API Reference + + +=== Connect to Neo4j Graph Database +----- +POST /connect +----- + +Neo4j database connection on frontend is done with this API. + +**API Parameters :** + +* `uri`= Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name + + +**Response :** +[source,json,indent=0] +---- +{ + "status":"Success", + "message":"Connection Successful" +} +---- + + +=== Upload Files from Local +---- +POST /upload +---- + +The upload endpoint is designed to handle the uploading of large files by breaking them into smaller chunks. This method ensures that large files can be uploaded efficiently without overloading the server. + +***API Parameters*** + +* `file`=The file to be uploaded, received in chunks, +* `chunkNumber`=The current chunk number being uploaded, +* `totalChunks`=The total number of chunks the file is divided into (each chunk of 1Mb size), +* `originalname`=The original name of the file, +* `model`=The model associated with the file, +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "message": "File uploaded and chunks merged successfully." +} +.... + + +=== User defined schema +---- +POST /schema +---- + +User can set schema for graph generation (i.e. Nodes and relationship labels) in settings panel or get existing db schema through this API. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": [ + { + "labels": [ + "Access_token", + "Activity", + "Ai chatbot", + "Book", + "Metric", + "Mode", + "Mountain" + ], + "relationshipTypes": [ + "ACCELERATE", + "ACCEPTS", + "CONVERT", + "CORRELATE", + "ESTABLISHED", + "EXAMPLE_OF" + ] + } + ] +} +.... + +=== Graph schema from input text +---- +POST /populate_graph_schema +---- + +The API is used to populate a graph schema based on the provided input text, model, and schema description flag. + +**API Parameters :** + +* `input_text`=The input text used to populate the graph schema. +* `model`=The model to be used for populating the graph schema. +* `is_schema_description_checked`=A flag indicating whether the schema description should be considered. + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": [ + { + "labels": [ + "Technology", + "Company", + "Person", + "Location", + "Organization", + "Concept" + ], + "relationshipTypes": [ + "LOCATED_AT", + "SUBSIDARY_OF", + "BORN_IN", + "LAST_MESSAGE", + "ATTENDED", + "PARTNERED_WITH" + ] + } + ] +} +.... + + +=== Unstructured sources scan other than local +---- +POST /url/scan +---- + +Create Document node for other sources - s3 bucket, gcs bucket, wikipedia, youtube url and web pages. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name +* `model`= LLM model, +* `source_url`= , +* `aws_access_key_id`= AWS access key, +* `aws_secret_access_key`= AWS secret key, +* `wiki_query`= Wikipedia query sources, +* `gcs_project_id`= GCS project id, +* `gcs_bucket_name`= GCS bucket name, +* `gcs_bucket_folder`= GCS bucket folder, +* `source_type`= s3 bucket/ gcs bucket/ youtube/Wikipedia as source type +* `gcs_project_id`=Form(None), +* `access_token`=Form(None) + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "success_count": 2, + "failed_count": 0, + "message": "Source Node created successfully for source type: Wikipedia and source: Albert Einstein, neo4j", + "file_name": [ + { + "fileName": "Albert Einstein", + "fileSize": 8074, + "url": "https://en.wikipedia.org/wiki/Albert_Einstein", + "status": "Success" + } + ] +} +.... + + +=== Extration of nodes and relations from content +---- +POST /extract : +---- + +This API is responsible for - + +** Reading the content of source provided in the form of langchain Document object from respective langchain loaders + +** Dividing the document into multiple chunks, and make below relations - +*** PART_OF - relation from Document node to all chunk nodes +*** FIRST_CHUNK - relation from document node to first chunk node +*** NEXT_CHUNK - relation from a chunk pointing to next chunk of the document. +*** HAS_ENTITY - relation between chunk node and entities extracted from LLM. + +** Extracting nodes and relations in the form of GraphDocument from respective LLM. + +** Update embedding of chunks and create vector index. + +** Update K-Nearest Neighbors graph for similar chunks. + + +**Implementation :** + +** For multiple sources of content - + +*** Local file - User can upload pdf file from their device. + +*** s3 bucket - User passes the bucket url and all the pdf files inside folders and subfolders will be listed. + +*** GCS bucket - User passes gcs project id, gcs bucket name and folder name, do google authentication to access all the pdf files under that folder and its subfolders and if folder name is not passed by user, all the pdf files under the bucket and its subfolders will be listed if user have read access of the bucket. + +*** Web Sources - +**** Wikipedia - Wikipedia 1st page content is rendered url passed by user. + +**** Youtube - Youtube video transcript is processed and if no transcript is available then respective error is thrown. + +**** Web urls - Text Content from any web url is processed for generating graph. + +** Langchain's LLMGraphTransformer library is used to get nodes and relations in the form of GraphDocument from LLMs. User and System prompts, LLM chain, graphDocument schema are defined in the library itself. + +** SentenceTransformer embeddingds are used by default, also embeddings are made configurable to use either OpenAIEmbeddings or VertexAIEmbeddings. + +** Vector index is created in databse on embeddingds created for chunks. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name +* `model`= LLM model, +* `file_name` = File uploaded from device +* `source_url`= , +* `aws_access_key_id`= AWS access key, +* `aws_secret_access_key`= AWS secret key, +* `wiki_query`= Wikipedia query sources, +* `gcs_project_id`=GCS project id, +* `gcs_bucket_name`= GCS bucket name, +* `gcs_bucket_folder`= GCS bucket folder, +* `gcs_blob_filename` = GCS file name, +* `source_type`= local file/ s3 bucket/ gcs bucket/ youtube/ Wikipedia as source, +allowedNodes=Node labels passed from settings panel, +* `allowedRelationship`=Relationship labels passed from settings panel, +* `language`=Language in which wikipedia content will be extracted + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "fileName": , + "nodeCount": , + "relationshipCount": , + "processingTime": , + "status": "Completed", + "model": + } +} +.... + + +=== Get list of sources +---- +GET /sources_list +---- + +List all sources (Document nodes) present in Neo4j graph database. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": [ + { + "fileName": "About Amazon.pdf", + "fileSize": 163931, + "errorMessage": "", + "fileSource": "local file", + "nodeCount": 62, + "model": "OpenAI GPT 4", + "fileType": "pdf", + "processingTime": 122.71, + "relationshipCount": 187, + "status": "Completed", + "updatedAt": { + "_DateTime__date": { + "_Date__ordinal": 738993, + "_Date__year": 2024, + "_Date__month": 4, + "_Date__day": 17 + }, + "_DateTime__time": { + "_Time__ticks": 28640715768000, + "_Time__hour": 7, + "_Time__minute": 57, + "_Time__second": 20, + "_Time__nanosecond": 715768000, + "_Time__tzinfo": null + } + } + } + ] +} +.... + + +=== Post processing after graph generation +---- +POST /post_processing : +---- + +This API is called at the end of processing of whole document to get create k-nearest neighbor relations between similar chunks of document based on KNN_MIN_SCORE which is 0.8 by default and to drop and create a full text index on db labels. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name +* `tasks`= List of tasks to perform + + +**Response :** +[source,json,indent=0] +.... +{ + "status":"Success", + "message":"All tasks completed successfully" +} +.... + + +=== Chat with Data +---- +POST /chat_bot +---- + +The API responsible for a chatbot system designed to leverage multiple AI models and a Neo4j graph database, providing answers to user queries. It interacts with AI models from OpenAI and Google's Vertex AI and utilizes embedding models to enhance the retrieval of relevant information. + +**Components :** + +** Embedding Models - Includes OpenAI Embeddings, VertexAI Embeddings, and SentenceTransformer Embeddings to support vector-based query operations. +** AI Models - OpenAI GPT 3.5, GPT 4o, Gemini Pro, Gemini 1.5 Pro and Groq llama3 can be configured for the chatbot backend to generate responses and process natural language. +** Graph Database (Neo4jGraph) - Manages interactions with the Neo4j database, retrieving, and storing conversation histories. +** Response Generation - Utilizes Vector Embeddings from the Neo4j database, chat history, and the knowledge base of the LLM used. + +**API Parameters :** + +* `uri`= Neo4j uri +* `userName`= Neo4j database username +* `password`= Neo4j database password +* `model`= LLM model +* `question`= User query for the chatbot +* `session_id`= Session ID used to maintain the history of chats during the user's connection + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "session_id": "0901", + "message": "Fibrosis, also known as fibrotic scarring, is a pathological wound healing process where connective tissue replaces normal parenchymal tissue." + "info": { + "sources": [ + { + "source_name": "https://en.wikipedia.org/wiki/Fibrosis", + "page_numbers": [], + "start_time": [] + } + ], + "model": "gpt-4o", + "chunkids": [ + "54d8c0dbefb67f1ed3f6939d59267e1ff557a94c", + "4cc02ee8419706c8decdf71ab0d3896aad5c7dca", + "266ce95311bb1921791b4f1cd29a48d433027139", + "11e19513247e1e396475728fa6a197695045b248", + "8bafa01b6d851f70822bcb86863e485e1785a64c" + ], + "total_tokens": 2213, + "response_time": 10.17 + }, + "user": "chatbot" + } +} +.... + +=== Get entities from chunks +---- +/chunk_entities +---- + +This API is used to get the entities and relations associated with a particular chunk and chunk metadata. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name +* `chunk_ids` = Chunk ids of document + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "nodes": [ + { + "element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73267", + "labels": [ + "Condition" + ], + "properties": { + "id": "Fibrosis" + } + }, + + ], + "relationships": [ + { + "element_id": "5:a69712a5-1102-40da-a96d-70c1143ea8e5:1153057844048764467", + "type": "AFFECTS", + "start_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73267", + "end_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73282" + }, + { + "element_id": "5:a69712a5-1102-40da-a96d-70c1143ea8e5:1155309643862449715", + "type": "AFFECTS", + "start_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73267", + "end_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73294" + }, + ], + "chunk_data": [ + { + "id": "54d8c0dbefb67f1ed3f6939d59267e1ff557a94c", + "position": 1, + "text": "Fibrosis, also known as fibrotic scarring, is a pathological wound healing ...", + "content_offset": 0, + "fileName": "fibrosis", + "length": 1002, + "embedding": null + } + ] + } +} +.... + +=== View graph for a file +---- +POST /graph_query +---- + +This API is used to view graph for a particular file. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `query_type`= Neo4j database name +* `document_names` = File name for which user wants to view graph + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "nodes": [ + { + "element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:9972", + "labels": [ + "Person" + ], + "properties": { + "id": "Jeff" + } + }, + { + "element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:9973", + "labels": [ + "Team" + ], + "properties": { + "id": "Miami" + } + } + ], + "relationships": [ + { + "element_id": "5:98e5e9bb-8095-440d-9462-03985fed2fa2:1153200780560312052", + "type": "PLAYER", + "start_node_element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:9972", + "end_node_element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:9973" + } + ] + } +} +.... + +=== Clear chat history +---- +POST /clear_chat_bot +---- + +This API is used to clear the chat history which is saved in Neo4j DB. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `session_id` = User session id for QA chat + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "session_id": "99c1a808-377f-448f-9ea6-4b4a8de46b14", + "message": "The chat History is cleared", + "user": "chatbot" + } +} +.... + +=== SSE event to update processing status +---- +GET /update_extract_status +---- + +The API provides a continuous update on the extraction status of a specified file. It uses Server-Sent Events (SSE) to stream updates to the client. + +**API Parameters :** + +* `file_name`=The name of the file whose extraction status is being tracked, +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name + + +**Response :** +[source,json,indent=0] +.... +{ + "fileName": "testFile.pdf", + "status": "Processing", + "processingTime": 0, + "nodeCount": 0, + "relationshipCount": 0, + "model": "OpenAI GPT 3.5", + "total_chunks": 3, + "total_pages": 1, + "fileSize": 92373, + "processed_chunk": 0 +} +.... + +=== Delete selected documents +---- +POST /delete_document_and_entities +---- + +**Overview :** + +Deleteion of nodes and relations for multiple files is done through this API. User can choose multiple documents to be deleted, also user have option to delete only 'Document' and 'Chunk' nodes and keep the entities extracted from that document. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `filenames`= List of files to be deleted, +* `source_types`= Document sources(Wikipedia, youtube, etc.), +* `deleteEntities`= Boolean value to check entities deletion is requested or not + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "message": "Deleted 1 documents with 68 entities from database" +} +.... + +=== Cancel processing job +---- +/cancelled_job +---- + +This API is responsible for cancelling an in process job. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `filenames`= Name of the file whose processing need to be stopped, +* `source_types`= Source of the file + + +**Response :** +[source,json,indent=0] +.... +{ + "message":"Cancelled the processing job successfully" +} +.... + + +=== Get the list of orphan nodes +---- +POST /get_unconnected_nodes_list +---- + +The API retrieves a list of nodes in the graph database that are not connected to any other nodes. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name + + +**Response :** +[source,json,indent=0] +.... +{ "status": "Success", + "data": [ + "e": + { + "id": "Leela Chess Zero", + "elementId": "4:abf6f691-928d-4b1c-80fc-2914ae517b4c:336", + "labels": ["Technology"], + "embedding": null + }, + "documents": ["AlphaZero - Wikipedia.pdf"], + "chunkConnections": 7 + ] +} +.... + + +=== Deletion of orpahn nodes +---- +POST /delete_unconnected_nodes +---- + +The API is used to delete unconnected entities from database. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `unconnected_entities_list`=selected entities list to delete of unconnected entities. + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "message: "Unconnected entities delete successfully" +} +.... + + +== Decisions + +* Process only 1st page of Wikipedia +* Split document content into chunks of size 200 and overlap of 20 +* Configurable elements - +** Number of chunks to combine +** Generate Embedding or not +** Embedding model +** minimum score for KNN graph +** Uploaded file storage location (GCS bucket or container) diff --git a/docs/frontend/frontend_docs.adoc b/docs/frontend/frontend_docs.adoc new file mode 100644 index 000000000..e69de29bb diff --git a/docs/project architecture.png b/docs/project architecture.png new file mode 100644 index 000000000..815749308 Binary files /dev/null and b/docs/project architecture.png differ diff --git a/docs/project_docs.adoc b/docs/project_docs.adoc new file mode 100644 index 000000000..22e997217 --- /dev/null +++ b/docs/project_docs.adoc @@ -0,0 +1,126 @@ += LLM Knowledge Graph Builder + +== Introduction + +This document provides comprehensive documentation for the Neo4j llm-graph-builder Project, a Python web application built with the FastAPI framework. It covers various aspects of the project, including its features, architecture, usage, development, deployment, limitations and known issues. + + +== Features + +* Upload unstructured data from multiple sources to generate structuted Neo4j knowledge graph. + +* Extraction of nodes and relations from multiple LLMs(OpenAI GPT-3.5, OpenAI GPT-4, Gemini 1.0-Pro and Diffbot). + +* View complete graph or only a particular element of graph(ex: Only chunks, only entities, document and entities, etc.) + +* Generate embedding of chunks created from unstructured content. + +* Generate k-nearest neighbors graph for similar chunks. + +* Chat with graph data using chat bot. + +== Local Setup and Execution + +Run Docker Compose to build and start all components: +.... +docker-compose up --build +.... + +Alternatively, run specific directories separately: + +** For frontend +.... +cd frontend +yarn +yarn run dev +.... + +** For backend +.... +cd backend +python -m venv envName +source envName/bin/activate +pip install -r requirements.txt +uvicorn score:app --reload +.... + +Set up environment variables +.... +OPENAI_API_KEY = "" +DIFFBOT_API_KEY = "" +NEO4J_URI = "" +NEO4J_USERNAME = "" +NEO4J_PASSWORD = "" +NEO4J_DATABASE = "" +AWS_ACCESS_KEY_ID = "" +AWS_SECRET_ACCESS_KEY = "" +EMBEDDING_MODEL = "" +IS_EMBEDDING = "TRUE" +KNN_MIN_SCORE = "" +LANGCHAIN_API_KEY = "" +LANGCHAIN_PROJECT = "" +LANGCHAIN_TRACING_V2 = "" +LANGCHAIN_ENDPOINT = "" +NUMBER_OF_CHUNKS_TO_COMBINE = "" +.... + +== Architecture +image::project architecture.png[Archirecture diagram, 600, align='left'] + +== Development + +==== Backend +link:backend/backend_docs.adoc[backend_docs.adoc] + +==== Frontend +link:frontend/frontend_docs.adoc[frontend_docs.adoc] + +== Deployment and Monitoring +* The application is deployed on Google Cloud Platform. + + To deploy frontend +.... +gcloud run deploy +source location current directory > Frontend +region : 32 [us-central 1] +Allow unauthenticated request : Yes +.... + + To deploy backend +.... +gcloud run deploy --set-env-vars "OPENAI_API_KEY = " --set-env-vars "DIFFBOT_API_KEY = " --set-env-vars "NEO4J_URI = " --set-env-vars "NEO4J_PASSWORD = " --set-env-vars "NEO4J_USERNAME = " +source location current directory > Backend +region : 32 [us-central 1] +Allow unauthenticated request : Yes +.... + +* Langserve is used with FAST API to deploy Langchain runnables and chains as a REST API. + +* Langsmith is used to monitor and evaluate the application + + +Developement url + +Production url + + + +== Appendix + +=== Limitations + +** Only pdf file uploaded from device or uploaded from s3 bucket or gcs bucket can be processed. + +** GCS buckets present under 1051503595507@cloudbuild.gserviceaccount.com service account can only be accessed. + +** Only 1st page of Wikipedia content is processed to generate graphDocument. + + +=== Known issues + +** InactiveRpcError error with Gemini 1.0 Pro - grpc_status:13, grpc_message:"Internal error encountered." + +** ResourceExhausted error with Gemini 1.5 Pro - 429 Quota exceeded for aiplatform.googleapis.com/generate_content_requests_per_minute_per_project_per_base_model with base model: gemini-1.5-pro + +** Gemini response validation errors even after making safety_settings parameters to BLOCK_NONE. + diff --git a/example.env b/example.env index 3eef484d6..3aad7811f 100644 --- a/example.env +++ b/example.env @@ -1,30 +1,31 @@ # Mandatory -OPENAI_API_KEY = "" -DIFFBOT_API_KEY = "" +OPENAI_API_KEY="" +DIFFBOT_API_KEY="" # Optional Backend -EMBEDDING_MODEL = "all-MiniLM-L6-v2" -IS_EMBEDDING = "true" -KNN_MIN_SCORE = "0.94" +EMBEDDING_MODEL="all-MiniLM-L6-v2" +IS_EMBEDDING="true" +KNN_MIN_SCORE="0.94" # Enable Gemini (default is False) | Can be False or True -GEMINI_ENABLED = False +GEMINI_ENABLED=False # Enable Google Cloud logs (default is False) | Can be False or True -GCP_LOG_METRICS_ENABLED = False -NUMBER_OF_CHUNKS_TO_COMBINE = 6 -UPDATE_GRAPH_CHUNKS_PROCESSED = 20 -NEO4J_URI = "neo4j://database:7687" -NEO4J_USERNAME = "neo4j" -NEO4J_PASSWORD = "password" -LANGCHAIN_API_KEY = "" -LANGCHAIN_PROJECT = "" -LANGCHAIN_TRACING_V2 = "true" -LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com" +GCP_LOG_METRICS_ENABLED=False +NUMBER_OF_CHUNKS_TO_COMBINE=6 +UPDATE_GRAPH_CHUNKS_PROCESSED=20 +NEO4J_URI="neo4j://database:7687" +NEO4J_USERNAME="neo4j" +NEO4J_PASSWORD="password" +LANGCHAIN_API_KEY="" +LANGCHAIN_PROJECT="" +LANGCHAIN_TRACING_V2="true" +LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" +GCS_FILE_CACHE=False # Optional Frontend BACKEND_API_URL="http://localhost:8000" BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true" -REACT_APP_SOURCES="local,youtube,wiki,s3" -LLM_MODELS="Diffbot,OpenAI GPT 3.5,OpenAI GPT 4o" +REACT_APP_SOURCES="local,youtube,wiki,s3,web" +LLM_MODELS="diffbot,gpt-3.5,gpt-4o" ENV="DEV" TIME_PER_CHUNK=4 TIME_PER_PAGE=50 diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 504bcc045..7e166bda8 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -3,14 +3,14 @@ FROM node:20 AS build ARG BACKEND_API_URL="http://localhost:8000" ARG REACT_APP_SOURCES="" -ARG LLM_MODELS="diffbot,gpt-3.5,gpt-4o,gemini-1.0-pro" +ARG LLM_MODELS="" ARG GOOGLE_CLIENT_ID="" ARG BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true" ARG TIME_PER_CHUNK=4 ARG TIME_PER_PAGE=50 ARG LARGE_FILE_SIZE=5242880 ARG CHUNK_SIZE=5242880 -ARG ENV="PROD" +ARG ENV="DEV" WORKDIR /app COPY package.json yarn.lock ./ diff --git a/frontend/example.env b/frontend/example.env index ced971938..bf7a7b03e 100644 --- a/frontend/example.env +++ b/frontend/example.env @@ -1,9 +1,10 @@ BACKEND_API_URL="http://localhost:8000" BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true" -REACT_APP_SOURCES="local,youtube,wiki,s3" -LLM_MODELS="Diffbot,OpenAI GPT 3.5,OpenAI GPT 4o" +REACT_APP_SOURCES="local,youtube,wiki,s3,web" +LLM_MODELS="diffbot,gpt-3.5,gpt-4o" ENV="DEV" TIME_PER_CHUNK=4 TIME_PER_PAGE=50 CHUNK_SIZE=5242880 +LARGE_FILE_SIZE=5242880 GOOGLE_CLIENT_ID="" \ No newline at end of file diff --git a/frontend/src/App.css b/frontend/src/App.css index 5c501b6fc..52d8fcd50 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -25,7 +25,7 @@ } .contentWithExpansion { - width: calc(-800px + 100dvw); + width: calc(-840px + 100dvw); height: calc(100dvh - 58px); padding: 3px; display: flex; @@ -121,6 +121,10 @@ height: 55px; object-fit: contain; } +.webImg{ + width: 80px; + height: 80px; +} ::placeholder { color: rgb(135, 130, 130) !important; diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 88f0164da..0caa501e3 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -2,22 +2,27 @@ import './App.css'; import '@neo4j-ndl/base/lib/neo4j-ds-styles.css'; import ThemeWrapper from './context/ThemeWrapper'; import QuickStarter from './components/QuickStarter'; - import { GoogleOAuthProvider } from '@react-oauth/google'; import { APP_SOURCES } from './utils/Constants'; +import ErrorBoundary from './components/UI/ErrroBoundary'; + const App: React.FC = () => { return ( <> {APP_SOURCES != undefined && APP_SOURCES.includes('gcs') ? ( - + + + + + + + + ) : ( + - - ) : ( - - - + )} ); diff --git a/frontend/src/HOC/SettingModalHOC.tsx b/frontend/src/HOC/SettingModalHOC.tsx new file mode 100644 index 000000000..7afe46d0e --- /dev/null +++ b/frontend/src/HOC/SettingModalHOC.tsx @@ -0,0 +1,28 @@ +import React from 'react'; +import { SettingsModalProps } from '../types'; +import SettingsModal from '../components/Popups/Settings/SettingModal'; + +const SettingModalHOC: React.FC = ({ + openTextSchema, + open, + onClose, + isSchema, + settingView, + setIsSchema, + onContinue, + onClear, +}) => { + return ( + + ); +}; +export default SettingModalHOC; diff --git a/frontend/src/assets/images/db-search.svg b/frontend/src/assets/images/db-search.svg new file mode 100644 index 000000000..33c7a3cae --- /dev/null +++ b/frontend/src/assets/images/db-search.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/frontend/src/assets/images/graph-search.svg b/frontend/src/assets/images/graph-search.svg new file mode 100644 index 000000000..1be521477 --- /dev/null +++ b/frontend/src/assets/images/graph-search.svg @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/frontend/src/assets/images/internet_logo.png b/frontend/src/assets/images/internet_logo.png new file mode 100644 index 000000000..f79962b7c Binary files /dev/null and b/frontend/src/assets/images/internet_logo.png differ diff --git a/frontend/src/assets/images/web-search-svgrepo-com.svg b/frontend/src/assets/images/web-search-svgrepo-com.svg new file mode 100644 index 000000000..199dd7cbe --- /dev/null +++ b/frontend/src/assets/images/web-search-svgrepo-com.svg @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/frontend/src/assets/images/web-svgrepo-com.svg b/frontend/src/assets/images/web-svgrepo-com.svg new file mode 100644 index 000000000..86089066d --- /dev/null +++ b/frontend/src/assets/images/web-svgrepo-com.svg @@ -0,0 +1,11 @@ + + + + + + + + + web + + \ No newline at end of file diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx new file mode 100644 index 000000000..f80b2dfbc --- /dev/null +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -0,0 +1,50 @@ +import { SegmentedControl, Tip } from '@neo4j-ndl/react'; +import { ChatModeOptions } from '../../utils/Constants'; +import { useFileContext } from '../../context/UsersFiles'; +import { DbmsIcon } from '@neo4j-ndl/react/icons'; +import { capitalize } from '@mui/material'; + +export default function ChatModeToggle({ inSidenav = false }) { + const [vector, _] = ChatModeOptions; + const { chatMode, setchatMode } = useFileContext(); + + return ( + + {ChatModeOptions.map((i, idx) => { + return ( + + + + {i.Icon === 'abc' ? ( + + + + + + + ) : ( + + )} + + + {capitalize(i.value)} + + ); + })} + + ); +} diff --git a/frontend/src/components/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx similarity index 93% rename from frontend/src/components/Chatbot.tsx rename to frontend/src/components/ChatBot/Chatbot.tsx index 44ca60241..096a6c31a 100644 --- a/frontend/src/components/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -6,33 +6,33 @@ import { SpeakerWaveIconOutline, SpeakerXMarkIconOutline, } from '@neo4j-ndl/react/icons'; -import ChatBotAvatar from '../assets/images/chatbot-ai.png'; -import { ChatbotProps, Source, UserCredentials } from '../types'; -import { useCredentials } from '../context/UserCredentials'; -import { chatBotAPI } from '../services/QnaAPI'; +import ChatBotAvatar from '../../assets/images/chatbot-ai.png'; +import { ChatbotProps, UserCredentials, chunk } from '../../types'; +import { useCredentials } from '../../context/UserCredentials'; +import { chatBotAPI } from '../../services/QnaAPI'; import { v4 as uuidv4 } from 'uuid'; -import { useFileContext } from '../context/UsersFiles'; -import InfoModal from './InfoModal'; +import { useFileContext } from '../../context/UsersFiles'; +import InfoModal from './Info/InfoModal'; import clsx from 'clsx'; import ReactMarkdown from 'react-markdown'; -import IconButtonWithToolTip from './IconButtonToolTip'; -import { buttonCaptions, tooltips } from '../utils/Constants'; -import useSpeechSynthesis from '../hooks/useSpeech'; -import ButtonWithToolTip from './ButtonWithToolTip'; +import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { buttonCaptions, tooltips } from '../../utils/Constants'; +import useSpeechSynthesis from '../../hooks/useSpeech'; +import ButtonWithToolTip from '../UI/ButtonWithToolTip'; const Chatbot: React.FC = (props) => { const { messages: listMessages, setMessages: setListMessages, isLoading, isFullScreen, clear } = props; const [inputMessage, setInputMessage] = useState(''); const [loading, setLoading] = useState(isLoading); const { userCredentials } = useCredentials(); - const { model } = useFileContext(); + const { model, chatMode } = useFileContext(); const messagesEndRef = useRef(null); const [sessionId, setSessionId] = useState(sessionStorage.getItem('session_id') ?? ''); const [showInfoModal, setShowInfoModal] = useState(false); - const [sourcesModal, setSourcesModal] = useState([]); + const [sourcesModal, setSourcesModal] = useState([]); const [modelModal, setModelModal] = useState(''); const [responseTime, setResponseTime] = useState(0); - const [chunkModal, setChunkModal] = useState([]); + const [chunkModal, setChunkModal] = useState([]); const [tokensUsed, setTokensUsed] = useState(0); const [copyMessageId, setCopyMessageId] = useState(null); @@ -56,9 +56,9 @@ const Chatbot: React.FC = (props) => { const simulateTypingEffect = ( response: { reply: string; - sources?: Source[]; + sources?: string[]; model?: string; - chunk_ids?: string[]; + chunk_ids?: chunk[]; total_tokens?: number; response_time?: number; speaking?: boolean; @@ -142,13 +142,13 @@ const Chatbot: React.FC = (props) => { try { setInputMessage(''); simulateTypingEffect({ reply: ' ' }); - const chatbotAPI = await chatBotAPI(userCredentials as UserCredentials, inputMessage, sessionId, model); + const chatbotAPI = await chatBotAPI(userCredentials as UserCredentials, inputMessage, sessionId, model, chatMode); const chatresponse = chatbotAPI?.response; console.log('api', chatresponse); chatbotReply = chatresponse?.data?.data?.message; chatSources = chatresponse?.data?.data?.info.sources; chatModel = chatresponse?.data?.data?.info.model; - chatChunks = chatresponse?.data?.data?.info.chunkids; + chatChunks = chatresponse?.data?.data?.info.chunkdetails; chatTokensUsed = chatresponse?.data?.data?.info.total_tokens; chatTimeTaken = chatresponse?.data?.data?.info.response_time; const finalbotReply = { diff --git a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx new file mode 100644 index 000000000..34ee64ef2 --- /dev/null +++ b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx @@ -0,0 +1,38 @@ +import { TrashIconOutline, XMarkIconOutline } from '@neo4j-ndl/react/icons'; +import ChatModeToggle from './ChatModeToggle'; +import { Box, IconButton } from '@neo4j-ndl/react'; +import { Messages } from '../../types'; +import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { tooltips } from '../../utils/Constants'; + +interface IconProps { + closeChatBot: () => void; + deleteOnClick?: () => void; + messages: Messages[]; +} + +const ExpandedChatButtonContainer: React.FC = ({ closeChatBot, deleteOnClick, messages }) => { + return ( +
+ + + + + + + + + +
+ ); +}; + +export default ExpandedChatButtonContainer; diff --git a/frontend/src/components/ChatBot/Info/InfoModal.tsx b/frontend/src/components/ChatBot/Info/InfoModal.tsx new file mode 100644 index 000000000..4519fd476 --- /dev/null +++ b/frontend/src/components/ChatBot/Info/InfoModal.tsx @@ -0,0 +1,352 @@ +import { Box, Typography, TextLink, Flex, Tabs, LoadingSpinner } from '@neo4j-ndl/react'; +import { DocumentTextIconOutline } from '@neo4j-ndl/react/icons'; +import '../../../styling/info.css'; +import Neo4jRetrievalLogo from '../../../assets/images/Neo4jRetrievalLogo.png'; +import wikipedialogo from '../../../assets/images/Wikipedia-logo-v2.svg'; +import youtubelogo from '../../../assets/images/youtube.png'; +import gcslogo from '../../../assets/images/gcs.webp'; +import s3logo from '../../../assets/images/s3logo.png'; +import { Chunk, Entity, GroupedEntity, UserCredentials, chatInfoMessage } from '../../../types'; +import { useEffect, useMemo, useState } from 'react'; +import HoverableLink from '../../UI/HoverableLink'; +import GraphViewButton from '../../Graph/GraphViewButton'; +import { chunkEntitiesAPI } from '../../../services/ChunkEntitiesInfo'; +import { useCredentials } from '../../../context/UserCredentials'; +import type { Node, Relationship } from '@neo4j-nvl/base'; +import { calcWordColor } from '@neo4j-devtools/word-color'; +import ReactMarkdown from 'react-markdown'; +import { GlobeAltIconOutline } from '@neo4j-ndl/react/icons'; +import { youtubeLinkValidation } from '../../../utils/Utils'; +const InfoModal: React.FC = ({ sources, model, total_tokens, response_time, chunk_ids }) => { + const [activeTab, setActiveTab] = useState(3); + const [infoEntities, setInfoEntities] = useState([]); + const [loading, setLoading] = useState(false); + const { userCredentials } = useCredentials(); + const [nodes, setNodes] = useState([]); + const [relationships, setRelationships] = useState([]); + const [chunks, setChunks] = useState([]); + const parseEntity = (entity: Entity) => { + const { labels, properties } = entity; + const label = labels[0]; + const text = properties.id; + return { label, text }; + }; + useEffect(() => { + setLoading(true); + chunkEntitiesAPI(userCredentials as UserCredentials, chunk_ids.map((c) => c.id).join(',')) + .then((response) => { + setInfoEntities(response.data.data.nodes); + setNodes(response.data.data.nodes); + setRelationships(response.data.data.relationships); + const chunks = response.data.data.chunk_data.map((chunk: any) => { + const chunkScore = chunk_ids.find((chunkdetail) => chunkdetail.id === chunk.id); + return { + ...chunk, + score: chunkScore?.score, + }; + }); + const sortedchunks = chunks.sort((a: any, b: any) => b.score - a.score); + setChunks(sortedchunks); + setLoading(false); + }) + .catch((error) => { + console.error('Error fetching entities:', error); + setLoading(false); + }); + }, [chunk_ids]); + const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { + return infoEntities.reduce((acc, entity) => { + const { label, text } = parseEntity(entity); + if (!acc[label]) { + const newColor = calcWordColor(label); + acc[label] = { texts: new Set(), color: newColor }; + } + acc[label].texts.add(text); + return acc; + }, {} as Record; color: string }>); + }, [infoEntities]); + const onChangeTabs = (tabId: number) => { + setActiveTab(tabId); + }; + const labelCounts = useMemo(() => { + const counts: { [label: string]: number } = {}; + infoEntities.forEach((entity) => { + const { labels } = entity; + const label = labels[0]; + counts[label] = counts[label] ? counts[label] + 1 : 1; + }); + return counts; + }, [infoEntities]); + const sortedLabels = useMemo(() => { + return Object.keys(labelCounts).sort((a, b) => labelCounts[b] - labelCounts[a]); + }, [labelCounts]); + + const generateYouTubeLink = (url: string, startTime: string) => { + try { + const urlObj = new URL(url); + urlObj.searchParams.set('t', startTime); + return urlObj.toString(); + } catch (error) { + console.error('Invalid URL:', error); + return ''; + } + }; + return ( + + + + + Retrieval information + + To generate this response, in {response_time} seconds we used{' '} + {total_tokens} tokens with the model{' '} + {model}. + + + + + Sources used + Top Entities used + Chunks + + + + {sources.length ? ( +
    + {sources.map((link, index) => { + return ( +
  • + {link?.startsWith('http') || link?.startsWith('https') ? ( + <> + {link?.includes('wikipedia.org') && ( +
    + Wikipedia Logo + + + + {link} + + + +
    + )} + {link?.includes('storage.googleapis.com') && ( +
    + Google Cloud Storage Logo + + {decodeURIComponent(link).split('/').at(-1)?.split('?')[0] ?? 'GCS File'} + +
    + )} + {link?.startsWith('s3://') && ( +
    + S3 Logo + + {decodeURIComponent(link).split('/').at(-1) ?? 'S3 File'} + +
    + )} + {youtubeLinkValidation(link) && ( + <> +
    + + + + + {link} + + + +
    + + )} + {!link?.startsWith('s3://') && + !link?.includes('storage.googleapis.com') && + !link?.includes('wikipedia.org') && + !link?.includes('youtube.com') && ( +
    + + + {link} + +
    + )} + + ) : ( +
    + + + {link} + + {/* {chunks?.length > 0 && ( + + - Page{' '} + {chunks + .map((c) => c.page_number as number) + .sort((a, b) => a - b) + .join(', ')} + + )} */} +
    + )} +
  • + ); + })} +
+ ) : ( + No Sources Found + )} +
+ + {loading ? ( + + + + ) : Object.keys(groupedEntities).length > 0 ? ( +
    + {sortedLabels.map((label, index) => ( +
  • +
    + {label} ({labelCounts[label]}) +
    + + {Array.from(groupedEntities[label].texts).slice(0, 3).join(', ')} + +
  • + ))} +
+ ) : ( + No Entities Found + )} +
+ + {loading ? ( + + + + ) : chunks.length > 0 ? ( +
+
    + {chunks.map((chunk) => ( +
  • + {chunk?.page_number ? ( + <> +
    + + + {/* {chunk?.fileName}, Page: {chunk?.page_number} */} + {chunk?.fileName} + +
    + Similarity Score: {chunk?.score} + + ) : chunk?.url && chunk?.start_time ? ( + <> +
    + + + + {chunk?.fileName} + + +
    + Similarity Score: {chunk?.score} + + ) : chunk?.url && chunk?.url.includes('wikipedia.org') ? ( + <> +
    + + {chunk?.fileName} +
    + Similarity Score: {chunk?.score} + + ) : chunk?.url && chunk?.url.includes('storage.googleapis.com') ? ( + <> +
    + + {chunk?.fileName} +
    + Similarity Score: {chunk?.score} + + ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( + <> +
    + + {chunk?.fileName} +
    + + ) : chunk?.url && + !chunk?.url.startsWith('s3://') && + !chunk?.url.includes('storage.googleapis.com') && + !chunk?.url.includes('wikipedia.org') && + !chunk?.url.includes('youtube.com') ? ( + <> +
    + + + {chunk?.url} + +
    + Similarity Score: {chunk?.score} + + ) : ( + <> + )} + {chunk?.text} +
  • + ))} +
+
+ ) : ( + No Chunks Found + )} +
+
+ {activeTab == 4 && nodes.length && relationships.length ? ( + + + + ) : ( + <> + )} +
+ ); +}; +export default InfoModal; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 86320b07a..0909cb611 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -1,26 +1,36 @@ -import { useEffect, useState, useMemo } from 'react'; -import ConnectionModal from './ConnectionModal'; +import { useEffect, useState, useMemo, useCallback } from 'react'; +import ConnectionModal from './Popups/ConnectionModal/ConnectionModal'; import LlmDropdown from './Dropdown'; import FileTable from './FileTable'; import { Button, Typography, Flex, StatusIndicator } from '@neo4j-ndl/react'; import { useCredentials } from '../context/UserCredentials'; import { useFileContext } from '../context/UsersFiles'; -import CustomAlert from './Alert'; +import CustomAlert from './UI/Alert'; import { extractAPI } from '../utils/FileAPI'; -import { ContentProps, CustomFile, OptionType, UserCredentials, alertStateType } from '../types'; -import { updateGraphAPI } from '../services/UpdateGraph'; -import GraphViewModal from './GraphViewModal'; -import deleteAPI from '../services/deleteFiles'; -import DeletePopUp from './DeletePopUp'; +import { ContentProps, CustomFile, Menuitems, OptionType, UserCredentials, alertStateType } from '../types'; +import deleteAPI from '../services/DeleteFiles'; +import { postProcessing } from '../services/PostProcessing'; +import DeletePopUp from './Popups/DeletePopUp/DeletePopUp'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; import useServerSideEvent from '../hooks/useSse'; import { useSearchParams } from 'react-router-dom'; -import ConfirmationDialog from './ConfirmationDialog'; -import { buttonCaptions, largeFileSize, tooltips } from '../utils/Constants'; -import ButtonWithToolTip from './ButtonWithToolTip'; +import ConfirmationDialog from './Popups/LargeFilePopUp/ConfirmationDialog'; +import { buttonCaptions, largeFileSize, taskParam, tooltips } from '../utils/Constants'; +import ButtonWithToolTip from './UI/ButtonWithToolTip'; import connectAPI from '../services/ConnectAPI'; +import SettingModalHOC from '../HOC/SettingModalHOC'; +import GraphViewModal from './Graph/GraphViewModal'; +import CustomMenu from './UI/Menu'; +import { TrashIconOutline } from '@neo4j-ndl/react/icons'; -const Content: React.FC = ({ isLeftExpanded, isRightExpanded }) => { +const Content: React.FC = ({ + isLeftExpanded, + isRightExpanded, + openTextSchema, + isSchema, + setIsSchema, + openOrphanNodeDeletionModal, +}) => { const [init, setInit] = useState(false); const [openConnection, setOpenConnection] = useState(false); const [openGraphView, setOpenGraphView] = useState(false); @@ -29,6 +39,10 @@ const Content: React.FC = ({ isLeftExpanded, isRightExpanded }) => const { setUserCredentials, userCredentials } = useCredentials(); const [showConfirmationModal, setshowConfirmationModal] = useState(false); const [extractLoading, setextractLoading] = useState(false); + const [isLargeFile, setIsLargeFile] = useState(false); + const [showSettingnModal, setshowSettingModal] = useState(false); + const [openDeleteMenu, setopenDeleteMenu] = useState(false); + const [deleteAnchor, setdeleteAnchor] = useState(null); const { filesData, @@ -220,6 +234,7 @@ const Content: React.FC = ({ isLeftExpanded, isRightExpanded }) => }; const handleGenerateGraph = (allowLargeFiles: boolean, selectedFilesFromAllfiles: CustomFile[]) => { + setIsLargeFile(false); const data = []; if (selectedfileslength && allowLargeFiles) { for (let i = 0; i < selectedfileslength; i++) { @@ -230,7 +245,7 @@ const Content: React.FC = ({ isLeftExpanded, isRightExpanded }) => } Promise.allSettled(data).then(async (_) => { setextractLoading(false); - await updateGraphAPI(userCredentials as UserCredentials); + await postProcessing(userCredentials as UserCredentials, taskParam); }); } else if (selectedFilesFromAllfiles.length && allowLargeFiles) { // @ts-ignore @@ -241,7 +256,7 @@ const Content: React.FC = ({ isLeftExpanded, isRightExpanded }) => } Promise.allSettled(data).then(async (_) => { setextractLoading(false); - await updateGraphAPI(userCredentials as UserCredentials); + await postProcessing(userCredentials as UserCredentials, taskParam); }); } }; @@ -376,6 +391,135 @@ const Content: React.FC = ({ isLeftExpanded, isRightExpanded }) => } }, []); + useEffect(() => { + const storedSchema = localStorage.getItem('isSchema'); + if (storedSchema !== null) { + setIsSchema(JSON.parse(storedSchema)); + } + }, []); + + const onClickHandler = () => { + if (isSchema) { + if (selectedRows.length) { + let selectedLargeFiles: CustomFile[] = []; + selectedRows.forEach((f) => { + const parsedData: CustomFile = JSON.parse(f); + if (parsedData.fileSource === 'local file') { + if (typeof parsedData.size === 'number' && parsedData.status === 'New' && parsedData.size > largeFileSize) { + selectedLargeFiles.push(parsedData); + } + } + }); + // @ts-ignore + if (selectedLargeFiles.length) { + setIsLargeFile(true); + setshowConfirmationModal(true); + handleGenerateGraph(false, []); + } else { + setIsLargeFile(false); + handleGenerateGraph(true, filesData); + } + } else if (filesData.length) { + const largefiles = filesData.filter((f) => { + if (typeof f.size === 'number' && f.status === 'New' && f.size > largeFileSize) { + return true; + } + return false; + }); + const selectAllNewFiles = filesData.filter((f) => f.status === 'New'); + const stringified = selectAllNewFiles.reduce((accu, f) => { + const key = JSON.stringify(f); + // @ts-ignore + accu[key] = true; + return accu; + }, {}); + setRowSelection(stringified); + if (largefiles.length) { + setIsLargeFile(true); + setshowConfirmationModal(true); + handleGenerateGraph(false, []); + } else { + setIsLargeFile(false); + handleGenerateGraph(true, filesData); + } + } + } else { + if (selectedRows.length) { + let selectedLargeFiles: CustomFile[] = []; + selectedRows.forEach((f) => { + const parsedData: CustomFile = JSON.parse(f); + if (parsedData.fileSource === 'local file') { + if (typeof parsedData.size === 'number' && parsedData.status === 'New' && parsedData.size > largeFileSize) { + selectedLargeFiles.push(parsedData); + } + } + }); + // @ts-ignore + if (selectedLargeFiles.length) { + setIsLargeFile(true); + } else { + setIsLargeFile(false); + } + } else if (filesData.length) { + const largefiles = filesData.filter((f) => { + if (typeof f.size === 'number' && f.status === 'New' && f.size > largeFileSize) { + return true; + } + return false; + }); + const selectAllNewFiles = filesData.filter((f) => f.status === 'New'); + const stringified = selectAllNewFiles.reduce((accu, f) => { + const key = JSON.stringify(f); + // @ts-ignore + accu[key] = true; + return accu; + }, {}); + setRowSelection(stringified); + if (largefiles.length) { + setIsLargeFile(true); + } else { + setIsLargeFile(false); + } + } + setshowSettingModal(true); + } + }; + + const deleteMenuItems: Menuitems[] = useMemo( + () => [ + { + title: `Delete Files ${selectedfileslength > 0 ? `(${selectedfileslength})` : ''}`, + onClick: () => setshowDeletePopUp(true), + disabledCondition: !selectedfileslength, + description: tooltips.deleteFile, + }, + { + title: 'Delete Orphan Nodes', + onClick: () => openOrphanNodeDeletionModal(), + disabledCondition: false, + }, + ], + [selectedfileslength] + ); + + const handleContinue = () => { + if (!isLargeFile) { + handleGenerateGraph(true, filesData); + setshowSettingModal(false); + } else { + setshowSettingModal(false); + setshowConfirmationModal(true); + handleGenerateGraph(false, []); + } + setIsSchema(true); + setalertDetails({ + showAlert: true, + alertType: 'success', + alertMessage: 'Schema is set successfully', + }); + localStorage.setItem('isSchema', JSON.stringify(true)); + }; + return ( <> {alertDetails.showAlert && ( @@ -386,6 +530,14 @@ const Content: React.FC = ({ isLeftExpanded, isRightExpanded }) => alertMessage={alertDetails.alertMessage} /> )} + {isSchema && ( + + )} {showConfirmationModal && filesForProcessing.length && ( = ({ isLeftExpanded, isRightExpanded }) => loading={deleteLoading} > )} + {showSettingnModal && ( + setshowSettingModal(false)} + onContinue={handleContinue} + open={showSettingnModal} + openTextSchema={openTextSchema} + isSchema={isSchema} + setIsSchema={setIsSchema} + /> + )}
= ({ isLeftExpanded, isRightExpanded }) => justifyContent='space-between' flexDirection='row' > - + { - if (selectedRows.length) { - let selectedLargeFiles: CustomFile[] = []; - selectedRows.forEach((f) => { - const parsedData: CustomFile = JSON.parse(f); - if (parsedData.fileSource === 'local file') { - if ( - typeof parsedData.size === 'number' && - parsedData.status === 'New' && - parsedData.size > largeFileSize - ) { - selectedLargeFiles.push(parsedData); - } - } - }); - // @ts-ignore - if (selectedLargeFiles.length) { - setshowConfirmationModal(true); - handleGenerateGraph(false, []); - } else { - handleGenerateGraph(true, filesData); - } - } else if (filesData.length) { - const largefiles = filesData.filter((f) => { - if (typeof f.size === 'number' && f.status === 'New' && f.size > largeFileSize) { - return true; - } - return false; - }); - const selectAllNewFiles = filesData.filter((f) => f.status === 'New'); - const stringified = selectAllNewFiles.reduce((accu, f) => { - const key = JSON.stringify(f); - // @ts-ignore - accu[key] = true; - return accu; - }, {}); - setRowSelection(stringified); - if (largefiles.length) { - setshowConfirmationModal(true); - handleGenerateGraph(false, []); - } else { - handleGenerateGraph(true, filesData); - } - } - }} + onClick={onClickHandler} disabled={disableCheck} className='mr-0.5' > @@ -527,19 +646,26 @@ const Content: React.FC = ({ isLeftExpanded, isRightExpanded }) => > {buttonCaptions.exploreGraphWithBloom} - setshowDeletePopUp(true)} - disabled={!selectedfileslength} - className='ml-0.5' - label='Delete Files' + { + setopenDeleteMenu(false); + }, [])} + items={deleteMenuItems} + MenuAnchor={deleteAnchor} + anchorOrigin={useMemo(() => ({ horizontal: 'left', vertical: 'bottom' }), [])} + transformOrigin={useMemo(() => ({ horizontal: 'right', vertical: 'top' }), [])} + > +
diff --git a/frontend/src/components/S3Bucket.tsx b/frontend/src/components/DataSources/AWS/S3Bucket.tsx similarity index 52% rename from frontend/src/components/S3Bucket.tsx rename to frontend/src/components/DataSources/AWS/S3Bucket.tsx index 17cc874cd..e39dc949f 100644 --- a/frontend/src/components/S3Bucket.tsx +++ b/frontend/src/components/DataSources/AWS/S3Bucket.tsx @@ -1,7 +1,7 @@ -import { DataComponentProps } from '../types'; -import s3logo from '../assets/images/s3logo.png'; -import CustomButton from './CustomButton'; -import { buttonCaptions } from '../utils/Constants'; +import { DataComponentProps } from '../../../types'; +import s3logo from '../../../assets/images/s3logo.png'; +import CustomButton from '../../UI/CustomButton'; +import { buttonCaptions } from '../../../utils/Constants'; const S3Component: React.FC = ({ openModal }) => { return ( diff --git a/frontend/src/components/S3Modal.tsx b/frontend/src/components/DataSources/AWS/S3Modal.tsx similarity index 94% rename from frontend/src/components/S3Modal.tsx rename to frontend/src/components/DataSources/AWS/S3Modal.tsx index bcf1beb10..e9d11fd8a 100644 --- a/frontend/src/components/S3Modal.tsx +++ b/frontend/src/components/DataSources/AWS/S3Modal.tsx @@ -1,13 +1,13 @@ import { TextInput } from '@neo4j-ndl/react'; import React, { useState } from 'react'; -import { CustomFile, CustomFileBase, S3ModalProps, UserCredentials } from '../types'; -import { urlScanAPI } from '../services/URLScan'; -import { useCredentials } from '../context/UserCredentials'; -import { validation } from '../utils/Utils'; -import { useFileContext } from '../context/UsersFiles'; +import { CustomFile, CustomFileBase, S3ModalProps, UserCredentials } from '../../../types'; +import { urlScanAPI } from '../../../services/URLScan'; +import { useCredentials } from '../../../context/UserCredentials'; +import { validation } from '../../../utils/Utils'; +import { useFileContext } from '../../../context/UsersFiles'; import { v4 as uuidv4 } from 'uuid'; -import CustomModal from '../HOC/CustomModal'; -import { buttonCaptions } from '../utils/Constants'; +import CustomModal from '../../../HOC/CustomModal'; +import { buttonCaptions } from '../../../utils/Constants'; interface S3File { fileName: string; fileSize: number; diff --git a/frontend/src/components/GCSButton.tsx b/frontend/src/components/DataSources/GCS/GCSButton.tsx similarity index 51% rename from frontend/src/components/GCSButton.tsx rename to frontend/src/components/DataSources/GCS/GCSButton.tsx index 16d94bcca..d81539032 100644 --- a/frontend/src/components/GCSButton.tsx +++ b/frontend/src/components/DataSources/GCS/GCSButton.tsx @@ -1,7 +1,7 @@ -import gcslogo from '../assets/images/gcs.webp'; -import { DataComponentProps } from '../types'; -import { buttonCaptions } from '../utils/Constants'; -import CustomButton from './CustomButton'; +import gcslogo from '../../../assets/images/gcs.webp'; +import { DataComponentProps } from '../../../types'; +import { buttonCaptions } from '../../../utils/Constants'; +import CustomButton from '../../UI/CustomButton'; const GCSButton: React.FC = ({ openModal }) => { return ( diff --git a/frontend/src/components/GCSModal.tsx b/frontend/src/components/DataSources/GCS/GCSModal.tsx similarity index 94% rename from frontend/src/components/GCSModal.tsx rename to frontend/src/components/DataSources/GCS/GCSModal.tsx index 2ba9ddaf6..08c4282d8 100644 --- a/frontend/src/components/GCSModal.tsx +++ b/frontend/src/components/DataSources/GCS/GCSModal.tsx @@ -1,14 +1,14 @@ import { TextInput } from '@neo4j-ndl/react'; import { useCallback, useEffect, useState } from 'react'; -import { useCredentials } from '../context/UserCredentials'; -import { useFileContext } from '../context/UsersFiles'; -import { urlScanAPI } from '../services/URLScan'; -import { CustomFileBase, GCSModalProps, fileName, nonoautherror } from '../types'; +import { useCredentials } from '../../../context/UserCredentials'; +import { useFileContext } from '../../../context/UsersFiles'; +import { urlScanAPI } from '../../../services/URLScan'; +import { CustomFileBase, GCSModalProps, fileName, nonoautherror } from '../../../types'; import { v4 as uuidv4 } from 'uuid'; -import CustomModal from '../HOC/CustomModal'; +import CustomModal from '../../../HOC/CustomModal'; import { useGoogleLogin } from '@react-oauth/google'; -import { useAlertContext } from '../context/Alert'; -import { buttonCaptions } from '../utils/Constants'; +import { useAlertContext } from '../../../context/Alert'; +import { buttonCaptions } from '../../../utils/Constants'; const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => { const [bucketName, setbucketName] = useState(''); @@ -148,7 +148,7 @@ const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => const submitHandler = async () => { if (bucketName.trim() === '' || projectId.trim() === '') { setStatus('danger'); - setStatusMessage('Please Fill the Bucket Name'); + setStatusMessage('Please Fill the Credentials'); setTimeout(() => { setStatus('unknown'); }, 5000); diff --git a/frontend/src/components/DropZone.tsx b/frontend/src/components/DataSources/Local/DropZone.tsx similarity index 94% rename from frontend/src/components/DropZone.tsx rename to frontend/src/components/DataSources/Local/DropZone.tsx index 8fe718c7f..e1c245c07 100644 --- a/frontend/src/components/DropZone.tsx +++ b/frontend/src/components/DataSources/Local/DropZone.tsx @@ -1,16 +1,16 @@ import axios from 'axios'; import { Dropzone, Flex, Typography } from '@neo4j-ndl/react'; import React, { useState, useEffect, FunctionComponent } from 'react'; -import Loader from '../utils/Loader'; +import Loader from '../../../utils/Loader'; import { v4 as uuidv4 } from 'uuid'; -import { useCredentials } from '../context/UserCredentials'; -import { useFileContext } from '../context/UsersFiles'; -import CustomAlert from './Alert'; -import { CustomFile, CustomFileBase, UploadResponse, alertStateType } from '../types'; -import { buttonCaptions, chunkSize } from '../utils/Constants'; -import { url } from '../utils/Utils'; +import { useCredentials } from '../../../context/UserCredentials'; +import { useFileContext } from '../../../context/UsersFiles'; +import CustomAlert from '../../UI/Alert'; +import { CustomFile, CustomFileBase, UploadResponse, alertStateType } from '../../../types'; +import { buttonCaptions, chunkSize } from '../../../utils/Constants'; +import { url } from '../../../utils/Utils'; import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; -import IconButtonWithToolTip from './IconButtonToolTip'; +import IconButtonWithToolTip from '../../UI/IconButtonToolTip'; const DropZone: FunctionComponent = () => { const { filesData, setFilesData, model } = useFileContext(); @@ -221,7 +221,7 @@ const DropZone: FunctionComponent = () => { )} } + loadingComponent={isLoading && } isTesting={true} className='!bg-none dropzoneContainer' supportedFilesDescription={ diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 727e95b21..9c8df4d9c 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -30,15 +30,15 @@ import { statusCheck, capitalize } from '../utils/Utils'; import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate, alertStateType } from '../types'; import { useCredentials } from '../context/UserCredentials'; import { MagnifyingGlassCircleIconSolid } from '@neo4j-ndl/react/icons'; -import CustomAlert from './Alert'; -import CustomProgressBar from './CustomProgressBar'; +import CustomAlert from './UI/Alert'; +import CustomProgressBar from './UI/CustomProgressBar'; import subscribe from '../services/PollingAPI'; import { triggerStatusUpdateAPI } from '../services/ServerSideStatusUpdateAPI'; import useServerSideEvent from '../hooks/useSse'; import { AxiosError } from 'axios'; import { XMarkIconOutline } from '@neo4j-ndl/react/icons'; import cancelAPI from '../services/CancelAPI'; -import IconButtonWithToolTip from './IconButtonToolTip'; +import IconButtonWithToolTip from './UI/IconButtonToolTip'; import { largeFileSize } from '../utils/Constants'; const FileTable: React.FC = ({ isExpanded, connectionStatus, setConnectionStatus, onInspect }) => { @@ -47,7 +47,7 @@ const FileTable: React.FC = ({ isExpanded, connectionStatus, set const columnHelper = createColumnHelper(); const [columnFilters, setColumnFilters] = useState([]); const [isLoading, setIsLoading] = useState(false); - //const [currentOuterHeight, setcurrentOuterHeight] = useState(window.outerHeight); + // const [currentOuterHeight, setcurrentOuterHeight] = useState(window.outerHeight); const [alertDetails, setalertDetails] = useState({ showAlert: false, alertType: 'error', @@ -252,7 +252,11 @@ const FileTable: React.FC = ({ isExpanded, connectionStatus, set columnHelper.accessor((row) => row, { id: 'source', cell: (info) => { - if (info.row.original.fileSource === 'youtube' || info.row.original.fileSource === 'Wikipedia') { + if ( + info.row.original.fileSource === 'youtube' || + info.row.original.fileSource === 'Wikipedia' || + info.row.original.fileSource === 'web-url' + ) { return ( @@ -388,7 +392,7 @@ const FileTable: React.FC = ({ isExpanded, connectionStatus, set userCredentials && userCredentials.database ) { - if (item?.fileSize > largeFileSize) { + if (item?.fileSize < largeFileSize) { subscribe( item.fileName, userCredentials?.uri, diff --git a/frontend/src/components/GraphViewButton.tsx b/frontend/src/components/Graph/GraphViewButton.tsx similarity index 100% rename from frontend/src/components/GraphViewButton.tsx rename to frontend/src/components/Graph/GraphViewButton.tsx diff --git a/frontend/src/components/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx similarity index 96% rename from frontend/src/components/GraphViewModal.tsx rename to frontend/src/components/Graph/GraphViewModal.tsx index 5a40b2d7e..ab80ae417 100644 --- a/frontend/src/components/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -1,24 +1,23 @@ import { Banner, Checkbox, Dialog, IconButtonArray, LoadingSpinner } from '@neo4j-ndl/react'; import { useCallback, useEffect, useRef, useState } from 'react'; -import { GraphType, GraphViewModalProps, Scheme, UserCredentials } from '../types'; +import { GraphType, GraphViewModalProps, Scheme, UserCredentials } from '../../types'; import { InteractiveNvlWrapper } from '@neo4j-nvl/react'; import NVL, { NvlOptions } from '@neo4j-nvl/base'; import type { Node, Relationship } from '@neo4j-nvl/base'; import { Resizable } from 're-resizable'; - import { DragIcon, FitToScreenIcon, MagnifyingGlassMinusIconOutline, MagnifyingGlassPlusIconOutline, } from '@neo4j-ndl/react/icons'; -import IconButtonWithToolTip from './IconButtonToolTip'; -import { processGraphData } from '../utils/Utils'; -import { useCredentials } from '../context/UserCredentials'; +import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { processGraphData } from '../../utils/Utils'; +import { useCredentials } from '../../context/UserCredentials'; import { LegendsChip } from './LegendsChip'; -import graphQueryAPI from '../services/GraphQuery'; -import { queryMap } from '../utils/Constants'; -import { useFileContext } from '../context/UsersFiles'; +import graphQueryAPI from '../../services/GraphQuery'; +import { queryMap } from '../../utils/Constants'; +import { useFileContext } from '../../context/UsersFiles'; const GraphViewModal: React.FunctionComponent = ({ open, diff --git a/frontend/src/components/LegendsChip.tsx b/frontend/src/components/Graph/LegendsChip.tsx similarity index 57% rename from frontend/src/components/LegendsChip.tsx rename to frontend/src/components/Graph/LegendsChip.tsx index 0316cad4c..f1d2900e3 100644 --- a/frontend/src/components/LegendsChip.tsx +++ b/frontend/src/components/Graph/LegendsChip.tsx @@ -1,5 +1,6 @@ import { useMemo } from 'react'; -import { LegendChipProps } from '../types'; +import { LegendChipProps } from '../../types'; +import Legend from '../UI/Legend'; export const LegendsChip: React.FunctionComponent = ({ scheme, title, nodes }) => { const chunkcount = useMemo( @@ -7,10 +8,5 @@ export const LegendsChip: React.FunctionComponent = ({ scheme, () => [...new Set(nodes?.filter((n) => n?.labels?.includes(title)).map((i) => i.id))].length, [] ); - return ( -
- {title} - {chunkcount && `(${chunkcount})`} -
- ); + return ; }; diff --git a/frontend/src/components/IconsPlacement.tsx b/frontend/src/components/IconsPlacement.tsx deleted file mode 100644 index 182ec064e..000000000 --- a/frontend/src/components/IconsPlacement.tsx +++ /dev/null @@ -1,34 +0,0 @@ -import { TrashIconOutline, XMarkIconOutline } from '@neo4j-ndl/react/icons'; -import { IconButton } from '@neo4j-ndl/react'; -import { Messages } from '../types'; -import IconButtonWithToolTip from './IconButtonToolTip'; -import { tooltips } from '../utils/Constants'; - -interface IconProps { - closeChatBot: () => void; - deleteOnClick?: () => void; - messages: Messages[]; -} - -const IconsPlacement: React.FC = ({ closeChatBot, deleteOnClick, messages }) => { - return ( -
- - - - - - -
- ); -}; - -export default IconsPlacement; diff --git a/frontend/src/components/InfoModal.tsx b/frontend/src/components/InfoModal.tsx deleted file mode 100644 index a043a4d9d..000000000 --- a/frontend/src/components/InfoModal.tsx +++ /dev/null @@ -1,274 +0,0 @@ -import { Box, Typography, TextLink, Flex, Tabs, LoadingSpinner } from '@neo4j-ndl/react'; -import { DocumentTextIconOutline } from '@neo4j-ndl/react/icons'; -import '../styling/info.css'; -import Neo4jRetrievalLogo from '../assets/images/Neo4jRetrievalLogo.png'; -import wikipedialogo from '../assets/images/Wikipedia-logo-v2.svg'; -import youtubelogo from '../assets/images/youtube.png'; -import gcslogo from '../assets/images/gcs.webp'; -import s3logo from '../assets/images/s3logo.png'; -import { Chunk, Entity, GroupedEntity, UserCredentials, chatInfoMessage } from '../types'; -import { useEffect, useMemo, useState } from 'react'; -import HoverableLink from './HoverableLink'; -import GraphViewButton from './GraphViewButton'; -import { chunkEntitiesAPI } from '../services/ChunkEntitiesInfo'; -import { useCredentials } from '../context/UserCredentials'; -import type { Node, Relationship } from '@neo4j-nvl/base'; -import { calcWordColor } from '@neo4j-devtools/word-color'; -import ReactMarkdown from 'react-markdown'; -const InfoModal: React.FC = ({ sources, model, total_tokens, response_time, chunk_ids }) => { - const [activeTab, setActiveTab] = useState(0); - const [infoEntities, setInfoEntities] = useState([]); - const [loading, setLoading] = useState(false); - const { userCredentials } = useCredentials(); - const [nodes, setNodes] = useState([]); - const [relationships, setRelationships] = useState([]); - const [chunks, setChunks] = useState([]); - const parseEntity = (entity: Entity) => { - const { labels, properties } = entity; - const label = labels[0]; - const text = properties.id; - return { label, text }; - }; - useEffect(() => { - setLoading(true); - chunkEntitiesAPI(userCredentials as UserCredentials, chunk_ids.join(',')) - .then((response) => { - setInfoEntities(response.data.data.nodes); - setNodes(response.data.data.nodes); - setRelationships(response.data.data.relationships); - setChunks(response.data.data.chunk_data); - setLoading(false); - }) - .catch((error) => { - console.error('Error fetching entities:', error); - setLoading(false); - }); - }, [chunk_ids]); - const groupedEntities = useMemo<{ [key: string]: GroupedEntity }>(() => { - return infoEntities.reduce((acc, entity) => { - const { label, text } = parseEntity(entity); - if (!acc[label]) { - const newColor = calcWordColor(label); - acc[label] = { texts: new Set(), color: newColor }; - } - acc[label].texts.add(text); - return acc; - }, {} as Record; color: string }>); - }, [infoEntities]); - const onChangeTabs = (tabId: number) => { - setActiveTab(tabId); - }; - const labelCounts = useMemo(() => { - const counts: { [label: string]: number } = {}; - infoEntities.forEach((entity) => { - const { labels } = entity; - const label = labels[0]; - counts[label] = counts[label] ? counts[label] + 1 : 1; - }); - return counts; - }, [infoEntities]); - const sortedLabels = useMemo(() => { - return Object.keys(labelCounts).sort((a, b) => labelCounts[b] - labelCounts[a]); - }, [labelCounts]); - - const generateYouTubeLink = (url: string, startTime: string) => { - try { - const urlObj = new URL(url); - urlObj.searchParams.set('t', startTime); - return urlObj.toString(); - } catch (error) { - console.error('Invalid URL:', error); - return ''; - } - }; - return ( - - - - - Retrieval information - - To generate this response, in {response_time} seconds we used{' '} - {total_tokens} tokens with the model{' '} - {model}. - - - - - Sources used - Top Entities used - Chunks - - - {activeTab === 0 ? ( - sources.length > 0 ? ( -
    - {sources.map((link, index) => ( -
  • - {link?.source_name.startsWith('http') || link?.source_name.startsWith('https') ? ( - <> - {link?.source_name.includes('wikipedia.org') && ( -
    - Wikipedia Logo - - {link?.source_name} - -
    - )} - {link?.source_name.includes('storage.googleapis.com') && ( -
    - Google Cloud Storage Logo - - {decodeURIComponent(link?.source_name).split('/').at(-1)?.split('?')[0] ?? 'GCS File'} - -
    - )} - {link.source_name.startsWith('s3://') && ( -
    - S3 Logo - - {decodeURIComponent(link?.source_name).split('/').at(-1) ?? 'S3 File'} - -
    - )} - {!link.source_name.includes('wikipedia.org') && - !link.source_name.includes('storage.googleapis.com') && - !link.source_name.startsWith('s3://') && ( - - - - {link.source_name} - - - - )} - - ) : ( -
    - - - {link?.source_name} - - {link?.page_numbers && link?.page_numbers.length > 0 && ( - - - Page {link?.page_numbers.sort((a, b) => a - b).join(', ')} - - )} -
    - )} -
  • - ))} -
- ) : ( - No Sources Found - ) - ) : activeTab === 1 ? ( - loading ? ( - - - - ) : Object.keys(groupedEntities).length > 0 ? ( -
    - {sortedLabels.map((label, index) => ( -
  • -
    - {label} ({labelCounts[label]}) -
    - - {Array.from(groupedEntities[label].texts).slice(0, 3).join(', ')} - -
  • - ))} -
- ) : ( - No Entities Found - ) - ) : loading ? ( - - - - ) : chunks.length > 0 ? ( -
-
    - {chunks.map((chunk) => ( -
  • - {chunk?.page_number ? ( -
    - - - {chunk?.fileName}, Page: {chunk?.page_number} - -
    - ) : chunk?.url && chunk?.start_time ? ( -
    - - - {chunk?.fileName} - -
    - ) : chunk?.url && chunk?.url.includes('wikipedia.org') ? ( -
    - - {chunk?.fileName} -
    - ) : chunk?.url && chunk?.url.includes('storage.googleapis.com') ? ( -
    - - {chunk?.fileName} -
    - ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( -
    - - {chunk?.fileName} -
    - ) : ( - <> - )} - {chunk?.text} -
  • - ))} -
-
- ) : ( - No Chunks Found - )} -
- {activeTab === 1 && nodes.length && relationships.length ? ( - - - - ) : ( - <> - )} -
- ); -}; -export default InfoModal; diff --git a/frontend/src/components/Layout/DrawerChatbot.tsx b/frontend/src/components/Layout/DrawerChatbot.tsx index 6fd2385b4..470b296dd 100644 --- a/frontend/src/components/Layout/DrawerChatbot.tsx +++ b/frontend/src/components/Layout/DrawerChatbot.tsx @@ -1,5 +1,5 @@ import { Drawer } from '@neo4j-ndl/react'; -import Chatbot from '../Chatbot'; +import Chatbot from '../ChatBot/Chatbot'; import { Messages } from '../../types'; import { useMessageContext } from '../../context/UserMessages'; interface DrawerChatbotProps { diff --git a/frontend/src/components/Layout/DrawerDropzone.tsx b/frontend/src/components/Layout/DrawerDropzone.tsx index fdab4c331..6bc2e3ea8 100644 --- a/frontend/src/components/Layout/DrawerDropzone.tsx +++ b/frontend/src/components/Layout/DrawerDropzone.tsx @@ -1,26 +1,23 @@ import { Drawer, Flex, StatusIndicator, Typography } from '@neo4j-ndl/react'; -import DropZone from '../DropZone'; -import React, { useState, useEffect, useCallback } from 'react'; +import DropZone from '../DataSources/Local/DropZone'; +import React, { useState, useEffect, useCallback, useMemo } from 'react'; import { healthStatus } from '../../services/HealthStatus'; -import S3Component from '../S3Bucket'; -import S3Modal from '../S3Modal'; -import Wikipedia from '../Wikipedia'; +import S3Component from '../DataSources/AWS/S3Bucket'; +import S3Modal from '../DataSources/AWS/S3Modal'; import { DrawerProps } from '../../types'; -import YouTubeButton from '../YoutubeButton'; -import YoutubeModal from '../YoutubeModal'; -import WikipediaModal from '../WikipediaModal'; -import GCSButton from '../GCSButton'; -import GCSModal from '../GCSModal'; -import CustomAlert from '../Alert'; +import GCSButton from '../DataSources/GCS/GCSButton'; +import GCSModal from '../DataSources/GCS/GCSModal'; +import CustomAlert from '../UI/Alert'; import { useAlertContext } from '../../context/Alert'; import { APP_SOURCES } from '../../utils/Constants'; +import GenericButton from '../WebSources/GenericSourceButton'; +import GenericModal from '../WebSources/GenericSourceModal'; const DrawerDropzone: React.FC = ({ isExpanded }) => { const [isBackendConnected, setIsBackendConnected] = useState(false); const [showModal, setshowModal] = useState(false); - const [showWikiepediaModal, setShowWikiepediaModal] = useState(false); - const [showYoutubeModal, setShowYoutubeModal] = useState(false); const [showGCSModal, setShowGCSModal] = useState(false); + const [showGenericModal, setshowGenericModal] = useState(false); const { closeAlert, alertState } = useAlertContext(); useEffect(() => { @@ -41,24 +38,31 @@ const DrawerDropzone: React.FC = ({ isExpanded }) => { const hideModal = useCallback(() => { setshowModal(false); }, []); - const openWikipediaModal = useCallback(() => { - setShowWikiepediaModal(true); - }, []); - const closeWikipediaModal = useCallback(() => { - setShowWikiepediaModal(false); - }, []); - const hideYoutubeModal = useCallback(() => { - setShowYoutubeModal(false); - }, []); - const openYoutubeModal = useCallback(() => { - setShowYoutubeModal(true); - }, []); const openGCSModal = useCallback(() => { setShowGCSModal(true); }, []); const hideGCSModal = useCallback(() => { setShowGCSModal(false); }, []); + const openGenericModal = useCallback(() => { + setshowGenericModal(true); + }, []); + const closeGenericModal = useCallback(() => { + setshowGenericModal(false); + }, []); + + const isYoutubeOnlyCheck = useMemo( + () => APP_SOURCES?.includes('youtube') && !APP_SOURCES.includes('wiki') && !APP_SOURCES.includes('web'), + [APP_SOURCES] + ); + const isWikipediaOnlyCheck = useMemo( + () => APP_SOURCES?.includes('wiki') && !APP_SOURCES.includes('youtube') && !APP_SOURCES.includes('web'), + [APP_SOURCES] + ); + const iswebOnlyCheck = useMemo( + () => APP_SOURCES?.includes('web') && !APP_SOURCES.includes('youtube') && !APP_SOURCES.includes('wiki'), + [APP_SOURCES] + ); return (
@@ -91,149 +95,89 @@ const DrawerDropzone: React.FC = ({ isExpanded }) => {
{process.env.ENV != 'PROD' ? ( <> - {isBackendConnected && APP_SOURCES != undefined && APP_SOURCES.length === 0 ? ( - -
+ + {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( +
-
- - -
-
- - -
-
- - -
-
- - -
-
- ) : ( - - {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( -
- -
- )} - {APP_SOURCES != undefined && APP_SOURCES.includes('youtube') && ( -
- - -
- )} - {APP_SOURCES != undefined && APP_SOURCES.includes('wiki') && ( -
- - -
- )} - {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( - <> - {APP_SOURCES.includes('s3') && ( -
- - {' '} -
- )} - {APP_SOURCES.includes('gcs') && ( -
- - -
- )} - - ) : ( - <> - )} -
- )} + )} + {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( + <> + {(APP_SOURCES.includes('youtube') || + APP_SOURCES.includes('wiki') || + APP_SOURCES.includes('web')) && ( +
+ + +
+ )} + {APP_SOURCES.includes('s3') && ( +
+ + {' '} +
+ )} + {APP_SOURCES.includes('gcs') && ( +
+ + +
+ )} + + ) : ( + <> + )} + ) : ( <> - {APP_SOURCES != undefined && APP_SOURCES.length === 0 ? ( - -
+ + {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( +
+ )} + {((APP_SOURCES != undefined && APP_SOURCES.includes('youtube')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('wiki')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('web'))) && (
- - -
-
- - -
-
- - -
-
- - + +
-
- ) : ( - - {APP_SOURCES != undefined && APP_SOURCES.includes('local') && ( -
- -
- )} - {APP_SOURCES != undefined && APP_SOURCES.includes('youtube') && ( -
- - -
- )} - {APP_SOURCES != undefined && APP_SOURCES.includes('wiki') && ( -
- - -
- )} - {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || - (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( - <> - {APP_SOURCES != undefined && APP_SOURCES.includes('s3') && ( -
- - {' '} -
- )} - {APP_SOURCES != undefined && APP_SOURCES.includes('gcs') && ( -
- - -
- )} - - ) : ( - <> - )} -
- )} + )} + {(APP_SOURCES != undefined && APP_SOURCES.includes('s3')) || + (APP_SOURCES != undefined && APP_SOURCES.includes('gcs')) ? ( + <> + {APP_SOURCES != undefined && APP_SOURCES.includes('s3') && ( +
+ + {' '} +
+ )} + {APP_SOURCES != undefined && APP_SOURCES.includes('gcs') && ( +
+ + +
+ )} + + ) : ( + <> + )} + )}
diff --git a/frontend/src/components/Layout/Header.tsx b/frontend/src/components/Layout/Header.tsx index 2992fda12..87b1ac160 100644 --- a/frontend/src/components/Layout/Header.tsx +++ b/frontend/src/components/Layout/Header.tsx @@ -8,9 +8,11 @@ import { InformationCircleIconOutline, } from '@neo4j-ndl/react/icons'; import { Typography } from '@neo4j-ndl/react'; -import { useCallback } from 'react'; -import IconButtonWithToolTip from '../IconButtonToolTip'; +import { useCallback, useEffect } from 'react'; +import IconButtonWithToolTip from '../UI/IconButtonToolTip'; import { tooltips } from '../../utils/Constants'; +import { useFileContext } from '../../context/UsersFiles'; +import { Badge } from '@mui/material'; export default function Header({ themeMode, @@ -25,6 +27,12 @@ export default function Header({ window.open(url, '_blank'); }, []); + const { isSchema, setIsSchema } = useFileContext(); + + useEffect(() => { + setIsSchema(isSchema); + }, [isSchema]); + return (
)} - - - + + + + +
diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 9af801a1e..b18a9ec12 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -3,23 +3,33 @@ import SideNav from './SideNav'; import DrawerDropzone from './DrawerDropzone'; import DrawerChatbot from './DrawerChatbot'; import Content from '../Content'; -import SettingsModal from '../SettingModal'; +import SettingsModal from '../Popups/Settings/SettingModal'; import { clearChatAPI } from '../../services/QnaAPI'; import { useCredentials } from '../../context/UserCredentials'; import { UserCredentials, alertStateType } from '../../types'; import { useMessageContext } from '../../context/UserMessages'; import { AlertColor, AlertPropsColorOverrides } from '@mui/material'; import { OverridableStringUnion } from '@mui/types'; -import SchemaFromTextDialog from '../SchemaFromText'; -import CustomAlert from '../Alert'; +import { useFileContext } from '../../context/UsersFiles'; +import SchemaFromTextDialog from '../Popups/Settings/SchemaFromText'; +import CustomAlert from '../UI/Alert'; +import DeletePopUpForOrphanNodes from '../Popups/DeletePopUpForOrphanNodes'; +import deleteOrphanAPI from '../../services/DeleteOrphanNodes'; + export default function PageLayoutNew({ isSettingPanelExpanded, closeSettingModal, openSettingsDialog, + closeOrphanNodeDeletionModal, + showOrphanNodeDeletionModal, + openOrphanNodeDeletionModal, }: { isSettingPanelExpanded: boolean; closeSettingModal: () => void; openSettingsDialog: () => void; + closeOrphanNodeDeletionModal: () => void; + showOrphanNodeDeletionModal: boolean; + openOrphanNodeDeletionModal: () => void; }) { const [isLeftExpanded, setIsLeftExpanded] = useState(true); const [isRightExpanded, setIsRightExpanded] = useState(true); @@ -30,6 +40,7 @@ export default function PageLayoutNew({ const toggleLeftDrawer = () => setIsLeftExpanded(!isLeftExpanded); const toggleRightDrawer = () => setIsRightExpanded(!isRightExpanded); const [openTextSchemaDialog, setOpenTextSchemaDialog] = useState(false); + const [orphanDeleteAPIloading, setorphanDeleteAPIloading] = useState(false); const [alertDetails, setalertDetails] = useState({ showAlert: false, alertType: 'error', @@ -38,6 +49,7 @@ export default function PageLayoutNew({ const { messages } = useMessageContext(); const openSchemaFromTextDialog = useCallback(() => setOpenTextSchemaDialog(true), []); const closeSchemaFromTextDialog = useCallback(() => setOpenTextSchemaDialog(false), []); + const { isSchema, setIsSchema } = useFileContext(); const deleteOnClick = async () => { try { @@ -72,6 +84,16 @@ export default function PageLayoutNew({ alertMessage: '', }); }; + const orphanNodesDeleteHandler = async (selectedEntities: string[]) => { + try { + setorphanDeleteAPIloading(true); + const response = await deleteOrphanAPI(userCredentials as UserCredentials, selectedEntities); + setorphanDeleteAPIloading(false); + console.log(response); + } catch (error) { + console.log(error); + } + }; return (
{alertDetails.showAlert && ( @@ -90,16 +112,29 @@ export default function PageLayoutNew({ onClose={closeSchemaFromTextDialog} showAlert={showAlert} > + setShowChatBot(true)} isLeftExpanded={isLeftExpanded} isRightExpanded={isRightExpanded} showChatBot={showChatBot} + openTextSchema={openSchemaFromTextDialog} + openOrphanNodeDeletionModal={openOrphanNodeDeletionModal} + isSchema={isSchema} + setIsSchema={setIsSchema} /> {showDrawerChatbot && ( diff --git a/frontend/src/components/Layout/SideNav.tsx b/frontend/src/components/Layout/SideNav.tsx index 7a657d4a5..12c0fe8bb 100644 --- a/frontend/src/components/Layout/SideNav.tsx +++ b/frontend/src/components/Layout/SideNav.tsx @@ -8,14 +8,14 @@ import { ChatBubbleOvalLeftEllipsisIconOutline, CloudArrowUpIconSolid, } from '@neo4j-ndl/react/icons'; -import {} from '@neo4j-ndl/react/icons'; import { SideNavProps } from '../../types'; -import Chatbot from '../Chatbot'; +import Chatbot from '../ChatBot/Chatbot'; import { createPortal } from 'react-dom'; import { useMessageContext } from '../../context/UserMessages'; import { getIsLoading } from '../../utils/Utils'; -import IconsPlacement from '../IconsPlacement'; +import ExpandedChatButtonContainer from '../ChatBot/ExpandedChatButtonContainer'; import { tooltips } from '../../utils/Constants'; +import ChatModeToggle from '../ChatBot/ChatModeToggle'; const SideNav: React.FC = ({ position, @@ -127,6 +127,7 @@ const SideNav: React.FC = ({ } /> + {!isChatModalOpen && } )} @@ -143,7 +144,7 @@ const SideNav: React.FC = ({ disableCloseButton={true} > - = ({ children, className = '' }) => { - const containerRef = useRef(null); - const [overflowing, setOverflowing] = useState(false); - useEffect(() => { - const container = containerRef.current; - if (container) { - if (container.clientHeight > 440) { - setOverflowing(true); - } else { - setOverflowing(false); - } - } - }, [children]); - return ( -
- {children} -
- ); -}; -export default OverflowContainer; diff --git a/frontend/src/components/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx similarity index 98% rename from frontend/src/components/ConnectionModal.tsx rename to frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index 55ff9896c..2e183cfc6 100644 --- a/frontend/src/components/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -1,9 +1,9 @@ import { Button, Dialog, TextInput, Dropdown, Banner, Dropzone, Typography, TextLink } from '@neo4j-ndl/react'; import { Dispatch, SetStateAction, useCallback, useEffect, useMemo, useState } from 'react'; -import connectAPI from '../services/ConnectAPI'; -import { useCredentials } from '../context/UserCredentials'; +import connectAPI from '../../../services/ConnectAPI'; +import { useCredentials } from '../../../context/UserCredentials'; import { useSearchParams } from 'react-router-dom'; -import { buttonCaptions } from '../utils/Constants'; +import { buttonCaptions } from '../../../utils/Constants'; interface Message { type: 'success' | 'info' | 'warning' | 'danger' | 'unknown'; diff --git a/frontend/src/components/DeletePopUp.tsx b/frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx similarity index 100% rename from frontend/src/components/DeletePopUp.tsx rename to frontend/src/components/Popups/DeletePopUp/DeletePopUp.tsx diff --git a/frontend/src/components/Popups/DeletePopUpForOrphanNodes/index.tsx b/frontend/src/components/Popups/DeletePopUpForOrphanNodes/index.tsx new file mode 100644 index 000000000..ac2cdbb21 --- /dev/null +++ b/frontend/src/components/Popups/DeletePopUpForOrphanNodes/index.tsx @@ -0,0 +1,196 @@ +import { List, ListItem, ListItemButton, ListItemIcon, ListItemText } from '@mui/material'; +import { Box, Button, Checkbox, Dialog, Flex, Typography } from '@neo4j-ndl/react'; +import { useCallback, useEffect, useState } from 'react'; +import { UserCredentials, orphanNodeProps } from '../../../types'; +import { getOrphanNodes } from '../../../services/GetOrphanNodes'; +import { useCredentials } from '../../../context/UserCredentials'; +import Loader from '../../../utils/Loader'; +import Legend from '../../UI/Legend'; +import { calcWordColor } from '@neo4j-devtools/word-color'; +import { DocumentIconOutline } from '@neo4j-ndl/react/icons'; +import ButtonWithToolTip from '../../UI/ButtonWithToolTip'; + +export default function DeletePopUpForOrphanNodes({ + open, + deleteHandler, + deleteCloseHandler, + loading, +}: { + open: boolean; + deleteHandler: (selectedEntities: string[]) => Promise; + deleteCloseHandler: () => void; + loading: boolean; +}) { + const [orphanNodes, setOrphanNodes] = useState([]); + const [selectedOrphanNodesForDeletion, setselectedOrphanNodesForDeletion] = useState([]); + const [selectedAll, setselectedAll] = useState(false); + const [isLoading, setLoading] = useState(false); + const { userCredentials } = useCredentials(); + + useEffect(() => { + if (open) { + (async () => { + try { + setLoading(true); + const apiresponse = await getOrphanNodes(userCredentials as UserCredentials); + setLoading(false); + if (apiresponse.data.data.length) { + setOrphanNodes(apiresponse.data.data); + } + } catch (error) { + setLoading(false); + console.log(error); + } + })(); + } + }, [userCredentials, open]); + + const onChangeHandler = useCallback((isChecked: boolean, id: string) => { + if (isChecked) { + setselectedOrphanNodesForDeletion((prev) => [...prev, id]); + setOrphanNodes((prev) => prev.map((n) => ({ ...n, checked: n.e.elementId === id ? true : n.checked }))); + } else { + setselectedAll(false); + setselectedOrphanNodesForDeletion((prev) => prev.filter((s) => s != id)); + setOrphanNodes((prev) => prev.map((n) => ({ ...n, checked: n.e.elementId === id ? false : n.checked }))); + } + }, []); + + return ( + { + deleteCloseHandler(); + setselectedOrphanNodesForDeletion([]); + setOrphanNodes([]); + setselectedAll(false); + }} + > + + + Orphan Nodes Deletion + 100 nodes per batch + + + + {orphanNodes.length ? ( + { + if (e.target.checked) { + setselectedAll(true); + setOrphanNodes((prev) => prev.map((n) => ({ ...n, checked: true }))); + setselectedOrphanNodesForDeletion(orphanNodes.map((n) => n.e.elementId)); + } else { + setselectedAll(false); + setOrphanNodes((prev) => prev.map((n) => ({ ...n, checked: false }))); + setselectedOrphanNodesForDeletion([]); + } + }} + > + ) : ( + <> + )} + + {orphanNodes.length > 0 ? ( + orphanNodes.map((n, i) => { + return ( + + + + onChangeHandler(e.target.checked, n.e.elementId)} + tabIndex={-1} + /> + + + {n.e.id} + Connected Chunks: {n.chunkConnections} + + } + secondary={ + + + Labels : + + {n.e.labels.map((l, index) => ( + + ))} + + + + Related Documents : + + {Array.from(new Set([...n.documents])).map((d, index) => ( + + + + + {d} + + ))} + + + + } + /> + + + ); + }) + ) : isLoading ? ( + + ) : ( + <>No Nodes Found + )} + + + + + { + await deleteHandler(selectedOrphanNodesForDeletion); + selectedOrphanNodesForDeletion.forEach((eid: string) => { + setOrphanNodes((prev) => prev.filter((node) => node.e.elementId != eid)); + }); + setOrphanNodes((prev) => prev.map((n) => ({ ...n, checked: false }))); + }} + size='large' + loading={loading} + text={ + isLoading + ? 'Fetching Orphan Nodes' + : !isLoading && !orphanNodes.length + ? 'No Nodes Found' + : !selectedOrphanNodesForDeletion.length + ? 'No Nodes Selected' + : 'Delete Selected Nodes' + } + label='Orphan Node deletion button' + disabled={!selectedOrphanNodesForDeletion.length || !orphanNodes.length} + > + Continue + + + + ); +} diff --git a/frontend/src/components/ConfirmationDialog.tsx b/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx similarity index 97% rename from frontend/src/components/ConfirmationDialog.tsx rename to frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx index d71d05e09..9a9691d92 100644 --- a/frontend/src/components/ConfirmationDialog.tsx +++ b/frontend/src/components/Popups/LargeFilePopUp/ConfirmationDialog.tsx @@ -1,8 +1,8 @@ import { Button, Dialog, Typography } from '@neo4j-ndl/react'; -import { CustomFile } from '../types'; +import { CustomFile } from '../../../types'; import LargeFilesAlert from './LargeFilesAlert'; import { useEffect, useState } from 'react'; -import { useFileContext } from '../context/UsersFiles'; +import { useFileContext } from '../../../context/UsersFiles'; export default function ConfirmationDialog({ largeFiles, diff --git a/frontend/src/components/LargeFilesAlert.tsx b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx similarity index 87% rename from frontend/src/components/LargeFilesAlert.tsx rename to frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx index a4a79ef30..aefafde50 100644 --- a/frontend/src/components/LargeFilesAlert.tsx +++ b/frontend/src/components/Popups/LargeFilePopUp/LargeFilesAlert.tsx @@ -1,16 +1,16 @@ import { Box, Checkbox, Flex, Typography } from '@neo4j-ndl/react'; import { DocumentTextIconOutline } from '@neo4j-ndl/react/icons'; -import { LargefilesProps } from '../types'; +import { LargefilesProps } from '../../../types'; import { List, ListItem, ListItemAvatar, ListItemButton, ListItemIcon, ListItemText } from '@mui/material'; import { FC } from 'react'; -import { chunkSize } from '../utils/Constants'; -import BellImage from '../assets/images/Stopwatch-blue.svg'; -import AlertIcon from './Layout/AlertIcon'; -import wikipedialogo from '../assets/images/Wikipedia-logo-v2.svg'; -import youtubelogo from '../assets/images/youtube.png'; -import gcslogo from '../assets/images/gcs.webp'; -import s3logo from '../assets/images/s3logo.png'; -import { calculateProcessingTime } from '../utils/Utils'; +import { chunkSize } from '../../../utils/Constants'; +import BellImage from '../../../assets/images/Stopwatch-blue.svg'; +import AlertIcon from '../../Layout/AlertIcon'; +import wikipedialogo from '../../../assets/images/Wikipedia-logo-v2.svg'; +import youtubelogo from '../../../assets/images/youtube.png'; +import gcslogo from '../../../assets/images/gcs.webp'; +import s3logo from '../../../assets/images/s3logo.png'; +import { calculateProcessingTime } from '../../../utils/Utils'; const imageIcon: Record = { Wikipedia: wikipedialogo, diff --git a/frontend/src/components/SchemaFromText.tsx b/frontend/src/components/Popups/Settings/SchemaFromText.tsx similarity index 94% rename from frontend/src/components/SchemaFromText.tsx rename to frontend/src/components/Popups/Settings/SchemaFromText.tsx index d760fb927..58a605edc 100644 --- a/frontend/src/components/SchemaFromText.tsx +++ b/frontend/src/components/Popups/Settings/SchemaFromText.tsx @@ -1,12 +1,12 @@ import { Checkbox, Dialog, Textarea } from '@neo4j-ndl/react'; import { useCallback, useState } from 'react'; -import { getNodeLabelsAndRelTypesFromText } from '../services/SchemaFromTextAPI'; -import { useCredentials } from '../context/UserCredentials'; -import { useFileContext } from '../context/UsersFiles'; +import { getNodeLabelsAndRelTypesFromText } from '../../../services/SchemaFromTextAPI'; +import { useCredentials } from '../../../context/UserCredentials'; +import { useFileContext } from '../../../context/UsersFiles'; import { AlertColor, AlertPropsColorOverrides } from '@mui/material'; import { OverridableStringUnion } from '@mui/types'; -import { buttonCaptions } from '../utils/Constants'; -import ButtonWithToolTip from './ButtonWithToolTip'; +import { buttonCaptions } from '../../../utils/Constants'; +import ButtonWithToolTip from '../../UI/ButtonWithToolTip'; const SchemaFromTextDialog = ({ open, diff --git a/frontend/src/components/SettingModal.tsx b/frontend/src/components/Popups/Settings/SettingModal.tsx similarity index 80% rename from frontend/src/components/SettingModal.tsx rename to frontend/src/components/Popups/Settings/SettingModal.tsx index e7476fc49..c6494ba31 100644 --- a/frontend/src/components/SettingModal.tsx +++ b/frontend/src/components/Popups/Settings/SettingModal.tsx @@ -1,27 +1,29 @@ import { Dialog, Dropdown } from '@neo4j-ndl/react'; import { OnChangeValue, ActionMeta } from 'react-select'; -import { OptionType, OptionTypeForExamples, UserCredentials, schema } from '../types'; -import { useFileContext } from '../context/UsersFiles'; -import { getNodeLabelsAndRelTypes } from '../services/GetNodeLabelsRelTypes'; -import { useCredentials } from '../context/UserCredentials'; +import { OptionType, OptionTypeForExamples, SettingsModalProps, UserCredentials, schema } from '../../../types'; +import { useFileContext } from '../../../context/UsersFiles'; +import { getNodeLabelsAndRelTypes } from '../../../services/GetNodeLabelsRelTypes'; +import { useCredentials } from '../../../context/UserCredentials'; import { MouseEventHandler, useCallback, useEffect, useState } from 'react'; -import schemaExamples from '../assets/schemas.json'; -import ButtonWithToolTip from './ButtonWithToolTip'; -import { tooltips } from '../utils/Constants'; +import schemaExamples from '../../../assets/schemas.json'; +import ButtonWithToolTip from '../../UI/ButtonWithToolTip'; +import { buttonCaptions, tooltips } from '../../../utils/Constants'; +import { useAlertContext } from '../../../context/Alert'; -export default function SettingsModal({ +const SettingsModal: React.FC = ({ open, onClose, - opneTextSchema, -}: { - open: boolean; - onClose: () => void; - opneTextSchema: () => void; -}) { + openTextSchema, + onContinue, + settingView, + setIsSchema, + isSchema, +}) => { const { setSelectedRels, setSelectedNodes, selectedNodes, selectedRels, selectedSchemas, setSelectedSchemas } = useFileContext(); const { userCredentials } = useCredentials(); const [loading, setLoading] = useState(false); + const removeNodesAndRels = (nodelabels: string[], relationshipTypes: string[]) => { const labelsToRemoveSet = new Set(nodelabels); const relationshipLabelsToremoveSet = new Set(relationshipTypes); @@ -48,7 +50,6 @@ export default function SettingsModal({ const { nodelabels, relationshipTypes } = removedSchema; removeNodesAndRels(nodelabels, relationshipTypes); } else if (actionMeta.action === 'clear') { - console.log({ actionMeta }); const removedSchemas = actionMeta.removedValues.map((s) => JSON.parse(s.value)); const removedNodelabels = removedSchemas.map((s) => s.nodelabels).flatMap((k) => k); const removedRelations = removedSchemas.map((s) => s.relationshipTypes).flatMap((k) => k); @@ -115,6 +116,8 @@ export default function SettingsModal({ const [relationshipTypeOptions, setrelationshipTypeOptions] = useState([]); const [defaultExamples, setdefaultExamples] = useState([]); + const { showAlert } = useAlertContext(); + useEffect(() => { const parsedData = schemaExamples.reduce((accu: OptionTypeForExamples[], example) => { const examplevalues: OptionTypeForExamples = { @@ -129,6 +132,7 @@ export default function SettingsModal({ }, []); setdefaultExamples(parsedData); }, []); + useEffect(() => { if (userCredentials && open) { const getOptions = async () => { @@ -157,6 +161,21 @@ export default function SettingsModal({ setSelectedRels(relationshipTypeOptions); }, [nodeLabelOptions, relationshipTypeOptions]); + const handleClear = () => { + setIsSchema(false); + setSelectedNodes([]); + setSelectedRels([]); + setSelectedSchemas([]); + localStorage.setItem('isSchema', JSON.stringify(false)); + localStorage.setItem('selectedNodeLabels', JSON.stringify({ db: userCredentials?.uri, selectedOptions: [] })); + localStorage.setItem( + 'selectedRelationshipLabels', + JSON.stringify({ db: userCredentials?.uri, selectedOptions: [] }) + ); + showAlert('info', `Successfully Removed the Schema settings`); + onClose(); + }; + return ( Entity Graph Extraction Settings @@ -172,7 +191,7 @@ export default function SettingsModal({ value: selectedSchemas, menuPosition: 'fixed', }} - type='creatable' + type='select' /> - + { onClose(); - opneTextSchema(); + openTextSchema(); }} label='Get Existing Schema From Text' > Get Schema From Text + {settingView === 'contentView' ? ( + + {buttonCaptions.continueSettings} + + ) : ( + + {buttonCaptions.clearSettings} + + )} ); -} +}; + +export default SettingsModal; diff --git a/frontend/src/components/QuickStarter.tsx b/frontend/src/components/QuickStarter.tsx index 62ee48817..83442f886 100644 --- a/frontend/src/components/QuickStarter.tsx +++ b/frontend/src/components/QuickStarter.tsx @@ -11,6 +11,7 @@ const QuickStarter: React.FunctionComponent = () => { const themeUtils = React.useContext(ThemeWrapperContext); const [themeMode, setThemeMode] = useState(themeUtils.colorMode); const [showSettingsModal, setshowSettingsModal] = useState(false); + const [showOrphanNodeDeletionDialog, setshowOrphanNodeDeletionDialog] = useState(false); const toggleColorMode = () => { setThemeMode((prevThemeMode) => { @@ -24,6 +25,12 @@ const QuickStarter: React.FunctionComponent = () => { const closeSettingModal = () => { setshowSettingsModal(false); }; + const openOrphanNodeDeletionModal = () => { + setshowOrphanNodeDeletionDialog(true); + }; + const closeOrphanNodeDeletionModal = () => { + setshowOrphanNodeDeletionDialog(false); + }; return ( @@ -34,6 +41,9 @@ const QuickStarter: React.FunctionComponent = () => { openSettingsDialog={openSettingsModal} isSettingPanelExpanded={showSettingsModal} closeSettingModal={closeSettingModal} + closeOrphanNodeDeletionModal={closeOrphanNodeDeletionModal} + showOrphanNodeDeletionModal={showOrphanNodeDeletionDialog} + openOrphanNodeDeletionModal={openOrphanNodeDeletionModal} /> diff --git a/frontend/src/components/Alert.tsx b/frontend/src/components/UI/Alert.tsx similarity index 91% rename from frontend/src/components/Alert.tsx rename to frontend/src/components/UI/Alert.tsx index cfd732770..d98cc38f6 100644 --- a/frontend/src/components/Alert.tsx +++ b/frontend/src/components/UI/Alert.tsx @@ -1,7 +1,7 @@ import React from 'react'; import Snackbar from '@mui/material/Snackbar'; import Alert from '@mui/material/Alert'; -import { CustomAlertProps } from '../types'; +import { CustomAlertProps } from '../../types'; const CustomAlert: React.FC = ({ open, handleClose, alertMessage, severity = 'error' }) => { return ( diff --git a/frontend/src/components/ButtonWithToolTip.tsx b/frontend/src/components/UI/ButtonWithToolTip.tsx similarity index 100% rename from frontend/src/components/ButtonWithToolTip.tsx rename to frontend/src/components/UI/ButtonWithToolTip.tsx diff --git a/frontend/src/components/CustomButton.tsx b/frontend/src/components/UI/CustomButton.tsx similarity index 87% rename from frontend/src/components/CustomButton.tsx rename to frontend/src/components/UI/CustomButton.tsx index 1f0919c01..038adbe36 100644 --- a/frontend/src/components/CustomButton.tsx +++ b/frontend/src/components/UI/CustomButton.tsx @@ -1,4 +1,4 @@ -import { CommonButtonProps } from '../types'; +import { CommonButtonProps } from '../../types'; const CustomButton: React.FC = ({ openModal, wrapperclassName, logo, title, className }) => { return ( diff --git a/frontend/src/components/CustomProgressBar.tsx b/frontend/src/components/UI/CustomProgressBar.tsx similarity index 100% rename from frontend/src/components/CustomProgressBar.tsx rename to frontend/src/components/UI/CustomProgressBar.tsx diff --git a/frontend/src/components/UI/ErrroBoundary.tsx b/frontend/src/components/UI/ErrroBoundary.tsx new file mode 100644 index 000000000..b76a27695 --- /dev/null +++ b/frontend/src/components/UI/ErrroBoundary.tsx @@ -0,0 +1,45 @@ +import React from 'react'; +import { Banner } from '@neo4j-ndl/react'; + +export default class ErrorBoundary extends React.Component { + state = { hasError: false, errorMessage: '' }; + + static getDerivedStateFromError(_error: unknown) { + return { hasError: true }; + } + + componentDidCatch(error: Error, errorInfo: any) { + this.setState({ ...this.state, errorMessage: error.message }); + console.log({ error }); + console.log({ errorInfo }); + } + + render() { + if (this.state.hasError) { + return ( +
+ +
+ ); + } + return this.props.children; + } +} diff --git a/frontend/src/components/HoverableLink.tsx b/frontend/src/components/UI/HoverableLink.tsx similarity index 98% rename from frontend/src/components/HoverableLink.tsx rename to frontend/src/components/UI/HoverableLink.tsx index 763bc716e..a212b0c88 100644 --- a/frontend/src/components/HoverableLink.tsx +++ b/frontend/src/components/UI/HoverableLink.tsx @@ -1,5 +1,5 @@ import React, { useState, useEffect, useRef } from 'react'; -import { HoverableLinkProps } from '../types'; +import { HoverableLinkProps } from '../../types'; const HoverableLink: React.FC = ({ url, children }) => { const [hovering, setHovering] = useState(false); const [iframeSrc, setIframeSrc] = useState(''); diff --git a/frontend/src/components/IconButtonToolTip.tsx b/frontend/src/components/UI/IconButtonToolTip.tsx similarity index 100% rename from frontend/src/components/IconButtonToolTip.tsx rename to frontend/src/components/UI/IconButtonToolTip.tsx diff --git a/frontend/src/components/UI/Legend.tsx b/frontend/src/components/UI/Legend.tsx new file mode 100644 index 000000000..5c7110a06 --- /dev/null +++ b/frontend/src/components/UI/Legend.tsx @@ -0,0 +1,16 @@ +export default function Legend({ + bgColor, + title, + chunkCount, +}: { + bgColor: string; + title: string; + chunkCount?: number; +}) { + return ( +
+ {title} + {chunkCount && `(${chunkCount})`} +
+ ); +} diff --git a/frontend/src/components/UI/Menu.tsx b/frontend/src/components/UI/Menu.tsx new file mode 100644 index 000000000..9315d46d5 --- /dev/null +++ b/frontend/src/components/UI/Menu.tsx @@ -0,0 +1,41 @@ +import { Menu } from '@neo4j-ndl/react'; +import { Menuitems, Origin } from '../../types'; + +export default function CustomMenu({ + open, + closeHandler, + items, + MenuAnchor, + anchorOrigin, + transformOrigin, +}: { + open: boolean; + closeHandler: () => void; + items: Menuitems[]; + MenuAnchor: HTMLElement | null; + anchorOrigin?: Origin; + transformOrigin?: Origin; +}) { + return ( + + {items.map((i, idx) => { + return ( + + ); + })} + + ); +} diff --git a/frontend/src/components/WebSources/CustomSourceInput.tsx b/frontend/src/components/WebSources/CustomSourceInput.tsx new file mode 100644 index 000000000..2cc1d5f65 --- /dev/null +++ b/frontend/src/components/WebSources/CustomSourceInput.tsx @@ -0,0 +1,71 @@ +import { Banner, Box, Button, Flex, TextInput } from '@neo4j-ndl/react'; +import { CustomInput } from '../../types'; + +export default function CustomSourceInput({ + value, + label, + placeHolder, + onChangeHandler, + submitHandler, + disabledCheck, + onCloseHandler, + id, + onBlurHandler, + status, + setStatus, + statusMessage, + isValid, + isFocused, + onPasteHandler, +}: CustomInput) { + return ( + + {status !== 'unknown' && ( + + setStatus('unknown')} + type={status} + name='Custom Banner' + className='text-lg font-semibold' + /> + + )} + +
+ +
+
+ + + + +
+ ); +} diff --git a/frontend/src/components/WebSources/GenericSourceButton.tsx b/frontend/src/components/WebSources/GenericSourceButton.tsx new file mode 100644 index 000000000..87be06fa5 --- /dev/null +++ b/frontend/src/components/WebSources/GenericSourceButton.tsx @@ -0,0 +1,37 @@ +import CustomButton from '../UI/CustomButton'; +import internet from '../../assets/images/web-search-svgrepo-com.svg'; +import { DataComponentProps } from '../../types'; +import { Flex, Typography } from '@neo4j-ndl/react'; +import IconButtonWithToolTip from '../UI/IconButtonToolTip'; +import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; +import { APP_SOURCES } from '../../utils/Constants'; + +export default function GenericButton({ openModal }: DataComponentProps) { + return ( + + + + + Web Sources +
+ + + {APP_SOURCES != undefined && APP_SOURCES.includes('youtube') && Youtube} + {APP_SOURCES != undefined && APP_SOURCES.includes('wiki') && Wikipedia} + {APP_SOURCES != undefined && APP_SOURCES.includes('web') && Website} + + + } + > + + +
+
+
+
+ ); +} diff --git a/frontend/src/components/WebSources/GenericSourceModal.tsx b/frontend/src/components/WebSources/GenericSourceModal.tsx new file mode 100644 index 000000000..6ad89bf02 --- /dev/null +++ b/frontend/src/components/WebSources/GenericSourceModal.tsx @@ -0,0 +1,75 @@ +import { Box, Dialog, Tabs, Typography } from '@neo4j-ndl/react'; +import youtubelogo from '../../assets/images/youtube.png'; +import wikipedialogo from '../../assets/images/Wikipedia-logo-v2.svg'; +import weblogo from '../../assets/images/web-svgrepo-com.svg'; +import { useState } from 'react'; +import WikipediaInput from './WikiPedia/WikipediaInput'; +import WebInput from './Web/WebInput'; +import YoutubeInput from './Youtube/YoutubeInput'; +import { APP_SOURCES } from '../../utils/Constants'; +import Neo4jRetrievalLogo from '../../assets/images/Neo4jRetrievalLogo.png'; + +export default function GenericModal({ + open, + closeHandler, + isOnlyYoutube, + isOnlyWikipedia, + isOnlyWeb, +}: { + open: boolean; + closeHandler: () => void; + isOnlyYoutube?: boolean; + isOnlyWikipedia?: boolean; + isOnlyWeb?: boolean; +}) { + const [activeTab, setactiveTab] = useState(isOnlyYoutube ? 0 : isOnlyWikipedia ? 1 : isOnlyWeb ? 2 : 0); + const [isLoading, setIsLoading] = useState(false); + + return ( + + + + + + Web Sources + + Convert Any Web Source to Knoweldge graph + + + + + {APP_SOURCES != undefined && APP_SOURCES.includes('youtube') && ( + + + + )} + {APP_SOURCES != undefined && APP_SOURCES.includes('wiki') && ( + + + + )} + {APP_SOURCES != undefined && APP_SOURCES.includes('web') && ( + + + + )} + + {APP_SOURCES != undefined && APP_SOURCES.includes('youtube') && ( + + + + )} + {APP_SOURCES != undefined && APP_SOURCES.includes('wiki') && ( + + + + )} + {APP_SOURCES != undefined && APP_SOURCES.includes('web') && ( + + + + )} + + + ); +} diff --git a/frontend/src/components/WebSources/Web/WebInput.tsx b/frontend/src/components/WebSources/Web/WebInput.tsx new file mode 100644 index 000000000..16dcc46d7 --- /dev/null +++ b/frontend/src/components/WebSources/Web/WebInput.tsx @@ -0,0 +1,38 @@ +import { webLinkValidation } from '../../../utils/Utils'; +import useSourceInput from '../../../hooks/useSourceInput'; +import CustomSourceInput from '../CustomSourceInput'; + +export default function WebInput({ setIsLoading }: { setIsLoading: React.Dispatch> }) { + const { + inputVal, + onChangeHandler, + onBlurHandler, + submitHandler, + status, + setStatus, + statusMessage, + isFocused, + isValid, + onClose, + onPasteHandler, + } = useSourceInput(webLinkValidation, setIsLoading, 'web-url', false, false, true); + return ( + + ); +} diff --git a/frontend/src/components/WebSources/WikiPedia/WikipediaInput.tsx b/frontend/src/components/WebSources/WikiPedia/WikipediaInput.tsx new file mode 100644 index 000000000..cf90d8349 --- /dev/null +++ b/frontend/src/components/WebSources/WikiPedia/WikipediaInput.tsx @@ -0,0 +1,42 @@ +import { wikiValidation } from '../../../utils/Utils'; +import useSourceInput from '../../../hooks/useSourceInput'; +import CustomSourceInput from '../CustomSourceInput'; + +export default function WikipediaInput({ + setIsLoading, +}: { + setIsLoading: React.Dispatch>; +}) { + const { + inputVal, + onChangeHandler, + onBlurHandler, + submitHandler, + status, + setStatus, + statusMessage, + isFocused, + isValid, + onClose, + onPasteHandler, + } = useSourceInput(wikiValidation, setIsLoading, 'Wikipedia', true, false, false); + return ( + + ); +} diff --git a/frontend/src/components/WebSources/Youtube/YoutubeInput.tsx b/frontend/src/components/WebSources/Youtube/YoutubeInput.tsx new file mode 100644 index 000000000..8652d7f92 --- /dev/null +++ b/frontend/src/components/WebSources/Youtube/YoutubeInput.tsx @@ -0,0 +1,42 @@ +import CustomSourceInput from '../CustomSourceInput'; +import useSourceInput from '../../../hooks/useSourceInput'; +import { youtubeLinkValidation } from '../../../utils/Utils'; + +export default function YoutubeInput({ + setIsLoading, +}: { + setIsLoading: React.Dispatch>; +}) { + const { + inputVal, + onChangeHandler, + onBlurHandler, + submitHandler, + status, + setStatus, + statusMessage, + isFocused, + isValid, + onClose, + onPasteHandler, + } = useSourceInput(youtubeLinkValidation, setIsLoading, 'youtube', false, true, false); + return ( + + ); +} diff --git a/frontend/src/components/Wikipedia.tsx b/frontend/src/components/Wikipedia.tsx deleted file mode 100644 index 5a3b84450..000000000 --- a/frontend/src/components/Wikipedia.tsx +++ /dev/null @@ -1,10 +0,0 @@ -import wikipedialogo from '../assets/images/Wikipedia-logo-v2.svg'; - -export default function Wikipedia({ openModal }: { openModal: () => void }) { - return ( -
- -
Wikipedia
-
- ); -} diff --git a/frontend/src/components/WikipediaModal.tsx b/frontend/src/components/WikipediaModal.tsx deleted file mode 100644 index ed7809301..000000000 --- a/frontend/src/components/WikipediaModal.tsx +++ /dev/null @@ -1,165 +0,0 @@ -import { useCallback, useState } from 'react'; -import CustomModal from '../HOC/CustomModal'; -import { TextInput } from '@neo4j-ndl/react'; -import { CustomFile, CustomFileBase, UserCredentials, WikipediaModalTypes, fileName } from '../types'; -import { useFileContext } from '../context/UsersFiles'; -import { v4 as uuidv4 } from 'uuid'; -import { useCredentials } from '../context/UserCredentials'; -import { urlScanAPI } from '../services/URLScan'; -import { buttonCaptions } from '../utils/Constants'; -import { wikiValidation } from '../utils/Utils'; - -const WikipediaModal: React.FC = ({ hideModal, open }) => { - const [wikiQuery, setwikiQuery] = useState(''); - const [statusMessage, setStatusMessage] = useState(''); - const [status, setStatus] = useState<'unknown' | 'success' | 'info' | 'warning' | 'danger'>('unknown'); - const { setFilesData, model, filesData } = useFileContext(); - const { userCredentials } = useCredentials(); - const [isFocused, setisFocused] = useState(false); - const [isValid, setValid] = useState(false); - const onClose = useCallback(() => { - hideModal(); - setwikiQuery(''); - setStatus('unknown'); - setValid(false); - setisFocused(false); - }, []); - - const submitHandler = async (url: string) => { - const defaultValues: CustomFileBase = { - processing: 0, - status: 'New', - NodesCount: 0, - relationshipCount: 0, - type: 'TEXT', - model: model, - fileSource: 'Wikipedia', - processingProgress: undefined, - }; - if (url.trim() != '') { - setValid(wikiValidation(url) && isFocused); - } - if (isValid) { - try { - setStatus('info'); - setStatusMessage('Scanning...'); - const apiResponse = await urlScanAPI({ - userCredentials: userCredentials as UserCredentials, - model: model, - wikiquery: wikiQuery.trim(), - source_type: 'Wikipedia', - }); - setStatus('success'); - if (apiResponse?.data.status == 'Failed' || !apiResponse.data) { - setStatus('danger'); - setStatusMessage(apiResponse?.data?.message); - setTimeout(() => { - setStatus('unknown'); - setwikiQuery(''); - setValid(false); - setisFocused(false); - hideModal(); - }, 5000); - return; - } - - const apiResCheck = apiResponse?.data?.success_count && apiResponse.data.failed_count; - if (apiResCheck) { - setStatus('info'); - setStatusMessage( - `Successfully Created Source Node for ${apiResponse.data.success_count} and Failed for ${apiResponse.data.failed_count} Wikipedia Link` - ); - } else if (apiResponse?.data?.success_count) { - setStatusMessage(`Successfully Created Source Node for ${apiResponse.data.success_count} Wikipedia Link`); - } else { - setStatus('danger'); - setStatusMessage(`Failed to Create Source Node for ${apiResponse.data.failed_count} Wikipedia Link`); - } - - const copiedFilesData: CustomFile[] = [...filesData]; - apiResponse?.data?.file_name?.forEach((item: fileName) => { - const filedataIndex = copiedFilesData.findIndex((filedataitem) => filedataitem?.name === item?.fileName); - if (filedataIndex == -1) { - copiedFilesData.unshift({ - name: item.fileName, - size: item.fileSize, - wiki_query: item.fileName, - source_url: item.url, - id: uuidv4(), - language: item.language, - // total_pages: 1, - ...defaultValues, - }); - } else { - const tempFileData = copiedFilesData[filedataIndex]; - copiedFilesData.splice(filedataIndex, 1); - copiedFilesData.unshift({ - ...tempFileData, - status: defaultValues.status, - NodesCount: defaultValues.NodesCount, - relationshipCount: defaultValues.relationshipCount, - processing: defaultValues.processing, - model: defaultValues.model, - fileSource: defaultValues.fileSource, - processingProgress: defaultValues.processingProgress, - }); - } - }); - setFilesData(copiedFilesData); - setwikiQuery(''); - setValid(false); - setisFocused(false); - } catch (error) { - setStatus('danger'); - setStatusMessage('Some Error Occurred or Please Check your Instance Connection'); - } - } else { - setStatus('danger'); - setStatusMessage('Please Fill the Wikipedia Link'); - setTimeout(() => { - setStatus('unknown'); - }, 5000); - return; - } - setTimeout(() => { - setStatus('unknown'); - hideModal(); - }, 1000); - }; - return ( - submitHandler(wikiQuery)} - status={status} - submitLabel={buttonCaptions.submit} - > -
- setValid(wikiValidation(wikiQuery) && isFocused)} - errorText={!isValid && isFocused && 'Please Fill The Valid URL'} - onChange={(e) => { - setisFocused(true); - if (e.target.value.includes('https://en.wikipedia.org/wiki/')) { - setValid(wikiValidation(e.target.value)); - } - setwikiQuery(e.target.value); - }} - /> -
-
- ); -}; -export default WikipediaModal; diff --git a/frontend/src/components/YoutubeButton.tsx b/frontend/src/components/YoutubeButton.tsx deleted file mode 100644 index 13beb37ff..000000000 --- a/frontend/src/components/YoutubeButton.tsx +++ /dev/null @@ -1,16 +0,0 @@ -import youtubelogo from '../assets/images/youtube.png'; -import { DataComponentProps } from '../types'; -import { buttonCaptions } from '../utils/Constants'; -import CustomButton from './CustomButton'; -const YouTubeButton: React.FC = ({ openModal }) => { - return ( - - ); -}; -export default YouTubeButton; diff --git a/frontend/src/components/YoutubeModal.tsx b/frontend/src/components/YoutubeModal.tsx deleted file mode 100644 index 0db313ceb..000000000 --- a/frontend/src/components/YoutubeModal.tsx +++ /dev/null @@ -1,132 +0,0 @@ -import { TextInput } from '@neo4j-ndl/react'; -import { useCallback, useState } from 'react'; -import { useCredentials } from '../context/UserCredentials'; -import { useFileContext } from '../context/UsersFiles'; -import { urlScanAPI } from '../services/URLScan'; -import { CustomFileBase, S3ModalProps } from '../types'; -import { v4 as uuidv4 } from 'uuid'; -import CustomModal from '../HOC/CustomModal'; -import { buttonCaptions } from '../utils/Constants'; - -const YoutubeModal: React.FC = ({ hideModal, open }) => { - const [youtubeURL, setYoutubeURL] = useState(''); - const [status, setStatus] = useState<'unknown' | 'success' | 'info' | 'warning' | 'danger'>('unknown'); - const [statusMessage, setStatusMessage] = useState(''); - const { userCredentials } = useCredentials(); - const { setFilesData, model, filesData } = useFileContext(); - const submitHandler = async () => { - const defaultValues: CustomFileBase = { - processing: 0, - status: 'New', - NodesCount: 0, - relationshipCount: 0, - type: 'TEXT', - model: model, - fileSource: 'youtube', - processingProgress: undefined, - }; - if (!youtubeURL) { - setStatus('danger'); - setStatusMessage('Please Fill the Valid YouTube link'); - setTimeout(() => { - setStatus('unknown'); - }, 5000); - } else { - try { - setStatus('info'); - setStatusMessage('Loading...'); - const apiResponse = await urlScanAPI({ - urlParam: youtubeURL.trim(), - userCredentials, - model, - accessKey: '', - secretKey: '', - source_type: 'youtube', - }); - if (apiResponse.data.status == 'Failed' || !apiResponse.data) { - setStatus('danger'); - setStatusMessage(apiResponse?.data.message); - setTimeout(() => { - setStatus('unknown'); - setYoutubeURL(''); - hideModal(); - }, 5000); - return; - } - setStatus('success'); - setStatusMessage(`Successfully Created Source Nodes for Link`); - const copiedFilesData = [...filesData]; - apiResponse?.data?.file_name?.forEach((item) => { - const filedataIndex = copiedFilesData.findIndex((filedataitem) => filedataitem?.name === item.fileName); - if (filedataIndex == -1) { - copiedFilesData.unshift({ - name: item.fileName, - size: item.fileSize ?? 0, - source_url: item.url, - // total_pages: 1, - id: uuidv4(), - ...defaultValues, - }); - } else { - const tempFileData = copiedFilesData[filedataIndex]; - copiedFilesData.splice(filedataIndex, 1); - copiedFilesData.unshift({ - ...tempFileData, - status: defaultValues.status, - NodesCount: defaultValues.NodesCount, - relationshipCount: defaultValues.relationshipCount, - processing: defaultValues.processing, - model: defaultValues.model, - fileSource: defaultValues.fileSource, - processingProgress: defaultValues.processingProgress, - }); - } - }); - setFilesData(copiedFilesData); - setYoutubeURL(''); - } catch (error) { - setStatus('danger'); - setStatusMessage('Some Error Occurred or Please Check your Instance Connection'); - } - } - setTimeout(() => { - setStatus('unknown'); - hideModal(); - }, 500); - }; - const onClose = useCallback(() => { - setYoutubeURL(''); - hideModal(); - setStatus('unknown'); - }, []); - - return ( - -
- { - setYoutubeURL(e.target.value); - }} - /> -
-
- ); -}; -export default YoutubeModal; diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index b6160275b..73c6bd66b 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -22,6 +22,10 @@ interface FileContextType { setSelectedRows: React.Dispatch>; selectedSchemas: readonly OptionType[]; setSelectedSchemas: Dispatch>; + chatMode: string; + setchatMode: Dispatch>; + isSchema: boolean; + setIsSchema: React.Dispatch>; } const FileContext = createContext(undefined); @@ -38,7 +42,9 @@ const FileContextProvider: FC = ({ children }) => { const [selectedSchemas, setSelectedSchemas] = useState([]); const [rowSelection, setRowSelection] = useState>({}); const [selectedRows, setSelectedRows] = useState([]); + const [chatMode, setchatMode] = useState('graph+vector'); const { userCredentials } = useCredentials(); + const [isSchema, setIsSchema] = useState(false); useEffect(() => { if (selectedNodeLabelstr != null) { @@ -74,6 +80,10 @@ const FileContextProvider: FC = ({ children }) => { setSelectedRows, selectedSchemas, setSelectedSchemas, + chatMode, + setchatMode, + isSchema, + setIsSchema, }; return {children}; }; diff --git a/frontend/src/hooks/useSourceInput.tsx b/frontend/src/hooks/useSourceInput.tsx new file mode 100644 index 000000000..76a3fa6a7 --- /dev/null +++ b/frontend/src/hooks/useSourceInput.tsx @@ -0,0 +1,176 @@ +import React, { useCallback, useState } from 'react'; +import { CustomFile, CustomFileBase, ScanProps, UserCredentials, fileName } from '../types'; +import { useFileContext } from '../context/UsersFiles'; +import { useCredentials } from '../context/UserCredentials'; +import { urlScanAPI } from '../services/URLScan'; +import { v4 as uuidv4 } from 'uuid'; + +export default function useSourceInput( + validator: (e: string) => boolean, + setIsLoading: React.Dispatch>, + fileSource: string, + isWikiQuery?: boolean, + isYoutubeLink?: boolean, + isWebLink?: boolean +) { + const [inputVal, setInputVal] = useState(''); + const [isValid, setIsValid] = useState(false); + const [isFocused, setIsFocused] = useState(false); + const [status, setStatus] = useState<'unknown' | 'success' | 'info' | 'warning' | 'danger'>('unknown'); + const [statusMessage, setStatusMessage] = useState(''); + const { setFilesData, model, filesData } = useFileContext(); + const { userCredentials } = useCredentials(); + + const onChangeHandler: React.ChangeEventHandler = useCallback((e) => { + setIsFocused(true); + if (e.target.value.length >= 10) { + setIsValid(validator(e.target.value) && true); + } + setInputVal(e.target.value); + }, []); + const onBlurHandler: React.FocusEventHandler = useCallback(() => { + setIsValid(validator(inputVal) && isFocused); + }, [inputVal, isFocused]); + + const onPasteHandler: React.ClipboardEventHandler = useCallback(() => { + setIsFocused(true); + setIsValid(validator(inputVal) && true); + }, [inputVal]); + + const onClose = useCallback(() => { + setInputVal(''); + setStatus('unknown'); + setIsValid(false); + setIsFocused(false); + }, []); + + const submitHandler = useCallback( + async (url: string) => { + const defaultValues: CustomFileBase = { + processing: 0, + status: 'New', + NodesCount: 0, + relationshipCount: 0, + type: 'TEXT', + model: model, + fileSource: fileSource, + processingProgress: undefined, + }; + if (url.trim() != '') { + setIsValid(validator(url) && isFocused); + } + if (isValid) { + try { + setStatus('info'); + setIsLoading(true); + setStatusMessage('Scanning...'); + const params: ScanProps = { + userCredentials: userCredentials as UserCredentials, + model: model, + source_type: fileSource, + }; + if (isWikiQuery) { + params.wikiquery = url.trim(); + } else if (isYoutubeLink || isWebLink) { + params.urlParam = url.trim(); + } + const apiResponse = await urlScanAPI(params); + setIsLoading(false); + setStatus('success'); + if (apiResponse?.data.status == 'Failed' || !apiResponse.data) { + setStatus('danger'); + setStatusMessage(apiResponse?.data?.message); + setTimeout(() => { + setStatus('unknown'); + setInputVal(''); + setIsValid(false); + setIsFocused(false); + }, 5000); + return; + } + + const apiResCheck = apiResponse?.data?.success_count && apiResponse.data.failed_count; + if (apiResCheck) { + setStatus('info'); + setStatusMessage( + `Successfully Created Source Node for ${apiResponse.data.success_count} and Failed for ${apiResponse.data.failed_count} ${fileSource} Link` + ); + } else if (apiResponse?.data?.success_count) { + setStatusMessage( + `Successfully Created Source Node for ${apiResponse.data.success_count} ${fileSource} Link` + ); + } else { + setStatus('danger'); + setStatusMessage(`Failed to Create Source Node for ${apiResponse.data.failed_count} ${fileSource} Link`); + } + + const copiedFilesData: CustomFile[] = [...filesData]; + apiResponse?.data?.file_name?.forEach((item: fileName) => { + const filedataIndex = copiedFilesData.findIndex((filedataitem) => filedataitem?.name === item?.fileName); + if (filedataIndex == -1) { + const baseValues = { + name: item.fileName, + size: item.fileSize, + source_url: item.url, + id: uuidv4(), + language: item.language, + // total_pages: 1, + ...defaultValues, + }; + if (isWikiQuery) { + baseValues.wiki_query = item.fileName; + } + copiedFilesData.unshift(baseValues); + } else { + const tempFileData = copiedFilesData[filedataIndex]; + copiedFilesData.splice(filedataIndex, 1); + copiedFilesData.unshift({ + ...tempFileData, + status: defaultValues.status, + NodesCount: defaultValues.NodesCount, + relationshipCount: defaultValues.relationshipCount, + processing: defaultValues.processing, + model: defaultValues.model, + fileSource: defaultValues.fileSource, + processingProgress: defaultValues.processingProgress, + }); + } + }); + setFilesData(copiedFilesData); + setInputVal(''); + setIsValid(false); + setIsFocused(false); + } catch (error) { + setStatus('danger'); + setStatusMessage('Some Error Occurred or Please Check your Instance Connection'); + } + } else { + setStatus('danger'); + setStatusMessage(`Please Fill the ${fileSource} Link`); + setTimeout(() => { + setStatus('unknown'); + }, 5000); + return; + } + setTimeout(() => { + setStatus('unknown'); + }, 1000); + }, + + [filesData, isWikiQuery, isYoutubeLink, isWebLink, isValid, fileSource, model] + ); + + return { + inputVal, + onChangeHandler, + onBlurHandler, + isValid, + isFocused, + status, + setStatus, + statusMessage, + submitHandler, + onClose, + onPasteHandler, + }; +} diff --git a/frontend/src/services/deleteFiles.ts b/frontend/src/services/DeleteFiles.ts similarity index 100% rename from frontend/src/services/deleteFiles.ts rename to frontend/src/services/DeleteFiles.ts diff --git a/frontend/src/services/DeleteOrphanNodes.ts b/frontend/src/services/DeleteOrphanNodes.ts new file mode 100644 index 000000000..135dfcdeb --- /dev/null +++ b/frontend/src/services/DeleteOrphanNodes.ts @@ -0,0 +1,20 @@ +import axios from 'axios'; +import { url } from '../utils/Utils'; +import { UserCredentials } from '../types'; + +const deleteOrphanAPI = async (userCredentials: UserCredentials, selectedNodes: string[]) => { + try { + const formData = new FormData(); + formData.append('uri', userCredentials?.uri ?? ''); + formData.append('database', userCredentials?.database ?? ''); + formData.append('userName', userCredentials?.userName ?? ''); + formData.append('password', userCredentials?.password ?? ''); + formData.append('unconnected_entities_list', JSON.stringify(selectedNodes)); + const response = await axios.post(`${url()}/delete_unconnected_nodes`, formData); + return response; + } catch (error) { + console.log('Error Posting the Question:', error); + throw error; + } +}; +export default deleteOrphanAPI; diff --git a/frontend/src/services/GetOrphanNodes.ts b/frontend/src/services/GetOrphanNodes.ts new file mode 100644 index 000000000..70cfe8cda --- /dev/null +++ b/frontend/src/services/GetOrphanNodes.ts @@ -0,0 +1,18 @@ +import axios from 'axios'; +import { url } from '../utils/Utils'; +import { OrphanNodeResponse, UserCredentials } from '../types'; + +export const getOrphanNodes = async (userCredentials: UserCredentials) => { + const formData = new FormData(); + formData.append('uri', userCredentials?.uri ?? ''); + formData.append('database', userCredentials?.database ?? ''); + formData.append('userName', userCredentials?.userName ?? ''); + formData.append('password', userCredentials?.password ?? ''); + try { + const response = await axios.post(`${url()}/get_unconnected_nodes_list`, formData); + return response; + } catch (error) { + console.log(error); + throw error; + } +}; diff --git a/frontend/src/services/GraphQuery.ts b/frontend/src/services/GraphQuery.ts index f179201a0..fb4e79960 100644 --- a/frontend/src/services/GraphQuery.ts +++ b/frontend/src/services/GraphQuery.ts @@ -10,8 +10,6 @@ const graphQueryAPI = async (userCredentials: UserCredentials, query_type: strin formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); formData.append('query_type', query_type ?? 'entities'); - - // @ts-ignore formData.append('document_names', JSON.stringify(document_names)); const response = await axios.post(`${url()}/graph_query`, formData, { diff --git a/frontend/src/services/UpdateGraph.ts b/frontend/src/services/PostProcessing.ts similarity index 70% rename from frontend/src/services/UpdateGraph.ts rename to frontend/src/services/PostProcessing.ts index 15b4665db..9f45cc6bd 100644 --- a/frontend/src/services/UpdateGraph.ts +++ b/frontend/src/services/PostProcessing.ts @@ -2,14 +2,15 @@ import axios from 'axios'; import { url } from '../utils/Utils'; import { UserCredentials } from '../types'; -const updateGraphAPI = async (userCredentials: UserCredentials) => { +const postProcessing = async (userCredentials: UserCredentials, taskParam: string[]) => { try { const formData = new FormData(); formData.append('uri', userCredentials?.uri ?? ''); formData.append('database', userCredentials?.database ?? ''); formData.append('userName', userCredentials?.userName ?? ''); formData.append('password', userCredentials?.password ?? ''); - const response = await axios.post(`${url()}/update_similarity_graph`, userCredentials, { + formData.append('tasks', JSON.stringify(taskParam)); + const response = await axios.post(`${url()}/post_processing`, formData, { headers: { 'Content-Type': 'multipart/form-data', }, @@ -21,4 +22,4 @@ const updateGraphAPI = async (userCredentials: UserCredentials) => { } }; -export { updateGraphAPI }; +export { postProcessing }; diff --git a/frontend/src/services/QnaAPI.ts b/frontend/src/services/QnaAPI.ts index df58b0163..0e483a8f1 100644 --- a/frontend/src/services/QnaAPI.ts +++ b/frontend/src/services/QnaAPI.ts @@ -6,7 +6,8 @@ export const chatBotAPI = async ( userCredentials: UserCredentials, question: string, session_id: string, - model: string + model: string, + mode = 'vector' ) => { try { const formData = new FormData(); @@ -17,6 +18,7 @@ export const chatBotAPI = async ( formData.append('question', question); formData.append('session_id', session_id); formData.append('model', model); + formData.append('mode', mode); const startTime = Date.now(); const response = await axios.post(`${url()}/chat_bot`, formData, { headers: { diff --git a/frontend/src/types.ts b/frontend/src/types.ts index d99b9f851..7bf8bced3 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -69,9 +69,9 @@ export type ExtractParams = { export type UploadParams = { file: Blob; model: string; - chunkNumber:number; - totalChunks:number; - originalname:string + chunkNumber: number; + totalChunks: number; + originalname: string; } & { [key: string]: any }; export type FormDataParams = ExtractParams | UploadParams; @@ -150,6 +150,10 @@ export interface ContentProps { isRightExpanded: boolean; showChatBot: boolean; openChatBot: () => void; + openTextSchema: () => void; + isSchema?: boolean; + setIsSchema: Dispatch>; + openOrphanNodeDeletionModal: () => void; } export interface FileTableProps { @@ -170,12 +174,32 @@ export interface CustomModalProps { setStatus: Dispatch>; } +export interface CustomInput { + value: string; + label: string; + placeHolder: string; + onChangeHandler: React.ChangeEventHandler; + submitHandler: (url: string) => void; + disabledCheck: boolean; + onCloseHandler: () => void; + id: string; + onBlurHandler: React.FocusEventHandler; + status: 'unknown' | 'success' | 'info' | 'warning' | 'danger'; + setStatus: Dispatch>; + statusMessage: string; + isValid: boolean; + isFocused: boolean; + onPasteHandler: React.ClipboardEventHandler; +} + export interface CommonButtonProps { openModal: () => void; wrapperclassName?: string; logo: string; - title: string; + title?: string; className?: string; + imgWidth?: number; + imgeHeight?: number; } export interface Source { @@ -183,17 +207,21 @@ export interface Source { source_name: string; start_time?: string; } +export interface chunk { + id: string; + score: number; +} export interface Messages { id: number; message: string; user: string; datetime: string; isTyping?: boolean; - sources?: Source[]; + sources?: string[]; model?: string; isLoading?: boolean; response_time?: number; - chunk_ids?: string[]; + chunk_ids?: chunk[]; total_tokens?: number; speaking?: boolean; copying?: boolean; @@ -301,6 +329,19 @@ export interface LegendChipProps { export interface FileContextProviderProps { children: ReactNode; } +export interface orphanNode { + id: string; + elementId: string; + description: string; + labels: string[]; + embedding: null | string; +} +export interface orphanNodeProps { + documents: string[]; + chunkConnections: number; + e: orphanNode; + checked?: boolean; +} export interface labelsAndTypes { labels: string[]; relationshipTypes: string[]; @@ -310,7 +351,7 @@ export interface commonserverresponse { error?: string; message?: string; file_name?: string; - data?: labelsAndTypes | labelsAndTypes[] | uploadData; + data?: labelsAndTypes | labelsAndTypes[] | uploadData | orphanNodeProps[]; } export interface ScehmaFromText extends Partial { @@ -319,6 +360,9 @@ export interface ScehmaFromText extends Partial { export interface ServerData extends Partial { data: labelsAndTypes[]; } +export interface OrphanNodeResponse extends Partial { + data: orphanNodeProps[]; +} export interface schema { nodelabels: string[]; relationshipTypes: string[]; @@ -332,10 +376,10 @@ export interface SourceListServerData { } export interface chatInfoMessage extends Partial { - sources: Source[]; + sources: string[]; model: string; response_time: number; - chunk_ids: string[]; + chunk_ids: chunk[]; total_tokens: number; } @@ -427,6 +471,7 @@ export interface Chunk { content_offset?: string; url?: string; fileSource: string; + score?: string; } export interface SpeechSynthesisProps { @@ -439,3 +484,26 @@ export interface SpeechArgs { pitch?: number; volume?: number; } + +export interface SettingsModalProps { + open: boolean; + onClose: () => void; + openTextSchema: () => void; + onContinue?: () => void; + settingView: 'contentView' | 'headerView'; + isSchema?: boolean; + setIsSchema: Dispatch>; + onClear?: () => void; +} +export interface Menuitems { + title: string; + onClick: () => void; + disabledCondition: boolean; + description?: string; +} +export type Vertical = 'top' | 'bottom'; +export type Horizontal = 'left' | 'right' | 'center'; +export interface Origin { + vertical: Vertical; + horizontal: Horizontal; +} diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 35832a08c..ee7fdd441 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -1,3 +1,5 @@ +import { VisualizeBloomIcon } from '@neo4j-ndl/react/icons'; + export const document = `+ [docs]`; export const chunks = `+ collect { MATCH p=(c)-[:NEXT_CHUNK]-() RETURN p } // chunk-chain @@ -27,8 +29,8 @@ export const docChunkEntities = `+[chunks] + collect { OPTIONAL MATCH p=(c:Chunk)-[:HAS_ENTITY]->(e)-[*0..1]-(:!Chunk) RETURN p }`; export const APP_SOURCES = process.env.REACT_APP_SOURCES !== '' - ? process.env.REACT_APP_SOURCES?.split(',') || [] - : ['gcs', 's3', 'local', 'wiki', 'youtube']; + ? process.env.REACT_APP_SOURCES?.split(',') + : ['gcs', 's3', 'local', 'wiki', 'youtube', 'web']; export const llms = process.env?.LLM_MODELS?.trim() != '' ? process.env.LLM_MODELS?.split(',') @@ -113,6 +115,8 @@ export const tooltips = { createSchema: 'Create your own schema by passing text', useExistingSchema: 'Use the already existing schema from DB', clearChat: 'Clear Chat History', + continue: 'Continue', + clearGraphSettings: 'Allow User to remove Settings', }; export const buttonCaptions = { @@ -133,4 +137,13 @@ export const buttonCaptions = { connectToNeo4j: 'Connect to Neo4j', cancel: 'Cancel', details: 'Details', + continueSettings: 'Continue', + clearSettings: 'Clear Settings', }; + +export const ChatModeOptions = [ + { Icon: VisualizeBloomIcon, value: 'vector' }, + { Icon: 'abc', value: 'graph+vector' }, +]; + +export const taskParam: string[] = ['update_similarity_graph', 'create_fulltext_index']; diff --git a/frontend/src/utils/FileAPI.ts b/frontend/src/utils/FileAPI.ts index a393f90fb..b6dcabeba 100644 --- a/frontend/src/utils/FileAPI.ts +++ b/frontend/src/utils/FileAPI.ts @@ -4,11 +4,18 @@ import { UserCredentials, ExtractParams, UploadParams } from '../types'; import { apiCall } from '../services/CommonAPI'; // Upload Call -export const uploadAPI = async (file: Blob, userCredentials: UserCredentials, model: string, chunkNumber: number, totalChunks: number,originalname:string): Promise => { +export const uploadAPI = async ( + file: Blob, + userCredentials: UserCredentials, + model: string, + chunkNumber: number, + totalChunks: number, + originalname: string +): Promise => { const urlUpload = `${url()}/upload`; const method: Method = 'post'; const commonParams: UserCredentials = userCredentials; - const additionalParams: UploadParams = { file, model, chunkNumber, totalChunks ,originalname}; + const additionalParams: UploadParams = { file, model, chunkNumber, totalChunks, originalname }; const response = await apiCall(urlUpload, method, commonParams, additionalParams); return response; }; @@ -77,6 +84,15 @@ export const extractAPI = async ( allowedNodes, allowedRelationship, }; + } else if (source_type === 'web-url') { + additionalParams = { + model, + source_url, + source_type, + file_name, + allowedNodes, + allowedRelationship, + }; } else { additionalParams = { model, diff --git a/frontend/src/utils/Loader.tsx b/frontend/src/utils/Loader.tsx index 85e8ba6b3..326e02c45 100644 --- a/frontend/src/utils/Loader.tsx +++ b/frontend/src/utils/Loader.tsx @@ -1,10 +1,10 @@ -export default function Loader() { +export default function Loader({ title }: { title: string }) { return (
-
Uploading
+
{title}
); } diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index f31b669bc..94271c20e 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -19,6 +19,21 @@ export const validation = (url: string) => { export const wikiValidation = (url: string) => { return url.trim() != '' && /https:\/\/([a-zA-Z]{2,3})\.wikipedia\.org\/wiki\/(.*)/gm.test(url) != false; }; +export const webLinkValidation = (url: string) => { + return ( + url.trim() != '' && + /(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g.test(url) != + false + ); +}; +export const youtubeLinkValidation = (url: string) => { + return ( + url.trim() != '' && + /^(?:https?:\/\/)?(?:www\.)?(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?v=|watch\?.+&v=))((\w|-){11})(?:\S+)?$/.test( + url + ) != false + ); +}; // Status indicator icons to status column export const statusCheck = (status: string) => { switch (status) {