From 07ec57aa107d8954cd9daf05bb42ec890a956a76 Mon Sep 17 00:00:00 2001 From: JayGhiya Date: Tue, 22 Oct 2024 19:21:08 +0530 Subject: [PATCH] fix: include class user intent. improve dspy user intent to return enum values to drastically reduce output llm tokens making the overall response faster --- .../__main__.py | 1 - .../graph/neo4j_helper.py | 9 ++++ .../models/confluence_class.py | 7 +++ .../models/confluence_user_intent.py | 12 ++--- .../processing/query_engine_process.py | 44 ++++++++++++++----- .../unoplat_dspy/intent_detection_module.py | 15 ++++--- 6 files changed, 66 insertions(+), 22 deletions(-) create mode 100644 unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/models/confluence_class.py diff --git a/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/__main__.py b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/__main__.py index 9a4b4c3..cab80f5 100644 --- a/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/__main__.py +++ b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/__main__.py @@ -32,7 +32,6 @@ def main(config): if not os.path.isfile(config): click.echo(f"Error: The code confluence query engine configuration file '{config}' does not exist.", err=True) return - app = ChatApp(config) app.run() diff --git a/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/graph/neo4j_helper.py b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/graph/neo4j_helper.py index da64187..0cab8ef 100644 --- a/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/graph/neo4j_helper.py +++ b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/graph/neo4j_helper.py @@ -60,6 +60,15 @@ def get_package_details(self,package_name): """ return self.run_query(query, package_name=package_name) + def get_class_details(self,class_name): + query = """ + MATCH (c:Class {qualified_name: $class_name}) + RETURN + c.qualified_name AS class_name, + c.objective AS class_objective, + c.implementation_summary AS class_summary + """ + return self.run_query(query, class_name=class_name) def get_codebase_details(self,codebase_name): query = """ diff --git a/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/models/confluence_class.py b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/models/confluence_class.py new file mode 100644 index 0000000..d0d7242 --- /dev/null +++ b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/models/confluence_class.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel, Field + +class CodeConfluenceClass(BaseModel): + class_name: str = Field(default=None, description="The name of the class") + class_summary: str = Field(default=None, description="The summary of the class") + class_objective: str = Field(default=None, description="The objective of the class") + relevance_score: int = Field(default=None, description="The relevance score of the class") diff --git a/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/models/confluence_user_intent.py b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/models/confluence_user_intent.py index 342aca1..44acf77 100644 --- a/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/models/confluence_user_intent.py +++ b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/models/confluence_user_intent.py @@ -1,7 +1,9 @@ -from enum import Enum +from enum import Enum, auto -class ConfluenceUserIntent(Enum): - CODE_SUMMARIZATION = "User wants an overview or summary of the codebase." - CODE_FEATURE = "User is looking for specific features that can be answered by going through the package summaries." - FUNCTIONAL_IMPLEMENTATION = "User wants detailed understanding at the function level." + +class ConfluenceUserIntent(str, Enum): + CODE_SUMMARIZATION = auto() + PACKAGE_OVERVIEW = auto() + CLASS_DETAILS = auto() + FUNCTIONAL_IMPLEMENTATION = auto() diff --git a/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/processing/query_engine_process.py b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/processing/query_engine_process.py index 1e08355..c6353c9 100644 --- a/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/processing/query_engine_process.py +++ b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/processing/query_engine_process.py @@ -1,5 +1,6 @@ from unoplat_code_confluence_query_engine.graph.neo4j_helper import Neo4jHelper from unoplat_code_confluence_query_engine.embedding.unoplat_embedding_generator import UnoplatEmbeddingGenerator +from unoplat_code_confluence_query_engine.models.confluence_class import CodeConfluenceClass from unoplat_code_confluence_query_engine.unoplat_dspy.intent_detection_module import CodeConfluenceIntentDetectionModule from unoplat_code_confluence_query_engine.unoplat_dspy.user_query_final_response import CodeConfluenceUserQueryResponseModule from unoplat_code_confluence_query_engine.unoplat_dspy.user_query_based_rererank_module import CodeConfluenceUserQueryReRankModule @@ -10,9 +11,9 @@ from typing import List from unoplat_code_confluence_query_engine.helper.json_loader import JsonLoader from textual import log -from unoplat_code_confluence_query_engine.models.confluence_user_intent import ConfluenceUserIntent from unoplat_code_confluence_query_engine.models.confluence_codebase import CodeConfluenceCodebase from unoplat_code_confluence_query_engine.models.confluence_package import CodeConfluencePackage +from unoplat_code_confluence_query_engine.models.confluence_user_intent import ConfluenceUserIntent class QueryEngineProcess: def __init__(self, appConfigPath:str): @@ -42,13 +43,11 @@ def __init__(self, appConfigPath:str): async def process_query(self, user_query: str) -> str: user_query_embedding = self.embedding_generator.generate_embeddings_for_single_text(user_query) - - - user_intent_list: List[str] = self.intent_module(user_query=user_query).answer + user_intent_list: List[int] = self.intent_module(user_query=user_query).answer final_response = "" - if ConfluenceUserIntent.FUNCTIONAL_IMPLEMENTATION.name in user_intent_list: + + if int(ConfluenceUserIntent.FUNCTIONAL_IMPLEMENTATION.value) in user_intent_list: # Search similar functions - results = self.graph_helper.search_similar_nodes(vector_index="Method_implementation_embedding_vector_index", query_embedding=user_query_embedding, top_k=5) context = {result["name"]: result["summary"] for result in results} @@ -68,7 +67,7 @@ async def process_query(self, user_query: str) -> str: final_response = self.user_query_response_module(user_query=user_query, code_metadata=context).answer - elif ConfluenceUserIntent.CODE_SUMMARIZATION.name in user_intent_list: + elif int(ConfluenceUserIntent.CODE_SUMMARIZATION.value) in user_intent_list: results = self.graph_helper.search_similar_nodes(vector_index="Codebase_implementation_embedding_vector_index", query_embedding=user_query_embedding,top_k=5) context = {result["name"]: result["summary"] for result in results} @@ -90,7 +89,7 @@ async def process_query(self, user_query: str) -> str: # Generate final response final_response = final_response + self.user_query_response_module(user_query=user_query, code_metadata=context).answer - elif ConfluenceUserIntent.CODE_FEATURE.name in user_intent_list: + elif int(ConfluenceUserIntent.PACKAGE_OVERVIEW.value) in user_intent_list: results = self.graph_helper.search_similar_nodes(vector_index="Package_implementation_embedding_vector_index", query_embedding=user_query_embedding,top_k=5) context = {result["name"]: result["summary"] for result in results} @@ -111,8 +110,32 @@ async def process_query(self, user_query: str) -> str: # Generate final response final_response = final_response + self.user_query_response_module(user_query=user_query, code_metadata=context).answer + elif int(ConfluenceUserIntent.CLASS_DETAILS.value) in user_intent_list: + results = self.graph_helper.search_similar_nodes(vector_index="Class_implementation_embedding_vector_index", query_embedding=user_query_embedding, top_k=5) + context = {result["name"]: result["summary"] for result in results} + + if len(context) > 1: + rerank_results = self.rerank_module(user_query=user_query, possible_answers=context).answer.relevant_answers + filtered_rerank_results = {k: v for k, v in rerank_results.items() if v > 7} + context = {k: v for k, v in context.items() if k in filtered_rerank_results.keys()} + else: + # If there's only one or no context, we don't need to rerank + filtered_rerank_results = {list(context.keys())[0]: 10} if context else {} + + # Get details for all class names + for class_name in context.keys(): + class_details = self.graph_helper.get_class_details(class_name=class_name)[0] + class_details_object = CodeConfluenceClass(**class_details) + class_details_object.relevance_score = filtered_rerank_results.get(class_name, 10) # Default to 10 if not ranked + context[class_name] = class_details_object.model_dump_json() + + # Generate final response + final_response = final_response + self.user_query_response_module(user_query=user_query, code_metadata=context).answer + else: - return "Could not understand your intent. please be more specific in terms of whether you want to understand at codebase level or feature level or implementation level." + return "Could not understand your intent. Please be more specific in terms of whether you want to understand at codebase level, feature level, or implementation level." + + return final_response # TODO: proper exception handling at all levels @@ -136,4 +159,5 @@ async def _create_vector_index(self, node_label: str, embedding_property: str): async def load_existing_codebases(self): return self.graph_helper.get_existing_codebases() - \ No newline at end of file + + diff --git a/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/unoplat_dspy/intent_detection_module.py b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/unoplat_dspy/intent_detection_module.py index 504e893..05a4be3 100644 --- a/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/unoplat_dspy/intent_detection_module.py +++ b/unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/unoplat_dspy/intent_detection_module.py @@ -1,19 +1,22 @@ from typing import List, Dict import dspy from textual import log +from unoplat_code_confluence_query_engine.models.confluence_user_intent import ConfluenceUserIntent + class IntentDescriptions: - DESCRIPTIONS: Dict[str, str] = { - "CODE_SUMMARIZATION": "User wants an overview or summary of the codebase.", - "CODE_FEATURE": "User is looking for specific features that can be answered by going through the package summaries.", - "FUNCTIONAL_IMPLEMENTATION": "User wants detailed understanding at the function level." + DESCRIPTIONS: Dict[ConfluenceUserIntent, str] = { + ConfluenceUserIntent.CODE_SUMMARIZATION: "The user query wants a high-level overview or summary of the entire codebase, including its main objectives and overall implementation.", + ConfluenceUserIntent.PACKAGE_OVERVIEW: "The user query is interested in understanding the objectives or summaries of specific packages within the codebase.", + ConfluenceUserIntent.CLASS_DETAILS: "The user query seeks detailed information about specific classes, including their purposes and how they fit within the packages.", + ConfluenceUserIntent.FUNCTIONAL_IMPLEMENTATION: "The user query wants detailed understanding at the function level." } class CodeConfluenceUserQuerySignature(dspy.Signature): """Based on user query and context of intents, return the user_intent_result.""" user_query: str = dspy.InputField(desc="This will contain user query", default=None, alias="UserQuery") - intent_descriptions: Dict[str, str] = dspy.InputField(desc="This will contain intents and their respective descriptions", default=None, alias="IntentDescriptions") - user_intent_result: List[str] = dspy.OutputField(desc="This will strictly return list of items from intents", alias="UserIntentResult") + intent_descriptions: Dict[ConfluenceUserIntent, str] = dspy.InputField(desc="This will contain intents and their respective descriptions", default=None, alias="IntentDescriptions") + user_intent_result: List[int] = dspy.OutputField(desc="This will strictly return list of confluent user intent enum values", alias="UserIntentResult") class CodeConfluenceIntentDetectionModule(dspy.Module): def __init__(self):