From 2d981058413212433e9f15e6222646aa6f25f215 Mon Sep 17 00:00:00 2001 From: dhirenmathur Date: Tue, 5 Nov 2024 04:07:37 +0530 Subject: [PATCH] update neo4j with docstrings with each llm response --- app/modules/auth/auth_router.py | 9 +++---- .../conversation/conversation_model.py | 4 +-- .../graph_construction/parsing_controller.py | 2 +- .../graph_construction/parsing_service.py | 2 +- .../knowledge_graph/inference_service.py | 27 +++++++------------ readme.md | 14 +++++----- 6 files changed, 22 insertions(+), 36 deletions(-) diff --git a/app/modules/auth/auth_router.py b/app/modules/auth/auth_router.py index 921f4ed2..f2f4f86e 100644 --- a/app/modules/auth/auth_router.py +++ b/app/modules/auth/auth_router.py @@ -10,11 +10,11 @@ from app.core.database import get_db from app.modules.auth.auth_schema import LoginRequest +from app.modules.auth.auth_service import auth_handler from app.modules.users.user_schema import CreateUser from app.modules.users.user_service import UserService from app.modules.utils.APIRouter import APIRouter from app.modules.utils.posthog_helper import PostHogClient -from app.modules.auth.auth_service import auth_handler SLACK_WEBHOOK_URL = os.getenv("SLACK_WEBHOOK_URL", None) @@ -29,7 +29,6 @@ async def send_slack_message(message: str): class AuthAPI: - @auth_router.post("/login") async def login(login_request: LoginRequest): email, password = login_request.email, login_request.password @@ -39,10 +38,8 @@ async def login(login_request: LoginRequest): id_token = res.get("idToken") return JSONResponse(content={"token": id_token}, status_code=200) except Exception as e: - return JSONResponse( - content={"error": f"ERROR: {str(e)}"}, status_code=400 - ) - + return JSONResponse(content={"error": f"ERROR: {str(e)}"}, status_code=400) + @auth_router.post("/signup") async def signup(request: Request, db: Session = Depends(get_db)): body = json.loads(await request.body()) diff --git a/app/modules/conversations/conversation/conversation_model.py b/app/modules/conversations/conversation/conversation_model.py index f4a6a20c..8736f5da 100644 --- a/app/modules/conversations/conversation/conversation_model.py +++ b/app/modules/conversations/conversation/conversation_model.py @@ -45,9 +45,7 @@ class Conversation(Base): nullable=False, ) shared_with_emails = Column(ARRAY(String), nullable=True) - visibility = Column( - SQLAEnum(Visibility), default=Visibility.PRIVATE, nullable=True - ) + visibility = Column(SQLAEnum(Visibility), default=Visibility.PRIVATE, nullable=True) # Relationships user = relationship("User", back_populates="conversations") messages = relationship( diff --git a/app/modules/parsing/graph_construction/parsing_controller.py b/app/modules/parsing/graph_construction/parsing_controller.py index 0f348176..ce357368 100644 --- a/app/modules/parsing/graph_construction/parsing_controller.py +++ b/app/modules/parsing/graph_construction/parsing_controller.py @@ -37,7 +37,7 @@ async def parse_directory( "crewAIInc/crewAI", "mem0ai/mem0", "AgentOps-AI/agentops", - "calcom/cal.com" + "calcom/cal.com", ] try: diff --git a/app/modules/parsing/graph_construction/parsing_service.py b/app/modules/parsing/graph_construction/parsing_service.py index e7aaefe0..c4ce891e 100644 --- a/app/modules/parsing/graph_construction/parsing_service.py +++ b/app/modules/parsing/graph_construction/parsing_service.py @@ -137,7 +137,7 @@ async def analyze_directory( if language in ["python", "javascript", "typescript"]: graph_manager = Neo4jManager(project_id, user_id) - self.create_neo4j_indices(graph_manager) + # self.create_neo4j_indices(graph_manager) commented since indices are created already try: graph_constructor = GraphConstructor(graph_manager, user_id) diff --git a/app/modules/parsing/knowledge_graph/inference_service.py b/app/modules/parsing/knowledge_graph/inference_service.py index 1322c689..18c6cfb6 100644 --- a/app/modules/parsing/knowledge_graph/inference_service.py +++ b/app/modules/parsing/knowledge_graph/inference_service.py @@ -428,11 +428,11 @@ async def generate_docstrings(self, repo_id: str) -> Dict[str, DocstringResponse ) await self.search_service.commit_indices() - entry_points = self.get_entry_points(repo_id) - logger.info( - f"DEBUGNEO4J: After get entry points, Repo ID: {repo_id}, Entry points: {len(entry_points)}" - ) - self.log_graph_stats(repo_id) + # entry_points = self.get_entry_points(repo_id) + # logger.info( + # f"DEBUGNEO4J: After get entry points, Repo ID: {repo_id}, Entry points: {len(entry_points)}" + # ) + # self.log_graph_stats(repo_id) # entry_points_neighbors = {} # for entry_point in entry_points: # neighbors = self.get_neighbours(entry_point, repo_id) @@ -455,17 +455,15 @@ async def process_batch(batch): f"Parsing project {repo_id}: Invalid response from LLM. Not an instance of DocstringResponse. Retrying..." ) response = await self.generate_response(batch, repo_id) + if isinstance(response, DocstringResponse): + self.update_neo4j_with_docstrings(repo_id, response) return response tasks = [process_batch(batch) for batch in batches] results = await asyncio.gather(*tasks) for result in results: - if isinstance(result, DocstringResponse): - all_docstrings["docstrings"] = ( - all_docstrings["docstrings"] + result.docstrings - ) - else: + if not isinstance(result, DocstringResponse): logger.error( f"Project {repo_id}: Invalid response from during inference. Manually verify the project completion." ) @@ -602,7 +600,7 @@ def update_neo4j_with_docstrings(self, repo_id: str, docstrings: DocstringRespon "tags": n.tags, "embedding": self.generate_embedding(n.docstring), } - for n in docstrings["docstrings"] + for n in docstrings.docstrings ] project = self.project_manager.get_project_from_db_by_id_sync(repo_id) repo_name = project.get("project_name") @@ -642,14 +640,7 @@ async def run_inference(self, repo_id: str): f"DEBUGNEO4J: After generate docstrings, Repo ID: {repo_id}, Docstrings: {len(docstrings)}" ) self.log_graph_stats(repo_id) - self.update_neo4j_with_docstrings(repo_id, docstrings) - logger.info( - f"DEBUGNEO4J: After update neo4j with docstrings, Repo ID: {repo_id}" - ) - self.log_graph_stats(repo_id) self.create_vector_index() - logger.info(f"DEBUGNEO4J: After create vector index, Repo ID: {repo_id}") - self.log_graph_stats(repo_id) def query_vector_index( self, diff --git a/readme.md b/readme.md index 731df367..3449354d 100644 --- a/readme.md +++ b/readme.md @@ -24,7 +24,7 @@ AI agents for your codebase in minutes.
-Potpie parses and understands your codebase by building a knowledge graph out of your code’s components. +Potpie parses and understands your codebase by building a knowledge graph out of your code’s components. It provides pre-built agents that are expert on your codebase to perform common engineering tasks for you, and also provides the platform for you to build your own custom agents.

@@ -50,7 +50,7 @@ It provides pre-built agents that are expert on your codebase to perform common ## What are Codebase Agents? -AI agents are autonomous tools that have the ability to reason, take decisions and perform actions on their own. They are provided with 'tools' that they can use to perform tasks. Agents are iterative in nature and build on top of the results of the previous iteration in order to perform any task assigned to them. +AI agents are autonomous tools that have the ability to reason, take decisions and perform actions on their own. They are provided with 'tools' that they can use to perform tasks. Agents are iterative in nature and build on top of the results of the previous iteration in order to perform any task assigned to them. Software development is a similarly iterative process and agents can be used to automate and optimize key aspects of software development. Things that developers do daily, like dbugging, can be broken down into a series of iterative steps that can be automated by agents. @@ -61,9 +61,9 @@ For example, debugging can be broken down into: 4. Testing the hypothesis 5. Repeating the above steps until the bug is fixed -In order to perform these steps, an agent would need to understand the codebase, the code around the stacktrace, the flow of the code, the project structure etc. +In order to perform these steps, an agent would need to understand the codebase, the code around the stacktrace, the flow of the code, the project structure etc. -Potpie parses your codebase and builds a graph tracking relationships between functions, files, classes, etc. We generate inferences for each node and embed and store it in the graph. This can be used to curate the correct context by performing a similarity search based on users query. The graph can also be queried to understand the code flow, it can be queried to understand the project structure etc. +Potpie parses your codebase and builds a graph tracking relationships between functions, files, classes, etc. We generate inferences for each node and embed and store it in the graph. This can be used to curate the correct context by performing a similarity search based on users query. The graph can also be queried to understand the code flow, it can be queried to understand the project structure etc. This allows Potpie's agents to understand the codebase and reason about the code. @@ -197,10 +197,10 @@ We used the Q&A agent to understand the underlying working of a feature of the [ - **Low level design**: Before writing that first line of code, it is important to know which files and functions need to be changed. This agent takes your functional requirements as an input and then generates a low level design for the feature. The output will consist of which files need to be changed, what all functions need to be added etc. We fed an open issue from the [**Portkey-AI/Gateway**](https://github.com/Portkey-AI/Gateway) project to this agent to generate a low level design for it: Video [here](https://www.linkedin.com/posts/dhirenmathur_potpie-ai-agents-vs-llms-i-am-extremely-activity-7255607456448286720-roOC). -- **Reviewing code changes**: Every commit to the codebase has the potential to be a breaking change. Use this agent to understand the functional impact of the changes in the codebase. It compares the code changes with the default branch and computes the blast radius of the changes. +- **Reviewing code changes**: Every commit to the codebase has the potential to be a breaking change. Use this agent to understand the functional impact of the changes in the codebase. It compares the code changes with the default branch and computes the blast radius of the changes. Here we analyse a PR from the [**mem0ai/mem0**](https://github.com/mem0ai/mem0) codebase and understand its blast radius : Video [here](https://www.linkedin.com/posts/dhirenmathur_prod-is-down-three-words-every-activity-7257007131613122560-o4A7). -- **Debugging**: Debugging is an iterative process that usually follows a set of well known steps. This agent emulates those steps and can be used to debug issues in the codebase. It takes a stacktrace as an input and then generates a list of steps that can be used to debug the issue. +- **Debugging**: Debugging is an iterative process that usually follows a set of well known steps. This agent emulates those steps and can be used to debug issues in the codebase. It takes a stacktrace as an input and then generates a list of steps that can be used to debug the issue. - **Unit and Integration testing**: Use the Unit Test Agent to generate unit test plans and code for individual functions to enhance test coverage, similarly use the Integration Test Agent to generate integration test plans and code for flows to ensure components work together properly. These agents are highly contextual and will use the codebase graph to gather context for generating the tests. @@ -234,7 +234,7 @@ Potpie is designed to be flexible and customizable. Here are key areas to person ### 1. System Prompts Configuration -Modify the system prompts to align with your organization's tone and terminology. +Modify the system prompts to align with your organization's tone and terminology. **Edit Prompt Text**: In `app/modules/intelligence/prompts/system_prompt_setup.py`, update the `system_prompts` lists to change the text for each agent.