Skip to content

Commit

Permalink
feat: added dspy typed signature and predictor for function level sum…
Browse files Browse the repository at this point in the history
…mary
  • Loading branch information
JayGhiya committed Jun 11, 2024
1 parent b3a74e8 commit 6face17
Show file tree
Hide file tree
Showing 10 changed files with 1,920 additions and 14 deletions.
8 changes: 4 additions & 4 deletions unoplat-code-confluence/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@ def start_parsing(local_workspace_path, programming_language, output_path, codeb

output_filename = f"{codebase_name}_{current_timestamp}.md"

with open(os.path.join(output_path, output_filename), 'a+') as md_file:
for node in iparse_json.parse_json_to_nodes(chapi_metadata, isummariser):
if node.type == "CLASS":
md_file.write(f"{node.summary}\n\n")
# with open(os.path.join(output_path, output_filename), 'a+') as md_file:
# for node in iparse_json.parse_json_to_nodes(chapi_metadata, isummariser):
# if node.type == "CLASS":
# md_file.write(f"{node.summary}\n\n")
# with open('codebase_summary.json', 'w') as file:
# json.dump(codebase_metadata, file)

Expand Down
10 changes: 10 additions & 0 deletions unoplat-code-confluence/data_models/chapi_unoplat_codebase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from typing import List, Optional

from pydantic import BaseModel, Field

from data_models.chapi_unoplat_package import UnoplatPackage


class UnoplatCodebase(BaseModel):
packages: List[UnoplatPackage] = Field(default_factory=list, alias="Packages")
summary: Optional[str] = Field(default=None, alias="Summary")
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,3 @@ class Function(BaseModel):
local_variables: List[FieldModel] = Field(default_factory=list, alias="LocalVariables")
body_hash: Optional[int] = Field(default=None, alias="BodyHash")
content: Optional[str] = Field(default=None, alias="Content")
summary: Optional[str] = Field(default=None, alias="Summary")
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from typing import Optional
from pydantic import BaseModel, Field


class FunctionSummary(BaseModel):
summary: Optional[str] = Field(default=None, alias="Summary",description="This should include high level summary of what function does based on function content and function metadata.")
implementation_summary: Optional[str] = Field(default=None, alias="ImplementationSummary",description="This should include implementation details of the function in a step by step fashion with precise functional arguments and fields used to perform the operation. use all metadata shared for the function to answer .")


11 changes: 11 additions & 0 deletions unoplat-code-confluence/data_models/chapi_unoplat_package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from typing import List, Optional
from pydantic import BaseModel, Field

from data_models.chapi_unoplat_node import Node


class UnoplatPackage(BaseModel):
package: Optional[str] = Field(default=None, alias="Package")
summary: Optional[str] = Field(default=None, alias="Summary")
nodes: List[Node] = Field(default_factory=list)

101 changes: 101 additions & 0 deletions unoplat-code-confluence/dspy_unoplat_code_confluence.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/jayghiya/Documents/unoplat/unoplat-codebase-understanding/unoplat-code-confluence/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"\u001b[32m2024-06-11 14:29:40.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mloader.json_loader\u001b[0m:\u001b[36mload_json_from_file\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mJSON data successfully loaded from springstarterjava1_codes.json\u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"<built-in method keys of dict object at 0x165201300>\n",
"name='root' return_type='ModelAndView' function_calls=[] annotations=[Annotation(name='RequestMapping', position=Position(start_line=17, start_line_position=4, stop_line=17, stop_line_position=59))] position=Position(start_line=18, start_line_position=11, stop_line=20, stop_line_position=4) local_variables=[FieldModel(type_type='ModelAndView', type_value='return', type_key=None)] body_hash=763749358 content='ModelAndView root() { return new ModelAndView(\"redirect:/swagger-ui/\"); '\n"
]
}
],
"source": [
"import dspy\n",
"import os\n",
"from data_models.chapi_unoplat_function import Function\n",
"from data_models.chapi_unoplat_function_summary import FunctionSummary\n",
"from loader import JsonLoader\n",
"from loader import JsonParser\n",
"\n",
"ollama_mistral = dspy.OllamaLocal(model='mistral:7b-instruct-fp16')\n",
"dspy.configure(lm=ollama_mistral)\n",
"\n",
"ollama_mistral(\"We are going to build class summary based on step by step approach. First we will build function by function summary then based on that we will build class level summary. For this user will be sharing relevant metadata in json for each function first and then class.\")\n",
"\n",
"\n",
"\n",
"class UnoplatFunctionSummary(dspy.Signature):\n",
" \"\"\"Generate Summary in json based on FunctionSummary\"\"\"\n",
" \n",
" input: Function = dspy.InputField(desc=\"will contain all relevant function metadata\")\n",
" output: FunctionSummary = dspy.OutputField(desc=\"summarisation of function\")\n",
"\n",
"cot_predictor = dspy.TypedChainOfThought(UnoplatFunctionSummary)\n",
"\n",
"# read the json file springstaterjava1_codes into pydantic\n",
"\n",
"\n",
"iload_json = JsonLoader()\n",
"\n",
"iparse_json = JsonParser()\n",
"\n",
"\n",
"meadata_classes = iload_json.load_json_from_file(\"springstarterjava1_codes.json\")\n",
"\n",
"package_dict = iparse_json.parse_json_to_nodes(meadata_classes,None)\n",
"\n",
"print(package_dict.keys)\n",
"\n",
"\n",
"function: Function = package_dict['com.datastax.examples.order'][2].functions[0]\n",
"print(function)\n",
"\n",
"prediction = cot_predictor(input=function,options={\"format\": \"json\"})\n",
"\n",
"ollama_mistral.inspect_history(n=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
19 changes: 12 additions & 7 deletions unoplat-code-confluence/loader/parse_json.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
from typing import List
from typing import Dict, List
from pydantic import ValidationError
from loader.iparse_json import IParseJson
from data_models.chapi_unoplat_node import Node
from loguru import logger
from nodeparser.isummariser import ISummariser

class JsonParser(IParseJson):
def parse_json_to_nodes(self, json_data: dict,isummariser:ISummariser) -> List[Node]:
def parse_json_to_nodes(self, json_data: dict, isummariser: ISummariser = None) -> Dict[str, List[Node]]:
"""Concrete implementation of the parse_json_to_nodes method."""
nodes = []
package_dict = {}
for item in json_data:
try:
node = Node(**item)
#Only summarise if node type is class
#TODO: going forward might require other constructs too like interface, abstract class for better intellisense etc
if node.type == "CLASS":
node = isummariser.summarise_node(node)
nodes.append(node)
#TODO: disabled right now for dspy
# if node.type == "CLASS":
# node = isummariser.summarise_node(node)

if node.package not in package_dict:
package_dict[node.package] = []
package_dict[node.package].append(node)

except ValidationError as e:
logger.error(f"Error validating node: {e}")
return nodes
return package_dict
2 changes: 1 addition & 1 deletion unoplat-code-confluence/nodeparser/nodesummariser.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _create_summary_prompt(self, node: Node) -> str:
internal_calls_str = ""
external_calls_str = ""
for call in function.function_calls:
call_description = f"`{call.function_name}()` to `Fill in description of what the call is for. Focus on call interactions within the class and outside the class using fields`"
call_description = f"`{call.function_name}()` to `Fill in description of what the call is for. Focus on call interactions within the class and outside the class using fields. Use function metadata and content of the class`"
if call.node_name == node.node_name: # Internal call
internal_calls_str += f" - {call_description}\n"
else: # External call
Expand Down
4 changes: 4 additions & 0 deletions unoplat-code-confluence/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@ pydantic-settings = "^2.2.1"
litellm = "^1.37.19"
pytest = "^8.2.1"
crewai = {extras = ["tools"], version = "^0.30.11"}
dspy-ai = "^2.4.9"


[tool.poetry.group.dev.dependencies]
ipykernel = "^6.29.4"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
1,769 changes: 1,768 additions & 1 deletion unoplat-code-confluence/springstarterjava1_codes.json

Large diffs are not rendered by default.

0 comments on commit 6face17

Please sign in to comment.