-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: added dspy typed signature and predictor for function level sum…
…mary
- Loading branch information
Showing
10 changed files
with
1,920 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 10 additions & 0 deletions
10
unoplat-code-confluence/data_models/chapi_unoplat_codebase.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from typing import List, Optional | ||
|
||
from pydantic import BaseModel, Field | ||
|
||
from data_models.chapi_unoplat_package import UnoplatPackage | ||
|
||
|
||
class UnoplatCodebase(BaseModel): | ||
packages: List[UnoplatPackage] = Field(default_factory=list, alias="Packages") | ||
summary: Optional[str] = Field(default=None, alias="Summary") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 9 additions & 0 deletions
9
unoplat-code-confluence/data_models/chapi_unoplat_function_summary.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from typing import Optional | ||
from pydantic import BaseModel, Field | ||
|
||
|
||
class FunctionSummary(BaseModel): | ||
summary: Optional[str] = Field(default=None, alias="Summary",description="This should include high level summary of what function does based on function content and function metadata.") | ||
implementation_summary: Optional[str] = Field(default=None, alias="ImplementationSummary",description="This should include implementation details of the function in a step by step fashion with precise functional arguments and fields used to perform the operation. use all metadata shared for the function to answer .") | ||
|
||
|
11 changes: 11 additions & 0 deletions
11
unoplat-code-confluence/data_models/chapi_unoplat_package.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from typing import List, Optional | ||
from pydantic import BaseModel, Field | ||
|
||
from data_models.chapi_unoplat_node import Node | ||
|
||
|
||
class UnoplatPackage(BaseModel): | ||
package: Optional[str] = Field(default=None, alias="Package") | ||
summary: Optional[str] = Field(default=None, alias="Summary") | ||
nodes: List[Node] = Field(default_factory=list) | ||
|
101 changes: 101 additions & 0 deletions
101
unoplat-code-confluence/dspy_unoplat_code_confluence.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/Users/jayghiya/Documents/unoplat/unoplat-codebase-understanding/unoplat-code-confluence/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", | ||
" from .autonotebook import tqdm as notebook_tqdm\n", | ||
"\u001b[32m2024-06-11 14:29:40.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mloader.json_loader\u001b[0m:\u001b[36mload_json_from_file\u001b[0m:\u001b[36m28\u001b[0m - \u001b[1mJSON data successfully loaded from springstarterjava1_codes.json\u001b[0m\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"<built-in method keys of dict object at 0x165201300>\n", | ||
"name='root' return_type='ModelAndView' function_calls=[] annotations=[Annotation(name='RequestMapping', position=Position(start_line=17, start_line_position=4, stop_line=17, stop_line_position=59))] position=Position(start_line=18, start_line_position=11, stop_line=20, stop_line_position=4) local_variables=[FieldModel(type_type='ModelAndView', type_value='return', type_key=None)] body_hash=763749358 content='ModelAndView root() { return new ModelAndView(\"redirect:/swagger-ui/\"); '\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import dspy\n", | ||
"import os\n", | ||
"from data_models.chapi_unoplat_function import Function\n", | ||
"from data_models.chapi_unoplat_function_summary import FunctionSummary\n", | ||
"from loader import JsonLoader\n", | ||
"from loader import JsonParser\n", | ||
"\n", | ||
"ollama_mistral = dspy.OllamaLocal(model='mistral:7b-instruct-fp16')\n", | ||
"dspy.configure(lm=ollama_mistral)\n", | ||
"\n", | ||
"ollama_mistral(\"We are going to build class summary based on step by step approach. First we will build function by function summary then based on that we will build class level summary. For this user will be sharing relevant metadata in json for each function first and then class.\")\n", | ||
"\n", | ||
"\n", | ||
"\n", | ||
"class UnoplatFunctionSummary(dspy.Signature):\n", | ||
" \"\"\"Generate Summary in json based on FunctionSummary\"\"\"\n", | ||
" \n", | ||
" input: Function = dspy.InputField(desc=\"will contain all relevant function metadata\")\n", | ||
" output: FunctionSummary = dspy.OutputField(desc=\"summarisation of function\")\n", | ||
"\n", | ||
"cot_predictor = dspy.TypedChainOfThought(UnoplatFunctionSummary)\n", | ||
"\n", | ||
"# read the json file springstaterjava1_codes into pydantic\n", | ||
"\n", | ||
"\n", | ||
"iload_json = JsonLoader()\n", | ||
"\n", | ||
"iparse_json = JsonParser()\n", | ||
"\n", | ||
"\n", | ||
"meadata_classes = iload_json.load_json_from_file(\"springstarterjava1_codes.json\")\n", | ||
"\n", | ||
"package_dict = iparse_json.parse_json_to_nodes(meadata_classes,None)\n", | ||
"\n", | ||
"print(package_dict.keys)\n", | ||
"\n", | ||
"\n", | ||
"function: Function = package_dict['com.datastax.examples.order'][2].functions[0]\n", | ||
"print(function)\n", | ||
"\n", | ||
"prediction = cot_predictor(input=function,options={\"format\": \"json\"})\n", | ||
"\n", | ||
"ollama_mistral.inspect_history(n=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": ".venv", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,27 @@ | ||
from typing import List | ||
from typing import Dict, List | ||
from pydantic import ValidationError | ||
from loader.iparse_json import IParseJson | ||
from data_models.chapi_unoplat_node import Node | ||
from loguru import logger | ||
from nodeparser.isummariser import ISummariser | ||
|
||
class JsonParser(IParseJson): | ||
def parse_json_to_nodes(self, json_data: dict,isummariser:ISummariser) -> List[Node]: | ||
def parse_json_to_nodes(self, json_data: dict, isummariser: ISummariser = None) -> Dict[str, List[Node]]: | ||
"""Concrete implementation of the parse_json_to_nodes method.""" | ||
nodes = [] | ||
package_dict = {} | ||
for item in json_data: | ||
try: | ||
node = Node(**item) | ||
#Only summarise if node type is class | ||
#TODO: going forward might require other constructs too like interface, abstract class for better intellisense etc | ||
if node.type == "CLASS": | ||
node = isummariser.summarise_node(node) | ||
nodes.append(node) | ||
#TODO: disabled right now for dspy | ||
# if node.type == "CLASS": | ||
# node = isummariser.summarise_node(node) | ||
|
||
if node.package not in package_dict: | ||
package_dict[node.package] = [] | ||
package_dict[node.package].append(node) | ||
|
||
except ValidationError as e: | ||
logger.error(f"Error validating node: {e}") | ||
return nodes | ||
return package_dict |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1,769 changes: 1,768 additions & 1 deletion
1,769
unoplat-code-confluence/springstarterjava1_codes.json
Large diffs are not rendered by default.
Oops, something went wrong.