Skip to content

Commit

Permalink
Merge pull request #103 from unoplat/102-feat-introduce-n-depth-packa…
Browse files Browse the repository at this point in the history
…ge-level-comprehensions-and-only-use-root-package-summary-for-codebase-comprehension

102 feat introduce n depth package level comprehensions and only use root package summary for codebase comprehension
  • Loading branch information
JayGhiya authored Jul 28, 2024
2 parents 14b4e88 + a0f9f8d commit 0433cdb
Show file tree
Hide file tree
Showing 18 changed files with 337 additions and 120 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,5 @@ unoplat-code-confluence/unoplat_code_confluence/example_config.json
unoplat-code-confluence/poetry.lock
app.log
unoplat-code-confluence/unoplat_code_confluence/configuration/__pycache__
unoplat-code-confluence/unoplat_code_confluence/markdownparser/__pycache__
unoplat-code-confluence/~/Documents/unoplat
1 change: 1 addition & 0 deletions unoplat-code-confluence/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ litellm = "^1.37.19"
pytest = "^8.2.1"
dspy-ai = "^2.4.9"
packaging = "^24.1"
progiter = "^2.0.0"

[tool.poetry.scripts]
unoplat-code-confluence = "unoplat_code_confluence.__main__:start_pipeline"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from unoplat_code_confluence.loader import iload_json, iparse_json
from unoplat_code_confluence.loader.json_loader import JsonLoader
from unoplat_code_confluence.loader.parse_json import JsonParser
from unoplat_code_confluence.nodeparser.markdownsummariser import MarkdownSummariser
from unoplat_code_confluence.markdownparser.markdownsummariser import MarkdownSummariser
from unoplat_code_confluence.summary_parser.codebase_summary import CodebaseSummaryParser
import warnings
from packaging import version
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,9 @@


class UnoplatPackage(BaseModel):
package_dict: Optional[Dict[str,List[DspyUnoplatNodeSubset]]] = Field(default_factory=dict,alias="package_dict")
name: Optional[str] = Field(default=None,description="Name of the package")
node_subsets: Optional[List[DspyUnoplatNodeSubset]] = Field( default_factory=list,description="List of the node subsets for the package")
sub_packages: Optional[Dict[str, 'UnoplatPackage']] = Field( default_factory=dict,description="Dict of the sub-packages for the package")

UnoplatPackage.model_rebuild()

Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from typing import Optional
from typing import Dict, Optional
from pydantic import BaseModel,Field

from unoplat_code_confluence.data_models.dspy.dspy_unoplat_package_summary import DspyUnoplatPackageSummary

class DspyUnoplatCodebaseSummary(BaseModel):
codebase_summary: Optional[str] = Field(default=None, description="A summary of the codebase")

codebase_objective: Optional[str] = Field(default=None, description="The objective of the codebase")

metadata: Optional[dict] = Field(default=None, description="The metadata of the codebase")
codebase_name: Optional[str] = Field( default=None,description="The file id of the codebase summary")
codebase_package: Optional[DspyUnoplatPackageSummary] = Field(default=None,description="A summary of the codebase package")
codebase_package: Optional[Dict[str,DspyUnoplatPackageSummary]] = Field(default_factory=dict,description="A summary of the codebase package")
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from typing import Optional
from pydantic import BaseModel,Field

from unoplat_code_confluence.data_models.dspy.dspy_o_function_summary import DspyFunctionSummary

class DspyUnoplatFunctionSummary(BaseModel):
function_name: str = Field( alias="FunctionName", description="The name of the function")
function_summary: DspyFunctionSummary = Field( alias="FunctionSummary", description="A summary of the function")
function_summary: DspyFunctionSummary = Field( alias="FunctionSummary", description="A summary of the function")
metadata: Optional[dict] = Field(default=None, description="Additional metadata for the function")
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ class DspyUnoplatNodeSummary(BaseModel):
node_name: Optional[str] = Field(default=None, alias="NodeName",description="The name of the class")
node_summary: Optional[str] = Field(default=None, alias="NodeSummary",description="A summary of the class")
node_objective: Optional[str] = Field(default=None, alias="NodeObjective",description="The objective of the class")
functions_summary: Optional[List[DspyUnoplatFunctionSummary]] = Field(default=None, alias="FunctionsSummary",description="A list of functions in the class")
functions_summary: Optional[List[DspyUnoplatFunctionSummary]] = Field(default=None, alias="FunctionsSummary",description="A list of functions in the class")
metadata: Optional[dict] = Field(default=None, description="Additional metadata for the node")
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@



class DspyUnoplatPackageNodeSummary(BaseModel):
class DspyUnoplatPackageSummary(BaseModel):
package_objective: str = Field( description="The objective of the package in a concise manner")
package_summary: str = Field( description="The detailed summary of the package")
class_summary: List[DspyUnoplatNodeSummary] = Field( default_factory=list,description="List of the class summaries for the package")


class DspyUnoplatPackageSummary(BaseModel):
package_summary_dict: Optional[Dict[str, DspyUnoplatPackageNodeSummary]] = Field(default_factory=dict,description="Dict to hold the summary of packages")
metadata: Optional[dict] = Field(default=None, description="Additional metadata for the package")
sub_package_summaries: Dict[str, 'DspyUnoplatPackageSummary'] = Field(default_factory=dict, description="Dictionary of sub-package summaries, keyed by package name")
DspyUnoplatPackageSummary.model_rebuild()

Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class CodeConfluenceClassSummarySignature(dspy.Signature):

class CodeConfluenceClassObjectiveSignature(dspy.Signature):
"""This signature takes in class summary and returns concise class_objective of the class. Do not include your reasoning in class_objective."""
final_class_summary: str = dspy.InputField(desc="This should contain concise detailed implementation summary of the class or in some cases direct content of the class if it is just a data model object")
final_class_summary: str = dspy.InputField(desc="This should contain concise detailed implementation summary of the class or in some cases direct content of the class if it is just a data model object.")
class_objective: str = dspy.OutputField(desc="This should contain concise objective of the class based on implementation summary in under 2 lines without loosing on any details")


Expand All @@ -36,7 +36,10 @@ def forward(self, class_metadata: DspyUnoplatNodeSubset, function_objective_summ
signature_class_summary = self.generate_class_summary(class_existing_summary=class_summary, function_summary=function_objective.function_summary.objective, class_metadata=str(class_metadata.model_dump_json()),hint="Generate the class detailed summary for the class by being concise , factual and grounded.:"+class_metadata.node_name)
class_summary = signature_class_summary.final_class_summary

hint="Generate the class objective for the class by being concise and dnt miss on any details.:"+class_metadata.node_name
if class_metadata.node_name is not None:
hint="Generate the class objective for the class by being concise and dnt miss on any details.:"+class_metadata.node_name
else:
hint="Generate the class objective for the class by being concise and dnt miss on any details."

if len(function_objective_summary) > 0:
class_objective_signature = self.generate_class_objective(final_class_summary = class_summary,hint=hint)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict
import dspy
from unoplat_code_confluence.data_models.dspy.dspy_unoplat_package_summary import DspyUnoplatPackageNodeSummary
from unoplat_code_confluence.data_models.dspy.dspy_unoplat_package_summary import DspyUnoplatPackageSummary



Expand All @@ -24,7 +24,7 @@ def __init__(self):
self.generate_codebase_objective = dspy.ChainOfThoughtWithHint(CodeConfluenceCodebaseObjectiveSignature)


def forward(self, package_objective_dict: Dict[str, DspyUnoplatPackageNodeSummary]):
def forward(self, package_objective_dict: Dict[str, DspyUnoplatPackageSummary]):

codebase_summary = ""
summary_hint="Enhance the existing codebase summary based on current package objective without loosing important details from existing codebase summary. So be cautious while being concise. "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def forward(self, function_metadata: DspyUnoplatFunctionSubset, class_metadata:

class_subset = str(class_metadata.model_dump_json())
function_subset = str(function_metadata.model_dump_json())

function_summary = self.generate_function_summary(chapi_function_metadata=function_subset).unoplat_function_summary

for function_call in function_metadata.function_calls:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,42 +1,56 @@
from typing import Dict, List
import dspy
from unoplat_code_confluence.data_models.dspy.dspy_unoplat_node_summary import DspyUnoplatNodeSummary
from unoplat_code_confluence.data_models.dspy.dspy_unoplat_package_summary import DspyUnoplatPackageNodeSummary
from unoplat_code_confluence.data_models.dspy.dspy_unoplat_package_summary import DspyUnoplatPackageSummary
from loguru import logger

#TODO: optimise using gpt4 judge and miprov2s

class CodeConfluenceSubPackageSignature(dspy.Signature):
"""This signature takes in existing summary of a root package and sub package summary and refines root_package_existing_summary with new insights without loosing on existing insights and returns root_package_final_summary. """
root_package_existing_summary: str = dspy.InputField(default="package existing summary:",desc="This will contain existing package summary")
sub_package_summary: str = dspy.InputField(desc="This will contain summary of the sub package")
sub_package_name: str = dspy.InputField(desc="This will contain name of the sub package")
root_package_final_summary: str = dspy.OutputField(desc="This will contain improved concise package summary without loosing on existing details")


class CodeConfluencePackageSignature(dspy.Signature):
"""This signature takes in existing summary of a class and function summary of a class one at a time and refines package_existing_summary with new insights and returns final_package_summary. """
package_existing_summary: str = dspy.InputField(default="package existing summary:",desc="This will contain existing package summary")
class_objective: str = dspy.InputField(desc="This will contain current class objective based on which existing package summary has to be improved")
package_name: str = dspy.InputField(desc="This will contain name of the package")
final_package_summary: str = dspy.OutputField(desc="This will contain improved concise package summary")
"""This signature takes in existing summary of root package and based on class summary of that package one at a time refines root_package_existing_summary with new insights without loosing on any existing details and returns root_package_final_summary. """
root_package_existing_summary: str = dspy.InputField(default="package existing summary:",desc="This will contain existing package summary")
root_class_objective: str = dspy.InputField(desc="This will contain current class objective based on which existing package summary has to be enhanced")
root_package_name: str = dspy.InputField(desc="This will contain name of the package")
root_package_final_summary: str = dspy.OutputField(desc="This will contain improved concise package summary")


class CodeConfluencePackageObjectiveSignature(dspy.Signature):
"""This signature takes in package summary and returns concise package_objective of the package."""
final_package_summary: str = dspy.InputField(desc="This will contain concise detailed implementation summary of the package")
package_name: str = dspy.InputField(desc="This will contain name of the package")
package_objective: str = dspy.OutputField(desc="This will contain concise objective of the package based on package summary")
root_package_summary: str = dspy.InputField(desc="This will contain concise detailed implementation summary of the package")
root_package_name: str = dspy.InputField(desc="This will contain name of the package")
root_package_objective: str = dspy.OutputField(desc="This will contain concise objective of the package based on package summary")

class CodeConfluencePackageModule(dspy.Module):
def __init__(self):
super().__init__()
self.generate_sub_package_summary = dspy.ChainOfThought(CodeConfluenceSubPackageSignature)
self.generate_package_summary = dspy.ChainOfThoughtWithHint(CodeConfluencePackageSignature)
self.generate_package_objective = dspy.ChainOfThoughtWithHint(CodeConfluencePackageObjectiveSignature)


def forward(self, class_objective_list: List[DspyUnoplatNodeSummary],package_name: str):
def forward(self, class_objective_list: List[DspyUnoplatNodeSummary],package_name: str,sub_package_summaries: Dict[str,DspyUnoplatPackageSummary]):

package_summary_hint="Enhance the package summary +:"+package_name+" based on class objective. Do not extrapolate or make up anything. Strictly be factual and grounded.While enhancing the package summary do not loose any existing important details by being overly concise."
package_summary = ""

for sub_package_name,sub_package_summary in sub_package_summaries.items():
package_summary = self.generate_sub_package_summary(root_package_existing_summary=package_summary,sub_package_summary=sub_package_summary.package_summary,sub_package_name=sub_package_name).root_package_final_summary

for class_objective in class_objective_list:
signature_package_summary: CodeConfluencePackageSignature = self.generate_package_summary(package_existing_summary=package_summary, package_name=package_name,class_objective=class_objective.node_objective,hint=package_summary_hint)
package_summary = signature_package_summary.final_package_summary
signature_package_summary: CodeConfluencePackageSignature = self.generate_package_summary(root_package_existing_summary=package_summary, root_class_objective=class_objective.node_objective,root_package_name=package_name,hint=package_summary_hint)
package_summary = signature_package_summary.root_package_final_summary

package_objective_hint = "First capture all highlights from summary and based on highlights generate the package objective for the package by being concise and dnt miss on any details for:"+package_name+". Do not extrapolate or make up anything. Strictly be factual and grounded."
class_objective_signature: CodeConfluencePackageObjectiveSignature = self.generate_package_objective(final_package_summary=package_summary,package_name=package_name,hint=package_objective_hint)
dspy_package_summary = DspyUnoplatPackageNodeSummary(package_objective=class_objective_signature.package_objective,package_summary=package_summary,class_summary=class_objective_list)
package_objective_signature: CodeConfluencePackageObjectiveSignature = self.generate_package_objective(root_package_summary=package_summary,root_package_name=package_name,hint=package_objective_hint)
dspy_package_summary = DspyUnoplatPackageSummary(package_objective=package_objective_signature.root_package_objective,package_summary=package_summary,class_summary=class_objective_list)
return dspy.Prediction(answer=dspy_package_summary)


Expand Down
Loading

0 comments on commit 0433cdb

Please sign in to comment.