Skip to content

Commit

Permalink
Merge pull request #212 from unoplat/206-ingestion-utility-all-class-…
Browse files Browse the repository at this point in the history
…func-metadata-and-func-to-func-call-ingestion-into-neo4j
  • Loading branch information
JayGhiya authored Dec 20, 2024
2 parents b509efc + 53ff5a9 commit ca26ab4
Show file tree
Hide file tree
Showing 134 changed files with 12,297 additions and 648 deletions.
55 changes: 55 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,58 @@ unoplat-code-confluence-query-engine/unoplat_code_confluence_query_engine/__pyca
unoplat-code-confluence-query-engine/dspy/__pycache__
experiments
unoplat-code-confluence-commons/dist
unoplat-code-confluence/unoplat_code_confluence/llm_pipelines/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/package_manager/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/python/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/python/package_manager/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/python/package_manager/pip/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/python/package_manager/poetry/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/python/package_manager/utils/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/python/package_naming/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/package_manager/python/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/package_manager/python/pip/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/package_manager/python/poetry/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/package_manager/python/utils/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/package_naming/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/package_naming/python/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/qualified_name/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/qualified_name/python/__pycache__
unoplat-code-confluence/tests/__pycache__
unoplat-code-confluence/tests/language_custom_parsing/__pycache__
unoplat-code-confluence/tests/language_custom_parsing/import_segregation/__pycache__
unoplat-code-confluence/tests/language_custom_parsing/import_segregation/python/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/import_segregation/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/import_segregation/python/__pycache__
unoplat-code-confluence/unoplat_code_confluence/language_custom_parsing/import_segregation/utils/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/package_manager/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/package_manager/python/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/package_manager/python/pip/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/package_manager/python/poetry/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/package_manager/python/utils/__pycache__
unoplat-code-confluence/tests/parser/__pycache__
unoplat-code-confluence/tests/parser/python/__pycache__
unoplat-code-confluence/tests/unoplat_code_confluence/parser/python/__pycache__
unoplat-code-confluence/unoplat_code_confluence/confluence_git/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/package_manager/pip/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/package_manager/poetry/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/package_manager/utils/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/utils/__pycache__
unoplat-code-confluence/tests/confluence_git/__pycache__
unoplat-code-confluence/tests/parser/python/node_variables/__pycache__
unoplat-code-confluence/tests/parser/python/in_class_dependency/__pycache__
unoplat-code-confluence/tests/parser/python/function_calls/__pycache__
unoplat-code-confluence/unoplat_code_confluence/data_models/chapi/__pycache__
unoplat-code-confluence/unoplat_code_confluence/data_models/chapi_forge/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/confluence_tree_sitter/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/function_calls/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/in_class_dependency/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/node_variables/__pycache__
unoplat-code-confluence/tests/parser/python/function_metadata/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/python/function_metadata/__pycache__
unoplat-code-confluence/unoplat_code_confluence/parser/tree_sitter/__pycache__
unoplat-code-confluence/unoplat_code_confluence/data_models/forge_summary/__pycache__
unoplat-code-confluence/tests/utility/__pycache__
unoplat-code-confluence/.env.dev
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ class ContainsRelationship(StructuredRel):
"""Relationship for representing containment between nodes"""
pass

class AnnotatedRelationship(StructuredRel):
"""Relationship for representing annotation on nodes and methods"""
position = JSONProperty()

class CallsRelationship(StructuredRel):
"""Represents a method call from one method to another."""
parameters = JSONProperty()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from neomodel import StructuredNode, StringProperty, RelationshipFrom, ZeroOrMore, JSONProperty
from neomodel import StructuredNode, StringProperty, Relationship, ZeroOrMore, JSONProperty

from unoplat_code_confluence_commons.graph_models.base_models import AnnotatedRelationship


class ConfluenceAnnotation(StructuredNode):
name = StringProperty(required=True)
key_values = JSONProperty()
position = JSONProperty()
# Relationships
annotated_classes = RelationshipFrom('.confluence_class.ConfluenceClass', 'HAS_ANNOTATION', cardinality=ZeroOrMore)
annotated_methods = RelationshipFrom('.confluence_method.ConfluenceMethod', 'HAS_ANNOTATION', cardinality=ZeroOrMore)
annotated_classes = Relationship('.confluence_class.ConfluenceClass', 'HAS_ANNOTATION', model=AnnotatedRelationship, cardinality=ZeroOrMore)
annotated_methods = Relationship('.confluence_internal_method.ConfluenceInternalMethod', 'HAS_ANNOTATION', model=AnnotatedRelationship, cardinality=ZeroOrMore)
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .base_models import BaseNode, ContainsRelationship
from neomodel import RelationshipFrom, RelationshipTo, StringProperty,ZeroOrMore,One,ArrayProperty,VectorIndex,FloatProperty,JSONProperty
from unoplat_code_confluence_commons.graph_models.base_models import BaseNode, ContainsRelationship, AnnotatedRelationship
from neomodel import RelationshipTo, StringProperty,ZeroOrMore,One,ArrayProperty,FloatProperty,JSONProperty,Relationship

class ConfluenceClass(BaseNode):
"""Represents a class in a package"""
Expand All @@ -15,10 +15,11 @@ class ConfluenceClass(BaseNode):
multiple_extend = ArrayProperty(StringProperty())
position = JSONProperty()
content = StringProperty()
comments_description = StringProperty()
# Class relationships
package = RelationshipTo('.confluence_package.ConfluencePackage', 'BELONGS_TO', model=ContainsRelationship, cardinality=One)
methods = RelationshipTo('.confluence_method.ConfluenceMethod', 'CONTAINS', model=ContainsRelationship, cardinality=ZeroOrMore)
methods = RelationshipTo('.confluence_internal_method.ConfluenceInternalMethod', 'CONTAINS', model=ContainsRelationship, cardinality=ZeroOrMore)
extends = RelationshipTo('.confluence_class.ConfluenceClass', 'EXTENDS', cardinality=ZeroOrMore)
imports = RelationshipTo('.confluence_import.ConfluenceImport', 'IMPORTS', cardinality=ZeroOrMore)
annotations = RelationshipTo('.confluence_annotation.ConfluenceAnnotation', 'HAS_ANNOTATION', cardinality=ZeroOrMore)
fields = RelationshipTo('.confluence_class_field.ConfluenceClassField', 'CONTAINS', model=ContainsRelationship, cardinality=ZeroOrMore)
imports = Relationship('.confluence_import.ConfluenceImport', 'IMPORTS', cardinality=ZeroOrMore)
annotations = Relationship('.confluence_annotation.ConfluenceAnnotation', 'HAS_ANNOTATION', model=AnnotatedRelationship, cardinality=ZeroOrMore)
fields = RelationshipTo('.confluence_class_field.ConfluenceClassField', 'CONTAINS', model=ContainsRelationship, cardinality=ZeroOrMore)
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from neomodel import StructuredNode, StringProperty, RelationshipTo, ZeroOrMore

from unoplat_code_confluence_commons.graph_models.base_models import AnnotatedRelationship


class ConfluenceClassField(StructuredNode):
field_type = StringProperty()
field_name = StringProperty()
annotations = RelationshipTo('.confluence_annotation.ConfluenceAnnotation', 'HAS_ANNOTATION', cardinality=ZeroOrMore)
annotations = RelationshipTo('.confluence_annotation.ConfluenceAnnotation', 'HAS_ANNOTATION', model=AnnotatedRelationship, cardinality=ZeroOrMore)
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
FloatProperty

)
from .base_models import BaseNode, ContainsRelationship
from unoplat_code_confluence_commons.graph_models.base_models import BaseNode, ContainsRelationship

class ConfluenceCodebase(BaseNode):
"""Represents a codebase in the system"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from neomodel import StructuredNode, StringProperty,RelationshipTo,ZeroOrMore

class ConfluenceExternalLibrary(StructuredNode):
"""Represents a external library in a method"""
library_name = StringProperty(unique_index=True, required=True)
library_version = StringProperty()
library_doc_url = StringProperty()
description = StringProperty()
contains = RelationshipTo('.confluence_external_method.ConfluenceExternalMethod', 'CONTAINS', cardinality=ZeroOrMore)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@


from neomodel import StructuredNode, StringProperty, RelationshipTo, One, ZeroOrMore
from unoplat_code_confluence_commons.graph_models.base_models import CallsRelationship
from unoplat_code_confluence_commons.graph_models.confluence_method_type import MethodTypeChoices

class ConfluenceExternalMethod(StructuredNode):
"""Represents a external method in a method"""
function_name = StringProperty(unique_index=True, required=True)
return_type = StringProperty()
method_type = StringProperty(choices=MethodTypeChoices.choices,default=MethodTypeChoices.EXTERNAL)
called_by = RelationshipTo('.confluence_internal_method.ConfluenceInternalMethod', 'CALLED_BY', model=CallsRelationship, cardinality=ZeroOrMore)
library = RelationshipTo('.confluence_external_lib.ConfluenceExternalLibrary', 'BELONGS_TO', cardinality=One)
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from neomodel import StructuredNode, StringProperty, ArrayProperty, RelationshipFrom, ZeroOrMore
from neomodel import StructuredNode, StringProperty, ArrayProperty, Relationship, ZeroOrMore

class ConfluenceImport(StructuredNode):
source = StringProperty(required=True)

usage_names = ArrayProperty(StringProperty())

imported_by = RelationshipFrom('.confluence_class.ConfluenceClass', 'IMPORTS', cardinality=ZeroOrMore)
imported_by = Relationship('.confluence_class.ConfluenceClass', 'IMPORTS', cardinality=ZeroOrMore)
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from unoplat_code_confluence_commons.graph_models.base_models import BaseNode, ContainsRelationship, CallsRelationship, AnnotatedRelationship
from neomodel import RelationshipTo, StringProperty,One,ArrayProperty,FloatProperty,ZeroOrMore,IntegerProperty,JSONProperty,Relationship

class ConfluenceInternalMethod(BaseNode):
"""Represents a method in a class"""

function_name = StringProperty(required=True)
return_type = StringProperty()
implementation_summary = StringProperty(default="")
objective = StringProperty(default="")
function_objective_embedding = ArrayProperty(FloatProperty())
function_implementation_summary_embedding = ArrayProperty(FloatProperty())
content = StringProperty()
body_hash = IntegerProperty()
local_variables = JSONProperty()
comments_description = StringProperty()
# # Method relationships
confluence_class = RelationshipTo('.confluence_class.ConfluenceClass', 'BELONGS_TO', model=ContainsRelationship, cardinality=One)
annotations = Relationship('.confluence_annotation.ConfluenceAnnotation', 'HAS_ANNOTATION', model=AnnotatedRelationship, cardinality=ZeroOrMore)
calls_methods = RelationshipTo('.confluence_internal_method.ConfluenceInternalMethod', 'CALLS', model=CallsRelationship, cardinality=ZeroOrMore)
calls_external_methods = RelationshipTo('.confluence_external_method.ConfluenceExternalMethod', 'CALLS', model=CallsRelationship, cardinality=ZeroOrMore)

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from pydantic import BaseModel
from typing import ClassVar, Dict

class MethodTypeChoices(BaseModel):
"""Defines method type choices for use in Neomodel properties."""

EXTERNAL: ClassVar[str] = 'external'
UTILITY: ClassVar[str] = 'utility'

choices: ClassVar[Dict[str, str]] = {
EXTERNAL: 'External',
UTILITY: 'Programming Language Utility'
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .base_models import BaseNode, ContainsRelationship
from neomodel import RelationshipFrom, RelationshipTo, StringProperty,ZeroOrMore,One,ArrayProperty,VectorIndex,FloatProperty
from unoplat_code_confluence_commons.graph_models.base_models import BaseNode, ContainsRelationship
from neomodel import RelationshipTo, StringProperty,ZeroOrMore,One,ArrayProperty,VectorIndex,FloatProperty

class ConfluencePackage(BaseNode):
"""Represents a package in the codebase"""
Expand Down
2 changes: 1 addition & 1 deletion unoplat-code-confluence/.isort.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ import_heading_stdlib = Standard Library
import_heading_thirdparty = Third Party
import_heading_firstparty = First Party
import_heading_localfolder = Local
py_version = 311 # For Python 3.12
py_version = 311 # For Python 3.12
47 changes: 47 additions & 0 deletions unoplat-code-confluence/config.dev.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"repositories": [
{
"git_url": "https://github.com/unoplat/unoplat-code-confluence",
"markdown_output_path": "/Users/jayghiya/Documents/unoplat",
"codebases": [
{
"codebase_folder_name": "unoplat-code-confluence",
"root_package_name": "unoplat_code_confluence",
"programming_language_metadata": {
"language": "python",
"package_manager": "poetry",
"language_version": "3.12.0"
}
}
]
}
],
"archguard": {
"download_url": "archguard/archguard",
"download_directory": "/Users/jayghiya/Documents/unoplat"
},
"databases": [
{
"name": "neo4j",
"uri": "bolt://localhost:7687"
}
],
"llm_provider_config": {
"model_provider": "openai/gpt-4o-mini",
"model_provider_args": {
"max_tokens": 500,
"temperature": 0.0
}
},
"logging_handlers": [
{
"sink": "~/Documents/unoplat/app.log",
"format": "<green>{time:YYYY-MM-DD at HH:mm:ss}</green> | <level>{level}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | <magenta>{thread.name}</magenta> - <level>{message}</level>",
"rotation": "10 MB",
"retention": "10 days",
"level": "DEBUG"
}
],
"json_output": false,
"sentence_transformer_model": "jinaai/jina-embeddings-v3"
}
44 changes: 44 additions & 0 deletions unoplat-code-confluence/debug_output/debug_tree_sitter_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

import os
import json
from datetime import datetime

from unoplat_code_confluence.configuration.settings import ProgrammingLanguage
from unoplat_code_confluence.parser.tree_sitter.code_confluence_tree_sitter import CodeConfluenceTreeSitter


def test():
"""Test to print AST structure for analysis."""
code = """
def run_scan(self) -> str: # Get total number of files in run_scan self.total_files = self.file_counter.count_files() logger.info("Starting scan...") command = [ "java", "-jar", self.jar_path, "--with-function-code", f"--language={self.language}", "--output=arrow", "--output=json", f"--path={self.codebase_path}", f"--output-dir={self.output_path}" ] logger.info(f"Command: {' '.join(command)}") process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) while True: output = process.stdout.readline() logger.debug(output) if output == '' and process.poll() is not None: break if output: logger.info(output.strip()) progress_value = self.parse_progress(output, total_files=self.total_files) logger.info(f"Progress: {progress_value}%") stdout, stderr = process.communicate() if process.returncode == 0: logger.info("Scan completed successfully") chapi_metadata_path = self.modify_output_filename("0_codes.json", f"{self.codebase_name}_codes.json") else: logger.error(f"Error in scanning: {stderr}") logger.info(f"Total files scanned: {self.total_files}") return chapi_metadata_path
"""


parser = CodeConfluenceTreeSitter(language=ProgrammingLanguage.PYTHON)
# Parse and get AST
tree = parser.parser.parse(bytes(code, "utf8"))

# Debug: Save AST to JSON
debug_dir = "debug_output"
os.makedirs(debug_dir, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

def node_to_dict(node):
result = {
"type": node.type,
"text": node.text.decode('utf8') if node.text else None,
"start_point": node.start_point,
"end_point": node.end_point,
}
if len(node.children) > 0:
result["children"] = [node_to_dict(child) for child in node.children]
return result

ast_dict = node_to_dict(tree.root_node)
ast_file = f"{debug_dir}/function_ast_{timestamp}.json"
with open(ast_file, "w") as f:
json.dump(ast_dict, f, indent=2)


if __name__ == "__main__":
test()
Loading

0 comments on commit ca26ab4

Please sign in to comment.