-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #212 from unoplat/206-ingestion-utility-all-class-…
…func-metadata-and-func-to-func-call-ingestion-into-neo4j
- Loading branch information
Showing
134 changed files
with
12,297 additions
and
648 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 6 additions & 4 deletions
10
...-confluence-commons/unoplat_code_confluence_commons/graph_models/confluence_annotation.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,11 @@ | ||
from neomodel import StructuredNode, StringProperty, RelationshipFrom, ZeroOrMore, JSONProperty | ||
from neomodel import StructuredNode, StringProperty, Relationship, ZeroOrMore, JSONProperty | ||
|
||
from unoplat_code_confluence_commons.graph_models.base_models import AnnotatedRelationship | ||
|
||
|
||
class ConfluenceAnnotation(StructuredNode): | ||
name = StringProperty(required=True) | ||
key_values = JSONProperty() | ||
position = JSONProperty() | ||
# Relationships | ||
annotated_classes = RelationshipFrom('.confluence_class.ConfluenceClass', 'HAS_ANNOTATION', cardinality=ZeroOrMore) | ||
annotated_methods = RelationshipFrom('.confluence_method.ConfluenceMethod', 'HAS_ANNOTATION', cardinality=ZeroOrMore) | ||
annotated_classes = Relationship('.confluence_class.ConfluenceClass', 'HAS_ANNOTATION', model=AnnotatedRelationship, cardinality=ZeroOrMore) | ||
annotated_methods = Relationship('.confluence_internal_method.ConfluenceInternalMethod', 'HAS_ANNOTATION', model=AnnotatedRelationship, cardinality=ZeroOrMore) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 4 additions & 1 deletion
5
...confluence-commons/unoplat_code_confluence_commons/graph_models/confluence_class_field.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
from neomodel import StructuredNode, StringProperty, RelationshipTo, ZeroOrMore | ||
|
||
from unoplat_code_confluence_commons.graph_models.base_models import AnnotatedRelationship | ||
|
||
|
||
class ConfluenceClassField(StructuredNode): | ||
field_type = StringProperty() | ||
field_name = StringProperty() | ||
annotations = RelationshipTo('.confluence_annotation.ConfluenceAnnotation', 'HAS_ANNOTATION', cardinality=ZeroOrMore) | ||
annotations = RelationshipTo('.confluence_annotation.ConfluenceAnnotation', 'HAS_ANNOTATION', model=AnnotatedRelationship, cardinality=ZeroOrMore) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 9 additions & 0 deletions
9
...onfluence-commons/unoplat_code_confluence_commons/graph_models/confluence_external_lib.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from neomodel import StructuredNode, StringProperty,RelationshipTo,ZeroOrMore | ||
|
||
class ConfluenceExternalLibrary(StructuredNode): | ||
"""Represents a external library in a method""" | ||
library_name = StringProperty(unique_index=True, required=True) | ||
library_version = StringProperty() | ||
library_doc_url = StringProperty() | ||
description = StringProperty() | ||
contains = RelationshipTo('.confluence_external_method.ConfluenceExternalMethod', 'CONTAINS', cardinality=ZeroOrMore) |
13 changes: 13 additions & 0 deletions
13
...luence-commons/unoplat_code_confluence_commons/graph_models/confluence_external_method.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
|
||
|
||
from neomodel import StructuredNode, StringProperty, RelationshipTo, One, ZeroOrMore | ||
from unoplat_code_confluence_commons.graph_models.base_models import CallsRelationship | ||
from unoplat_code_confluence_commons.graph_models.confluence_method_type import MethodTypeChoices | ||
|
||
class ConfluenceExternalMethod(StructuredNode): | ||
"""Represents a external method in a method""" | ||
function_name = StringProperty(unique_index=True, required=True) | ||
return_type = StringProperty() | ||
method_type = StringProperty(choices=MethodTypeChoices.choices,default=MethodTypeChoices.EXTERNAL) | ||
called_by = RelationshipTo('.confluence_internal_method.ConfluenceInternalMethod', 'CALLED_BY', model=CallsRelationship, cardinality=ZeroOrMore) | ||
library = RelationshipTo('.confluence_external_lib.ConfluenceExternalLibrary', 'BELONGS_TO', cardinality=One) |
4 changes: 2 additions & 2 deletions
4
...code-confluence-commons/unoplat_code_confluence_commons/graph_models/confluence_import.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
from neomodel import StructuredNode, StringProperty, ArrayProperty, RelationshipFrom, ZeroOrMore | ||
from neomodel import StructuredNode, StringProperty, ArrayProperty, Relationship, ZeroOrMore | ||
|
||
class ConfluenceImport(StructuredNode): | ||
source = StringProperty(required=True) | ||
|
||
usage_names = ArrayProperty(StringProperty()) | ||
|
||
imported_by = RelationshipFrom('.confluence_class.ConfluenceClass', 'IMPORTS', cardinality=ZeroOrMore) | ||
imported_by = Relationship('.confluence_class.ConfluenceClass', 'IMPORTS', cardinality=ZeroOrMore) |
21 changes: 21 additions & 0 deletions
21
...luence-commons/unoplat_code_confluence_commons/graph_models/confluence_internal_method.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from unoplat_code_confluence_commons.graph_models.base_models import BaseNode, ContainsRelationship, CallsRelationship, AnnotatedRelationship | ||
from neomodel import RelationshipTo, StringProperty,One,ArrayProperty,FloatProperty,ZeroOrMore,IntegerProperty,JSONProperty,Relationship | ||
|
||
class ConfluenceInternalMethod(BaseNode): | ||
"""Represents a method in a class""" | ||
|
||
function_name = StringProperty(required=True) | ||
return_type = StringProperty() | ||
implementation_summary = StringProperty(default="") | ||
objective = StringProperty(default="") | ||
function_objective_embedding = ArrayProperty(FloatProperty()) | ||
function_implementation_summary_embedding = ArrayProperty(FloatProperty()) | ||
content = StringProperty() | ||
body_hash = IntegerProperty() | ||
local_variables = JSONProperty() | ||
comments_description = StringProperty() | ||
# # Method relationships | ||
confluence_class = RelationshipTo('.confluence_class.ConfluenceClass', 'BELONGS_TO', model=ContainsRelationship, cardinality=One) | ||
annotations = Relationship('.confluence_annotation.ConfluenceAnnotation', 'HAS_ANNOTATION', model=AnnotatedRelationship, cardinality=ZeroOrMore) | ||
calls_methods = RelationshipTo('.confluence_internal_method.ConfluenceInternalMethod', 'CALLS', model=CallsRelationship, cardinality=ZeroOrMore) | ||
calls_external_methods = RelationshipTo('.confluence_external_method.ConfluenceExternalMethod', 'CALLS', model=CallsRelationship, cardinality=ZeroOrMore) |
21 changes: 0 additions & 21 deletions
21
...code-confluence-commons/unoplat_code_confluence_commons/graph_models/confluence_method.py
This file was deleted.
Oops, something went wrong.
13 changes: 13 additions & 0 deletions
13
...confluence-commons/unoplat_code_confluence_commons/graph_models/confluence_method_type.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from pydantic import BaseModel | ||
from typing import ClassVar, Dict | ||
|
||
class MethodTypeChoices(BaseModel): | ||
"""Defines method type choices for use in Neomodel properties.""" | ||
|
||
EXTERNAL: ClassVar[str] = 'external' | ||
UTILITY: ClassVar[str] = 'utility' | ||
|
||
choices: ClassVar[Dict[str, str]] = { | ||
EXTERNAL: 'External', | ||
UTILITY: 'Programming Language Utility' | ||
} |
4 changes: 2 additions & 2 deletions
4
...ode-confluence-commons/unoplat_code_confluence_commons/graph_models/confluence_package.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
{ | ||
"repositories": [ | ||
{ | ||
"git_url": "https://github.com/unoplat/unoplat-code-confluence", | ||
"markdown_output_path": "/Users/jayghiya/Documents/unoplat", | ||
"codebases": [ | ||
{ | ||
"codebase_folder_name": "unoplat-code-confluence", | ||
"root_package_name": "unoplat_code_confluence", | ||
"programming_language_metadata": { | ||
"language": "python", | ||
"package_manager": "poetry", | ||
"language_version": "3.12.0" | ||
} | ||
} | ||
] | ||
} | ||
], | ||
"archguard": { | ||
"download_url": "archguard/archguard", | ||
"download_directory": "/Users/jayghiya/Documents/unoplat" | ||
}, | ||
"databases": [ | ||
{ | ||
"name": "neo4j", | ||
"uri": "bolt://localhost:7687" | ||
} | ||
], | ||
"llm_provider_config": { | ||
"model_provider": "openai/gpt-4o-mini", | ||
"model_provider_args": { | ||
"max_tokens": 500, | ||
"temperature": 0.0 | ||
} | ||
}, | ||
"logging_handlers": [ | ||
{ | ||
"sink": "~/Documents/unoplat/app.log", | ||
"format": "<green>{time:YYYY-MM-DD at HH:mm:ss}</green> | <level>{level}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | <magenta>{thread.name}</magenta> - <level>{message}</level>", | ||
"rotation": "10 MB", | ||
"retention": "10 days", | ||
"level": "DEBUG" | ||
} | ||
], | ||
"json_output": false, | ||
"sentence_transformer_model": "jinaai/jina-embeddings-v3" | ||
} |
File renamed without changes.
44 changes: 44 additions & 0 deletions
44
unoplat-code-confluence/debug_output/debug_tree_sitter_content.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
|
||
import os | ||
import json | ||
from datetime import datetime | ||
|
||
from unoplat_code_confluence.configuration.settings import ProgrammingLanguage | ||
from unoplat_code_confluence.parser.tree_sitter.code_confluence_tree_sitter import CodeConfluenceTreeSitter | ||
|
||
|
||
def test(): | ||
"""Test to print AST structure for analysis.""" | ||
code = """ | ||
def run_scan(self) -> str: # Get total number of files in run_scan self.total_files = self.file_counter.count_files() logger.info("Starting scan...") command = [ "java", "-jar", self.jar_path, "--with-function-code", f"--language={self.language}", "--output=arrow", "--output=json", f"--path={self.codebase_path}", f"--output-dir={self.output_path}" ] logger.info(f"Command: {' '.join(command)}") process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) while True: output = process.stdout.readline() logger.debug(output) if output == '' and process.poll() is not None: break if output: logger.info(output.strip()) progress_value = self.parse_progress(output, total_files=self.total_files) logger.info(f"Progress: {progress_value}%") stdout, stderr = process.communicate() if process.returncode == 0: logger.info("Scan completed successfully") chapi_metadata_path = self.modify_output_filename("0_codes.json", f"{self.codebase_name}_codes.json") else: logger.error(f"Error in scanning: {stderr}") logger.info(f"Total files scanned: {self.total_files}") return chapi_metadata_path | ||
""" | ||
|
||
|
||
parser = CodeConfluenceTreeSitter(language=ProgrammingLanguage.PYTHON) | ||
# Parse and get AST | ||
tree = parser.parser.parse(bytes(code, "utf8")) | ||
|
||
# Debug: Save AST to JSON | ||
debug_dir = "debug_output" | ||
os.makedirs(debug_dir, exist_ok=True) | ||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | ||
|
||
def node_to_dict(node): | ||
result = { | ||
"type": node.type, | ||
"text": node.text.decode('utf8') if node.text else None, | ||
"start_point": node.start_point, | ||
"end_point": node.end_point, | ||
} | ||
if len(node.children) > 0: | ||
result["children"] = [node_to_dict(child) for child in node.children] | ||
return result | ||
|
||
ast_dict = node_to_dict(tree.root_node) | ||
ast_file = f"{debug_dir}/function_ast_{timestamp}.json" | ||
with open(ast_file, "w") as f: | ||
json.dump(ast_dict, f, indent=2) | ||
|
||
|
||
if __name__ == "__main__": | ||
test() |
Oops, something went wrong.