Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor parsing improvements #134

Merged
merged 2 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/modules/github/github_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def get_github_repo_details(self, repo_name: str) -> Tuple[Github, Dict, str]:
}
response = requests.get(url, headers=headers)
if response.status_code != 200:
logger.error(
logger.info(
f"Failed to get installation ID for {repo_name}. Status code: {response.status_code}, Response: {response.text}"
)
raise HTTPException(
Expand Down
22 changes: 19 additions & 3 deletions app/modules/intelligence/provider/provider_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,32 @@ async def list_available_llms(self) -> List[ProviderInfo]:
]

async def set_global_ai_provider(self, user_id: str, provider: str):
provider = provider.lower()
# First check if preferences exist
preferences = self.db.query(UserPreferences).filter_by(user_id=user_id).first()

if not preferences:
preferences = UserPreferences(user_id=user_id, preferences={})
# Create new preferences if they don't exist
preferences = UserPreferences(user_id=user_id, preferences={"llm_provider": provider})
self.db.add(preferences)
else:
# Initialize preferences dict if None
if preferences.preferences is None:
preferences.preferences = {}

# Update the provider in preferences
preferences.preferences["llm_provider"] = provider

# Explicit update query
self.db.query(UserPreferences).filter_by(user_id=user_id).update(
{"preferences": preferences.preferences}
)

PostHogClient().send_event(
user_id, "provider_change_event", {"provider": provider}
)
preferences.preferences["llm_provider"] = provider

self.db.commit()

return {"message": f"AI provider set to {provider}"}

def get_large_llm(self):
Expand Down
1 change: 1 addition & 0 deletions app/modules/parsing/graph_construction/parsing_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def open_text_file(file_path):
"mov",
"wmv",
"flv",
"ipynb",
]
include_extensions = [
"py",
Expand Down
1 change: 0 additions & 1 deletion app/modules/parsing/graph_construction/parsing_repomap.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,6 @@ def create_graph(self, repo_dir):

for file in files:
file_count += 1
logging.info(f"Processing file number: {file_count}")

file_path = os.path.join(root, file)
rel_path = os.path.relpath(file_path, repo_dir)
Expand Down
10 changes: 6 additions & 4 deletions app/modules/parsing/knowledge_graph/inference_service.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import logging
import os
import re
from typing import Dict, List, Optional

Expand Down Expand Up @@ -33,6 +34,7 @@ def __init__(self, db: Session, user_id: Optional[str] = "dummy"):
self.embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
self.search_service = SearchService(db)
self.project_manager = ProjectService(db)
self.parallel_requests = int(os.getenv("PARALLEL_REQUESTS", 50))

def close(self):
self.driver.close()
Expand Down Expand Up @@ -187,7 +189,7 @@ def get_entry_points_for_nodes(
}

def batch_nodes(
self, nodes: List[Dict], max_tokens: int = 32000, model: str = "gpt-4"
self, nodes: List[Dict], max_tokens: int = 16000, model: str = "gpt-4"
) -> List[List[DocstringRequest]]:
batches = []
current_batch = []
Expand Down Expand Up @@ -252,7 +254,7 @@ async def generate_docstrings_for_entry_points(
entry_points_neighbors, docstring_lookup
)

semaphore = asyncio.Semaphore(10) # Limit to 10 concurrent tasks
semaphore = asyncio.Semaphore(self.parallel_requests) # Limit to 10 concurrent tasks

async def process_batch(batch):
async with semaphore:
Expand Down Expand Up @@ -292,7 +294,7 @@ def batch_entry_points(
self,
entry_points_neighbors: Dict[str, List[str]],
docstring_lookup: Dict[str, str],
max_tokens: int = 32000,
max_tokens: int = 16000,
model: str = "gpt-4",
) -> List[List[Dict[str, str]]]:
batches = []
Expand Down Expand Up @@ -442,7 +444,7 @@ async def generate_docstrings(self, repo_id: str) -> Dict[str, DocstringResponse
batches = self.batch_nodes(nodes)
all_docstrings = {"docstrings": []}

semaphore = asyncio.Semaphore(10) # Limit to 10 concurrent tasks
semaphore = asyncio.Semaphore(self.parallel_requests) # Limit to 10 concurrent tasks

async def process_batch(batch):
async with semaphore:
Expand Down
Loading