Skip to content

Commit

Permalink
fix: comment resolution
Browse files Browse the repository at this point in the history
  • Loading branch information
sshivaditya committed Dec 2, 2024
1 parent dbb6824 commit ca066a0
Show file tree
Hide file tree
Showing 13 changed files with 426 additions and 261 deletions.
8 changes: 8 additions & 0 deletions eval-results.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
## Evaluation Results

| Metric | Current | vs Previous | Status |
|--------|---------|-------------|---------|
| Levenshtein | 0.3582 | ↑ 0.0043 ||
| Context Precision | 1.0000 | ↓ 0.0000 ||
| Duration | 38.57s | ↑ 16.27s | ⚠️ |
| Cost | $0.246485 | ↓ 0.0000 ||
17 changes: 12 additions & 5 deletions src/adapters/openai/helpers/completions.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import OpenAI from "openai";
import { Context } from "../../../types";
import { SuperOpenAi } from "./openai";
import { CompletionsModelHelper, ModelApplications } from "../../../types/llm";
import { CompletionsModelHelper, ModelApplications, StreamlinedComment } from "../../../types/llm";
import { encode } from "gpt-tokenizer";
import { logger } from "../../../helpers/errors";
import { createWeightTable } from "../../../handlers/rlhf/completions-scorer";
Expand All @@ -15,6 +15,7 @@ export interface CompletionsType {
total: number;
};
}

export const defaultCompletionsType: CompletionsType = {
answer: "",
groundTruths: [],
Expand All @@ -24,6 +25,7 @@ export const defaultCompletionsType: CompletionsType = {
total: 0,
},
};

export class Completions extends SuperOpenAi {
protected context: Context;

Expand Down Expand Up @@ -84,9 +86,13 @@ export class Completions extends SuperOpenAi {
const sysMsg = [
"You Must obey the following ground truths: ",
JSON.stringify(groundTruths) + "\n",
"You are tasked with assisting as a GitHub bot by generating responses based on provided chat history supported by the phrases ",
"You are tasked with assisting as a GitHub bot by generating responses based on provided chat history and weighted context. The context is weighted based on:",
"1. User Reactions: Positive reactions (👍, ❤️, 🎉, 🚀) increase weight, negative reactions (👎, 😕) decrease weight",
"2. Edit History: Comments that have been refined through edits have higher weight",
"3. Similarity to Current Query: Content more similar to the current question has higher weight",
"\nWeighted Context Table:",
weightPrompt + "\n",
"and similar responses, focusing on using available knowledge within the provided corpus, which may contain code, documentation, or incomplete information. Your role is to interpret and use this knowledge effectively to answer user questions.\n\n# Steps\n\n1. **Understand Context**: Review the chat history and any similar provided responses to understand the context.\n2. **Extract Relevant Information**: Identify key pieces of information, even if they are incomplete, from the available corpus.\n3. **Apply Knowledge**: Use the extracted information and relevant documentation to construct an informed response.\n4. **Draft Response**: Compile the gathered insights into a coherent and concise response, ensuring it's clear and directly addresses the user's query.\n5. **Review and Refine**: Check for accuracy and completeness, filling any gaps with logical assumptions where necessary.\n\n# Output Format\n\n- Concise and coherent responses in paragraphs that directly address the user's question.\n- Incorporate inline code snippets or references from the documentation if relevant.\n\n# Examples\n\n**Example 1**\n\n*Input:*\n- Chat History: \"What was the original reason for moving the LP tokens?\"\n- Corpus Excerpts: \"It isn't clear to me if we redid the staking yet and if we should migrate. If so, perhaps we should make a new issue instead. We should investigate whether the missing LP tokens issue from the MasterChefV2.1 contract is critical to the decision of migrating or not.\"\n\n*Output:*\n\"It was due to missing LP tokens issue from the MasterChefV2.1 Contract.\n\n# Notes\n\n- Ensure the response is crafted from the corpus provided, without introducing information outside of what's available or relevant to the query.\n- Consider edge cases where the corpus might lack explicit answers, and justify responses with logical reasoning based on the existing information.",
"Your role is to interpret this weighted knowledge effectively to answer user questions, giving more consideration to higher-weighted content.\n\n# Steps\n\n1. **Understand Context**: Review the chat history and weighted responses, prioritizing higher-weighted content.\n2. **Extract Relevant Information**: Focus on information from highly-weighted sources, which represent community-validated content.\n3. **Apply Knowledge**: Use the extracted information, considering both content relevance and community feedback.\n4. **Draft Response**: Compile insights into a coherent response, emphasizing information from highly-weighted sources.\n5. **Review and Refine**: Ensure accuracy and alignment with the weighted context.\n\n# Output Format\n\n- Concise and coherent responses that directly address the user's question.\n- Prioritize information from highly-weighted sources.\n- Include code snippets or references when relevant.\n\n# Notes\n\n- Higher weights indicate stronger community validation through reactions and refinements.\n- Consider both the content and its weight when forming responses.\n- Balance between different sources based on their weights.",
`Your name is: ${botName}`,
"\n",
"Main Context (Provide additional precedence in terms of information): ",
Expand Down Expand Up @@ -149,9 +155,10 @@ export class Completions extends SuperOpenAi {
localContext: string[],
groundTruths: string[],
botName: string,
maxTokens: number
maxTokens: number,
weightedComments: StreamlinedComment[] = []
): Promise<CompletionsType> {
const weightPrompt = await createWeightTable(this.context);
const weightPrompt = await createWeightTable(weightedComments);
return await this.createCompletion(query, model, additionalContext, localContext, groundTruths, botName, maxTokens, weightPrompt);
}

Expand Down
52 changes: 0 additions & 52 deletions src/adapters/supabase/helpers/weights.ts

This file was deleted.

110 changes: 81 additions & 29 deletions src/handlers/ask-llm.ts
Original file line number Diff line number Diff line change
@@ -1,60 +1,87 @@
/// Implementation of the LLM question answering system
/// This module handles asking questions to the LLM using context from issues,
/// comments, and repository information. It now uses a weighted comment system
/// based on reactions and edit history instead of Supabase similarity search.

import { Context } from "../types";
import { CompletionsType } from "../adapters/openai/helpers/completions";
import { CommentSimilaritySearchResult } from "../adapters/supabase/helpers/comment";
import { IssueSimilaritySearchResult } from "../adapters/supabase/helpers/issues";
import { recursivelyFetchLinkedIssues } from "../helpers/issue-fetching";
import { formatChatHistory } from "../helpers/format-chat-history";
import { fetchRepoDependencies, fetchRepoLanguageStats } from "./ground-truths/chat-bot";
import { findGroundTruths } from "./ground-truths/find-ground-truths";
import { bubbleUpErrorComment, logger } from "../helpers/errors";
import { calculateTextScore } from "../helpers/trigram-weights";
import { StreamlinedComment } from "../types/llm";

/// Find most relevant comments based on weights and similarity to question
/// Uses the new weighted comments system that combines reactions and edit history
/// to determine relevance along with textual similarity to the question
async function findRelevantComments(question: string, comments: StreamlinedComment[], threshold: number, maxResults: number = 5): Promise<string[]> {
/// Sort comments by their weight and similarity to question
const scoredComments = comments
.filter((c) => c.body)
.map((comment) => ({
comment,
/// Combine the comment's weight from reactions/edits with its similarity score
score: (comment.weight || 0) + calculateTextScore(question, [comment]),
}))
.sort((a, b) => b.score - a.score);

/// Take top results above threshold
return scoredComments
.filter((c) => c.score >= threshold)
.slice(0, maxResults)
.map((c) => c.comment.body || "")
.filter(Boolean);
}

export async function askQuestion(context: Context, question: string) {
if (!question) {
throw logger.error("No question provided");
}
// using any links in comments or issue/pr bodies to fetch more context

/// Using any links in comments or issue/pr bodies to fetch more context
const { specAndBodies, streamlinedComments } = await recursivelyFetchLinkedIssues({
context,
owner: context.payload.repository.owner.login,
repo: context.payload.repository.name,
});
// build a nicely structure system message containing a streamlined chat history
// includes the current issue, any linked issues, and any linked PRs

/// Get all comments as a flat array for processing
const allComments = Object.values(streamlinedComments).flat();

/// Find relevant comments based on weights and question similarity
/// This replaces the previous Supabase similarity search with our new weighted system
const relevantComments = await findRelevantComments(question, allComments, context.config.similarityThreshold);

/// Build a nicely structured system message containing a streamlined chat history
/// Includes the current issue, any linked issues, and any linked PRs
const formattedChat = await formatChatHistory(context, streamlinedComments, specAndBodies);
logger.info(`${formattedChat.join("")}`);
return await askLlm(context, question, formattedChat);

return await askLlm(context, question, formattedChat, relevantComments, allComments);
}

export async function askLlm(context: Context, question: string, formattedChat: string[]): Promise<CompletionsType> {
export async function askLlm(
context: Context,
question: string,
formattedChat: string[],
relevantComments: string[],
weightedComments: StreamlinedComment[]
): Promise<CompletionsType> {
const {
env: { UBIQUITY_OS_APP_NAME },
config: { model, similarityThreshold, maxTokens },
config: { model, maxTokens },
adapters: {
supabase: { comment, issue },
voyage: { reranker },
openai: { completions },
},
} = context;

try {
// using db functions to find similar comments and issues
const [similarComments, similarIssues] = await Promise.all([
comment.findSimilarComments(question, 1 - similarityThreshold, ""),
issue.findSimilarIssues(question, 1 - similarityThreshold, ""),
]);

// combine the similar comments and issues into a single array
const similarText = [
...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []),
...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []),
];

// filter out any empty strings
/// Filter out any empty strings from the chat history
formattedChat = formattedChat.filter((text) => text);

// rerank the similar text using voyageai
const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : [];
// gather structural data about the payload repository
/// Gather structural data about the payload repository
const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]);

let groundTruths: string[] = [];
Expand All @@ -72,11 +99,36 @@ export async function askLlm(context: Context, question: string, formattedChat:
}

if (groundTruths.length === 3) {
return await completions.createCompletionWithHF(10, question, model, rerankedText, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME, maxTokens);
return await completions.createCompletionWithHF(
10,
question,
model,
relevantComments,
formattedChat,
groundTruths,
UBIQUITY_OS_APP_NAME,
maxTokens,
weightedComments
);
}

groundTruths = await findGroundTruths(context, "chat-bot", { languages, dependencies, devDependencies });
return await completions.createCompletionWithHF(10, question, model, rerankedText, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME, maxTokens);
groundTruths = await findGroundTruths(context, "chat-bot", {
languages,
dependencies,
devDependencies,
});

return await completions.createCompletionWithHF(
10,
question,
model,
relevantComments,
formattedChat,
groundTruths,
UBIQUITY_OS_APP_NAME,
maxTokens,
weightedComments
);
} catch (error) {
throw bubbleUpErrorComment(context, error, false);
}
Expand Down
25 changes: 20 additions & 5 deletions src/handlers/comments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { logger } from "../helpers/errors";
import { splitKey } from "../helpers/issue";
import { LinkedIssues, SimplifiedComment } from "../types/github-types";
import { StreamlinedComment } from "../types/llm";
import { processCommentsWithWeights } from "../helpers/weights";
import { Context } from "../types/context";

/**
* Get all streamlined comments from linked issues
Expand All @@ -15,7 +17,7 @@ export async function getAllStreamlinedComments(linkedIssues: LinkedIssues[]) {
const linkedIssueComments = issue.comments || [];
if (linkedIssueComments.length === 0) continue;

const linkedStreamlinedComments = streamlineComments(linkedIssueComments);
const linkedStreamlinedComments = await streamlineComments(linkedIssueComments, issue.context);
if (!linkedStreamlinedComments) continue;

for (const [key, value] of Object.entries(linkedStreamlinedComments)) {
Expand Down Expand Up @@ -71,13 +73,16 @@ export function createKey(issueUrl: string, issue?: number) {
}

/**
* Streamline comments by filtering out bot comments and organizing them by issue key
* Streamline comments by filtering out bot comments, organizing them by issue key,
* and calculating weights based on reactions and edits
* @param comments - The comments to streamline
* @param context - The context object containing octokit client
* @returns The streamlined comments grouped by issue key
*/
export function streamlineComments(comments: SimplifiedComment[]) {
export async function streamlineComments(comments: SimplifiedComment[], context: Context) {
const streamlined: Record<string, StreamlinedComment[]> = {};

// First pass: organize comments by key
for (const comment of comments) {
const { user, issueUrl: url, body } = comment;
if (user?.type === "Bot") continue;
Expand All @@ -88,14 +93,24 @@ export function streamlineComments(comments: SimplifiedComment[]) {

if (user && body) {
streamlined[key].push({
user: user.login,
user,
body,
id: parseInt(comment.id, 10),
id: comment.id,
org: owner,
repo,
issueUrl: url,
});
}
}

// Second pass: process weights for each group of comments
for (const [key, groupComments] of Object.entries(streamlined)) {
const weightedComments = await processCommentsWithWeights(context, groupComments);
streamlined[key] = weightedComments.map((comment) => ({
...comment,
id: comment.id.toString(),
}));
}

return streamlined;
}
Loading

0 comments on commit ca066a0

Please sign in to comment.