From 4b7b7a8d45a8d799d474357ae91aa7200b508641 Mon Sep 17 00:00:00 2001 From: zugdev Date: Tue, 10 Dec 2024 16:44:37 -0300 Subject: [PATCH] chore: cleanup search --- src/home/issues-search.ts | 162 ------------------- src/home/search/search-scorer.ts | 164 -------------------- src/home/search/string-similarity.ts | 27 ---- src/home/sorting/filter-issues-by-search.ts | 12 -- src/home/types/search-types.ts | 30 ---- 5 files changed, 395 deletions(-) delete mode 100644 src/home/issues-search.ts delete mode 100644 src/home/search/search-scorer.ts delete mode 100644 src/home/search/string-similarity.ts delete mode 100644 src/home/sorting/filter-issues-by-search.ts delete mode 100644 src/home/types/search-types.ts diff --git a/src/home/issues-search.ts b/src/home/issues-search.ts deleted file mode 100644 index a80280c..0000000 --- a/src/home/issues-search.ts +++ /dev/null @@ -1,162 +0,0 @@ -import { GitHubNotifications } from "./github-types"; -import { SearchResult, SearchWeights, SearchConfig } from "./types/search-types"; -import { SearchScorer } from "./search/search-scorer"; - -export class IssueSearch { - private readonly _weights: SearchWeights = { - title: 0.375, - body: 0.25, - fuzzy: 0.25, - meta: 0.125, - repo: 0.1, - }; - - private readonly _config: SearchConfig = { - fuzzySearchThreshold: 0.7, - exactMatchBonus: 1.0, - fuzzyMatchWeight: 0.7, - }; - - private readonly _searchScorer: SearchScorer; - private _searchableIssues: Map = new Map(); - - constructor(private _taskManager: TaskManager) { - this._searchScorer = new SearchScorer(this._config); - } - - public async initializeIssues(issues: GitHubNotifications) { - this._searchableIssues.clear(); - issues.forEach((issue) => { - const searchableContent = this._getSearchableContent(issue); - this._searchableIssues.set(issue.id, searchableContent); - }); - } - - public search(searchText: string): Map { - let filterText = searchText.toLowerCase().trim(); - const results = new Map(); - const isFuzzySearchEnabled = filterText.startsWith("?"); - - if (isFuzzySearchEnabled) { - filterText = filterText.slice(1).trim(); - } - - if (!filterText) { - for (const id of this._searchableIssues.keys()) { - results.set(id, this._createEmptyResult()); - } - return results; - } - - const searchTerms = this._preprocessSearchTerms(filterText); - - for (const issueId of this._searchableIssues.keys()) { - const issue = this._taskManager.getGitHubIssueById(issueId); - if (!issue) { - results.set(issueId, this._createEmptyResult(false)); - continue; - } - - const result = this._calculateIssueRelevance(issue, searchTerms, isFuzzySearchEnabled); - results.set(issueId, result); - } - - this._calculateNDCGScore(results); - return results; - } - - private _calculateIssueRelevance(issue: GitHubNotifications, searchTerms: string[], enableFuzzy: boolean): SearchResult { - const matchDetails = { - titleMatches: [] as string[], - bodyMatches: [] as string[], - labelMatches: [] as string[], - numberMatch: false, - repoMatch: false, - fuzzyMatches: [] as Array<{ - original: string; - matched: string; - score: number; - }>, - }; - - const searchableContent = this._searchableIssues.get(issue.id) || this._getSearchableContent(issue); - - // Calculate individual scores - const scores = { - title: this._searchScorer.calculateTitleScore(issue, searchTerms, matchDetails), - body: this._searchScorer.calculateBodyScore(issue, searchTerms, matchDetails), - fuzzy: enableFuzzy ? this._searchScorer.calculateFuzzyScore(searchableContent, searchTerms, matchDetails) : 0, - meta: this._searchScorer.calculateMetaScore(issue, searchTerms, matchDetails), - repo: this._searchScorer.calculateRepoScore(issue, searchTerms, matchDetails), - }; - - // Calculate weighted total score - const totalScore = Object.entries(scores).reduce((total, [key, score]) => { - return total + score * this._weights[key as keyof SearchWeights]; - }, 0); - - const isVisible = totalScore > 0 || matchDetails.numberMatch; - - return { - visible: isVisible, - score: isVisible ? totalScore : 0, - matchDetails, - }; - } - - private _calculateNDCGScore(results: Map): number { - const scores = Array.from(results.values()) - .filter((r) => r.visible) - .map((r) => r.score) - .sort((a, b) => b - a); - - if (scores.length === 0) return 0; - - const dcg = scores.reduce((sum, score, index) => { - return sum + (Math.pow(2, score) - 1) / Math.log2(index + 2); - }, 0); - - const idcg = [...scores] - .sort((a, b) => b - a) - .reduce((sum, score, index) => { - return sum + (Math.pow(2, score) - 1) / Math.log2(index + 2); - }, 0); - - return idcg === 0 ? 0 : dcg / idcg; - } - - private _preprocessSearchTerms(searchText: string): string[] { - return searchText - .split(/\s+/) - .filter(Boolean) - .map((term) => term.toLowerCase()); - } - - private _getSearchableContent(issue: GitHubNotifications): string { - // Remove URLs from the content - const removeUrls = (text: string): string => { - return text.replace(/(?:https?:\/\/|http?:\/\/|www\.)[^\s]+/g, ""); - }; - - const title = issue.title; - const body = removeUrls(issue.body || ""); - const labels = issue.labels?.map((l) => (typeof l === "object" && l.name ? l.name : "")).join(" ") || ""; - - return `${title} ${body} ${labels}`.toLowerCase(); - } - - private _createEmptyResult(visible: boolean = true): SearchResult { - return { - visible, - score: visible ? 1 : 0, - matchDetails: { - titleMatches: [], - bodyMatches: [], - labelMatches: [], - numberMatch: false, - fuzzyMatches: [], - repoMatch: false, - }, - }; - } -} diff --git a/src/home/search/search-scorer.ts b/src/home/search/search-scorer.ts deleted file mode 100644 index 0db203c..0000000 --- a/src/home/search/search-scorer.ts +++ /dev/null @@ -1,164 +0,0 @@ -import { GitHubNotifications } from "../github-types"; -import { SearchConfig, SearchResult } from "../types/search-types"; -import { StringSimilarity } from "./string-similarity"; - -export class SearchScorer { - constructor(private _config: SearchConfig) {} - - public calculateTitleScore(issue: GitHubNotifications, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number { - let score = 0; - const title = issue.title.toLowerCase(); - const words = title.split(/\s+/); - - searchTerms.forEach((term) => { - if (title.includes(term)) { - matchDetails.titleMatches.push(term); - score += this._config.exactMatchBonus; - - // Apply exponential boost for word beginnings - words.forEach((word) => { - if (word.startsWith(term)) { - // e^(-x) where x is the position of the match relative to word length - const positionBoost = Math.exp(-term.length / word.length); - score += positionBoost; - } - }); - } - }); - - if (searchTerms.length > 1 && title.includes(searchTerms.join(" "))) { - score += 1; - } - return Math.min(score, 3); - } - - public calculateBodyScore(issue: GitHubNotifications, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number { - let score = 0; - const body = (issue.body || "").toLowerCase(); - const words = body.split(/\s+/); - - searchTerms.forEach((term) => { - let termScore = 0; - words.forEach((word) => { - if (word.startsWith(term)) { - // Apply exponential boost for word beginnings - const positionBoost = Math.exp(-term.length / word.length); - termScore += positionBoost; - } - }); - - if (termScore > 0) { - matchDetails.bodyMatches.push(term); - score += Math.min(termScore, 1); - } - - const codeBlockMatches = body.match(/```[\s\S]*?```/g) || []; - codeBlockMatches.forEach((block) => { - if (block.toLowerCase().includes(term)) { - score += 0.5; - } - }); - }); - return Math.min(score, 2); - } - - public calculateMetaScore(issue: GitHubNotifications, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number { - let score = 0; - const numberTerm = searchTerms.find((term) => /^\d+$/.test(term)); - if (numberTerm && issue.number.toString() === numberTerm) { - matchDetails.numberMatch = true; - score += 2; - } - if (issue.labels) { - searchTerms.forEach((term) => { - issue.labels?.forEach((label) => { - if (typeof label === "object" && label.name) { - const labelName = label.name.toLowerCase(); - if (labelName.includes(term)) { - matchDetails.labelMatches.push(label.name); - // Apply exponential boost for label matches at word start - if (labelName.startsWith(term)) { - score += 0.8; - } else { - score += 0.5; - } - } - } - }); - }); - } - - return score; - } - - public calculateRepoScore(issue: GitHubNotifications, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number { - let score = 0; - if (issue.repository_url) { - const repoName = issue.repository_url.split("/").pop()?.toLowerCase() || ""; - const orgName = issue.repository_url.split("/").slice(-2)[0].toLowerCase() || ""; - searchTerms.forEach((term) => { - if (repoName.startsWith(term.toLowerCase())) { - matchDetails.repoMatch = true; - score += term.length / repoName.length; - } - if (orgName.startsWith(term.toLowerCase())) { - score += term.length / orgName.length; - } - }); - } - return score; - } - - public calculateFuzzyScore(content: string, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number { - let score = 0; - const contentWords = this._tokenizeContent(content); - - searchTerms.forEach((searchTerm) => { - let bestMatch = { - word: "", - score: 0, - isWordStart: false, - }; - - contentWords.forEach((word) => { - const similarity = StringSimilarity.calculate(searchTerm, word); - const isWordStart = word.startsWith(searchTerm); - - // Calculate position-based boost - const positionBoost = isWordStart ? Math.exp(-searchTerm.length / word.length) : 0; - const adjustedScore = similarity + positionBoost; - - if (adjustedScore > this._config.fuzzySearchThreshold && adjustedScore > bestMatch.score) { - bestMatch = { - word, - score: adjustedScore, - isWordStart, - }; - } - }); - - if (bestMatch.score > 0) { - matchDetails.fuzzyMatches.push({ - original: searchTerm, - matched: bestMatch.word, - score: bestMatch.score, - }); - - // Apply exponential weight for word-start matches - const finalScore = bestMatch.isWordStart ? bestMatch.score * Math.exp(this._config.fuzzyMatchWeight) : bestMatch.score * this._config.fuzzyMatchWeight; - - score += finalScore; - } - }); - - return Math.min(score, 2); - } - - private _tokenizeContent(content: string): string[] { - return content - .toLowerCase() - .replace(/[^\w\s]/g, " ") - .split(/\s+/) - .filter((word) => word.length > 2); - } -} diff --git a/src/home/search/string-similarity.ts b/src/home/search/string-similarity.ts deleted file mode 100644 index d38743b..0000000 --- a/src/home/search/string-similarity.ts +++ /dev/null @@ -1,27 +0,0 @@ -export class StringSimilarity { - public static calculate(str1: string, str2: string): number { - const maxLen = Math.max(str1.length, str2.length); - if (maxLen === 0) return 1.0; - - const distance = this._calculateLevenshteinDistance(str1, str2); - return 1 - distance / maxLen; - } - - private static _calculateLevenshteinDistance(str1: string, str2: string): number { - const matrix: number[][] = Array(str2.length + 1) - .fill(null) - .map(() => Array(str1.length + 1).fill(null)); - - for (let i = 0; i <= str1.length; i++) matrix[0][i] = i; - for (let j = 0; j <= str2.length; j++) matrix[j][0] = j; - - for (let j = 1; j <= str2.length; j++) { - for (let i = 1; i <= str1.length; i++) { - const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1; - matrix[j][i] = Math.min(matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + indicator); - } - } - - return matrix[str2.length][str1.length]; - } -} diff --git a/src/home/sorting/filter-issues-by-search.ts b/src/home/sorting/filter-issues-by-search.ts deleted file mode 100644 index 2d7cbab..0000000 --- a/src/home/sorting/filter-issues-by-search.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { GitHubNotifications } from "../github-types"; - -export function filterIssuesBySearch(filterText: string) { - const searchResults = taskManager.issueSearcher.search(filterText); - //Create the new GithubIssue[] array based on the ranking in the searchResults - const sortedIssues = Array.from(searchResults.entries()) - .filter(([, result]) => result.score > 0) - .sort((a, b) => b[1].score - a[1].score) - .map(([id]) => taskManager.getGitHubIssueById(id)) - .filter((issue): issue is GitHubNotifications => issue !== undefined); - return sortedIssues; -} diff --git a/src/home/types/search-types.ts b/src/home/types/search-types.ts deleted file mode 100644 index bafa5fb..0000000 --- a/src/home/types/search-types.ts +++ /dev/null @@ -1,30 +0,0 @@ -export interface SearchResult { - visible: boolean; - score: number; - matchDetails: { - titleMatches: string[]; - bodyMatches: string[]; - labelMatches: string[]; - numberMatch: boolean; - fuzzyMatches: Array<{ - original: string; - matched: string; - score: number; - }>; - repoMatch: boolean; - }; -} - -export interface SearchWeights { - title: number; - body: number; - fuzzy: number; - meta: number; - repo: number; -} - -export interface SearchConfig { - fuzzySearchThreshold: number; - exactMatchBonus: number; - fuzzyMatchWeight: number; -}