diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 51732f5..26660eb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,8 +28,8 @@ jobs: yarn yarn build env: # Set environment variables for the build - SUPABASE_URL: "https://wfzpewmlyiozupulbuur.supabase.co" - SUPABASE_ANON_KEY: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6IndmenBld21seWlvenVwdWxidXVyIiwicm9sZSI6ImFub24iLCJpYXQiOjE2OTU2NzQzMzksImV4cCI6MjAxMTI1MDMzOX0.SKIL3Q0NOBaMehH0ekFspwgcu3afp3Dl9EDzPqs1nKs" + SUPABASE_URL: ${{ secrets.SUPABASE_URL }} + SUPABASE_ANON_KEY: ${{ secrets.SUPABASE_ANON_KEY }} - name: Upload build artifact uses: actions/upload-artifact@v4 diff --git a/bun.lockb b/bun.lockb old mode 100755 new mode 100644 index a5cb0c5..8b8774f Binary files a/bun.lockb and b/bun.lockb differ diff --git a/functions/issue-scraper.ts b/functions/issue-scraper.ts index 2f28866..e9f79a8 100644 --- a/functions/issue-scraper.ts +++ b/functions/issue-scraper.ts @@ -1,41 +1,349 @@ import { Context } from "./types"; +import { SupabaseClient } from "@supabase/supabase-js"; +import { VoyageAIClient } from "voyageai"; +import { Octokit } from "@octokit/rest"; +import markdownit from "markdown-it"; +import plainTextPlugin from "markdown-it-plain-text"; +import { validatePOST } from "./validators"; + +interface MarkdownItWithPlainText extends markdownit { + plainText: string; + } + +interface IssueMetadata { + nodeId: string; + number: number; + title: string; + body: string; + state: string; + repositoryName: string; + repositoryId: number; + assignees: string[]; + authorId: number; + createdAt: string; + closedAt: string | null; + stateReason: string | null; + updatedAt: string; + } + + interface IssueNode { + id: string; + number: number; + title: string; + body: string; + state: string; + stateReason: string | null; + createdAt: string; + updatedAt: string; + closedAt: string | null; + author: { + login: string; + } | null; + assignees: { + nodes: Array<{ + login: string; + }>; + }; + repository: { + id: string; + name: string; + owner: { + login: string; + }; + }; + } + + interface GraphQlSearchResponse { + search: { + pageInfo: { + hasNextPage: boolean; + endCursor: string | null; + }; + nodes: Array; + }; + } export const corsHeaders = { - "Access-Control-Allow-Origin": "*", - "Access-Control-Allow-Methods": "GET", - "Access-Control-Allow-Headers": "Content-Type", + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "GET", + "Access-Control-Allow-Headers": "Content-Type", }; export async function onRequest(ctx: Context): Promise { - const { request, env } = ctx; - const url = new URL(request.url); + const { request, env } = ctx; - try { - switch (request.method) { - case "GET": - if (url.searchParams.has("key")) { - const key = url.searchParams.get("key") as string; - return new Response("GET request with key: " + key + JSON.stringify(env.SUPABASE_ANON_KEY), { + try { + switch (request.method) { + case "POST": { + const result = await validatePOST(request); + if (!result.isValid || !result.gitHubUserId) { + return new Response("Unauthorized", { + headers: corsHeaders, + status: 400, + }); + } + try { + const supabase = new SupabaseClient(env.SUPABASE_URL, env.SUPABASE_ANON_KEY); + const response = await issueScraper(result.gitHubUserId, supabase, env.VOYAGEAI_API_KEY, result.authToken); + return new Response(response, { + headers: corsHeaders, + status: 200, + }); + } catch (error) { + console.error("Error processing request:", error); + return new Response("Internal Server Error", { + headers: corsHeaders, + status: 500, + }); + } + } + + default: + return new Response("Method Not Allowed", { + headers: corsHeaders, + status: 405, + }); + } + } catch (error) { + console.error("Error processing request:", error); + return new Response("Internal Server Error", { headers: corsHeaders, - status: 200, - }); + status: 500, + }); + } +} + + +function markdownToPlainText(markdown: string | null): string | null { + if (!markdown) return markdown; + const md = markdownit() as MarkdownItWithPlainText; + md.use(plainTextPlugin); + md.render(markdown); + return md.plainText; +} + + +const SEARCH_ISSUES_QUERY = ` + query SearchIssues($searchText: String!, $after: String) { + search( + query: $searchText, + type: ISSUE, + first: 100, + after: $after + ) { + pageInfo { + hasNextPage + endCursor + } + nodes { + ... on Issue { + id + number + title + body + state + stateReason + createdAt + updatedAt + closedAt + author { + login + } + assignees(first: 10) { + nodes { + login + } + } + repository { + id + name + owner { + login + } + } + } + } + } + } +`; + +async function fetchAuthorId(octokit: InstanceType, login: string): Promise { + try { + const response = await octokit.rest.users.getByUsername({ username: login }); + return response.data.id; + } catch (error) { + console.error(`Error fetching author ID for ${login}:`, error); + return -1; + } +} + +async function fetchUserIssues(octokit: InstanceType, username: string): Promise { + const allIssues: IssueNode[] = []; + let hasNextPage = true; + let cursor: string | null = null; + + const searchText = `assignee:${username} is:issue is:closed`; + + while (hasNextPage) { + const variables: { searchText: string; after?: string } = { + searchText, + }; + if (cursor) { + variables.after = cursor; + } + + const response: GraphQlSearchResponse = await octokit.graphql(SEARCH_ISSUES_QUERY, variables); + + const completedIssues = response.search.nodes.filter((issue) => issue.stateReason === "COMPLETED"); + allIssues.push(...completedIssues); + + hasNextPage = response.search.pageInfo.hasNextPage; + cursor = response.search.pageInfo.endCursor; + + if (!cursor) break; + } + + return allIssues; +} + +// Pulls issues from GitHub and stores them in Supabase +async function issueScraper(username: string, supabase: SupabaseClient, voyageApiKey: string, token?: string): Promise { + try { + + if (!username) { + throw new Error("Username is required"); + } + + const context = { + adapters: {}, + logger: { + info: (message: string, data: Record) => console.log("INFO:", message + ":", data), + error: (message: string, data: Record) => console.error("ERROR:", message + ":", data), + }, + octokit: new Octokit(token ? { auth: token } : {}), + }; + + const voyageClient = new VoyageAIClient({ apiKey: voyageApiKey }); + const issues = await fetchUserIssues(context.octokit, username); + const processedIssues: Array<{ issue: IssueMetadata; error?: string }> = []; + + for (const issue of issues) { + try { + const authorId = issue.author?.login ? await fetchAuthorId(context.octokit, issue.author.login) : -1; + const repoOwner = issue.repository.owner.login; + + const metadata: IssueMetadata = { + nodeId: issue.id, + number: issue.number, + title: issue.title || "", + body: issue.body || "", + state: issue.state, + stateReason: issue.stateReason, + repositoryName: issue.repository.name, + repositoryId: parseInt(issue.repository.id), + assignees: (issue.assignees?.nodes || []).map((assignee) => assignee.login), + authorId, + createdAt: issue.createdAt, + closedAt: issue.closedAt, + updatedAt: issue.updatedAt, + }; + const markdown = metadata.body + " " + metadata.title; + const plaintext = markdownToPlainText(markdown); + if (!plaintext || plaintext === null) { + throw new Error("Error converting markdown to plaintext"); + } + const embeddingObject = await voyageClient.embed({ + input: markdown, + model: "voyage-large-2-instruct", + inputType: "document", + }); + const embedding = (embeddingObject.data && embeddingObject.data[0]?.embedding) || {}; + const payload = { + issue: metadata, + action: "created", + sender: { + login: username, + }, + repository: { + id: parseInt(issue.repository.id), + node_id: issue.repository.id, + name: issue.repository.name, + full_name: `${repoOwner}/${issue.repository.name}`, + owner: { + login: repoOwner, + id: authorId, + type: "User", + site_admin: false, + }, + }, + }; + //Check if the user is authenticated + if (!supabase.auth.getUser()) { + throw new Error("User is not authenticated"); } - return new Response("GET request without key", { - headers: corsHeaders, - status: 200, + + const { error } = await supabase.from("issues").upsert({ + id: metadata.nodeId, + markdown, + plaintext, + embedding: JSON.stringify(embedding), + author_id: metadata.authorId, + modified_at: metadata.updatedAt, + payload: payload, }); - default: - return new Response("Method Not Allowed", { - headers: corsHeaders, - status: 405, + processedIssues.push({ + issue: metadata, + error: error ? `Error storing issue: ${error.message}` : undefined, + }); + } catch (error) { + processedIssues.push({ + issue: { + nodeId: issue.id, + number: issue.number, + title: issue.title || "", + body: issue.body || "", + state: issue.state, + stateReason: issue.stateReason, + repositoryName: issue.repository.name, + repositoryId: parseInt(issue.repository.id), + assignees: [], + authorId: -1, + createdAt: issue.createdAt, + closedAt: issue.closedAt, + updatedAt: issue.updatedAt, + }, + error: `Error processing issue: ${error instanceof Error ? error.message : "Unknown error"}`, }); + } } + + return JSON.stringify( + { + success: true, + stats: { + storageSuccessful: processedIssues.filter((p) => !p.error).length, + storageFailed: processedIssues.filter((p) => p.error).length, + }, + errors: processedIssues + .filter((p) => p.error) + .map((p) => ({ + type: "storage", + name: `${p.issue.repositoryName}#${p.issue.number}`, + error: p.error, + })), + issues: processedIssues.map((p) => ({ + number: p.issue.number, + title: p.issue.title, + repo: p.issue.repositoryName, + error: p.error, + })), + }, + null, + 2 + ); } catch (error) { - console.error("Error processing request:", error); - return new Response("Internal Server Error", { - headers: corsHeaders, - status: 500, - }); + console.error("Error in issueScraper:", error); + throw error; } } diff --git a/functions/types.ts b/functions/types.ts index 47b54d0..5b06f3c 100644 --- a/functions/types.ts +++ b/functions/types.ts @@ -16,6 +16,7 @@ export interface ValidationResult { isValid: boolean; gitHubUserId?: string; referralCode?: string; + authToken?: string; } export type Context = EventContext>; diff --git a/src/home/authentication.ts b/src/home/authentication.ts index aea7883..b15f00e 100644 --- a/src/home/authentication.ts +++ b/src/home/authentication.ts @@ -4,6 +4,7 @@ import { GitHubUser } from "./github-types"; import { trackReferralCode } from "./register-referral"; import { displayGitHubUserInformation } from "./rendering/display-github-user-information"; import { renderGitHubLoginButton } from "./rendering/render-github-login-button"; +import { startIssueScraper } from "./scraper/issue-scraper"; // import { issueScraper } from "./scraper/issue-scraper"; export async function authentication() { @@ -23,7 +24,7 @@ export async function authentication() { await displayGitHubUserInformation(gitHubUser); // <-- Issue Scraper here --> // const supabase = getSupabase(); - // const githubUserName = gitHubUser.login; - //await issueScraper(githubUserName, supabase, accessToken || undefined); + const githubUserName = gitHubUser.login; + await startIssueScraper(githubUserName); } } diff --git a/src/home/scraper/issue-scraper.ts b/src/home/scraper/issue-scraper.ts index 9310cde..9c969a5 100644 --- a/src/home/scraper/issue-scraper.ts +++ b/src/home/scraper/issue-scraper.ts @@ -1,315 +1,40 @@ -import { SupabaseClient } from "@supabase/supabase-js"; -import { VoyageAIClient } from "voyageai"; -import { Octokit } from "@octokit/rest"; -import markdownit from "markdown-it"; -import plainTextPlugin from "markdown-it-plain-text"; +import { checkSupabaseSession } from "../rendering/render-github-login-button"; -declare const VOYAGEAI_API_KEY: string; // @DEV: passed in at build time check build/esbuild-build.ts +export async function startIssueScraper(username: string) { + const supabaseAuth = await checkSupabaseSession(); -interface MarkdownItWithPlainText extends markdownit { - plainText: string; -} + // Check if 24 hours have passed since last fetch + const lastFetchKey = `lastFetch_${username}`; + const lastFetch = localStorage.getItem(lastFetchKey); + const now = Date.now(); -function markdownToPlainText(markdown: string | null): string | null { - if (!markdown) return markdown; - const md = markdownit() as MarkdownItWithPlainText; - md.use(plainTextPlugin); - md.render(markdown); - return md.plainText; -} - -interface IssueMetadata { - nodeId: string; - number: number; - title: string; - body: string; - state: string; - repositoryName: string; - repositoryId: number; - assignees: string[]; - authorId: number; - createdAt: string; - closedAt: string | null; - stateReason: string | null; - updatedAt: string; -} - -interface IssueNode { - id: string; - number: number; - title: string; - body: string; - state: string; - stateReason: string | null; - createdAt: string; - updatedAt: string; - closedAt: string | null; - author: { - login: string; - } | null; - assignees: { - nodes: Array<{ - login: string; - }>; - }; - repository: { - id: string; - name: string; - owner: { - login: string; - }; - }; -} - -interface GraphQlSearchResponse { - search: { - pageInfo: { - hasNextPage: boolean; - endCursor: string | null; - }; - nodes: Array; - }; -} - -const SEARCH_ISSUES_QUERY = ` - query SearchIssues($searchText: String!, $after: String) { - search( - query: $searchText, - type: ISSUE, - first: 100, - after: $after - ) { - pageInfo { - hasNextPage - endCursor - } - nodes { - ... on Issue { - id - number - title - body - state - stateReason - createdAt - updatedAt - closedAt - author { - login - } - assignees(first: 10) { - nodes { - login - } - } - repository { - id - name - owner { - login - } - } - } - } - } - } -`; - -async function fetchAuthorId(octokit: InstanceType, login: string): Promise { - try { - const response = await octokit.rest.users.getByUsername({ username: login }); - return response.data.id; - } catch (error) { - console.error(`Error fetching author ID for ${login}:`, error); - return -1; - } -} - -async function fetchUserIssues(octokit: InstanceType, username: string): Promise { - const allIssues: IssueNode[] = []; - let hasNextPage = true; - let cursor: string | null = null; - - const searchText = `assignee:${username} is:issue is:closed`; - - while (hasNextPage) { - const variables: { searchText: string; after?: string } = { - searchText, - }; - if (cursor) { - variables.after = cursor; - } - - const response: GraphQlSearchResponse = await octokit.graphql(SEARCH_ISSUES_QUERY, variables); - - const completedIssues = response.search.nodes.filter((issue) => issue.stateReason === "COMPLETED"); - allIssues.push(...completedIssues); - - hasNextPage = response.search.pageInfo.hasNextPage; - cursor = response.search.pageInfo.endCursor; - - if (!cursor) break; + if (lastFetch && now - Number(lastFetch) < 24 * 60 * 60 * 1000) { + return JSON.stringify({ + success: true, + message: "Skipping fetch - last fetch was less than 24 hours ago", + }); } - - return allIssues; -} - -// Pulls issues from GitHub and stores them in Supabase -export async function issueScraper(username: string, supabase: SupabaseClient, token?: string): Promise { - try { - // Check if 24 hours have passed since last fetch - const lastFetchKey = `lastFetch_${username}`; - const lastFetch = localStorage.getItem(lastFetchKey); - const now = Date.now(); - - if (lastFetch && now - Number(lastFetch) < 24 * 60 * 60 * 1000) { - return JSON.stringify({ - success: true, - message: "Skipping fetch - last fetch was less than 24 hours ago", - }); - } - - if (!username) { - throw new Error("Username is required"); - } - - if (VOYAGEAI_API_KEY === undefined) { - throw new Error("Required environment `VOYAGEAI_API_KEY` is missing"); - } - - const context = { - adapters: {}, - logger: { - info: (message: string, data: Record) => console.log("INFO:", message + ":", data), - error: (message: string, data: Record) => console.error("ERROR:", message + ":", data), - }, - octokit: new Octokit(token ? { auth: token } : {}), - }; - - const voyageClient = new VoyageAIClient({ apiKey: VOYAGEAI_API_KEY }); - const issues = await fetchUserIssues(context.octokit, username); - const processedIssues: Array<{ issue: IssueMetadata; error?: string }> = []; - - for (const issue of issues) { - try { - const authorId = issue.author?.login ? await fetchAuthorId(context.octokit, issue.author.login) : -1; - const repoOwner = issue.repository.owner.login; - - const metadata: IssueMetadata = { - nodeId: issue.id, - number: issue.number, - title: issue.title || "", - body: issue.body || "", - state: issue.state, - stateReason: issue.stateReason, - repositoryName: issue.repository.name, - repositoryId: parseInt(issue.repository.id), - assignees: (issue.assignees?.nodes || []).map((assignee) => assignee.login), - authorId, - createdAt: issue.createdAt, - closedAt: issue.closedAt, - updatedAt: issue.updatedAt, - }; - const markdown = metadata.body + " " + metadata.title; - const plaintext = markdownToPlainText(markdown); - if (!plaintext || plaintext === null) { - throw new Error("Error converting markdown to plaintext"); - } - const embeddingObject = await voyageClient.embed({ - input: markdown, - model: "voyage-large-2-instruct", - inputType: "document", - }); - const embedding = (embeddingObject.data && embeddingObject.data[0]?.embedding) || {}; - const payload = { - issue: metadata, - action: "created", - sender: { - login: username, - }, - repository: { - id: parseInt(issue.repository.id), - node_id: issue.repository.id, - name: issue.repository.name, - full_name: `${repoOwner}/${issue.repository.name}`, - owner: { - login: repoOwner, - id: authorId, - type: "User", - site_admin: false, - }, - }, - }; - //Check if the user is authenticated - if (!supabase.auth.getUser()) { - throw new Error("User is not authenticated"); - } - - const { error } = await supabase.from("issues").upsert({ - id: metadata.nodeId, - markdown, - plaintext, - embedding: JSON.stringify(embedding), - author_id: metadata.authorId, - modified_at: metadata.updatedAt, - payload: payload, - }); - - processedIssues.push({ - issue: metadata, - error: error ? `Error storing issue: ${error.message}` : undefined, - }); - } catch (error) { - processedIssues.push({ - issue: { - nodeId: issue.id, - number: issue.number, - title: issue.title || "", - body: issue.body || "", - state: issue.state, - stateReason: issue.stateReason, - repositoryName: issue.repository.name, - repositoryId: parseInt(issue.repository.id), - assignees: [], - authorId: -1, - createdAt: issue.createdAt, - closedAt: issue.closedAt, - updatedAt: issue.updatedAt, - }, - error: `Error processing issue: ${error instanceof Error ? error.message : "Unknown error"}`, - }); - } - } - - // Update last fetch timestamp + + const response = await fetch("/issue-scraper", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + authToken: supabaseAuth.provider_token, + }), + }); + + if (response.status === 200) { localStorage.setItem(lastFetchKey, now.toString()); - - return JSON.stringify( - { - success: true, - stats: { - storageSuccessful: processedIssues.filter((p) => !p.error).length, - storageFailed: processedIssues.filter((p) => p.error).length, - }, - errors: processedIssues - .filter((p) => p.error) - .map((p) => ({ - type: "storage", - name: `${p.issue.repositoryName}#${p.issue.number}`, - error: p.error, - })), - issues: processedIssues.map((p) => ({ - number: p.issue.number, - title: p.issue.title, - repo: p.issue.repositoryName, - error: p.error, - })), - }, - null, - 2 - ); - } catch (error) { - console.error("Error in issueScraper:", error); - throw error; + return JSON.stringify({ + success: true, + message: "Successfully fetched issues", + }); + } else { + return JSON.stringify({ + success: false, + message: `Failed to fetch issues. Status: ${response.status}`, + }); } -} +} \ No newline at end of file