diff --git a/.github/sync-env-vars.sh b/.github/sync-env-vars.sh index db504a60..95d8bc96 100644 --- a/.github/sync-env-vars.sh +++ b/.github/sync-env-vars.sh @@ -2,7 +2,7 @@ # Check if required environment variables are set if [ -z "$CLOUDFLARE_ACCOUNT_ID" ] || [ -z "$CLOUDFLARE_API_TOKEN" ] || [ -z "$GITHUB_REPOSITORY" ] || \ - [ -z "$VOYAGEAI_API_KEY" ] || [ -z "$SUPABASE_URL" ] || [ -z "$SUPABASE_ANON_KEY" ]; then + [ -z "$VOYAGEAI_API_KEY" ] || [ -z "$SUPABASE_URL" ] || [ -z "$SUPABASE_ANON_KEY" ] || [ -z "$SUPABASE_KEY" ]; then echo "Error: Required environment variables are not set" exit 1 fi @@ -35,6 +35,10 @@ curl -X PATCH \ "SUPABASE_ANON_KEY": { "value": "'"${SUPABASE_ANON_KEY}"'", "type": "secret_text" + }, + "SUPABASE_KEY": { + "value": "'"${SUPABASE_KEY}"'", + "type": "secret_text" } } } diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 26660eb5..51732f58 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,8 +28,8 @@ jobs: yarn yarn build env: # Set environment variables for the build - SUPABASE_URL: ${{ secrets.SUPABASE_URL }} - SUPABASE_ANON_KEY: ${{ secrets.SUPABASE_ANON_KEY }} + SUPABASE_URL: "https://wfzpewmlyiozupulbuur.supabase.co" + SUPABASE_ANON_KEY: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6IndmenBld21seWlvenVwdWxidXVyIiwicm9sZSI6ImFub24iLCJpYXQiOjE2OTU2NzQzMzksImV4cCI6MjAxMTI1MDMzOX0.SKIL3Q0NOBaMehH0ekFspwgcu3afp3Dl9EDzPqs1nKs" - name: Upload build artifact uses: actions/upload-artifact@v4 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index c52741c5..0dd4d40c 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,6 +13,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Deploy to Cloudflare + if: ${{ github.event.workflow_run.conclusion == 'success' }} uses: ubiquity/cloudflare-deploy-action@main with: repository: ${{ github.repository }} @@ -44,5 +45,6 @@ jobs: VOYAGEAI_API_KEY: ${{ secrets.VOYAGEAI_API_KEY }} SUPABASE_URL: ${{ secrets.SUPABASE_URL }} SUPABASE_ANON_KEY: ${{ secrets.SUPABASE_ANON_KEY }} + SUPABASE_KEY: ${{ secrets.SUPABASE_KEY }} run: bash .github/sync-env-vars.sh diff --git a/functions/issue-scraper.ts b/functions/issue-scraper.ts index e8b04a2b..afaf6d47 100644 --- a/functions/issue-scraper.ts +++ b/functions/issue-scraper.ts @@ -9,140 +9,135 @@ import { validatePOST } from "./validators"; const VECTOR_SIZE = 1024; interface MarkdownItWithPlainText extends markdownit { - plainText: string; + plainText: string; } interface PayloadType { - issue: { - nodeId: string; - number: number; - title: string; - body: string; - state: string; - stateReason: string | null; - repositoryName: string; - repositoryId: number; - assignees: string[]; - createdAt: string; - closedAt: string | null; - updatedAt: string; - }; - action: string; - sender: { - login: string; - }; - repository: { - id: number; - node_id: string; - name: string; - full_name: string; - owner: { - login: string; - id: number; - type: string; - site_admin: boolean; - }; - }; -} - -interface IssueNode { - id: string; + issue: { + nodeId: string; number: number; title: string; body: string; state: string; stateReason: string | null; + repositoryName: string; + repositoryId: number; + assignees: string[]; createdAt: string; - updatedAt: string; closedAt: string | null; - author: { - login: string; - } | null; - assignees: { - nodes: Array<{ - login: string; - }>; + updatedAt: string; + }; + action: string; + sender: { + login: string; + }; + repository: { + id: number; + node_id: string; + name: string; + full_name: string; + owner: { + login: string; + id: number; + type: string; + site_admin: boolean; }; - repository: { - id: string; - name: string; - owner: { - login: string; - }; + }; +} + +interface IssueNode { + id: string; + number: number; + title: string; + body: string; + state: string; + stateReason: string | null; + createdAt: string; + updatedAt: string; + closedAt: string | null; + author: { + login: string; + } | null; + assignees: { + nodes: Array<{ + login: string; + }>; + }; + repository: { + id: string; + name: string; + owner: { + login: string; }; + }; } interface GraphQlSearchResponse { - search: { - pageInfo: { - hasNextPage: boolean; - endCursor: string | null; - }; - nodes: Array; + search: { + pageInfo: { + hasNextPage: boolean; + endCursor: string | null; }; + nodes: Array; + }; } export const corsHeaders = { - "Access-Control-Allow-Origin": "*", - "Access-Control-Allow-Methods": "GET", - "Access-Control-Allow-Headers": "Content-Type", + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "GET", + "Access-Control-Allow-Headers": "Content-Type", }; export async function onRequest(ctx: Context): Promise { - const { request, env } = ctx; - try { - switch (request.method) { - case "POST": { - const result = await validatePOST(request); - if (!result.isValid || !result.gitHubUser) { - return new Response("Unauthorized", { - headers: corsHeaders, - status: 400, - }); - } - const githubUserName = result.gitHubUser.login; - try { - const supabase = new SupabaseClient(env.SUPABASE_URL, env.SUPABASE_KEY); - const response = await issueScraper( - githubUserName, - supabase, - env.VOYAGEAI_API_KEY, - result.authToken - ); - return new Response(response, { - headers: corsHeaders, - status: 200, - }); - } catch (error) { - console.error("Error processing request:", error); - return new Response("Internal Server Error", { - headers: corsHeaders, - status: 500, - }); - } - } - - default: - return new Response("Method Not Allowed", { - headers: corsHeaders, - status: 405, - }); + const { request, env } = ctx; + try { + switch (request.method) { + case "POST": { + const result = await validatePOST(request); + if (!result.isValid || !result.gitHubUser) { + return new Response("Unauthorized", { + headers: corsHeaders, + status: 400, + }); } - } catch (error) { - console.error("Error processing request:", error); - return new Response("Internal Server Error", { + const githubUserName = result.gitHubUser.login; + try { + const supabase = new SupabaseClient(env.SUPABASE_URL, env.SUPABASE_KEY); + const response = await issueScraper(githubUserName, supabase, env.VOYAGEAI_API_KEY, result.authToken); + return new Response(response, { + headers: corsHeaders, + status: 200, + }); + } catch (error) { + console.error("Error processing request:", error); + return new Response("Internal Server Error", { headers: corsHeaders, status: 500, + }); + } + } + + default: + return new Response("Method Not Allowed", { + headers: corsHeaders, + status: 405, }); } + } catch (error) { + console.error("Error processing request:", error); + return new Response("Internal Server Error", { + headers: corsHeaders, + status: 500, + }); + } } function markdownToPlainText(markdown: string | null): string | null { - if (!markdown) return markdown; - const md = markdownit() as MarkdownItWithPlainText; - md.use(plainTextPlugin); - md.render(markdown); - return md.plainText; + if (!markdown) return markdown; + const md = markdownit() as MarkdownItWithPlainText; + md.use(plainTextPlugin); + md.render(markdown); + return md.plainText; } const SEARCH_ISSUES_QUERY = ` @@ -189,181 +184,168 @@ const SEARCH_ISSUES_QUERY = ` } `; -async function fetchUserIssuesBatch( - octokit: InstanceType, - username: string -): Promise { - const allIssues: IssueNode[] = []; - let hasNextPage = true; - let cursor: string | null = null; +async function fetchUserIssuesBatch(octokit: InstanceType, username: string): Promise { + const allIssues: IssueNode[] = []; + let hasNextPage = true; + let cursor: string | null = null; - const searchText = `assignee:${username} is:issue is:closed`; + const searchText = `assignee:${username} is:issue is:closed`; - while (hasNextPage) { - const variables: { searchText: string; after?: string } = { searchText }; - if (cursor) { - variables.after = cursor; - } + while (hasNextPage) { + const variables: { searchText: string; after?: string } = { searchText }; + if (cursor) { + variables.after = cursor; + } - const response: GraphQlSearchResponse = await octokit.graphql(SEARCH_ISSUES_QUERY, variables); + const response: GraphQlSearchResponse = await octokit.graphql(SEARCH_ISSUES_QUERY, variables); - const completedIssues = response.search.nodes.filter((issue) => issue.stateReason === "COMPLETED"); - allIssues.push(...completedIssues); + const completedIssues = response.search.nodes.filter((issue) => issue.stateReason === "COMPLETED"); + allIssues.push(...completedIssues); - hasNextPage = response.search.pageInfo.hasNextPage; - cursor = response.search.pageInfo.endCursor; - } + hasNextPage = response.search.pageInfo.hasNextPage; + cursor = response.search.pageInfo.endCursor; + } - return allIssues; + return allIssues; } async function batchEmbeddings(voyageClient: VoyageAIClient, texts: string[]): Promise<(number[] | undefined)[]> { - try { - const embeddingResponse = await voyageClient.embed({ - input: texts, - model: "voyage-large-2-instruct", - inputType: "document", - }); - return embeddingResponse.data?.map((item) => item.embedding) || [] - } catch (error) { - console.error("Error batching embeddings:", error); - throw error; - } + try { + const embeddingResponse = await voyageClient.embed({ + input: texts, + model: "voyage-large-2-instruct", + inputType: "document", + }); + return embeddingResponse.data?.map((item) => item.embedding) || []; + } catch (error) { + console.error("Error batching embeddings:", error); + throw error; + } } async function batchUpsertIssues( - supabase: SupabaseClient, - issues: Array<{ - id: string; - markdown: string; - plaintext: string; - embedding: string; - author_id: number; - payload: PayloadType; - }> + supabase: SupabaseClient, + issues: Array<{ + id: string; + markdown: string; + plaintext: string; + embedding: string; + author_id: number; + payload: PayloadType; + }> ): Promise { - const { error } = await supabase.from("issues").upsert(issues); - if (error) { - throw new Error(`Error during batch upsert: ${error.message}`); - } + const { error } = await supabase.from("issues").upsert(issues); + if (error) { + throw new Error(`Error during batch upsert: ${error.message}`); + } } -async function batchFetchAuthorIds( - octokit: InstanceType, - logins: string[] -): Promise> { - const authorIdMap: Record = {}; - const BATCH_SIZE = 20; - for (let i = 0; i < logins.length; i += BATCH_SIZE) { - const batch = logins.slice(i, i + BATCH_SIZE); - const promises = batch.map(async (login) => { - try { - const response = await octokit.rest.users.getByUsername({ username: login }); - return { login, id: response.data.id }; - } catch (error) { - console.error(`Error fetching author ID for ${login}:`, error); - return { login, id: -1 }; - } - }); - const results = await Promise.all(promises); - results.forEach(({ login, id }) => { - authorIdMap[login] = id; - }); - } - return authorIdMap; +async function batchFetchAuthorIds(octokit: InstanceType, logins: string[]): Promise> { + const authorIdMap: Record = {}; + const BATCH_SIZE = 20; + for (let i = 0; i < logins.length; i += BATCH_SIZE) { + const batch = logins.slice(i, i + BATCH_SIZE); + const promises = batch.map(async (login) => { + try { + const response = await octokit.rest.users.getByUsername({ username: login }); + return { login, id: response.data.id }; + } catch (error) { + console.error(`Error fetching author ID for ${login}:`, error); + return { login, id: -1 }; + } + }); + const results = await Promise.all(promises); + results.forEach(({ login, id }) => { + authorIdMap[login] = id; + }); + } + return authorIdMap; } -async function issueScraper( - username: string, - supabase: SupabaseClient, - voyageApiKey: string, - token?: string -): Promise { - try { - if (!username) { - throw new Error("Username is required"); - } +async function issueScraper(username: string, supabase: SupabaseClient, voyageApiKey: string, token?: string): Promise { + try { + if (!username) { + throw new Error("Username is required"); + } - const octokit = new Octokit(token ? { auth: token } : {}); - const voyageClient = new VoyageAIClient({ apiKey: voyageApiKey }); + const octokit = new Octokit(token ? { auth: token } : {}); + const voyageClient = new VoyageAIClient({ apiKey: voyageApiKey }); - const issues = await fetchUserIssuesBatch(octokit, username); + const issues = await fetchUserIssuesBatch(octokit, username); - // Extract unique author logins - const uniqueAuthors = Array.from( - new Set(issues.map((issue) => issue.author?.login).filter((login): login is string => !!login)) - ); + // Extract unique author logins + const uniqueAuthors = Array.from(new Set(issues.map((issue) => issue.author?.login).filter((login): login is string => !!login))); - // Fetch author IDs in batches - const authorIdMap = await batchFetchAuthorIds(octokit, uniqueAuthors); + // Fetch author IDs in batches + const authorIdMap = await batchFetchAuthorIds(octokit, uniqueAuthors); - const markdowns = issues.map((issue) => `${issue.body || ""} ${issue.title || ""}`); - const plainTexts = markdowns.map(markdownToPlainText); - const embeddings = await batchEmbeddings(voyageClient, markdowns); + const markdowns = issues.map((issue) => `${issue.body || ""} ${issue.title || ""}`); + const plainTexts = markdowns.map(markdownToPlainText); + const embeddings = await batchEmbeddings(voyageClient, markdowns); - const upsertData = issues.map((issue, index) => { - const authorId = issue.author?.login ? authorIdMap[issue.author.login] || -1 : -1; - const repoOwner = issue.repository.owner.login; + const upsertData = issues.map((issue, index) => { + const authorId = issue.author?.login ? authorIdMap[issue.author.login] || -1 : -1; + const repoOwner = issue.repository.owner.login; - return { - id: issue.id, - markdown: markdowns[index], - plaintext: plainTexts[index] ?? '', - embedding: JSON.stringify(embeddings[index] || Array(VECTOR_SIZE).fill(0)), - author_id: authorId, - payload: { - issue: { - nodeId: issue.id, - number: issue.number, - title: issue.title || "", - body: issue.body || "", - state: issue.state, - stateReason: issue.stateReason, - repositoryName: issue.repository.name, - repositoryId: parseInt(issue.repository.id), - assignees: (issue.assignees?.nodes || []).map((a) => a.login), - createdAt: issue.createdAt, - closedAt: issue.closedAt, - updatedAt: issue.updatedAt, - }, - action: "created", - sender: { login: username }, - repository: { - id: parseInt(issue.repository.id), - node_id: issue.repository.id, - name: issue.repository.name, - full_name: `${repoOwner}/${issue.repository.name}`, - owner: { - login: repoOwner, - id: authorId, - type: "User", - site_admin: false, - }, - }, - }, - }; - }); + return { + id: issue.id, + markdown: markdowns[index], + plaintext: plainTexts[index] ?? "", + embedding: JSON.stringify(embeddings[index] || Array(VECTOR_SIZE).fill(0)), + author_id: authorId, + payload: { + issue: { + nodeId: issue.id, + number: issue.number, + title: issue.title || "", + body: issue.body || "", + state: issue.state, + stateReason: issue.stateReason, + repositoryName: issue.repository.name, + repositoryId: parseInt(issue.repository.id), + assignees: (issue.assignees?.nodes || []).map((a) => a.login), + createdAt: issue.createdAt, + closedAt: issue.closedAt, + updatedAt: issue.updatedAt, + }, + action: "created", + sender: { login: username }, + repository: { + id: parseInt(issue.repository.id), + node_id: issue.repository.id, + name: issue.repository.name, + full_name: `${repoOwner}/${issue.repository.name}`, + owner: { + login: repoOwner, + id: authorId, + type: "User", + site_admin: false, + }, + }, + }, + }; + }); - await batchUpsertIssues(supabase, upsertData); + await batchUpsertIssues(supabase, upsertData); - return JSON.stringify( - { - success: true, - stats: { - storageSuccessful: upsertData.length, - storageFailed: 0, - }, - issues: upsertData.map((issue) => ({ - id: issue.id, - markdown: issue.markdown, - plaintext: issue.plaintext, - })), - }, - null, - 2 - ); - } catch (error) { - console.error("Error in issueScraper:", error); - throw error; - } + return JSON.stringify( + { + success: true, + stats: { + storageSuccessful: upsertData.length, + storageFailed: 0, + }, + issues: upsertData.map((issue) => ({ + id: issue.id, + markdown: issue.markdown, + plaintext: issue.plaintext, + })), + }, + null, + 2 + ); + } catch (error) { + console.error("Error in issueScraper:", error); + throw error; + } } diff --git a/src/home/getters/get-github-access-token.ts b/src/home/getters/get-github-access-token.ts index c0c686dc..c5303f81 100644 --- a/src/home/getters/get-github-access-token.ts +++ b/src/home/getters/get-github-access-token.ts @@ -9,9 +9,9 @@ import { getLocalStore } from "./get-local-store"; export async function isOrgMemberWithoutScope() { const octokit = new Octokit({ auth: await getGitHubAccessToken() }); try { - // await octokit.orgs.getMembershipForAuthenticatedUser({ - // org: "ubiquity", - // }); + await octokit.orgs.getMembershipForAuthenticatedUser({ + org: "ubiquity", + }); } catch (e) { if (e && typeof e === "object" && "status" in e && e.status === 404) { return false;