From fa8934f26b77990f881f27a1a076367c13d2f3af Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Mon, 9 Dec 2024 22:59:40 -0500 Subject: [PATCH] fix: evals workflow --- .github/workflows/evals-testing.yml | 10 ++++------ evals/data/eval-gold-responses.json | 12 ++++++------ evals/llm.eval.ts | 4 +--- src/adapters/openai/helpers/completions.ts | 5 +++++ 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/workflows/evals-testing.yml b/.github/workflows/evals-testing.yml index a19763d..c2162b0 100644 --- a/.github/workflows/evals-testing.yml +++ b/.github/workflows/evals-testing.yml @@ -1,10 +1,8 @@ name: Run Braintrust Evals on: - workflow_run: - workflows: ["Knip"] - types: - - completed + workflow_dispatch: + pull_request: permissions: write-all @@ -19,7 +17,7 @@ jobs: VOYAGEAI_API_KEY: ${{ secrets.VOYAGEAI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} - UBIQUITY_OS_APP_NAME: "ubiquity-agent" # Hardcoded value + UBIQUITY_OS_APP_NAME: "ubiquity-agent" steps: - uses: actions/setup-node@v4 @@ -43,7 +41,7 @@ jobs: VOYAGEAI_API_KEY: ${{ secrets.VOYAGEAI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} - UBIQUITY_OS_APP_NAME: "ubiquity-agent" # Hardcoded value + UBIQUITY_OS_APP_NAME: "ubiquity-agent" - name: Add Evals Report to Github Comment if: always() diff --git a/evals/data/eval-gold-responses.json b/evals/data/eval-gold-responses.json index d947c52..e834c80 100644 --- a/evals/data/eval-gold-responses.json +++ b/evals/data/eval-gold-responses.json @@ -1,14 +1,14 @@ { "issueResponses": [ { - "scenario": "UI Improvements Issue", + "scenario": "manifest.name mismatch ", "issue": { - "body": "Depends on #13\n\nWhat should be done:\n\n1. The \"select configuration\" step should be removed. Always use the \"production\" config.\n2. Show plugin readme on installing a new plugin (original comment)\n3. Show indicator if plugin is already installed (original comment)\n4. Load current config values in the editor (if config is present)\n5. Add \"Reset to defaults\" button which resets plugin options to default values (original comment)\n6. Add github redirect to the same page (example)", - "number": 19, - "html_url": "https://github.com/ubiquity-os/ubiquity-os-plugin-installer/issues/19/", - "question": "@ubosshivaditya could you please provide a summary of the UI improvements needed based on the issue discussion and linked issues?" + "body": "Manifests need to be updated so the name matches the intended name, which is the name of the repo it lives in.\n\nAny mismatch in manifest.name and the plugin repo, and we will not be able to install those plugins. The config will look like this:\n\nThis is because the worker URL contains the repo name, and we use that to match against manifest.name.", + "number": 27, + "html_url": "https://github.com/ubiquity-os/ubiquity-os-plugin-installer/issues/27/", + "question": "@ubosshivaditya could you please provide a summary of the issue ?" }, - "expectedResponse": "Based on the issue discussion and linked issues, here's a summary of the UI improvements needed:\n\n1. Configuration Simplification:\n- Remove the \"select configuration\" step\n- Always use \"production\" config as this is for partners\n- Development configs can be created manually when needed\n\n2. Plugin Information Display:\n- Show plugin readme when installing new plugins\n- Add indicator to show if a plugin is already installed\n- Load and display current config values in the editor when available\n\n3. Configuration Management:\n- Add \"Reset to defaults\" button for plugin options\n- Ensure proper handling of config values\n\n4. GitHub Integration:\n- Add GitHub redirect functionality to the same page\n- Reference implementation available in work.ubq.fi repository\n\nThis implementation depends on the config installer feature (#13) being completed first. The changes focus on improving user experience for partners while maintaining development flexibility through manual configuration options.", + "expectedResponse": "The manifest.name should match the name of the repo it lives in. This is because the worker URL contains the repo name, and we use that to match against manifest.name.", "sender": { "login": "sshivaditya2019", "type": "User" diff --git a/evals/llm.eval.ts b/evals/llm.eval.ts index 7b64831..c696954 100644 --- a/evals/llm.eval.ts +++ b/evals/llm.eval.ts @@ -7,13 +7,11 @@ import { createClient } from "@supabase/supabase-js"; import { createAdapters } from "../src/adapters"; import { Context } from "../src/types/context"; import { logger } from "../src/helpers/errors"; -import { Octokit } from "@octokit/rest"; +import { customOctokit as Octokit } from "@ubiquity-os/plugin-sdk/octokit"; import issueTemplate from "../tests/__mocks__/issue-template"; import { writeFileSync } from "fs"; import { fetchContext, formattedHistory, initAdapters } from "./handlers/setup-context"; -console.log(process.env); - // Required environment variables with type assertion const requiredEnvVars = { OPENAI_API_KEY: process.env.OPENAI_API_KEY as string, diff --git a/src/adapters/openai/helpers/completions.ts b/src/adapters/openai/helpers/completions.ts index 36989cf..1329da3 100644 --- a/src/adapters/openai/helpers/completions.ts +++ b/src/adapters/openai/helpers/completions.ts @@ -118,6 +118,11 @@ export class Completions extends SuperOpenAi { }, }); + if (!res.choices || !res.choices.length) { + logger.debug(`No completion found for query: ${query} Response: ${JSON.stringify(res)}`, { res }); + return { answer: "", tokenUsage: { input: 0, output: 0, total: 0 }, groundTruths }; + } + const answer = res.choices[0].message; if (answer && answer.content && res.usage) { return {