diff --git a/packages/core/src/http-client.ts b/packages/core/src/http-client.ts index 4253dfc5..901378c1 100644 --- a/packages/core/src/http-client.ts +++ b/packages/core/src/http-client.ts @@ -1,4 +1,4 @@ -import { removeLeadingSlashes } from '@sap-cloud-sdk/util'; +import { mergeIgnoreCase, removeLeadingSlashes } from '@sap-cloud-sdk/util'; import { executeHttpRequest, HttpRequestConfig, @@ -102,13 +102,13 @@ function mergeWithDefaultRequestConfig( return { ...defaultConfig, ...requestConfig, - headers: { + headers: mergeIgnoreCase({ ...defaultConfig.headers, ...requestConfig?.headers - }, - params: { + }), + params: mergeIgnoreCase({ ...defaultConfig.params, ...requestConfig?.params - } + }) }; } diff --git a/packages/gen-ai-hub/src/client/openai/openai-client.test.ts b/packages/gen-ai-hub/src/client/openai/openai-client.test.ts index e11053fa..0db33ae2 100644 --- a/packages/gen-ai-hub/src/client/openai/openai-client.test.ts +++ b/packages/gen-ai-hub/src/client/openai/openai-client.test.ts @@ -59,11 +59,9 @@ describe('openai client', () => { chatCompletionEndpoint ); - const response = await client.chatCompletion( - 'gpt-35-turbo', - prompt, - '1234' - ); + const response = await client.chatCompletion(prompt, { + deploymentId: '1234' + }); expect(response).toEqual(mockResponse); }); @@ -86,7 +84,7 @@ describe('openai client', () => { ); await expect( - client.chatCompletion('gpt-4', prompt, '1234') + client.chatCompletion(prompt, { deploymentId: '1234' }) ).rejects.toThrow('status code 400'); }); }); @@ -111,11 +109,9 @@ describe('openai client', () => { }, embeddingsEndpoint ); - const response = await client.embeddings( - 'text-embedding-ada-002', - prompt, - '1234' - ); + const response = await client.embeddings(prompt, { + deploymentId: '1234' + }); expect(response).toEqual(mockResponse); }); @@ -138,7 +134,7 @@ describe('openai client', () => { ); await expect( - client.embeddings('text-embedding-3-large', prompt, '1234') + client.embeddings(prompt, { deploymentId: '1234' }) ).rejects.toThrow('status code 400'); }); }); diff --git a/packages/gen-ai-hub/src/client/openai/openai-client.ts b/packages/gen-ai-hub/src/client/openai/openai-client.ts index 443c8ad4..9ab170a7 100644 --- a/packages/gen-ai-hub/src/client/openai/openai-client.ts +++ b/packages/gen-ai-hub/src/client/openai/openai-client.ts @@ -1,10 +1,9 @@ -import { HttpRequestConfig } from '@sap-cloud-sdk/http-client'; -import { CustomRequestConfig, executeRequest } from '@sap-ai-sdk/core'; +import { type CustomRequestConfig, executeRequest } from '@sap-ai-sdk/core'; import { - DeploymentResolver, - resolveDeployment + getDeploymentId, + type ModelDeployment } from '../../utils/deployment-resolver.js'; -import { +import type { OpenAiChatCompletionParameters, OpenAiEmbeddingParameters, OpenAiEmbeddingOutput, @@ -21,21 +20,20 @@ const apiVersion = '2024-02-01'; export class OpenAiClient { /** * Creates a completion for the chat messages. - * @param model - The model to use for the chat completion. * @param data - The input parameters for the chat completion. - * @param deploymentResolver - A deployment id or a function to retrieve it. + * @param modelDeployment - This configuration is used to retrieve a deployment. Depending on the configuration use either the given deployment ID or the model name to retrieve matching deployments. If model and deployment ID are given, the model is verified against the deployment. * @param requestConfig - The request configuration. * @returns The completion result. */ async chatCompletion( - model: OpenAiChatModel | { name: OpenAiChatModel; version: string }, data: OpenAiChatCompletionParameters, - deploymentResolver?: DeploymentResolver, + modelDeployment: ModelDeployment, requestConfig?: CustomRequestConfig ): Promise { - const deploymentId = await resolveOpenAiDeployment( - model, - deploymentResolver + const deploymentId = await getDeploymentId( + modelDeployment, + 'azure-openai', + requestConfig ); const response = await executeRequest( { @@ -43,65 +41,33 @@ export class OpenAiClient { apiVersion }, data, - mergeRequestConfig(requestConfig) + requestConfig ); return response.data; } + /** * Creates an embedding vector representing the given text. - * @param model - The model to use for the embedding computation. * @param data - The text to embed. - * @param deploymentResolver - A deployment id or a function to retrieve it. + * @param modelDeployment - This configuration is used to retrieve a deployment. Depending on the configuration use either the given deployment ID or the model name to retrieve matching deployments. If model and deployment ID are given, the model is verified against the deployment. * @param requestConfig - The request configuration. * @returns The completion result. */ async embeddings( - model: - | OpenAiEmbeddingModel - | { name: OpenAiEmbeddingModel; version: string }, data: OpenAiEmbeddingParameters, - deploymentResolver?: DeploymentResolver, + modelDeployment: ModelDeployment, requestConfig?: CustomRequestConfig ): Promise { - const deploymentId = await resolveOpenAiDeployment( - model, - deploymentResolver + const deploymentId = await getDeploymentId( + modelDeployment, + 'azure-openai', + requestConfig ); const response = await executeRequest( { url: `/inference/deployments/${deploymentId}/embeddings`, apiVersion }, data, - mergeRequestConfig(requestConfig) + requestConfig ); return response.data; } } - -async function resolveOpenAiDeployment( - model: string | { name: string; version: string }, - resolver?: DeploymentResolver -) { - if (typeof resolver === 'string') { - return resolver; - } - const llm = - typeof model === 'string' ? { name: model, version: 'latest' } : model; - const deployment = await resolveDeployment({ - scenarioId: 'foundation-models', - executableId: 'azure-openai', - model: llm - }); - return deployment.id; -} - -function mergeRequestConfig( - requestConfig?: CustomRequestConfig -): HttpRequestConfig { - return { - method: 'POST', - headers: { - 'content-type': 'application/json' - }, - params: { 'api-version': apiVersion }, - ...requestConfig - }; -} diff --git a/packages/gen-ai-hub/src/orchestration/orchestration-client.ts b/packages/gen-ai-hub/src/orchestration/orchestration-client.ts index 203b1088..87fccce4 100644 --- a/packages/gen-ai-hub/src/orchestration/orchestration-client.ts +++ b/packages/gen-ai-hub/src/orchestration/orchestration-client.ts @@ -1,8 +1,6 @@ import { executeRequest, CustomRequestConfig } from '@sap-ai-sdk/core'; -import { - DeploymentResolver, - resolveDeployment -} from '../utils/deployment-resolver.js'; +import { pickValueIgnoreCase } from '@sap-cloud-sdk/util'; +import { resolveDeployment } from '../utils/deployment-resolver.js'; import { CompletionPostRequest, CompletionPostResponse @@ -16,25 +14,35 @@ export class OrchestrationClient { /** * Creates a completion for the chat messages. * @param data - The input parameters for the chat completion. - * @param deploymentResolver - A deployment ID or a function to retrieve it. + * @param deploymentId - A deployment ID or undefined to retrieve it based on the given model. * @param requestConfig - Request configuration. * @returns The completion result. */ async chatCompletion( data: OrchestrationCompletionParameters, - deploymentResolver: DeploymentResolver = () => - resolveDeployment({ scenarioId: 'orchestration' }), + deploymentId?: string, requestConfig?: CustomRequestConfig ): Promise { const body = constructCompletionPostRequest(data); - const deployment = - typeof deploymentResolver === 'function' - ? (await deploymentResolver()).id - : deploymentResolver; + deploymentId = + deploymentId ?? + ( + await resolveDeployment({ + scenarioId: 'orchestration', + model: { + name: data.llmConfig.model_name, + version: data.llmConfig.model_version + }, + resourceGroup: pickValueIgnoreCase( + requestConfig?.headers, + 'ai-resource-group' + ) + }) + ).id; const response = await executeRequest( { - url: `/inference/deployments/${deployment}/completion` + url: `/inference/deployments/${deploymentId}/completion` }, body, requestConfig diff --git a/packages/gen-ai-hub/src/utils/deployment-resolver.test.ts b/packages/gen-ai-hub/src/utils/deployment-resolver.test.ts index d1228845..12f79daa 100644 --- a/packages/gen-ai-hub/src/utils/deployment-resolver.test.ts +++ b/packages/gen-ai-hub/src/utils/deployment-resolver.test.ts @@ -5,7 +5,7 @@ import { } from '../../../../test-util/mock-http.js'; import { resolveDeployment } from './deployment-resolver.js'; -describe('Deployment resolver', () => { +describe('deployment resolver', () => { beforeEach(() => { mockClientCredentialsGrantCall(); }); @@ -18,29 +18,30 @@ describe('Deployment resolver', () => { beforeEach(() => { mockResponse(); }); + it('should return the first deployment, if multiple are given', async () => { - const { id, configurationId } = await resolveDeployment({ + const { id } = await resolveDeployment({ scenarioId: 'foundation-models' }); expect(id).toBe('1'); - expect(configurationId).toBe('c1'); }); - it('should return the deployment with the correct model name', async () => { - const { id, configurationId } = await resolveDeployment({ + + it('should return the first deployment with the correct model name', async () => { + const { id } = await resolveDeployment({ scenarioId: 'foundation-models', model: { name: 'gpt-4o' } }); - expect(id).toBe('2'); - expect(configurationId).toBe('c2'); + expect(id).toBe('1'); }); - it('should return the deployment with the correct model name', async () => { - const { id, configurationId } = await resolveDeployment({ + + it('should return the deployment with the correct model name and version', async () => { + const { id } = await resolveDeployment({ scenarioId: 'foundation-models', model: { name: 'gpt-4o', version: '0613' } }); expect(id).toBe('2'); - expect(configurationId).toBe('c2'); }); + it('should throw in case no deployment with the given model name is found', async () => { await expect( resolveDeployment({ @@ -49,7 +50,8 @@ describe('Deployment resolver', () => { }) ).rejects.toThrow('No deployment matched the given criteria'); }); - it('should throw in case no deployment with the given model version is found', async () => { + + it('should throw in case no deployment with the given model and version is found', async () => { await expect( resolveDeployment({ scenarioId: 'foundation-models', @@ -73,9 +75,47 @@ describe('Deployment resolver', () => { }); await expect( - resolveDeployment({ scenarioId: 'foundation-models' }) + resolveDeployment({ + scenarioId: 'foundation-models', + model: { name: 'gpt-4o', version: '0613' } + }) ).rejects.toThrow('No deployment matched the given criteria'); }); + + it('should consider custom resource group', async () => { + nock(aiCoreDestination.url, { + reqheaders: { + 'ai-resource-group': 'otherId' + } + }) + .get('/v2/lm/deployments') + .query({ scenarioId: 'foundation-models', status: 'RUNNING' }) + .reply(200, { + resources: [ + { + id: '5', + details: { + resources: { + backend_details: { + model: { + name: 'gpt-4o', + version: 'latest' + } + } + } + } + } + ] + }); + + const { id } = await resolveDeployment({ + scenarioId: 'foundation-models', + model: { name: 'gpt-4o' }, + resourceGroup: 'otherId' + }); + + expect(id).toBe('5'); + }); }); function mockResponse() { @@ -87,32 +127,22 @@ function mockResponse() { .get('/v2/lm/deployments') .query({ scenarioId: 'foundation-models', status: 'RUNNING' }) .reply(200, { - count: 1, resources: [ { - configurationId: 'c1', id: '1', - deploymentUrl: 'https://foo.com/v2/inference/deployments/1', details: { resources: { backend_details: { model: { - name: 'gpt-4-32k', + name: 'gpt-4o', version: 'latest' } } - }, - scaling: { - backend_details: {} } - }, - lastOperation: 'CREATE', - status: 'RUNNING' + } }, { - configurationId: 'c2', id: '2', - deploymentUrl: 'https://foo.com/v2/inference/deployments/2', details: { resources: { backend_details: { @@ -122,8 +152,7 @@ function mockResponse() { } } } - }, - status: 'RUNNING' + } } ] }); diff --git a/packages/gen-ai-hub/src/utils/deployment-resolver.ts b/packages/gen-ai-hub/src/utils/deployment-resolver.ts index a2c74551..e6a55515 100644 --- a/packages/gen-ai-hub/src/utils/deployment-resolver.ts +++ b/packages/gen-ai-hub/src/utils/deployment-resolver.ts @@ -1,19 +1,57 @@ -import { - DeploymentApi, - AiDeployment, - AiDeploymentStatus -} from '@sap-ai-sdk/ai-core'; +import { DeploymentApi, AiDeployment } from '@sap-ai-sdk/ai-core'; +import { CustomRequestConfig } from '@sap-ai-sdk/core'; +import { pickValueIgnoreCase } from '@sap-cloud-sdk/util'; /** - * A deployment resolver can be either a deployment ID or a function that returns a full deployment object. + * The model deployment configuration when using a model. It can be either the name of the model or an object containing the name and version of the model. + * @typeParam ModelNameT - String literal type representing the name of the model. */ -export type DeploymentResolver = DeploymentId | (() => Promise); +export type ModelConfiguration = + | ModelNameT + | { + /** + * The name of the model. + */ + modelName: ModelNameT; + /** + * The version of the model. + */ + modelVersion?: string; + }; + +/** + * The deployment configuration when using a deployment ID. + */ +export interface DeploymentIdConfiguration { + /** + * The deployment ID. + */ + deploymentId: string; +} + /** - * A deployment ID is a string that uniquely identifies a deployment. + * The deployment configuration can be either a model configuration or a deployment ID configuration. + * @typeParam ModelNameT - String literal type representing the name of the model. */ -export type DeploymentId = string; +export type ModelDeployment = + | ModelConfiguration + | DeploymentIdConfiguration; + /** - * A foundation model is identifier by its name and potentially a version. + * Type guard to check if the given deployment configuration is a deployment ID configuration. + * @param modelDeployment - Configuration to check. + * @returns `true` if the configuration is a deployment ID configuration, `false` otherwise. + */ +export function isDeploymentIdConfiguration( + modelDeployment: ModelDeployment +): modelDeployment is DeploymentIdConfiguration { + return ( + typeof modelDeployment === 'object' && 'deploymentId' in modelDeployment + ); +} + +/** + * A foundation model is identified by its name and potentially a version. */ export interface FoundationModel { /** @@ -27,56 +65,124 @@ export interface FoundationModel { } /** - * Query the AI Core service for a deployment that matches the given criteria. If more than one deployment matches the criteria, the first one is returned. - * @param opts - The options for the deployment resolution. - * @param opts.scenarioId - The scenario ID of the deployment. - * @param opts.executableId - The executable of the deployment. - * @param opts.model - The name and potentially version of the model to look for. - * @returns An AiDeployment, if a deployment was found, fails otherwise. + * The options for the deployment resolution. */ -export async function resolveDeployment(opts: { +interface DeploymentResolutionOptions { + /** + * The scenario ID of the deployment. + */ scenarioId: string; - executableId?: string; + /** + * The name and potentially version of the model to look for. + */ model?: FoundationModel; -}): Promise { - const query = { - scenarioId: opts.scenarioId, - status: 'RUNNING' as AiDeploymentStatus, - ...(opts.executableId && { executableIds: [opts.executableId] }) - }; + /** + * The executable ID of the deployment. + */ + executableId?: string; + /** + * The resource group of the deployment. + */ + resourceGroup?: string; +} - // TODO: add a cache: https://github.tools.sap/AI/gen-ai-hub-sdk-js-backlog/issues/78 - let deploymentList: AiDeployment[]; - const { deploymentQuery } = DeploymentApi; - const resourceGroup = { 'AI-Resource-Group': 'default' }; - try { - deploymentList = (await deploymentQuery(query, resourceGroup).execute()) - .resources; - } catch (error) { - throw new Error('Failed to fetch the list of deployments: ' + error); - } +/** + * Query the AI Core service for a deployment that matches the given criteria. If more than one deployment matches the criteria, the first one is returned. + * @param opts - The options for the deployment resolution. + * @returns A promise of a deployment, if a deployment was found, fails otherwise. + */ +export async function resolveDeployment( + opts: DeploymentResolutionOptions +): Promise { + const { model } = opts; - if (opts.model) { - const modelName = opts.model.name; - deploymentList = deploymentList.filter( - deployment => extractModel(deployment)?.name === modelName + let deployments = await getAllDeployments(opts); + + if (model) { + deployments = deployments.filter( + deployment => extractModel(deployment)?.name === model.name ); - if (opts.model.version) { - const modelVersion = opts.model.version; - // feature idea: smart handling of 'latest' version: treat 'latest' and the highest version number as the same - deploymentList = deploymentList.filter( - deployment => extractModel(deployment)?.version === modelVersion + + if (model.version) { + deployments = deployments.filter( + deployment => extractModel(deployment)?.version === model.version ); } } - if (!deploymentList.length) { + if (!deployments.length) { throw new Error( 'No deployment matched the given criteria: ' + JSON.stringify(opts) ); } - return deploymentList[0]; + return deployments[0]; +} + +async function getAllDeployments( + opts: DeploymentResolutionOptions +): Promise { + const { scenarioId, executableId, resourceGroup = 'default' } = opts; + // TODO: add a cache: https://github.tools.sap/AI/gen-ai-hub-sdk-js-backlog/issues/78 + try { + return ( + await DeploymentApi.deploymentQuery( + { + scenarioId, + status: 'RUNNING', + ...(executableId && { executableIds: [executableId] }) + }, + { 'AI-Resource-Group': resourceGroup } + ).execute() + ).resources; + } catch (error) { + throw new Error('Failed to fetch the list of deployments: ' + error); + } +} + +function extractModel( + deployment: AiDeployment +): Partial | undefined { + return deployment.details?.resources?.backend_details?.model; +} + +/** + * Get the deployment ID for a given model deployment configuration and executable ID using the 'foundation-models' scenario. + * @param modelDeployment - The model deployment configuration. + * @param executableId - The executable ID. + * @param requestConfig - The request configuration. + * @returns The ID of the deployment, if found. + */ +export async function getDeploymentId( + modelDeployment: ModelDeployment, + executableId: string, + requestConfig?: CustomRequestConfig +): Promise { + if (isDeploymentIdConfiguration(modelDeployment)) { + return modelDeployment.deploymentId; + } + + return ( + await resolveDeployment({ + scenarioId: 'foundation-models', + executableId, + model: translateToFoundationModel(modelDeployment), + resourceGroup: pickValueIgnoreCase( + requestConfig?.headers, + 'ai-resource-group' + ) + }) + ).id; } -const extractModel = (deployment: AiDeployment) => - deployment.details?.resources?.backend_details?.model; +function translateToFoundationModel( + modelConfig: ModelConfiguration +): FoundationModel { + if (typeof modelConfig === 'string') { + return { name: modelConfig }; + } + + return { + name: modelConfig.modelName, + ...(modelConfig.modelVersion && { version: modelConfig.modelVersion }) + }; +} diff --git a/sample-code/src/aiservice.ts b/sample-code/src/aiservice.ts index b161682f..29939099 100644 --- a/sample-code/src/aiservice.ts +++ b/sample-code/src/aiservice.ts @@ -10,9 +10,12 @@ const openAiClient = new OpenAiClient(); * @returns The answer from GPT. */ export async function chatCompletion(): Promise { - const response = await openAiClient.chatCompletion('gpt-35-turbo', { - messages: [{ role: 'user', content: 'What is the capital of France?' }] - }); + const response = await openAiClient.chatCompletion( + { + messages: [{ role: 'user', content: 'What is the capital of France?' }] + }, + 'gpt-35-turbo' + ); const assistantMessage = response.choices[0] .message as OpenAiChatAssistantMessage; return assistantMessage.content!; @@ -23,8 +26,12 @@ export async function chatCompletion(): Promise { * @returns An embedding vector. */ export async function computeEmbedding(): Promise { - const response = await openAiClient.embeddings('text-embedding-ada-002', { - input: 'Hello, world!' - }); + const response = await openAiClient.embeddings( + { + input: 'Hello, world!' + }, + 'text-embedding-ada-002' + ); + return response.data[0].embedding; } diff --git a/tests/type-tests/test/openai.test-d.ts b/tests/type-tests/test/openai.test-d.ts index e39c30da..65047641 100644 --- a/tests/type-tests/test/openai.test-d.ts +++ b/tests/type-tests/test/openai.test-d.ts @@ -12,18 +12,24 @@ expectType(client); * Chat Completion. */ expectType>( - client.chatCompletion('gpt-4', { - messages: [{ role: 'user', content: 'test prompt' }] - }) + client.chatCompletion( + { + messages: [{ role: 'user', content: 'test prompt' }] + }, + 'gpt-4' + ) ); /** * Embeddings. */ expectType>( - client.embeddings('text-embedding-ada-002', { - input: 'test input' - }) + client.embeddings( + { + input: 'test input' + }, + 'text-embedding-ada-002' + ) ); -expectError(client.embeddings('gpt-35-turbo', { input: 'test input' })); +expectError(client.embeddings({ input: 'test input' }, 'gpt-35-turbo'));