From 4f55cd59f9a7cc756c7ee8047ddb695b7a648a39 Mon Sep 17 00:00:00 2001 From: Valery Bugakov Date: Mon, 21 Oct 2024 15:36:02 +0800 Subject: [PATCH] Autocomplete: add git metadata to autocomplete requests --- lib/shared/src/inferenceClient/misc.ts | 14 ++ lib/shared/src/misc/observableOperation.ts | 1 + .../get-inline-completions-tests/helpers.ts | 12 +- .../completions/providers/shared/provider.ts | 31 ++++- .../remote-urls-from-parent-dirs.test.ts | 25 ++-- .../remote-urls-from-parent-dirs.ts | 52 ++++++-- .../src/repository/repo-name-resolver.test.ts | 15 ++- vscode/src/repository/repo-name-resolver.ts | 124 ++++++++++++------ 8 files changed, 202 insertions(+), 72 deletions(-) diff --git a/lib/shared/src/inferenceClient/misc.ts b/lib/shared/src/inferenceClient/misc.ts index 7b436b5f5a31..708f049e1a7e 100644 --- a/lib/shared/src/inferenceClient/misc.ts +++ b/lib/shared/src/inferenceClient/misc.ts @@ -28,6 +28,20 @@ export type CodeCompletionsParams = Omit & { timeoutMs: number // TODO: apply the same type to the underlying `CompletionParameters` model?: LegacyModelRefStr | ModelRefStr + metadata?: { + /** + * File path relative to the git root. + */ + gitFilePath?: string + /** + * Repository names resolved using Sourcegraph API based on remote URLs (such as `github.com/foo/bar`). + */ + gitRepoNames?: string[] + /** + * git rev-parse HEAD + */ + gitCommitHash?: string + } } export type SerializedCodeCompletionsParams = Omit diff --git a/lib/shared/src/misc/observableOperation.ts b/lib/shared/src/misc/observableOperation.ts index 9961a3d2586f..b42ea2f7677f 100644 --- a/lib/shared/src/misc/observableOperation.ts +++ b/lib/shared/src/misc/observableOperation.ts @@ -4,6 +4,7 @@ import { catchError, filter, firstValueFrom, shareReplay, startWith, switchMap } /** A sentinel value to indicate that the value has not yet been emitted. */ export const pendingOperation = Symbol.for('@@pendingOperation') +export type MaybePendingObservable = Observable /** * Run an operation with the outer observable as input. The result is replayed to all subscribers diff --git a/vscode/src/completions/get-inline-completions-tests/helpers.ts b/vscode/src/completions/get-inline-completions-tests/helpers.ts index 562943989516..c95e4d8de309 100644 --- a/vscode/src/completions/get-inline-completions-tests/helpers.ts +++ b/vscode/src/completions/get-inline-completions-tests/helpers.ts @@ -1,7 +1,7 @@ import dedent from 'dedent' import { Observable } from 'observable-fns' import { vi } from 'vitest' -import type { URI } from 'vscode-uri' +import { URI } from 'vscode-uri' import { AUTH_STATUS_FIXTURE_AUTHED, @@ -28,7 +28,7 @@ import type { CodeCompletionsParams, CompletionResponseWithMetaData, } from '@sourcegraph/cody-shared/src/inferenceClient/misc' - +import { repoNameResolver } from '../../repository/repo-name-resolver' import { DEFAULT_VSCODE_SETTINGS } from '../../testutils/mocks' import type { SupportedLanguage } from '../../tree-sitter/grammars' import { updateParseTreeCache } from '../../tree-sitter/parse-tree-cache' @@ -411,6 +411,14 @@ export function initCompletionProviderConfig({ authStatus, }: Partial>): void { setEditorWindowIsFocused(() => true) + + vi.spyOn(repoNameResolver, 'getRepoInfoContainingUri').mockReturnValue( + Observable.of({ + rootUri: URI.file('/repoName'), + repoNames: ['sourcegraph/repoName'], + }) + ) + vi.spyOn(featureFlagProvider, 'evaluateFeatureFlagEphemerally').mockResolvedValue(false) vi.spyOn(featureFlagProvider, 'evaluatedFeatureFlag').mockReturnValue(Observable.of(false)) vi.spyOn(ClientConfigSingleton.getInstance(), 'getConfig').mockResolvedValue({ diff --git a/vscode/src/completions/providers/shared/provider.ts b/vscode/src/completions/providers/shared/provider.ts index 5395a8a5cc31..95ad14135034 100644 --- a/vscode/src/completions/providers/shared/provider.ts +++ b/vscode/src/completions/providers/shared/provider.ts @@ -13,10 +13,15 @@ import { type GitContext, type LegacyModelRefStr, type Model, + currentAuthStatus, + firstResultFromOperation, + isDotCom, + pathFunctionsForURI, toLegacyModel, tokensToChars, } from '@sourcegraph/cody-shared' +import { repoNameResolver } from '../../../repository/repo-name-resolver' import type * as CompletionAnalyticsLogger from '../../analytics-logger' import { defaultCodeCompletionsClient } from '../../default-client' import type { TriggerKind } from '../../get-inline-completions' @@ -232,14 +237,38 @@ export abstract class Provider { return Promise.resolve(this.client.complete(requestParams, abortController)) } + protected async getRequestParamsMetadata( + document: TextDocument, + abortSignal: AbortSignal + ): Promise { + const repoInfo = await firstResultFromOperation( + repoNameResolver.getRepoInfoContainingUri(document.uri), + abortSignal + ) + + const gitFilePath = repoInfo?.rootUri + ? pathFunctionsForURI(document.uri).relative(repoInfo?.rootUri.path, document.uri.path) + : undefined + + // TODO: add git commit hash + return { + gitFilePath, + gitRepoNames: repoInfo?.repoNames, + } + } + public async generateCompletions( generateOptions: GenerateCompletionsOptions, abortSignal: AbortSignal, tracer?: CompletionProviderTracer ): Promise> { - const { docContext, numberOfCompletionsToGenerate } = generateOptions + const { docContext, numberOfCompletionsToGenerate, document } = generateOptions const requestParams = this.getRequestParams(generateOptions) as CodeCompletionsParams + // Add git metadata for enterprise users. + if (!('metadata' in requestParams) && !isDotCom(currentAuthStatus())) { + requestParams.metadata = await this.getRequestParamsMetadata(document, abortSignal) + } tracer?.params(requestParams) const completionsGenerators = Array.from({ length: numberOfCompletionsToGenerate }).map( diff --git a/vscode/src/repository/remote-urls-from-parent-dirs.test.ts b/vscode/src/repository/remote-urls-from-parent-dirs.test.ts index e861e655e666..afc41f4ed305 100644 --- a/vscode/src/repository/remote-urls-from-parent-dirs.test.ts +++ b/vscode/src/repository/remote-urls-from-parent-dirs.test.ts @@ -3,10 +3,15 @@ import * as vscode from 'vscode' import { URI } from 'vscode-uri' import { vscodeGitAPI } from './git-extension-api' -import { gitRemoteUrlsForUri } from './remote-urls-from-parent-dirs' +import { gitRemoteUrlsInfoForUri } from './remote-urls-from-parent-dirs' import { mockFsCalls } from './test-helpers' -describe('gitRemoteUrlsForUri', () => { +async function gitRemoteUrlsForUri(uri: vscode.Uri): Promise { + const repoInfo = await gitRemoteUrlsInfoForUri(uri) + return repoInfo?.remoteUrls +} + +describe('gitRemoteUrlsInfoForUri', () => { beforeAll(() => { // Ensure that the `vscodeGitAPI` is not somehow set, because these tests were written to // test the behavior that is the fallback for when it is not set. @@ -75,9 +80,9 @@ describe('gitRemoteUrlsForUri', () => { const remoteUrls = await gitRemoteUrlsForUri(fileUri) expect(remoteUrls).toEqual([ - 'https://github.com/username/yourproject.git', - 'https://github.com/originalauthor/yourproject.git', 'git@backupserver:repositories/yourproject.git', + 'https://github.com/originalauthor/yourproject.git', + 'https://github.com/username/yourproject.git', ]) }) @@ -109,7 +114,7 @@ describe('gitRemoteUrlsForUri', () => { .mockRejectedValue(new vscode.FileSystemError('file does not exist')) const uri = URI.file('repo/src/dir/foo.ts') - const remoteUrls = await gitRemoteUrlsForUri(uri) + const remoteUrls = await gitRemoteUrlsInfoForUri(uri) expect(statMock).toBeCalledTimes(5) expect(remoteUrls).toBe(undefined) @@ -209,10 +214,12 @@ describe('gitRemoteUrlsForUri', () => { gitConfig: 'a', }) - expect(await gitRemoteUrlsForUri(URI.parse('https://example.com/foo/bar'))).toBe(undefined) - expect(await gitRemoteUrlsForUri(URI.parse('https://gitlab.com/foo/bar'))).toBe(undefined) - expect(await gitRemoteUrlsForUri(URI.parse('https://github.com/foo/bar'))).toBe(undefined) - expect(await gitRemoteUrlsForUri(URI.parse('ssh://git@github.com:foo/bar.git'))).toBe(undefined) + expect(await gitRemoteUrlsInfoForUri(URI.parse('https://example.com/foo/bar'))).toBe(undefined) + expect(await gitRemoteUrlsInfoForUri(URI.parse('https://gitlab.com/foo/bar'))).toBe(undefined) + expect(await gitRemoteUrlsInfoForUri(URI.parse('https://github.com/foo/bar'))).toBe(undefined) + expect(await gitRemoteUrlsInfoForUri(URI.parse('ssh://git@github.com:foo/bar.git'))).toBe( + undefined + ) expect(statMock).toBeCalledTimes(0) expect(readFileMock).toBeCalledTimes(0) }) diff --git a/vscode/src/repository/remote-urls-from-parent-dirs.ts b/vscode/src/repository/remote-urls-from-parent-dirs.ts index be57c46ba870..d4e86e4330cf 100644 --- a/vscode/src/repository/remote-urls-from-parent-dirs.ts +++ b/vscode/src/repository/remote-urls-from-parent-dirs.ts @@ -6,6 +6,11 @@ import { vscodeGitAPI } from './git-extension-api' const textDecoder = new TextDecoder('utf-8') +export interface GitRemoteUrlsInfo { + rootUri: vscode.Uri + remoteUrls: string[] +} + /** * Get the Git remote URLs for a given URI, which is assumed to be a file or path in a Git * repository. If it's not in a Git repository or there are no remote URLs, it returns `undefined`. @@ -13,25 +18,32 @@ const textDecoder = new TextDecoder('utf-8') * This function tries 2 different ways to get the remote URLs: (1) by using the Git extension API * available in VS Code only, and (2) by crawling the file system for the `.git/config` file. */ -export async function gitRemoteUrlsForUri( +export async function gitRemoteUrlsInfoForUri( uri: vscode.Uri, signal?: AbortSignal -): Promise { - const fromGitExtension = gitRemoteUrlsFromGitExtension(uri) - if (fromGitExtension && fromGitExtension.length > 0) { - return fromGitExtension +): Promise { + let remoteUrlsInfo = gitRemoteUrlsInfoFromGitExtension(uri) + + if (!remoteUrlsInfo || remoteUrlsInfo.remoteUrls.length === 0) { + remoteUrlsInfo = await gitRemoteUrlsInfoFromParentDirs(uri, signal) } - return await gitRemoteUrlsFromParentDirs(uri, signal) + + if (remoteUrlsInfo && remoteUrlsInfo.remoteUrls.length > 0) { + remoteUrlsInfo.remoteUrls = Array.from(new Set(remoteUrlsInfo.remoteUrls)).sort() + return remoteUrlsInfo + } + + return undefined } /** * Walks the tree from the current directory to find the `.git` folder and * extracts remote URLs. */ -async function gitRemoteUrlsFromParentDirs( +async function gitRemoteUrlsInfoFromParentDirs( uri: vscode.Uri, signal?: AbortSignal -): Promise { +): Promise { if (!isFileURI(uri)) { return undefined } @@ -40,15 +52,25 @@ async function gitRemoteUrlsFromParentDirs( const dirUri = isFile ? vscode.Uri.joinPath(uri, '..') : uri const gitRepoURIs = await gitRepoURIsFromParentDirs(dirUri, signal) - return gitRepoURIs - ? await gitRemoteUrlsFromGitConfigUri(gitRepoURIs.gitConfigUri, signal) - : undefined + + if (gitRepoURIs) { + const remoteUrls = await gitRemoteUrlsFromGitConfigUri(gitRepoURIs.gitConfigUri, signal) + + if (remoteUrls && remoteUrls.length > 0) { + return { + rootUri: gitRepoURIs.rootUri, + remoteUrls: remoteUrls || [], + } + } + } + + return undefined } /** * ❗️ The Git extension API instance is only available in the VS Code extension. ️️❗️ */ -function gitRemoteUrlsFromGitExtension(uri: vscode.Uri): string[] | undefined { +function gitRemoteUrlsInfoFromGitExtension(uri: vscode.Uri): GitRemoteUrlsInfo | undefined { const repository = vscodeGitAPI?.getRepository(uri) if (!repository) { return undefined @@ -63,7 +85,11 @@ function gitRemoteUrlsFromGitExtension(uri: vscode.Uri): string[] | undefined { remoteUrls.add(remote.pushUrl) } } - return remoteUrls.size ? Array.from(remoteUrls) : undefined + repository.state.HEAD?.commit + + return remoteUrls.size + ? { rootUri: repository.rootUri, remoteUrls: Array.from(remoteUrls) } + : undefined } interface GitRepoURIs { diff --git a/vscode/src/repository/repo-name-resolver.test.ts b/vscode/src/repository/repo-name-resolver.test.ts index 9249b92a9ac2..126c10926e5f 100644 --- a/vscode/src/repository/repo-name-resolver.test.ts +++ b/vscode/src/repository/repo-name-resolver.test.ts @@ -11,6 +11,7 @@ import { mockResolvedConfig, } from '@sourcegraph/cody-shared' +import { URI } from 'vscode-uri' import * as remoteUrlsFromParentDirs from './remote-urls-from-parent-dirs' import { RepoNameResolver } from './repo-name-resolver' import { mockFsCalls } from './test-helpers' @@ -24,9 +25,10 @@ describe('getRepoNamesContainingUri', () => { mockResolvedConfig({ auth: {} }) mockClientCapabilities(CLIENT_CAPABILITIES_FIXTURE) - vi.spyOn(remoteUrlsFromParentDirs, 'gitRemoteUrlsForUri').mockResolvedValue([ - 'git@github.com:sourcegraph/cody.git', - ]) + vi.spyOn(remoteUrlsFromParentDirs, 'gitRemoteUrlsInfoForUri').mockResolvedValue({ + rootUri: URI.file('/repo'), + remoteUrls: ['git@github.com:sourcegraph/cody.git'], + }) const { fileUri } = mockFsCalls({ filePath: '/repo/submodule/foo.ts', @@ -55,9 +57,10 @@ describe('getRepoNamesContainingUri', () => { const repoNameResolver = new RepoNameResolver() mockAuthStatus(AUTH_STATUS_FIXTURE_AUTHED_DOTCOM) - vi.spyOn(remoteUrlsFromParentDirs, 'gitRemoteUrlsForUri').mockResolvedValue([ - 'git@github.com:sourcegraph/cody.git', - ]) + vi.spyOn(remoteUrlsFromParentDirs, 'gitRemoteUrlsInfoForUri').mockResolvedValue({ + rootUri: URI.file('/repo'), + remoteUrls: ['git@github.com:sourcegraph/cody.git'], + }) const { fileUri } = mockFsCalls({ filePath: '/repo/submodule/foo.ts', diff --git a/vscode/src/repository/repo-name-resolver.ts b/vscode/src/repository/repo-name-resolver.ts index 0f42093a402f..678647d6e765 100644 --- a/vscode/src/repository/repo-name-resolver.ts +++ b/vscode/src/repository/repo-name-resolver.ts @@ -1,7 +1,10 @@ +import { LRUCache } from 'lru-cache' +import { Observable, map } from 'observable-fns' import type * as vscode from 'vscode' import { ContextFiltersProvider, + type MaybePendingObservable, authStatus, combineLatest, convertGitCloneURLToCodebaseName, @@ -19,9 +22,18 @@ import { switchMapReplayOperation, } from '@sourcegraph/cody-shared' -import { Observable, map } from 'observable-fns' import { logDebug } from '../output-channel-logger' -import { gitRemoteUrlsForUri } from './remote-urls-from-parent-dirs' + +import { type GitRemoteUrlsInfo, gitRemoteUrlsInfoForUri } from './remote-urls-from-parent-dirs' + +type RemoteUrl = string +type RepoName = string +type UriFsPath = string + +type GitRepoInfo = { + repoNames: string[] + rootUri?: vscode.Uri +} export class RepoNameResolver { /** @@ -31,74 +43,104 @@ export class RepoNameResolver { * ❗️ For enterprise, this uses the Sourcegraph API to resolve repo names instead of the local * conversion function. ❗️ */ - public getRepoNamesContainingUri(uri: vscode.Uri): Observable { + public getRepoNamesContainingUri(uri: vscode.Uri): MaybePendingObservable { + return this.getRepoInfoContainingUri(uri).map(repoInfo => { + if (repoInfo && repoInfo !== pendingOperation) { + return repoInfo.repoNames + } + return repoInfo || [] + }) + } + + /** + * Get repo root URI and the names of repositories (such as `github.com/foo/bar`) + * that contain the given file URI. The file URI can also be a folder within + * a workspace or a workspace root folder. + * + * ❗️ For enterprise, this uses the Sourcegraph API to resolve repo names instead of the local + * conversion function. ❗️ + */ + public getRepoInfoContainingUri(uri: vscode.Uri): MaybePendingObservable { return combineLatest( - promiseFactoryToObservable(signal => this.getUniqueRemoteUrlsCached(uri, signal)), + promiseFactoryToObservable(signal => this.getRemoteUrlsInfoCached(uri, signal)), authStatus ).pipe( switchMapReplayOperation( - ([uniqueRemoteUrls, authStatus]): Observable => { + ([repoInfo, authStatus]): MaybePendingObservable => { + const remoteUrls = repoInfo?.remoteUrls || [] + // Use local conversion function for non-enterprise accounts. if (isDotCom(authStatus)) { + const repoNames = remoteUrls + .map(convertGitCloneURLToCodebaseName) + .filter(isDefined) + return Observable.of( - uniqueRemoteUrls.map(convertGitCloneURLToCodebaseName).filter(isDefined) + repoNames.length ? { rootUri: repoInfo?.rootUri, repoNames } : null ) } return combineLatest( - ...uniqueRemoteUrls.map(remoteUrl => this.getRepoNameCached(remoteUrl)) + ...remoteUrls.map(remoteUrl => this.getRepoNameCached(remoteUrl)) ).pipe( - map(repoNames => - repoNames.includes(pendingOperation) - ? pendingOperation - : ( - repoNames as Exclude< - (typeof repoNames)[number], - typeof pendingOperation - >[] - ).filter(isDefined) - ) + map(maybeRepoNames => { + if (maybeRepoNames.includes(pendingOperation)) { + return pendingOperation + } + + const repoNames = ( + maybeRepoNames as Exclude< + (typeof maybeRepoNames)[number], + typeof pendingOperation + >[] + ).filter(isDefined) + + return repoNames.length ? { rootUri: repoInfo?.rootUri, repoNames } : null + }) ) } ), map(value => { if (isError(value)) { logDebug('RepoNameResolver:getRepoNamesContainingUri', 'error', { verbose: value }) - return [] + return null } return value }) ) } - private getUniqueRemoteUrlsCache: Partial>> = {} - private async getUniqueRemoteUrlsCached(uri: vscode.Uri, signal?: AbortSignal): Promise { + private fsPathToRemoteUrlsInfo = new LRUCache>( + { max: 1000 } + ) + + private async getRemoteUrlsInfoCached( + uri: vscode.Uri, + signal?: AbortSignal + ): Promise { const key = uri.toString() - let uniqueRemoteUrls: Promise | undefined = this.getUniqueRemoteUrlsCache[key] - if (!uniqueRemoteUrls) { - uniqueRemoteUrls = gitRemoteUrlsForUri(uri, signal) - .then(remoteUrls => { - const uniqueRemoteUrls = Array.from(new Set(remoteUrls ?? [])).sort() - return uniqueRemoteUrls - }) - .catch(error => { - logError('RepoNameResolver:getUniqueRemoteUrlsCached', 'error', { - verbose: error, - }) - return [] + let remoteUrlsInfo = this.fsPathToRemoteUrlsInfo.get(key) + + if (!remoteUrlsInfo) { + remoteUrlsInfo = gitRemoteUrlsInfoForUri(uri, signal).catch(error => { + logError('RepoNameResolver:getRemoteUrlsInfoCached', 'error', { + verbose: error, }) - this.getUniqueRemoteUrlsCache[key] = uniqueRemoteUrls + return undefined + }) + this.fsPathToRemoteUrlsInfo.set(key, remoteUrlsInfo) } - return uniqueRemoteUrls + return remoteUrlsInfo } - private getRepoNameCache: Partial< - Record> - > = {} - private getRepoNameCached(remoteUrl: string): Observable { + private remoteUrlToRepoName = new LRUCache>({ + max: 1000, + }) + + private getRepoNameCached(remoteUrl: string): MaybePendingObservable { const key = remoteUrl - let observable: ReturnType | undefined = - this.getRepoNameCache[key] + let observable = this.remoteUrlToRepoName.get(key) + if (!observable) { observable = resolvedConfig.pipe( pluck('auth'), @@ -114,7 +156,7 @@ export class RepoNameResolver { return value }) ) - this.getRepoNameCache[key] = observable + this.remoteUrlToRepoName.set(key, observable) } return observable }