From 05a78ce7f215934157f899e0cfac40449ac95e0d Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Wed, 15 Apr 2026 10:37:57 +0100 Subject: [PATCH] fix(github-copilot): harden memory embeddings provider --- CHANGELOG.md | 1 + docs/concepts/memory-search.md | 12 +- docs/reference/memory-config.md | 12 +- extensions/github-copilot/auth.test.ts | 96 +++ extensions/github-copilot/auth.ts | 33 +- extensions/github-copilot/embeddings.test.ts | 628 ++++++------------ extensions/github-copilot/embeddings.ts | 153 ++--- extensions/github-copilot/index.test.ts | 1 - extensions/github-copilot/index.ts | 5 +- src/memory-host-sdk/engine-embeddings.ts | 4 + .../host/embeddings-github-copilot.test.ts | 178 +++++ .../host/embeddings-github-copilot.ts | 151 +++++ 12 files changed, 718 insertions(+), 556 deletions(-) create mode 100644 extensions/github-copilot/auth.test.ts create mode 100644 src/memory-host-sdk/host/embeddings-github-copilot.test.ts create mode 100644 src/memory-host-sdk/host/embeddings-github-copilot.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index d81ac302f2d..d3865e9b4d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai - Control UI/Overview: add a Model Auth status card showing OAuth token health and provider rate-limit pressure at a glance, with attention callouts when OAuth tokens are expiring or expired. Backed by a new `models.authStatus` gateway method that strips credentials and caches for 60s. (#66211) Thanks @omarshahine. - docs-i18n: add behavior baseline fixtures (#64073). Thanks @hxy91819 - docs-i18n: harden behavior fixture path reads (#67046). Thanks @hxy91819 +- GitHub Copilot/memory search: add a GitHub Copilot embedding provider for memory search, and expose a dedicated Copilot embedding host helper so plugins can reuse the transport while honoring remote overrides, token refresh, and safer payload validation. (#61718) Thanks @feiskyer and @vincentkoc. ### Fixes diff --git a/docs/concepts/memory-search.md b/docs/concepts/memory-search.md index dbaf05c20a1..ff444e6cbd8 100644 --- a/docs/concepts/memory-search.md +++ b/docs/concepts/memory-search.md @@ -38,14 +38,14 @@ node-llama-cpp). | Provider | ID | Needs API key | Notes | | -------------- | ---------------- | ------------- | ---------------------------------------------------- | -| GitHub Copilot | `github-copilot` | No | Auto-detected, uses Copilot subscription | -| OpenAI | `openai` | Yes | Auto-detected, fast | -| Gemini | `gemini` | Yes | Supports image/audio indexing | -| Voyage | `voyage` | Yes | Auto-detected | -| Mistral | `mistral` | Yes | Auto-detected | | Bedrock | `bedrock` | No | Auto-detected when the AWS credential chain resolves | -| Ollama | `ollama` | No | Local, must set explicitly | +| Gemini | `gemini` | Yes | Supports image/audio indexing | +| GitHub Copilot | `github-copilot` | No | Auto-detected, uses Copilot subscription | | Local | `local` | No | GGUF model, ~0.6 GB download | +| Mistral | `mistral` | Yes | Auto-detected | +| Ollama | `ollama` | No | Local, must set explicitly | +| OpenAI | `openai` | Yes | Auto-detected, fast | +| Voyage | `voyage` | Yes | Auto-detected | ## How search works diff --git a/docs/reference/memory-config.md b/docs/reference/memory-config.md index 7e7f1eb7b40..2cae17b9ed0 100644 --- a/docs/reference/memory-config.md +++ b/docs/reference/memory-config.md @@ -39,7 +39,7 @@ plugin-owned config, transcript persistence, and safe rollout pattern. | Key | Type | Default | Description | | ---------- | --------- | ---------------- | ------------------------------------------------------------------------------------------------------------- | -| `provider` | `string` | auto-detected | Embedding adapter ID: `github-copilot`, `openai`, `gemini`, `voyage`, `mistral`, `bedrock`, `ollama`, `local` | +| `provider` | `string` | auto-detected | Embedding adapter ID: `bedrock`, `gemini`, `github-copilot`, `local`, `mistral`, `ollama`, `openai`, `voyage` | | `model` | `string` | provider default | Embedding model name | | `fallback` | `string` | `"none"` | Fallback adapter ID when the primary fails | | `enabled` | `boolean` | `true` | Enable or disable memory search | @@ -65,13 +65,13 @@ credential chain instead (instance roles, SSO, access keys). | Provider | Env var | Config key | | -------------- | -------------------------------------------------- | --------------------------------- | -| GitHub Copilot | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN` | Auth profile via device login | -| OpenAI | `OPENAI_API_KEY` | `models.providers.openai.apiKey` | -| Gemini | `GEMINI_API_KEY` | `models.providers.google.apiKey` | -| Voyage | `VOYAGE_API_KEY` | `models.providers.voyage.apiKey` | -| Mistral | `MISTRAL_API_KEY` | `models.providers.mistral.apiKey` | | Bedrock | AWS credential chain | No API key needed | +| Gemini | `GEMINI_API_KEY` | `models.providers.google.apiKey` | +| GitHub Copilot | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, `GITHUB_TOKEN` | Auth profile via device login | +| Mistral | `MISTRAL_API_KEY` | `models.providers.mistral.apiKey` | | Ollama | `OLLAMA_API_KEY` (placeholder) | -- | +| OpenAI | `OPENAI_API_KEY` | `models.providers.openai.apiKey` | +| Voyage | `VOYAGE_API_KEY` | `models.providers.voyage.apiKey` | Codex OAuth covers chat/completions only and does not satisfy embedding requests. diff --git a/extensions/github-copilot/auth.test.ts b/extensions/github-copilot/auth.test.ts new file mode 100644 index 00000000000..74dd3d759e8 --- /dev/null +++ b/extensions/github-copilot/auth.test.ts @@ -0,0 +1,96 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const ensureAuthProfileStoreMock = vi.hoisted(() => vi.fn()); +const listProfilesForProviderMock = vi.hoisted(() => vi.fn()); +const coerceSecretRefMock = vi.hoisted(() => vi.fn()); +const resolveRequiredConfiguredSecretRefInputStringMock = vi.hoisted(() => vi.fn()); + +vi.mock("openclaw/plugin-sdk/provider-auth", () => ({ + coerceSecretRef: coerceSecretRefMock, + ensureAuthProfileStore: ensureAuthProfileStoreMock, + listProfilesForProvider: listProfilesForProviderMock, +})); + +vi.mock("openclaw/plugin-sdk/config-runtime", () => ({ + resolveRequiredConfiguredSecretRefInputString: resolveRequiredConfiguredSecretRefInputStringMock, +})); + +import { resolveFirstGithubToken } from "./auth.js"; + +describe("resolveFirstGithubToken", () => { + beforeEach(() => { + ensureAuthProfileStoreMock.mockReturnValue({ + profiles: { + "github-copilot:github": { + type: "token", + tokenRef: { source: "file", provider: "default", id: "/providers/github-copilot/token" }, + }, + }, + }); + listProfilesForProviderMock.mockReturnValue(["github-copilot:github"]); + coerceSecretRefMock.mockReturnValue({ + source: "file", + provider: "default", + id: "/providers/github-copilot/token", + }); + resolveRequiredConfiguredSecretRefInputStringMock.mockResolvedValue("resolved-profile-token"); + }); + + afterEach(() => { + vi.restoreAllMocks(); + ensureAuthProfileStoreMock.mockReset(); + listProfilesForProviderMock.mockReset(); + coerceSecretRefMock.mockReset(); + resolveRequiredConfiguredSecretRefInputStringMock.mockReset(); + }); + + it("prefers env tokens when available", async () => { + const result = await resolveFirstGithubToken({ + env: { GH_TOKEN: "env-token" } as NodeJS.ProcessEnv, + }); + + expect(result).toEqual({ + githubToken: "env-token", + hasProfile: true, + }); + expect(resolveRequiredConfiguredSecretRefInputStringMock).not.toHaveBeenCalled(); + }); + + it("returns direct profile tokens before resolving SecretRefs", async () => { + ensureAuthProfileStoreMock.mockReturnValue({ + profiles: { + "github-copilot:github": { + type: "token", + token: "profile-token", + }, + }, + }); + coerceSecretRefMock.mockReturnValue(null); + + const result = await resolveFirstGithubToken({ + env: {} as NodeJS.ProcessEnv, + }); + + expect(result).toEqual({ + githubToken: "profile-token", + hasProfile: true, + }); + }); + + it("resolves non-env SecretRefs when config is available", async () => { + const result = await resolveFirstGithubToken({ + config: { secrets: { defaults: { provider: "default" } } } as never, + env: {} as NodeJS.ProcessEnv, + }); + + expect(result).toEqual({ + githubToken: "resolved-profile-token", + hasProfile: true, + }); + expect(resolveRequiredConfiguredSecretRefInputStringMock).toHaveBeenCalledWith( + expect.objectContaining({ + path: "providers.github-copilot.authProfiles.github-copilot:github.tokenRef", + }), + ); + }); +}); diff --git a/extensions/github-copilot/auth.ts b/extensions/github-copilot/auth.ts index 45114802a8e..1e524f8048d 100644 --- a/extensions/github-copilot/auth.ts +++ b/extensions/github-copilot/auth.ts @@ -1,3 +1,5 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; +import { resolveRequiredConfiguredSecretRefInputString } from "openclaw/plugin-sdk/config-runtime"; import { coerceSecretRef, ensureAuthProfileStore, @@ -5,14 +7,19 @@ import { } from "openclaw/plugin-sdk/provider-auth"; import { PROVIDER_ID } from "./models.js"; -export function resolveFirstGithubToken(params: { agentDir?: string; env: NodeJS.ProcessEnv }): { +export async function resolveFirstGithubToken(params: { + agentDir?: string; + config?: OpenClawConfig; + env: NodeJS.ProcessEnv; +}): Promise<{ githubToken: string; hasProfile: boolean; -} { +}> { const authStore = ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false, }); - const hasProfile = listProfilesForProvider(authStore, PROVIDER_ID).length > 0; + const profileIds = listProfilesForProvider(authStore, PROVIDER_ID); + const hasProfile = profileIds.length > 0; const envToken = params.env.COPILOT_GITHUB_TOKEN ?? params.env.GH_TOKEN ?? params.env.GITHUB_TOKEN ?? ""; const githubToken = envToken.trim(); @@ -20,7 +27,7 @@ export function resolveFirstGithubToken(params: { agentDir?: string; env: NodeJS return { githubToken, hasProfile }; } - const profileId = listProfilesForProvider(authStore, PROVIDER_ID)[0]; + const profileId = profileIds[0]; const profile = profileId ? authStore.profiles[profileId] : undefined; if (profile?.type !== "token") { return { githubToken: "", hasProfile }; @@ -36,5 +43,23 @@ export function resolveFirstGithubToken(params: { agentDir?: string; env: NodeJS hasProfile, }; } + + if (tokenRef && params.config) { + try { + const resolved = await resolveRequiredConfiguredSecretRefInputString({ + config: params.config, + env: params.env, + value: profile.tokenRef, + path: `providers.github-copilot.authProfiles.${profileId ?? "default"}.tokenRef`, + }); + return { + githubToken: resolved?.trim() ?? "", + hasProfile, + }; + } catch { + return { githubToken: "", hasProfile }; + } + } + return { githubToken: "", hasProfile }; } diff --git a/extensions/github-copilot/embeddings.test.ts b/extensions/github-copilot/embeddings.test.ts index a48dda8b1fb..73a656d564c 100644 --- a/extensions/github-copilot/embeddings.test.ts +++ b/extensions/github-copilot/embeddings.test.ts @@ -2,59 +2,54 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; const resolveFirstGithubTokenMock = vi.hoisted(() => vi.fn()); const resolveCopilotApiTokenMock = vi.hoisted(() => vi.fn()); +const resolveConfiguredSecretInputStringMock = vi.hoisted(() => vi.fn()); const fetchWithSsrFGuardMock = vi.hoisted(() => vi.fn()); +const createGitHubCopilotEmbeddingProviderMock = vi.hoisted(() => vi.fn()); vi.mock("./auth.js", () => ({ resolveFirstGithubToken: resolveFirstGithubTokenMock, })); +vi.mock("openclaw/plugin-sdk/config-runtime", () => ({ + resolveConfiguredSecretInputString: resolveConfiguredSecretInputStringMock, +})); + vi.mock("openclaw/plugin-sdk/github-copilot-token", () => ({ DEFAULT_COPILOT_API_BASE_URL: "https://api.githubcopilot.test", resolveCopilotApiToken: resolveCopilotApiTokenMock, })); +vi.mock("openclaw/plugin-sdk/memory-core-host-engine-embeddings", () => ({ + createGitHubCopilotEmbeddingProvider: createGitHubCopilotEmbeddingProviderMock, +})); + vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({ fetchWithSsrFGuard: fetchWithSsrFGuardMock, })); -vi.mock("openclaw/plugin-sdk/provider-auth", () => ({ - coerceSecretRef: vi.fn(), - ensureAuthProfileStore: vi.fn(() => ({ profiles: {} })), - listProfilesForProvider: vi.fn(() => []), -})); - import { githubCopilotMemoryEmbeddingProviderAdapter } from "./embeddings.js"; -const TEST_COPILOT_TOKEN = "copilot_test_token_abc"; const TEST_BASE_URL = "https://api.githubcopilot.test"; -function buildModelsResponse(models: Array<{ id: string; supported_endpoints?: string[] }>) { +function buildModelsResponse(models: Array<{ id: string; supported_endpoints?: unknown }>) { return { data: models }; } -function buildEmbeddingResponse(embeddings: Array<{ embedding: number[]; index: number }>) { - return { data: embeddings }; -} - -function mockFetchSequence( - responses: Array<{ ok: boolean; status?: number; json?: unknown; text?: string }>, -) { - let callIndex = 0; - fetchWithSsrFGuardMock.mockImplementation(async () => { - const spec = responses[callIndex++]; - if (!spec) { - throw new Error(`Unexpected fetchWithSsrFGuard call #${callIndex}`); - } - return { - response: { - ok: spec.ok, - status: spec.status ?? (spec.ok ? 200 : 500), - json: async () => spec.json, - text: async () => spec.text ?? "", - }, - release: vi.fn(async () => {}), - }; - }); +function mockDiscoveryResponse(spec: { + ok: boolean; + status?: number; + json?: unknown; + text?: string; +}) { + fetchWithSsrFGuardMock.mockImplementationOnce(async () => ({ + response: { + ok: spec.ok, + status: spec.status ?? (spec.ok ? 200 : 500), + json: async () => spec.json, + text: async () => spec.text ?? "", + }, + release: vi.fn(async () => {}), + })); } function defaultCreateOptions() { @@ -67,453 +62,218 @@ function defaultCreateOptions() { describe("githubCopilotMemoryEmbeddingProviderAdapter", () => { beforeEach(() => { - resolveFirstGithubTokenMock.mockReturnValue({ + resolveConfiguredSecretInputStringMock.mockResolvedValue({}); + resolveFirstGithubTokenMock.mockResolvedValue({ githubToken: "gh_test_token_123", hasProfile: false, }); resolveCopilotApiTokenMock.mockResolvedValue({ - token: TEST_COPILOT_TOKEN, + token: "copilot_test_token_abc", expiresAt: Date.now() + 3_600_000, source: "test", baseUrl: TEST_BASE_URL, }); + createGitHubCopilotEmbeddingProviderMock.mockImplementation(async (client) => ({ + provider: { + id: "github-copilot", + model: client.model, + embedQuery: async () => [0.1, 0.2, 0.3], + embedBatch: async (texts: string[]) => texts.map(() => [0.1, 0.2, 0.3]), + }, + client, + })); }); afterEach(() => { vi.restoreAllMocks(); + resolveConfiguredSecretInputStringMock.mockReset(); resolveFirstGithubTokenMock.mockReset(); resolveCopilotApiTokenMock.mockReset(); + createGitHubCopilotEmbeddingProviderMock.mockReset(); fetchWithSsrFGuardMock.mockReset(); }); - describe("adapter properties", () => { - it("has correct id", () => { - expect(githubCopilotMemoryEmbeddingProviderAdapter.id).toBe("github-copilot"); - }); - - it("has transport set to remote", () => { - expect(githubCopilotMemoryEmbeddingProviderAdapter.transport).toBe("remote"); - }); - - it("has autoSelectPriority of 15", () => { - expect(githubCopilotMemoryEmbeddingProviderAdapter.autoSelectPriority).toBe(15); - }); - - it("allows explicit override when configured auto", () => { - expect(githubCopilotMemoryEmbeddingProviderAdapter.allowExplicitWhenConfiguredAuto).toBe( - true, - ); - }); + it("registers the expected adapter metadata", () => { + expect(githubCopilotMemoryEmbeddingProviderAdapter.id).toBe("github-copilot"); + expect(githubCopilotMemoryEmbeddingProviderAdapter.transport).toBe("remote"); + expect(githubCopilotMemoryEmbeddingProviderAdapter.autoSelectPriority).toBe(15); + expect(githubCopilotMemoryEmbeddingProviderAdapter.allowExplicitWhenConfiguredAuto).toBe(true); }); - describe("model discovery", () => { - it("picks text-embedding-3-small when available", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "text-embedding-3-large", supported_endpoints: ["/v1/embeddings"] }, - { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, - { id: "text-embedding-ada-002", supported_endpoints: ["/v1/embeddings"] }, - { id: "gpt-4o", supported_endpoints: ["/v1/chat/completions"] }, - ]), - }, - ]); - - const result = - await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - - expect(result.provider?.model).toBe("text-embedding-3-small"); + it("picks text-embedding-3-small when available", async () => { + mockDiscoveryResponse({ + ok: true, + json: buildModelsResponse([ + { id: "text-embedding-3-large", supported_endpoints: ["/v1/embeddings"] }, + { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, + { id: "gpt-4o", supported_endpoints: ["/v1/chat/completions"] }, + ]), }); - it("falls back to text-embedding-3-large when small is unavailable", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "text-embedding-3-large", supported_endpoints: ["/v1/embeddings"] }, - { id: "text-embedding-ada-002", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - ]); + const result = await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - const result = - await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - - expect(result.provider?.model).toBe("text-embedding-3-large"); - }); - - it("filters models by embedding endpoint support", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "gpt-4o", supported_endpoints: ["/v1/chat/completions"] }, - { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - ]); - - const result = - await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - - expect(result.provider?.model).toBe("text-embedding-3-small"); - }); - - it("discovers models by ID when supported_endpoints is empty", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "gpt-4o", supported_endpoints: ["/v1/chat/completions"] }, - { id: "text-embedding-3-small", supported_endpoints: [] }, - { id: "text-embedding-ada-002" }, - ]), - }, - ]); - - const result = - await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - - expect(result.provider?.model).toBe("text-embedding-3-small"); - }); - - it("picks first available model when no preferred model is available", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "custom-embedding-v1", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - ]); - - const result = - await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - - expect(result.provider?.model).toBe("custom-embedding-v1"); - }); + expect(result.provider?.model).toBe("text-embedding-3-small"); + expect(createGitHubCopilotEmbeddingProviderMock).toHaveBeenCalledWith( + expect.objectContaining({ + baseUrl: TEST_BASE_URL, + githubToken: "gh_test_token_123", + model: "text-embedding-3-small", + }), + ); }); - describe("user-configured model", () => { - it("uses user-configured model override", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, - { id: "custom-model", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - ]); + it("matches embedding-capable models when supported_endpoints is missing or malformed", async () => { + mockDiscoveryResponse({ + ok: true, + json: buildModelsResponse([ + { id: "gpt-4o", supported_endpoints: { broken: true } }, + { id: "text-embedding-3-small", supported_endpoints: [] }, + { id: "text-embedding-ada-002" }, + ]), + }); - const result = await githubCopilotMemoryEmbeddingProviderAdapter.create({ + const result = await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); + + expect(result.provider?.model).toBe("text-embedding-3-small"); + }); + + it("strips the provider prefix from a user-selected model", async () => { + mockDiscoveryResponse({ + ok: true, + json: buildModelsResponse([ + { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, + ]), + }); + + const result = await githubCopilotMemoryEmbeddingProviderAdapter.create({ + ...defaultCreateOptions(), + model: "github-copilot/text-embedding-3-small", + } as never); + + expect(result.provider?.model).toBe("text-embedding-3-small"); + }); + + it("throws when the user-selected model is unavailable", async () => { + mockDiscoveryResponse({ + ok: true, + json: buildModelsResponse([ + { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, + ]), + }); + + await expect( + githubCopilotMemoryEmbeddingProviderAdapter.create({ ...defaultCreateOptions(), - model: "custom-model", - } as never); - - expect(result.provider?.model).toBe("custom-model"); - }); - - it("strips github-copilot/ prefix from user model", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - ]); - - const result = await githubCopilotMemoryEmbeddingProviderAdapter.create({ - ...defaultCreateOptions(), - model: "github-copilot/text-embedding-3-small", - } as never); - - expect(result.provider?.model).toBe("text-embedding-3-small"); - }); - - it("throws when user model is not in discovered list", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - ]); - - await expect( - githubCopilotMemoryEmbeddingProviderAdapter.create({ - ...defaultCreateOptions(), - model: "gpt-4o", - } as never), - ).rejects.toThrow('GitHub Copilot embedding model "gpt-4o" is not available'); - }); - - it("throws when user model is set but no embedding models are discovered", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "gpt-4o", supported_endpoints: ["/v1/chat/completions"] }, - ]), - }, - ]); - - await expect( - githubCopilotMemoryEmbeddingProviderAdapter.create({ - ...defaultCreateOptions(), - model: "text-embedding-3-small", - } as never), - ).rejects.toThrow("No embedding models available from GitHub Copilot"); - }); + model: "gpt-4o", + } as never), + ).rejects.toThrow('GitHub Copilot embedding model "gpt-4o" is not available'); }); - describe("error handling", () => { - it("throws when no embedding models are available", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "gpt-4o", supported_endpoints: ["/v1/chat/completions"] }, - ]), - }, - ]); - - await expect( - githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()), - ).rejects.toThrow("No embedding models available from GitHub Copilot"); + it("throws when discovery finds no embedding models", async () => { + mockDiscoveryResponse({ + ok: true, + json: buildModelsResponse([{ id: "gpt-4o", supported_endpoints: ["/v1/chat/completions"] }]), }); - it("throws when model discovery returns HTTP error", async () => { - mockFetchSequence([ - { - ok: false, - status: 401, - text: "Unauthorized", - }, - ]); - - await expect( - githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()), - ).rejects.toThrow("GitHub Copilot model discovery HTTP 401"); - }); - - it("throws when no GitHub token is available", async () => { - resolveFirstGithubTokenMock.mockReturnValue({ - githubToken: "", - hasProfile: false, - }); - - await expect( - githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()), - ).rejects.toThrow("No GitHub token available"); - }); - - it("throws when embeddings endpoint returns HTTP error", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - { ok: false, status: 429, text: "Rate limit exceeded" }, - ]); - - const result = - await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - await expect(result.provider!.embedQuery("hello")).rejects.toThrow( - "GitHub Copilot embeddings HTTP 429", - ); - }); - - it("throws when embeddings response is malformed", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - { ok: true, json: { model: "text-embedding-3-small" } }, - ]); - - const result = - await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - await expect(result.provider!.embedQuery("hello")).rejects.toThrow( - "GitHub Copilot embeddings response missing data[]", - ); - }); + await expect( + githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()), + ).rejects.toThrow("No embedding models available from GitHub Copilot"); }); - describe("shouldContinueAutoSelection", () => { - it("returns true for missing GitHub token errors", () => { - const err = new Error("No GitHub token available for Copilot embedding provider"); - expect(githubCopilotMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection!(err)).toBe( - true, - ); - }); + it("wraps invalid discovery JSON as a setup error", async () => { + fetchWithSsrFGuardMock.mockImplementationOnce(async () => ({ + response: { + ok: true, + status: 200, + json: async () => { + throw new SyntaxError("bad json"); + }, + text: async () => "", + }, + release: vi.fn(async () => {}), + })); - it("returns true for token exchange failures", () => { - const err = new Error("Copilot token exchange failed: HTTP 401"); - expect(githubCopilotMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection!(err)).toBe( - true, - ); - }); - - it("returns true for no embedding models available", () => { - const err = new Error("No embedding models available from GitHub Copilot"); - expect(githubCopilotMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection!(err)).toBe( - true, - ); - }); - - it("returns true for model discovery failures", () => { - const err = new Error("GitHub Copilot model discovery HTTP 403: Forbidden"); - expect(githubCopilotMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection!(err)).toBe( - true, - ); - }); - - it("returns true for user model not available", () => { - const err = new Error( - 'GitHub Copilot embedding model "gpt-4o" is not available. Available: text-embedding-3-small', - ); - expect(githubCopilotMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection!(err)).toBe( - true, - ); - }); - - it("returns false for non-Copilot errors", () => { - const err = new Error("Network timeout"); - expect(githubCopilotMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection!(err)).toBe( - false, - ); - }); - - it("returns false for non-Error values", () => { - expect( - githubCopilotMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection!("some string"), - ).toBe(false); - }); + await expect( + githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()), + ).rejects.toThrow("GitHub Copilot model discovery returned invalid JSON"); }); - describe("embedQuery", () => { - it("calls the endpoint and returns a vector", async () => { - const embedding = [0.1, 0.2, 0.3]; - const magnitude = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0)); - const normalized = embedding.map((v) => v / magnitude); - - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - { - ok: true, - json: buildEmbeddingResponse([{ embedding, index: 0 }]), - }, - ]); - - const result = - await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - const vector = await result.provider!.embedQuery("hello world"); - - expect(vector).toEqual(normalized); - - // Verify the embeddings call used POST with correct body (second fetch call) - expect(fetchWithSsrFGuardMock).toHaveBeenCalledTimes(2); - const embeddingsCall = fetchWithSsrFGuardMock.mock.calls[1][0] as { - url: string; - init: { method: string; body: string }; - }; - expect(embeddingsCall.url).toBe(`${TEST_BASE_URL}/embeddings`); - expect(embeddingsCall.init.method).toBe("POST"); - const body = JSON.parse(embeddingsCall.init.body) as { model: string; input: string[] }; - expect(body.model).toBe("text-embedding-3-small"); - expect(body.input).toEqual(["hello world"]); + it("honors remote overrides when creating the provider", async () => { + resolveConfiguredSecretInputStringMock.mockResolvedValue({ value: "gh_remote_token" }); + mockDiscoveryResponse({ + ok: true, + json: buildModelsResponse([ + { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, + ]), }); + + await githubCopilotMemoryEmbeddingProviderAdapter.create({ + ...defaultCreateOptions(), + remote: { + apiKey: "ignored-at-runtime", + baseUrl: "https://proxy.example/v1", + headers: { "X-Proxy-Token": "proxy" }, + }, + } as never); + + expect(resolveFirstGithubTokenMock).toHaveBeenCalled(); + expect(createGitHubCopilotEmbeddingProviderMock).toHaveBeenCalledWith({ + baseUrl: "https://proxy.example/v1", + env: process.env, + fetchImpl: fetch, + githubToken: "gh_remote_token", + headers: { "X-Proxy-Token": "proxy" }, + model: "text-embedding-3-small", + }); + + const discoveryCall = fetchWithSsrFGuardMock.mock.calls[0]?.[0] as { + init: { headers: Record }; + url: string; + }; + expect(discoveryCall.url).toBe("https://proxy.example/v1/models"); + expect(discoveryCall.init.headers["X-Proxy-Token"]).toBe("proxy"); }); - describe("embedBatch", () => { - it("returns multiple vectors sorted by index", async () => { - const emb0 = [0.1, 0.2, 0.3]; - const emb1 = [0.4, 0.5, 0.6]; - - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - { - ok: true, - // Return in reverse index order to verify sorting - json: buildEmbeddingResponse([ - { embedding: emb1, index: 1 }, - { embedding: emb0, index: 0 }, - ]), - }, - ]); - - const result = - await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - const vectors = await result.provider!.embedBatch(["first", "second"]); - - expect(vectors).toHaveLength(2); - // Verify order matches input order (index 0 first, index 1 second) - const mag0 = Math.sqrt(emb0.reduce((sum, v) => sum + v * v, 0)); - const mag1 = Math.sqrt(emb1.reduce((sum, v) => sum + v * v, 0)); - expect(vectors[0]).toEqual(emb0.map((v) => v / mag0)); - expect(vectors[1]).toEqual(emb1.map((v) => v / mag1)); + it("includes provider, baseUrl, and model in runtime cache data", async () => { + mockDiscoveryResponse({ + ok: true, + json: buildModelsResponse([ + { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, + ]), }); - it("returns empty array for empty input", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - ]); + const result = await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - const result = - await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - const vectors = await result.provider!.embedBatch([]); - - expect(vectors).toEqual([]); - // No extra fetch call for empty input - expect(fetchWithSsrFGuardMock).toHaveBeenCalledTimes(1); - }); - }); - - describe("runtime", () => { - it("includes cache key data with provider, baseUrl, and model", async () => { - mockFetchSequence([ - { - ok: true, - json: buildModelsResponse([ - { id: "text-embedding-3-small", supported_endpoints: ["/v1/embeddings"] }, - ]), - }, - ]); - - const result = - await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions()); - - expect(result.runtime).toBeDefined(); - expect(result.runtime!.id).toBe("github-copilot"); - expect(result.runtime!.cacheKeyData).toEqual({ + expect(result.runtime).toEqual({ + id: "github-copilot", + cacheKeyData: { provider: "github-copilot", baseUrl: TEST_BASE_URL, model: "text-embedding-3-small", - }); + }, }); }); + + it("treats token parsing and discovery failures as auto-fallback errors", () => { + expect( + githubCopilotMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection!( + new Error("Copilot token response missing token"), + ), + ).toBe(true); + expect( + githubCopilotMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection!( + new Error("Unexpected response from GitHub Copilot token endpoint"), + ), + ).toBe(true); + expect( + githubCopilotMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection!( + new Error("GitHub Copilot model discovery returned invalid JSON"), + ), + ).toBe(true); + expect( + githubCopilotMemoryEmbeddingProviderAdapter.shouldContinueAutoSelection!( + new Error("Network timeout"), + ), + ).toBe(false); + }); }); diff --git a/extensions/github-copilot/embeddings.ts b/extensions/github-copilot/embeddings.ts index f271d85f032..d06c8a06942 100644 --- a/extensions/github-copilot/embeddings.ts +++ b/extensions/github-copilot/embeddings.ts @@ -1,10 +1,11 @@ +import { resolveConfiguredSecretInputString } from "openclaw/plugin-sdk/config-runtime"; import { DEFAULT_COPILOT_API_BASE_URL, resolveCopilotApiToken, } from "openclaw/plugin-sdk/github-copilot-token"; -import type { - MemoryEmbeddingProvider, - MemoryEmbeddingProviderAdapter, +import { + createGitHubCopilotEmbeddingProvider, + type MemoryEmbeddingProviderAdapter, } from "openclaw/plugin-sdk/memory-core-host-engine-embeddings"; import { fetchWithSsrFGuard, type SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime"; import { resolveFirstGithubToken } from "./auth.js"; @@ -39,22 +40,8 @@ function buildSsrfPolicy(baseUrl: string): SsrFPolicy | undefined { } type CopilotModelEntry = { - id: string; - supported_endpoints?: string[]; -}; - -type CopilotModelsResponse = { - data?: CopilotModelEntry[]; -}; - -type CopilotEmbeddingDataEntry = { - embedding: number[]; - index: number; -}; - -type CopilotEmbeddingResponse = { - data?: CopilotEmbeddingDataEntry[]; - model?: string; + id?: unknown; + supported_endpoints?: unknown; }; function isCopilotSetupError(err: unknown): boolean { @@ -68,15 +55,18 @@ function isCopilotSetupError(err: unknown): boolean { return ( err.message.includes("No GitHub token available") || err.message.includes("Copilot token exchange failed") || + err.message.includes("Copilot token response") || err.message.includes("No embedding models available") || err.message.includes("GitHub Copilot model discovery") || - err.message.includes("GitHub Copilot embedding model") + err.message.includes("GitHub Copilot embedding model") || + err.message.includes("Unexpected response from GitHub Copilot token endpoint") ); } async function discoverEmbeddingModels(params: { baseUrl: string; copilotToken: string; + headers?: Record; ssrfPolicy?: SsrFPolicy; }): Promise { const url = `${params.baseUrl.replace(/\/$/, "")}/models`; @@ -86,6 +76,7 @@ async function discoverEmbeddingModels(params: { method: "GET", headers: { ...COPILOT_HEADERS_STATIC, + ...params.headers, Authorization: `Bearer ${params.copilotToken}`, }, }, @@ -98,17 +89,31 @@ async function discoverEmbeddingModels(params: { `GitHub Copilot model discovery HTTP ${response.status}: ${await response.text()}`, ); } - const body = (await response.json()) as CopilotModelsResponse; - const allModels = Array.isArray(body.data) ? body.data : []; + let payload: unknown; + try { + payload = await response.json(); + } catch { + throw new Error("GitHub Copilot model discovery returned invalid JSON"); + } + const allModels = Array.isArray((payload as { data?: unknown })?.data) + ? ((payload as { data: CopilotModelEntry[] }).data ?? []) + : []; // Filter for embedding models. The Copilot API may list embedding models // with an explicit /v1/embeddings endpoint, or with an empty // supported_endpoints array. Match both: endpoint-declared embedding // models and models whose ID indicates embedding capability. - const models = allModels.filter( - (m) => - m.supported_endpoints?.some((ep) => ep.includes("embeddings")) || /\bembedding/i.test(m.id), - ); - return models.map((m) => m.id); + return allModels.flatMap((entry) => { + const id = typeof entry.id === "string" ? entry.id.trim() : ""; + if (!id) { + return []; + } + const endpoints = Array.isArray(entry.supported_endpoints) + ? entry.supported_endpoints.filter((value): value is string => typeof value === "string") + : []; + return endpoints.some((ep) => ep.includes("embeddings")) || /\bembedding/i.test(id) + ? [id] + : []; + }); } finally { await release(); } @@ -142,77 +147,6 @@ function pickBestModel(available: string[], userModel?: string): string { throw new Error("No embedding models available from GitHub Copilot"); } -function sanitizeAndNormalizeEmbedding(vec: number[]): number[] { - const sanitized = vec.map((value) => (Number.isFinite(value) ? value : 0)); - const magnitude = Math.sqrt(sanitized.reduce((sum, value) => sum + value * value, 0)); - if (magnitude < 1e-10) { - return sanitized; - } - return sanitized.map((value) => value / magnitude); -} - -// Note: the Copilot token is captured at creation time. Copilot tokens are -// short-lived (~30 min) so long-lived sessions may hit 401s. This matches -// how other embedding providers capture API keys at creation. A token -// refresh mechanism can be added if this becomes a practical issue. -async function createCopilotEmbeddingProvider(params: { - baseUrl: string; - copilotToken: string; - model: string; - ssrfPolicy?: SsrFPolicy; -}): Promise { - const embeddingsUrl = `${params.baseUrl.replace(/\/$/, "")}/embeddings`; - const headers: Record = { - ...COPILOT_HEADERS_STATIC, - Authorization: `Bearer ${params.copilotToken}`, - }; - - const embedBatch = async (texts: string[]): Promise => { - if (texts.length === 0) { - return []; - } - const { response, release } = await fetchWithSsrFGuard({ - url: embeddingsUrl, - init: { - method: "POST", - headers, - body: JSON.stringify({ model: params.model, input: texts }), - }, - policy: params.ssrfPolicy, - auditContext: "memory-remote", - }); - try { - if (!response.ok) { - throw new Error( - `GitHub Copilot embeddings HTTP ${response.status}: ${await response.text()}`, - ); - } - const body = (await response.json()) as CopilotEmbeddingResponse; - if (!Array.isArray(body.data)) { - throw new Error("GitHub Copilot embeddings response missing data[]"); - } - return body.data - .toSorted((a, b) => a.index - b.index) - .map((entry) => sanitizeAndNormalizeEmbedding(entry.embedding)); - } finally { - await release(); - } - }; - - return { - id: COPILOT_EMBEDDING_PROVIDER_ID, - model: params.model, - embedQuery: async (text: string) => { - const [result] = await embedBatch([text]); - if (!result) { - throw new Error("GitHub Copilot embeddings returned no vectors for query"); - } - return result; - }, - embedBatch, - }; -} - export const githubCopilotMemoryEmbeddingProviderAdapter: MemoryEmbeddingProviderAdapter = { id: COPILOT_EMBEDDING_PROVIDER_ID, transport: "remote", @@ -220,18 +154,28 @@ export const githubCopilotMemoryEmbeddingProviderAdapter: MemoryEmbeddingProvide allowExplicitWhenConfiguredAuto: true, shouldContinueAutoSelection: (err: unknown) => isCopilotSetupError(err), create: async (options) => { - const { githubToken } = resolveFirstGithubToken({ + const remoteGithubToken = await resolveConfiguredSecretInputString({ + config: options.config, + env: process.env, + value: options.remote?.apiKey, + path: "agents.*.memorySearch.remote.apiKey", + }); + const { githubToken: profileGithubToken } = await resolveFirstGithubToken({ agentDir: options.agentDir, + config: options.config, env: process.env, }); + const githubToken = remoteGithubToken.value || profileGithubToken; if (!githubToken) { throw new Error("No GitHub token available for Copilot embedding provider"); } const { token: copilotToken, baseUrl: resolvedBaseUrl } = await resolveCopilotApiToken({ githubToken, + env: process.env, }); - const baseUrl = resolvedBaseUrl || DEFAULT_COPILOT_API_BASE_URL; + const baseUrl = + options.remote?.baseUrl?.trim() || resolvedBaseUrl || DEFAULT_COPILOT_API_BASE_URL; const ssrfPolicy = buildSsrfPolicy(baseUrl); // Always discover models even when the user pins one: this validates @@ -240,17 +184,20 @@ export const githubCopilotMemoryEmbeddingProviderAdapter: MemoryEmbeddingProvide const availableModels = await discoverEmbeddingModels({ baseUrl, copilotToken, + headers: options.remote?.headers, ssrfPolicy, }); const userModel = options.model?.trim() || undefined; const model = pickBestModel(availableModels, userModel); - const provider = await createCopilotEmbeddingProvider({ + const { provider } = await createGitHubCopilotEmbeddingProvider({ baseUrl, - copilotToken, + env: process.env, + fetchImpl: fetch, + githubToken, + headers: options.remote?.headers, model, - ssrfPolicy, }); return { diff --git a/extensions/github-copilot/index.test.ts b/extensions/github-copilot/index.test.ts index cbbb2ca421c..3f68d8748a7 100644 --- a/extensions/github-copilot/index.test.ts +++ b/extensions/github-copilot/index.test.ts @@ -57,7 +57,6 @@ describe("github-copilot plugin", () => { expect(adapter.id).toBe("github-copilot"); }); - it("skips catalog discovery when plugin discovery is disabled", async () => { const provider = registerProviderWithPluginConfig({ discovery: { enabled: false } }); diff --git a/extensions/github-copilot/index.ts b/extensions/github-copilot/index.ts index aa4429659d2..64d97b7f8c6 100644 --- a/extensions/github-copilot/index.ts +++ b/extensions/github-copilot/index.ts @@ -2,8 +2,8 @@ import { definePluginEntry, type ProviderAuthContext } from "openclaw/plugin-sdk import { ensureAuthProfileStore } from "openclaw/plugin-sdk/provider-auth"; import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime"; import { resolveFirstGithubToken } from "./auth.js"; -import { PROVIDER_ID, resolveCopilotForwardCompatModel } from "./models.js"; import { githubCopilotMemoryEmbeddingProviderAdapter } from "./embeddings.js"; +import { PROVIDER_ID, resolveCopilotForwardCompatModel } from "./models.js"; import { buildGithubCopilotReplayPolicy } from "./replay-policy.js"; import { wrapCopilotProviderStream } from "./stream.js"; @@ -107,8 +107,9 @@ export default definePluginEntry({ } const { DEFAULT_COPILOT_API_BASE_URL, resolveCopilotApiToken } = await loadGithubCopilotRuntime(); - const { githubToken, hasProfile } = resolveFirstGithubToken({ + const { githubToken, hasProfile } = await resolveFirstGithubToken({ agentDir: ctx.agentDir, + config: ctx.config, env: ctx.env, }); if (!hasProfile && !githubToken) { diff --git a/src/memory-host-sdk/engine-embeddings.ts b/src/memory-host-sdk/engine-embeddings.ts index eef160b2f63..ad794fe4f1d 100644 --- a/src/memory-host-sdk/engine-embeddings.ts +++ b/src/memory-host-sdk/engine-embeddings.ts @@ -30,6 +30,10 @@ export { createMistralEmbeddingProvider, DEFAULT_MISTRAL_EMBEDDING_MODEL, } from "./host/embeddings-mistral.js"; +export { + createGitHubCopilotEmbeddingProvider, + type GitHubCopilotEmbeddingClient, +} from "./host/embeddings-github-copilot.js"; export { createOllamaEmbeddingProvider, DEFAULT_OLLAMA_EMBEDDING_MODEL, diff --git a/src/memory-host-sdk/host/embeddings-github-copilot.test.ts b/src/memory-host-sdk/host/embeddings-github-copilot.test.ts new file mode 100644 index 00000000000..0f3904b1555 --- /dev/null +++ b/src/memory-host-sdk/host/embeddings-github-copilot.test.ts @@ -0,0 +1,178 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const resolveCopilotApiTokenMock = vi.hoisted(() => vi.fn()); +const fetchWithSsrFGuardMock = vi.hoisted(() => vi.fn()); + +vi.mock("../../agents/github-copilot-token.js", () => ({ + DEFAULT_COPILOT_API_BASE_URL: "https://api.githubcopilot.test", + resolveCopilotApiToken: resolveCopilotApiTokenMock, +})); + +vi.mock("../../infra/net/fetch-guard.js", () => ({ + fetchWithSsrFGuard: fetchWithSsrFGuardMock, +})); + +import { createGitHubCopilotEmbeddingProvider } from "./embeddings-github-copilot.js"; + +function mockFetchResponse(spec: { ok: boolean; status?: number; json?: unknown; text?: string }) { + fetchWithSsrFGuardMock.mockImplementationOnce(async () => ({ + response: { + ok: spec.ok, + status: spec.status ?? (spec.ok ? 200 : 500), + json: async () => spec.json, + text: async () => spec.text ?? "", + }, + release: vi.fn(async () => {}), + })); +} + +describe("createGitHubCopilotEmbeddingProvider", () => { + beforeEach(() => { + resolveCopilotApiTokenMock.mockResolvedValue({ + token: "copilot-token-a", + expiresAt: Date.now() + 3_600_000, + source: "test", + baseUrl: "https://api.githubcopilot.test", + }); + }); + + afterEach(() => { + vi.restoreAllMocks(); + resolveCopilotApiTokenMock.mockReset(); + fetchWithSsrFGuardMock.mockReset(); + }); + + it("normalizes embeddings returned for queries", async () => { + mockFetchResponse({ + ok: true, + json: { + data: [{ index: 0, embedding: [3, 4] }], + }, + }); + + const { provider } = await createGitHubCopilotEmbeddingProvider({ + githubToken: "gh_test", + model: "text-embedding-3-small", + }); + + await expect(provider.embedQuery("hello")).resolves.toEqual([0.6, 0.8]); + expect(fetchWithSsrFGuardMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://api.githubcopilot.test/embeddings", + }), + ); + }); + + it("preserves input order by explicit response index", async () => { + mockFetchResponse({ + ok: true, + json: { + data: [ + { index: 1, embedding: [0, 2] }, + { index: 0, embedding: [1, 0] }, + ], + }, + }); + + const { provider } = await createGitHubCopilotEmbeddingProvider({ + githubToken: "gh_test", + model: "text-embedding-3-small", + }); + + await expect(provider.embedBatch(["first", "second"])).resolves.toEqual([ + [1, 0], + [0, 1], + ]); + }); + + it("uses a fresh Copilot token for later requests", async () => { + resolveCopilotApiTokenMock + .mockResolvedValueOnce({ + token: "copilot-token-create", + expiresAt: Date.now() + 3_600_000, + source: "test", + baseUrl: "https://api.githubcopilot.test", + }) + .mockResolvedValueOnce({ + token: "copilot-token-first", + expiresAt: Date.now() + 3_600_000, + source: "test", + baseUrl: "https://api.githubcopilot.test", + }) + .mockResolvedValueOnce({ + token: "copilot-token-second", + expiresAt: Date.now() + 3_600_000, + source: "test", + baseUrl: "https://api.githubcopilot.test", + }); + mockFetchResponse({ + ok: true, + json: { data: [{ index: 0, embedding: [1, 0] }] }, + }); + mockFetchResponse({ + ok: true, + json: { data: [{ index: 0, embedding: [0, 1] }] }, + }); + + const { provider } = await createGitHubCopilotEmbeddingProvider({ + githubToken: "gh_test", + model: "text-embedding-3-small", + }); + + await provider.embedQuery("first"); + await provider.embedQuery("second"); + + const firstHeaders = fetchWithSsrFGuardMock.mock.calls[0]?.[0]?.init?.headers as Record< + string, + string + >; + const secondHeaders = fetchWithSsrFGuardMock.mock.calls[1]?.[0]?.init?.headers as Record< + string, + string + >; + expect(firstHeaders.Authorization).toBe("Bearer copilot-token-first"); + expect(secondHeaders.Authorization).toBe("Bearer copilot-token-second"); + }); + + it("honors custom baseUrl and header overrides", async () => { + mockFetchResponse({ + ok: true, + json: { data: [{ index: 0, embedding: [1, 0] }] }, + }); + + const { provider } = await createGitHubCopilotEmbeddingProvider({ + githubToken: "gh_test", + model: "text-embedding-3-small", + baseUrl: "https://proxy.example/v1", + headers: { "X-Proxy-Token": "proxy" }, + }); + + await provider.embedQuery("hello"); + + const call = fetchWithSsrFGuardMock.mock.calls[0]?.[0] as { + init: { headers: Record }; + url: string; + }; + expect(call.url).toBe("https://proxy.example/v1/embeddings"); + expect(call.init.headers["X-Proxy-Token"]).toBe("proxy"); + expect(call.init.headers.Authorization).toBe("Bearer copilot-token-a"); + }); + + it("fails fast on sparse or malformed embedding payloads", async () => { + mockFetchResponse({ + ok: true, + json: { + data: [{ index: 1, embedding: [1, 0] }], + }, + }); + + const { provider } = await createGitHubCopilotEmbeddingProvider({ + githubToken: "gh_test", + model: "text-embedding-3-small", + }); + + await expect(provider.embedBatch(["first", "second"])).rejects.toThrow( + "GitHub Copilot embeddings response missing vectors for some inputs", + ); + }); +}); diff --git a/src/memory-host-sdk/host/embeddings-github-copilot.ts b/src/memory-host-sdk/host/embeddings-github-copilot.ts new file mode 100644 index 00000000000..246b764abb9 --- /dev/null +++ b/src/memory-host-sdk/host/embeddings-github-copilot.ts @@ -0,0 +1,151 @@ +import { + DEFAULT_COPILOT_API_BASE_URL, + resolveCopilotApiToken, +} from "../../agents/github-copilot-token.js"; +import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js"; +import type { EmbeddingProvider } from "./embeddings.types.js"; +import { buildRemoteBaseUrlPolicy, withRemoteHttpResponse } from "./remote-http.js"; + +export type GitHubCopilotEmbeddingClient = { + githubToken: string; + model: string; + baseUrl?: string; + headers?: Record; + env?: NodeJS.ProcessEnv; + fetchImpl?: typeof fetch; +}; + +const COPILOT_EMBEDDING_PROVIDER_ID = "github-copilot"; + +const COPILOT_HEADERS_STATIC: Record = { + "Content-Type": "application/json", + "Editor-Version": "vscode/1.96.2", + "User-Agent": "GitHubCopilotChat/0.26.7", +}; + +function resolveConfiguredBaseUrl( + configuredBaseUrl: string | undefined, + tokenBaseUrl: string | undefined, +): string { + const trimmed = configuredBaseUrl?.trim(); + if (trimmed) { + return trimmed; + } + return tokenBaseUrl || DEFAULT_COPILOT_API_BASE_URL; +} + +async function resolveGitHubCopilotEmbeddingSession(client: GitHubCopilotEmbeddingClient): Promise<{ + baseUrl: string; + headers: Record; +}> { + const token = await resolveCopilotApiToken({ + githubToken: client.githubToken, + env: client.env, + fetchImpl: client.fetchImpl, + }); + const baseUrl = resolveConfiguredBaseUrl(client.baseUrl, token.baseUrl); + return { + baseUrl, + headers: { + ...COPILOT_HEADERS_STATIC, + ...client.headers, + Authorization: `Bearer ${token.token}`, + }, + }; +} + +function parseGitHubCopilotEmbeddingPayload(payload: unknown, expectedCount: number): number[][] { + if (!payload || typeof payload !== "object") { + throw new Error("GitHub Copilot embeddings response missing data[]"); + } + const data = (payload as { data?: unknown }).data; + if (!Array.isArray(data)) { + throw new Error("GitHub Copilot embeddings response missing data[]"); + } + + const vectors = Array.from({ length: expectedCount }); + for (const entry of data) { + if (!entry || typeof entry !== "object") { + throw new Error("GitHub Copilot embeddings response contains an invalid entry"); + } + const indexValue = (entry as { index?: unknown }).index; + const embedding = (entry as { embedding?: unknown }).embedding; + const index = typeof indexValue === "number" ? indexValue : Number.NaN; + if (!Number.isInteger(index)) { + throw new Error("GitHub Copilot embeddings response contains an invalid index"); + } + if (index < 0 || index >= expectedCount) { + throw new Error("GitHub Copilot embeddings response contains an out-of-range index"); + } + if (vectors[index] !== undefined) { + throw new Error("GitHub Copilot embeddings response contains duplicate indexes"); + } + if (!Array.isArray(embedding) || !embedding.every((value) => typeof value === "number")) { + throw new Error("GitHub Copilot embeddings response contains an invalid embedding"); + } + vectors[index] = sanitizeAndNormalizeEmbedding(embedding); + } + + for (let index = 0; index < expectedCount; index += 1) { + if (vectors[index] === undefined) { + throw new Error("GitHub Copilot embeddings response missing vectors for some inputs"); + } + } + return vectors as number[][]; +} + +export async function createGitHubCopilotEmbeddingProvider( + client: GitHubCopilotEmbeddingClient, +): Promise<{ provider: EmbeddingProvider; client: GitHubCopilotEmbeddingClient }> { + const initialSession = await resolveGitHubCopilotEmbeddingSession(client); + + const embed = async (input: string[]): Promise => { + if (input.length === 0) { + return []; + } + + const session = await resolveGitHubCopilotEmbeddingSession(client); + const url = `${session.baseUrl.replace(/\/$/, "")}/embeddings`; + return await withRemoteHttpResponse({ + url, + fetchImpl: client.fetchImpl, + ssrfPolicy: buildRemoteBaseUrlPolicy(session.baseUrl), + init: { + method: "POST", + headers: session.headers, + body: JSON.stringify({ model: client.model, input }), + }, + onResponse: async (response) => { + if (!response.ok) { + throw new Error( + `GitHub Copilot embeddings HTTP ${response.status}: ${await response.text()}`, + ); + } + + let payload: unknown; + try { + payload = await response.json(); + } catch { + throw new Error("GitHub Copilot embeddings returned invalid JSON"); + } + return parseGitHubCopilotEmbeddingPayload(payload, input.length); + }, + }); + }; + + return { + provider: { + id: COPILOT_EMBEDDING_PROVIDER_ID, + model: client.model, + embedQuery: async (text) => { + const [vector] = await embed([text]); + return vector ?? []; + }, + embedBatch: embed, + }, + client: { + ...client, + baseUrl: initialSession.baseUrl, + }, + }; +}