refactor: move memory embeddings into provider plugins

This commit is contained in:
Peter Steinberger
2026-04-17 01:31:39 +01:00
parent 7e9ff0f86e
commit 77e6e4cf87
94 changed files with 1039 additions and 7125 deletions

View File

@@ -4,7 +4,6 @@ const resolveFirstGithubTokenMock = vi.hoisted(() => vi.fn());
const resolveCopilotApiTokenMock = vi.hoisted(() => vi.fn());
const resolveConfiguredSecretInputStringMock = vi.hoisted(() => vi.fn());
const fetchWithSsrFGuardMock = vi.hoisted(() => vi.fn());
const createGitHubCopilotEmbeddingProviderMock = vi.hoisted(() => vi.fn());
vi.mock("./auth.js", () => ({
resolveFirstGithubToken: resolveFirstGithubTokenMock,
@@ -19,10 +18,6 @@ vi.mock("openclaw/plugin-sdk/github-copilot-token", () => ({
resolveCopilotApiToken: resolveCopilotApiTokenMock,
}));
vi.mock("openclaw/plugin-sdk/memory-core-host-engine-embeddings", () => ({
createGitHubCopilotEmbeddingProvider: createGitHubCopilotEmbeddingProviderMock,
}));
vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({
fetchWithSsrFGuard: fetchWithSsrFGuardMock,
}));
@@ -73,15 +68,6 @@ describe("githubCopilotMemoryEmbeddingProviderAdapter", () => {
source: "test",
baseUrl: TEST_BASE_URL,
});
createGitHubCopilotEmbeddingProviderMock.mockImplementation(async (client) => ({
provider: {
id: "github-copilot",
model: client.model,
embedQuery: async () => [0.1, 0.2, 0.3],
embedBatch: async (texts: string[]) => texts.map(() => [0.1, 0.2, 0.3]),
},
client,
}));
});
afterEach(() => {
@@ -89,7 +75,6 @@ describe("githubCopilotMemoryEmbeddingProviderAdapter", () => {
resolveConfiguredSecretInputStringMock.mockReset();
resolveFirstGithubTokenMock.mockReset();
resolveCopilotApiTokenMock.mockReset();
createGitHubCopilotEmbeddingProviderMock.mockReset();
fetchWithSsrFGuardMock.mockReset();
});
@@ -113,12 +98,8 @@ describe("githubCopilotMemoryEmbeddingProviderAdapter", () => {
const result = await githubCopilotMemoryEmbeddingProviderAdapter.create(defaultCreateOptions());
expect(result.provider?.model).toBe("text-embedding-3-small");
expect(createGitHubCopilotEmbeddingProviderMock).toHaveBeenCalledWith(
expect.objectContaining({
baseUrl: TEST_BASE_URL,
githubToken: "gh_test_token_123",
model: "text-embedding-3-small",
}),
expect(resolveCopilotApiTokenMock).toHaveBeenCalledWith(
expect.objectContaining({ githubToken: "gh_test_token_123" }),
);
});
@@ -217,14 +198,12 @@ describe("githubCopilotMemoryEmbeddingProviderAdapter", () => {
} as never);
expect(resolveFirstGithubTokenMock).toHaveBeenCalled();
expect(createGitHubCopilotEmbeddingProviderMock).toHaveBeenCalledWith({
baseUrl: "https://proxy.example/v1",
env: process.env,
fetchImpl: fetch,
githubToken: "gh_remote_token",
headers: { "X-Proxy-Token": "proxy" },
model: "text-embedding-3-small",
});
expect(resolveCopilotApiTokenMock).toHaveBeenCalledWith(
expect.objectContaining({
env: process.env,
githubToken: "gh_remote_token",
}),
);
const discoveryCall = fetchWithSsrFGuardMock.mock.calls[0]?.[0] as {
init: { headers: Record<string, string> };

View File

@@ -4,7 +4,10 @@ import {
resolveCopilotApiToken,
} from "openclaw/plugin-sdk/github-copilot-token";
import {
createGitHubCopilotEmbeddingProvider,
buildRemoteBaseUrlPolicy,
sanitizeAndNormalizeEmbedding,
withRemoteHttpResponse,
type MemoryEmbeddingProvider,
type MemoryEmbeddingProviderAdapter,
} from "openclaw/plugin-sdk/memory-core-host-engine-embeddings";
import { fetchWithSsrFGuard, type SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime";
@@ -44,6 +47,15 @@ type CopilotModelEntry = {
supported_endpoints?: unknown;
};
type GitHubCopilotEmbeddingClient = {
githubToken: string;
model: string;
baseUrl?: string;
headers?: Record<string, string>;
env?: NodeJS.ProcessEnv;
fetchImpl?: typeof fetch;
};
function isCopilotSetupError(err: unknown): boolean {
if (!(err instanceof Error)) {
return false;
@@ -147,9 +159,126 @@ function pickBestModel(available: string[], userModel?: string): string {
throw new Error("No embedding models available from GitHub Copilot");
}
function parseGitHubCopilotEmbeddingPayload(payload: unknown, expectedCount: number): number[][] {
if (!payload || typeof payload !== "object") {
throw new Error("GitHub Copilot embeddings response missing data[]");
}
const data = (payload as { data?: unknown }).data;
if (!Array.isArray(data)) {
throw new Error("GitHub Copilot embeddings response missing data[]");
}
const vectors = Array.from<number[] | undefined>({ length: expectedCount });
for (const entry of data) {
if (!entry || typeof entry !== "object") {
throw new Error("GitHub Copilot embeddings response contains an invalid entry");
}
const indexValue = (entry as { index?: unknown }).index;
const embedding = (entry as { embedding?: unknown }).embedding;
const index = typeof indexValue === "number" ? indexValue : Number.NaN;
if (!Number.isInteger(index)) {
throw new Error("GitHub Copilot embeddings response contains an invalid index");
}
if (index < 0 || index >= expectedCount) {
throw new Error("GitHub Copilot embeddings response contains an out-of-range index");
}
if (vectors[index] !== undefined) {
throw new Error("GitHub Copilot embeddings response contains duplicate indexes");
}
if (!Array.isArray(embedding) || !embedding.every((value) => typeof value === "number")) {
throw new Error("GitHub Copilot embeddings response contains an invalid embedding");
}
vectors[index] = sanitizeAndNormalizeEmbedding(embedding);
}
for (let index = 0; index < expectedCount; index += 1) {
if (vectors[index] === undefined) {
throw new Error("GitHub Copilot embeddings response missing vectors for some inputs");
}
}
return vectors as number[][];
}
async function resolveGitHubCopilotEmbeddingSession(client: GitHubCopilotEmbeddingClient): Promise<{
baseUrl: string;
headers: Record<string, string>;
}> {
const token = await resolveCopilotApiToken({
githubToken: client.githubToken,
env: client.env,
fetchImpl: client.fetchImpl,
});
const baseUrl = client.baseUrl?.trim() || token.baseUrl || DEFAULT_COPILOT_API_BASE_URL;
return {
baseUrl,
headers: {
...COPILOT_HEADERS_STATIC,
...client.headers,
Authorization: `Bearer ${token.token}`,
},
};
}
async function createGitHubCopilotEmbeddingProvider(
client: GitHubCopilotEmbeddingClient,
): Promise<{ provider: MemoryEmbeddingProvider; client: GitHubCopilotEmbeddingClient }> {
const initialSession = await resolveGitHubCopilotEmbeddingSession(client);
const embed = async (input: string[]): Promise<number[][]> => {
if (input.length === 0) {
return [];
}
const session = await resolveGitHubCopilotEmbeddingSession(client);
const url = `${session.baseUrl.replace(/\/$/, "")}/embeddings`;
return await withRemoteHttpResponse({
url,
fetchImpl: client.fetchImpl,
ssrfPolicy: buildRemoteBaseUrlPolicy(session.baseUrl),
init: {
method: "POST",
headers: session.headers,
body: JSON.stringify({ model: client.model, input }),
},
onResponse: async (response) => {
if (!response.ok) {
throw new Error(
`GitHub Copilot embeddings HTTP ${response.status}: ${await response.text()}`,
);
}
let payload: unknown;
try {
payload = await response.json();
} catch {
throw new Error("GitHub Copilot embeddings returned invalid JSON");
}
return parseGitHubCopilotEmbeddingPayload(payload, input.length);
},
});
};
return {
provider: {
id: COPILOT_EMBEDDING_PROVIDER_ID,
model: client.model,
embedQuery: async (text) => {
const [vector] = await embed([text]);
return vector ?? [];
},
embedBatch: embed,
},
client: {
...client,
baseUrl: initialSession.baseUrl,
},
};
}
export const githubCopilotMemoryEmbeddingProviderAdapter: MemoryEmbeddingProviderAdapter = {
id: COPILOT_EMBEDDING_PROVIDER_ID,
transport: "remote",
authProviderId: COPILOT_EMBEDDING_PROVIDER_ID,
autoSelectPriority: 15,
allowExplicitWhenConfiguredAuto: true,
shouldContinueAutoSelection: (err: unknown) => isCopilotSetupError(err),