mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-28 08:43:36 +00:00
* fix(github-copilot): bound model discovery and embeddings JSON response reads The GitHub Copilot embeddings plugin already bounds its error response bodies via readResponseTextLimited, but the success JSON reads for both model discovery and the embeddings call used unbounded response.json(). Route both through readProviderJsonResponse (16 MiB cap). Update isCopilotSetupError to recognise the new error label prefix so auto-selection still falls through on malformed discovery responses. Update tests to use proper Response objects and the new error messages. AI-assisted. Co-authored-by: Cursor <cursoragent@cursor.com> * fix(github-copilot): use memory embedding response cap Signed-off-by: sallyom <somalley@redhat.com> --------- Signed-off-by: sallyom <somalley@redhat.com> Co-authored-by: Cursor <cursoragent@cursor.com> Co-authored-by: sallyom <somalley@redhat.com>
341 lines
12 KiB
TypeScript
341 lines
12 KiB
TypeScript
// Github Copilot plugin module implements embeddings behavior.
|
|
import {
|
|
buildRemoteBaseUrlPolicy,
|
|
sanitizeAndNormalizeEmbedding,
|
|
withRemoteHttpResponse,
|
|
type MemoryEmbeddingProvider,
|
|
type MemoryEmbeddingProviderAdapter,
|
|
} from "openclaw/plugin-sdk/memory-core-host-engine-embeddings";
|
|
import { buildCopilotIdeHeaders } from "openclaw/plugin-sdk/provider-auth";
|
|
import {
|
|
readProviderJsonResponse,
|
|
readResponseTextLimited,
|
|
} from "openclaw/plugin-sdk/provider-http";
|
|
import { resolveConfiguredSecretInputString } from "openclaw/plugin-sdk/secret-input-runtime";
|
|
import { fetchWithSsrFGuard, type SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime";
|
|
import { resolveFirstGithubToken } from "./auth.js";
|
|
import { DEFAULT_COPILOT_API_BASE_URL, resolveCopilotApiToken } from "./token.js";
|
|
|
|
const COPILOT_EMBEDDING_PROVIDER_ID = "github-copilot";
|
|
|
|
/**
|
|
* Preferred embedding models in order. The first available model wins.
|
|
*/
|
|
const PREFERRED_MODELS = [
|
|
"text-embedding-3-small",
|
|
"text-embedding-3-large",
|
|
"text-embedding-ada-002",
|
|
] as const;
|
|
|
|
const COPILOT_HEADERS_STATIC: Record<string, string> = {
|
|
"Content-Type": "application/json",
|
|
...buildCopilotIdeHeaders(),
|
|
};
|
|
const COPILOT_ERROR_BODY_LIMIT_BYTES = 8 * 1024;
|
|
const COPILOT_EMBEDDINGS_RESPONSE_MAX_BYTES = 64 * 1024 * 1024;
|
|
|
|
function buildSsrfPolicy(baseUrl: string): SsrFPolicy | undefined {
|
|
try {
|
|
const parsed = new URL(baseUrl);
|
|
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
return undefined;
|
|
}
|
|
return { allowedHostnames: [parsed.hostname] };
|
|
} catch {
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
type CopilotModelEntry = {
|
|
id?: unknown;
|
|
supported_endpoints?: unknown;
|
|
};
|
|
|
|
type GitHubCopilotEmbeddingClient = {
|
|
githubToken: string;
|
|
model: string;
|
|
baseUrl?: string;
|
|
headers?: Record<string, string>;
|
|
env?: NodeJS.ProcessEnv;
|
|
fetchImpl?: typeof fetch;
|
|
};
|
|
|
|
function isCopilotSetupError(err: unknown): boolean {
|
|
if (!(err instanceof Error)) {
|
|
return false;
|
|
}
|
|
// All Copilot-specific setup failures should allow auto-selection to
|
|
// fall through to the next provider (e.g. OpenAI). This covers: missing
|
|
// GitHub token, token exchange failures, no embedding models on the plan,
|
|
// model discovery errors, and user-pinned model not available on Copilot.
|
|
return (
|
|
err.message.includes("No GitHub token available") ||
|
|
err.message.includes("Copilot token exchange failed") ||
|
|
err.message.includes("Copilot token response") ||
|
|
err.message.includes("No embedding models available") ||
|
|
err.message.includes("GitHub Copilot model discovery") ||
|
|
err.message.includes("github-copilot.model-discovery") ||
|
|
err.message.includes("GitHub Copilot embedding model") ||
|
|
err.message.includes("Unexpected response from GitHub Copilot token endpoint")
|
|
);
|
|
}
|
|
|
|
async function discoverEmbeddingModels(params: {
|
|
baseUrl: string;
|
|
copilotToken: string;
|
|
headers?: Record<string, string>;
|
|
ssrfPolicy?: SsrFPolicy;
|
|
}): Promise<string[]> {
|
|
const url = `${params.baseUrl.replace(/\/$/, "")}/models`;
|
|
const { response, release } = await fetchWithSsrFGuard({
|
|
url,
|
|
init: {
|
|
method: "GET",
|
|
headers: {
|
|
...COPILOT_HEADERS_STATIC,
|
|
...params.headers,
|
|
Authorization: `Bearer ${params.copilotToken}`,
|
|
},
|
|
},
|
|
policy: params.ssrfPolicy,
|
|
auditContext: "memory-remote",
|
|
});
|
|
try {
|
|
if (!response.ok) {
|
|
const detail = await readResponseTextLimited(response, COPILOT_ERROR_BODY_LIMIT_BYTES);
|
|
throw new Error(`GitHub Copilot model discovery HTTP ${response.status}: ${detail}`);
|
|
}
|
|
const payload = await readProviderJsonResponse(response, "github-copilot.model-discovery");
|
|
const allModels = Array.isArray((payload as { data?: unknown })?.data)
|
|
? ((payload as { data: CopilotModelEntry[] }).data ?? [])
|
|
: [];
|
|
// Filter for embedding models. The Copilot API may list embedding models
|
|
// with an explicit /v1/embeddings endpoint, or with an empty
|
|
// supported_endpoints array. Match both: endpoint-declared embedding
|
|
// models and models whose ID indicates embedding capability.
|
|
return allModels.flatMap((entry) => {
|
|
const id = typeof entry.id === "string" ? entry.id.trim() : "";
|
|
if (!id) {
|
|
return [];
|
|
}
|
|
const endpoints = Array.isArray(entry.supported_endpoints)
|
|
? entry.supported_endpoints.filter((value): value is string => typeof value === "string")
|
|
: [];
|
|
return endpoints.some((ep) => ep.includes("embeddings")) || /\bembedding/i.test(id)
|
|
? [id]
|
|
: [];
|
|
});
|
|
} finally {
|
|
await release();
|
|
}
|
|
}
|
|
|
|
function pickBestModel(available: string[], userModel?: string): string {
|
|
if (userModel) {
|
|
const normalized = userModel.trim();
|
|
// Strip the provider prefix if users set "github-copilot/model-name".
|
|
const stripped = normalized.startsWith(`${COPILOT_EMBEDDING_PROVIDER_ID}/`)
|
|
? normalized.slice(`${COPILOT_EMBEDDING_PROVIDER_ID}/`.length)
|
|
: normalized;
|
|
if (available.length === 0) {
|
|
throw new Error("No embedding models available from GitHub Copilot");
|
|
}
|
|
if (!available.includes(stripped)) {
|
|
throw new Error(
|
|
`GitHub Copilot embedding model "${stripped}" is not available. Available: ${available.join(", ")}`,
|
|
);
|
|
}
|
|
return stripped;
|
|
}
|
|
for (const preferred of PREFERRED_MODELS) {
|
|
if (available.includes(preferred)) {
|
|
return preferred;
|
|
}
|
|
}
|
|
if (available.length > 0) {
|
|
return available[0];
|
|
}
|
|
throw new Error("No embedding models available from GitHub Copilot");
|
|
}
|
|
|
|
function parseGitHubCopilotEmbeddingPayload(payload: unknown, expectedCount: number): number[][] {
|
|
if (!payload || typeof payload !== "object") {
|
|
throw new Error("GitHub Copilot embeddings response missing data[]");
|
|
}
|
|
const data = (payload as { data?: unknown }).data;
|
|
if (!Array.isArray(data)) {
|
|
throw new Error("GitHub Copilot embeddings response missing data[]");
|
|
}
|
|
|
|
const vectors = Array.from<number[] | undefined>({ length: expectedCount });
|
|
for (const entry of data) {
|
|
if (!entry || typeof entry !== "object") {
|
|
throw new Error("GitHub Copilot embeddings response contains an invalid entry");
|
|
}
|
|
const indexValue = (entry as { index?: unknown }).index;
|
|
const embedding = (entry as { embedding?: unknown }).embedding;
|
|
const index = typeof indexValue === "number" ? indexValue : Number.NaN;
|
|
if (!Number.isInteger(index)) {
|
|
throw new Error("GitHub Copilot embeddings response contains an invalid index");
|
|
}
|
|
if (index < 0 || index >= expectedCount) {
|
|
throw new Error("GitHub Copilot embeddings response contains an out-of-range index");
|
|
}
|
|
if (vectors[index] !== undefined) {
|
|
throw new Error("GitHub Copilot embeddings response contains duplicate indexes");
|
|
}
|
|
if (!Array.isArray(embedding) || !embedding.every((value) => typeof value === "number")) {
|
|
throw new Error("GitHub Copilot embeddings response contains an invalid embedding");
|
|
}
|
|
vectors[index] = sanitizeAndNormalizeEmbedding(embedding);
|
|
}
|
|
|
|
for (let index = 0; index < expectedCount; index += 1) {
|
|
if (vectors[index] === undefined) {
|
|
throw new Error("GitHub Copilot embeddings response missing vectors for some inputs");
|
|
}
|
|
}
|
|
return vectors as number[][];
|
|
}
|
|
|
|
async function resolveGitHubCopilotEmbeddingSession(client: GitHubCopilotEmbeddingClient): Promise<{
|
|
baseUrl: string;
|
|
headers: Record<string, string>;
|
|
}> {
|
|
const token = await resolveCopilotApiToken({
|
|
githubToken: client.githubToken,
|
|
env: client.env,
|
|
fetchImpl: client.fetchImpl,
|
|
});
|
|
const baseUrl = client.baseUrl?.trim() || token.baseUrl || DEFAULT_COPILOT_API_BASE_URL;
|
|
return {
|
|
baseUrl,
|
|
headers: {
|
|
...COPILOT_HEADERS_STATIC,
|
|
...client.headers,
|
|
Authorization: `Bearer ${token.token}`,
|
|
},
|
|
};
|
|
}
|
|
|
|
async function createGitHubCopilotEmbeddingProvider(
|
|
client: GitHubCopilotEmbeddingClient,
|
|
): Promise<{ provider: MemoryEmbeddingProvider; client: GitHubCopilotEmbeddingClient }> {
|
|
const initialSession = await resolveGitHubCopilotEmbeddingSession(client);
|
|
|
|
const embed = async (input: string[], signal?: AbortSignal): Promise<number[][]> => {
|
|
if (input.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const session = await resolveGitHubCopilotEmbeddingSession(client);
|
|
const url = `${session.baseUrl.replace(/\/$/, "")}/embeddings`;
|
|
return await withRemoteHttpResponse({
|
|
url,
|
|
fetchImpl: client.fetchImpl,
|
|
ssrfPolicy: buildRemoteBaseUrlPolicy(session.baseUrl),
|
|
signal,
|
|
init: {
|
|
method: "POST",
|
|
headers: session.headers,
|
|
body: JSON.stringify({ model: client.model, input }),
|
|
},
|
|
onResponse: async (response) => {
|
|
if (!response.ok) {
|
|
const detail = await readResponseTextLimited(response, COPILOT_ERROR_BODY_LIMIT_BYTES);
|
|
throw new Error(`GitHub Copilot embeddings HTTP ${response.status}: ${detail}`);
|
|
}
|
|
|
|
const payload = await readProviderJsonResponse(response, "github-copilot.embeddings", {
|
|
maxBytes: COPILOT_EMBEDDINGS_RESPONSE_MAX_BYTES,
|
|
});
|
|
return parseGitHubCopilotEmbeddingPayload(payload, input.length);
|
|
},
|
|
});
|
|
};
|
|
|
|
return {
|
|
provider: {
|
|
id: COPILOT_EMBEDDING_PROVIDER_ID,
|
|
model: client.model,
|
|
embedQuery: async (text, options) => {
|
|
const [vector] = await embed([text], options?.signal);
|
|
return vector ?? [];
|
|
},
|
|
embedBatch: async (texts, options) => await embed(texts, options?.signal),
|
|
},
|
|
client: {
|
|
...client,
|
|
baseUrl: initialSession.baseUrl,
|
|
},
|
|
};
|
|
}
|
|
|
|
export const githubCopilotMemoryEmbeddingProviderAdapter: MemoryEmbeddingProviderAdapter = {
|
|
id: COPILOT_EMBEDDING_PROVIDER_ID,
|
|
transport: "remote",
|
|
authProviderId: COPILOT_EMBEDDING_PROVIDER_ID,
|
|
autoSelectPriority: 15,
|
|
allowExplicitWhenConfiguredAuto: true,
|
|
shouldContinueAutoSelection: (err: unknown) => isCopilotSetupError(err),
|
|
create: async (options) => {
|
|
const remoteGithubToken = await resolveConfiguredSecretInputString({
|
|
config: options.config,
|
|
env: process.env,
|
|
value: options.remote?.apiKey,
|
|
path: "agents.*.memorySearch.remote.apiKey",
|
|
});
|
|
const { githubToken: profileGithubToken } = await resolveFirstGithubToken({
|
|
agentDir: options.agentDir,
|
|
config: options.config,
|
|
env: process.env,
|
|
});
|
|
const githubToken = remoteGithubToken.value || profileGithubToken;
|
|
if (!githubToken) {
|
|
throw new Error("No GitHub token available for Copilot embedding provider");
|
|
}
|
|
|
|
const { token: copilotToken, baseUrl: resolvedBaseUrl } = await resolveCopilotApiToken({
|
|
githubToken,
|
|
env: process.env,
|
|
});
|
|
const baseUrl =
|
|
options.remote?.baseUrl?.trim() || resolvedBaseUrl || DEFAULT_COPILOT_API_BASE_URL;
|
|
const ssrfPolicy = buildSsrfPolicy(baseUrl);
|
|
|
|
// Always discover models even when the user pins one: this validates
|
|
// the Copilot token and confirms the plan supports embeddings before
|
|
// we attempt any embedding requests.
|
|
const availableModels = await discoverEmbeddingModels({
|
|
baseUrl,
|
|
copilotToken,
|
|
headers: options.remote?.headers,
|
|
ssrfPolicy,
|
|
});
|
|
|
|
const userModel = options.model?.trim() || undefined;
|
|
const model = pickBestModel(availableModels, userModel);
|
|
|
|
const { provider } = await createGitHubCopilotEmbeddingProvider({
|
|
baseUrl,
|
|
env: process.env,
|
|
fetchImpl: fetch,
|
|
githubToken,
|
|
headers: options.remote?.headers,
|
|
model,
|
|
});
|
|
|
|
return {
|
|
provider,
|
|
runtime: {
|
|
id: COPILOT_EMBEDDING_PROVIDER_ID,
|
|
cacheKeyData: {
|
|
provider: COPILOT_EMBEDDING_PROVIDER_ID,
|
|
baseUrl,
|
|
model,
|
|
},
|
|
},
|
|
};
|
|
},
|
|
};
|