From c8c6df73a94a201fc1cd7b047c220ea1e1053c96 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sat, 16 May 2026 11:09:45 +0800 Subject: [PATCH] fix(providers): harden embedding response schemas --- CHANGELOG.md | 1 + .../amazon-bedrock/embedding-provider.test.ts | 30 +++++++++ .../amazon-bedrock/embedding-provider.ts | 49 +++++++++++---- extensions/google/embedding-provider.test.ts | 52 ++++++++++++++- extensions/google/embedding-provider.ts | 63 +++++++++++++++---- .../src/host/embeddings-remote-fetch.test.ts | 60 +++++++++++++++++- .../src/host/embeddings-remote-fetch.ts | 47 ++++++++++++-- 7 files changed, 270 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e3b143a126..880905b1116 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ Docs: https://docs.openclaw.ai - Providers/images: reject malformed successful OpenAI-compatible, OpenAI, Google, fal, and OpenRouter image responses with provider-owned errors instead of raw shape failures, silent invalid base64 skips, or empty image results. - Providers/videos: reject malformed successful xAI, OpenRouter, and fal video create, poll, and result responses with provider-owned errors instead of raw parser failures or long bogus polling. - Providers/audio: reject malformed successful OpenAI-compatible, ElevenLabs, and Deepgram speech responses with provider-owned errors instead of raw parser failures, wrong-shaped transcripts, or JSON/text bodies treated as audio. +- Providers/embeddings: reject malformed successful OpenAI-compatible, Google Gemini, and Amazon Bedrock embedding responses instead of silently returning empty or coerced vectors. - Trajectory export: skip and report malformed session/runtime JSONL rows in `manifest.json` instead of letting wrong-shaped session rows crash support bundle export. - Voice calls: persist rejected inbound-call replay keys so duplicate carrier webhook retries stay ignored after a Gateway restart. - Config/doctor: copy fallback-enabled channel `allowFrom` entries into explicit `groupAllowFrom` allowlists during `openclaw doctor --fix`, preserving current group access without adding runtime fallback-transition flags. diff --git a/extensions/amazon-bedrock/embedding-provider.test.ts b/extensions/amazon-bedrock/embedding-provider.test.ts index ce4cb76be74..8a99252b570 100644 --- a/extensions/amazon-bedrock/embedding-provider.test.ts +++ b/extensions/amazon-bedrock/embedding-provider.test.ts @@ -76,4 +76,34 @@ describe("bedrock embedding response parsers", () => { "Amazon Bedrock embedding response returned malformed JSON", ); }); + + it("rejects non-object embedding JSON", () => { + expect(() => __testing.parseSingle("titan-v2", "[]")).toThrow( + "Amazon Bedrock embedding response returned malformed JSON", + ); + }); + + it("rejects missing single embedding vectors", () => { + expect(() => __testing.parseSingle("titan-v2", "{}")).toThrow( + "Amazon Bedrock embedding response returned malformed JSON", + ); + }); + + it("rejects wrong single embedding vector element types", () => { + expect(() => __testing.parseSingle("titan-v2", '{"embedding":[1,"bad"]}')).toThrow( + "Amazon Bedrock embedding response returned malformed JSON", + ); + }); + + it("rejects missing batch embedding vectors", () => { + expect(() => __testing.parseCohereBatch("cohere-v3", "{}")).toThrow( + "Amazon Bedrock embedding response returned malformed JSON", + ); + }); + + it("rejects wrong batch embedding vector shapes", () => { + expect(() => + __testing.parseCohereBatch("cohere-v3", '{"embeddings":[[1],{"bad":true}]}'), + ).toThrow("Amazon Bedrock embedding response returned malformed JSON"); + }); }); diff --git a/extensions/amazon-bedrock/embedding-provider.ts b/extensions/amazon-bedrock/embedding-provider.ts index afd446d0dd0..0e85866da34 100644 --- a/extensions/amazon-bedrock/embedding-provider.ts +++ b/extensions/amazon-bedrock/embedding-provider.ts @@ -233,20 +233,42 @@ type BedrockEmbeddingResponseJson = { function parseBedrockEmbeddingResponseJson(raw: string): BedrockEmbeddingResponseJson { try { const parsed = JSON.parse(raw) as unknown; - return parsed && typeof parsed === "object" && !Array.isArray(parsed) - ? (parsed as BedrockEmbeddingResponseJson) - : {}; + if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { + throw new Error("Amazon Bedrock embedding response returned malformed JSON"); + } + return parsed as BedrockEmbeddingResponseJson; } catch { throw new Error("Amazon Bedrock embedding response returned malformed JSON"); } } +function malformedBedrockEmbeddingResponse(): Error { + return new Error("Amazon Bedrock embedding response returned malformed JSON"); +} + function asNumberArray(value: unknown): number[] { - return Array.isArray(value) ? (value as number[]) : []; + if (!Array.isArray(value)) { + throw malformedBedrockEmbeddingResponse(); + } + for (const entry of value) { + if (typeof entry !== "number" || !Number.isFinite(entry)) { + throw malformedBedrockEmbeddingResponse(); + } + } + return value; +} + +function asRecord(value: unknown): Record | undefined { + return typeof value === "object" && value !== null && !Array.isArray(value) + ? (value as Record) + : undefined; } function asNumberArrayBatch(value: unknown): number[][] { - return Array.isArray(value) ? (value.filter(Array.isArray) as number[][]) : []; + if (!Array.isArray(value)) { + throw malformedBedrockEmbeddingResponse(); + } + return value.map((entry) => asNumberArray(entry)); } function parseSingle(family: Family, raw: string): number[] { @@ -256,10 +278,11 @@ function parseSingle(family: Family, raw: string): number[] { return asNumberArray(Array.isArray(data.embeddings) ? data.embeddings[0]?.embedding : null); case "twelvelabs": { if (Array.isArray(data.data)) { - return asNumberArray(data.data[0]?.embedding); + return asNumberArray(asRecord(data.data[0])?.embedding); } - if (data.data && typeof data.data === "object") { - return asNumberArray((data.data as { embedding?: unknown }).embedding); + const dataRecord = asRecord(data.data); + if (dataRecord) { + return asNumberArray(dataRecord.embedding); } return asNumberArray(data.embedding); } @@ -272,12 +295,14 @@ function parseCohereBatch(family: Family, raw: string): number[][] { const data = parseBedrockEmbeddingResponseJson(raw); const embeddings = data.embeddings; if (!embeddings) { - return []; + throw malformedBedrockEmbeddingResponse(); } if (family === "cohere-v4" && !Array.isArray(embeddings)) { - return embeddings && typeof embeddings === "object" - ? asNumberArrayBatch((embeddings as { float?: unknown }).float) - : []; + const embeddingRecord = asRecord(embeddings); + if (!embeddingRecord) { + throw malformedBedrockEmbeddingResponse(); + } + return asNumberArrayBatch(embeddingRecord.float); } return asNumberArrayBatch(embeddings); } diff --git a/extensions/google/embedding-provider.test.ts b/extensions/google/embedding-provider.test.ts index cb18423996e..896329cca60 100644 --- a/extensions/google/embedding-provider.test.ts +++ b/extensions/google/embedding-provider.test.ts @@ -137,10 +137,10 @@ describe("Gemini embedding provider", () => { return url.endsWith(":batchEmbedContents") ? { embeddings: Array.from({ length: 2 }, () => ({ - values: [0, Number.POSITIVE_INFINITY, 5], + values: [0, 0, 5], })), } - : { embedding: { values: [3, 4, Number.NaN] } }; + : { embedding: { values: [3, 4, 0] } }; }); const { provider } = await createGeminiEmbeddingProvider({ @@ -213,4 +213,52 @@ describe("Gemini embedding provider", () => { ], }); }); + + it("rejects non-object successful embedding responses", async () => { + installFetchMock(() => []); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + remote: { apiKey: "test-key" }, + model: "gemini-embedding-001", + fallback: "none", + }); + + await expect(provider.embedQuery("test query")).rejects.toThrow( + "gemini embeddings failed: malformed JSON response", + ); + }); + + it("rejects wrong single embedding vector shapes", async () => { + installFetchMock(() => ({ embedding: { values: [1, "bad"] } })); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + remote: { apiKey: "test-key" }, + model: "gemini-embedding-001", + fallback: "none", + }); + + await expect(provider.embedQuery("test query")).rejects.toThrow( + "gemini embeddings failed: malformed JSON response", + ); + }); + + it("rejects batch embedding count mismatches", async () => { + installFetchMock(() => ({ embeddings: [{ values: [1, 2] }] })); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + remote: { apiKey: "test-key" }, + model: "gemini-embedding-001", + fallback: "none", + }); + + await expect(provider.embedBatch(["one", "two"])).rejects.toThrow( + "gemini embeddings failed: malformed JSON response", + ); + }); }); diff --git a/extensions/google/embedding-provider.ts b/extensions/google/embedding-provider.ts index 5831dd6c146..d0614650e1b 100644 --- a/extensions/google/embedding-provider.ts +++ b/extensions/google/embedding-provider.ts @@ -17,6 +17,7 @@ import { import { createProviderHttpError, providerOperationRetryConfig, + readProviderJsonObjectResponse, } from "openclaw/plugin-sdk/provider-http"; import type { SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime"; import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime"; @@ -90,6 +91,52 @@ type GeminiEmbeddingRequest = { }; export type GeminiTextEmbeddingRequest = GeminiEmbeddingRequest; +function asRecord(value: unknown): Record | undefined { + return typeof value === "object" && value !== null && !Array.isArray(value) + ? (value as Record) + : undefined; +} + +function malformedGeminiEmbeddingResponse(): Error { + return new Error("gemini embeddings failed: malformed JSON response"); +} + +function readGeminiEmbeddingValues(value: unknown): number[] { + if (!Array.isArray(value)) { + throw malformedGeminiEmbeddingResponse(); + } + for (const entry of value) { + if (typeof entry !== "number" || !Number.isFinite(entry)) { + throw malformedGeminiEmbeddingResponse(); + } + } + return value; +} + +function readGeminiSingleEmbedding(payload: Record): number[] { + const embedding = asRecord(payload.embedding); + if (!embedding) { + throw malformedGeminiEmbeddingResponse(); + } + return readGeminiEmbeddingValues(embedding.values); +} + +function readGeminiBatchEmbeddings( + payload: Record, + expectedCount: number, +): number[][] { + if (!Array.isArray(payload.embeddings) || payload.embeddings.length !== expectedCount) { + throw malformedGeminiEmbeddingResponse(); + } + return payload.embeddings.map((entry) => { + const embedding = asRecord(entry); + if (!embedding) { + throw malformedGeminiEmbeddingResponse(); + } + return readGeminiEmbeddingValues(embedding.values); + }); +} + /** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */ export function buildGeminiTextEmbeddingRequest(params: { text: string; @@ -195,10 +242,7 @@ async function fetchGeminiEmbeddingPayload(params: { client: GeminiEmbeddingClient; endpoint: string; body: unknown; -}): Promise<{ - embedding?: { values?: number[] }; - embeddings?: Array<{ values?: number[] }>; -}> { +}): Promise> { return await executeWithApiKeyRotation({ provider: "google", apiKeys: params.client.apiKeys, @@ -221,10 +265,7 @@ async function fetchGeminiEmbeddingPayload(params: { if (!res.ok) { throw await createProviderHttpError(res, "gemini embeddings failed"); } - return (await res.json()) as { - embedding?: { values?: number[] }; - embeddings?: Array<{ values?: number[] }>; - }; + return await readProviderJsonObjectResponse(res, "gemini embeddings failed"); }, }); }, @@ -288,7 +329,7 @@ export async function createGeminiEmbeddingProvider( outputDimensionality: isV2 ? outputDimensionality : undefined, }), }); - return sanitizeAndNormalizeEmbedding(payload.embedding?.values ?? []); + return sanitizeAndNormalizeEmbedding(readGeminiSingleEmbedding(payload)); }; const embedBatchInputs = async (inputs: EmbeddingInput[]): Promise => { @@ -309,8 +350,8 @@ export async function createGeminiEmbeddingProvider( ), }, }); - const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : []; - return inputs.map((_, index) => sanitizeAndNormalizeEmbedding(embeddings[index]?.values ?? [])); + const embeddings = readGeminiBatchEmbeddings(payload, inputs.length); + return embeddings.map((values) => sanitizeAndNormalizeEmbedding(values)); }; const embedBatch = async (texts: string[]): Promise => { diff --git a/packages/memory-host-sdk/src/host/embeddings-remote-fetch.test.ts b/packages/memory-host-sdk/src/host/embeddings-remote-fetch.test.ts index 1696b61263e..90e3db9a086 100644 --- a/packages/memory-host-sdk/src/host/embeddings-remote-fetch.test.ts +++ b/packages/memory-host-sdk/src/host/embeddings-remote-fetch.test.ts @@ -32,7 +32,7 @@ describe("fetchRemoteEmbeddingVectors", () => { it("maps remote embedding response data to vectors", async () => { postJsonMock.mockImplementationOnce(async (params) => { return await params.parse({ - data: [{ embedding: [0.1, 0.2] }, {}, { embedding: [0.3] }], + data: [{ embedding: [0.1, 0.2] }, { embedding: [0.4] }, { embedding: [0.3] }], }); }); @@ -43,7 +43,7 @@ describe("fetchRemoteEmbeddingVectors", () => { errorPrefix: "embedding fetch failed", }); - expect(vectors).toEqual([[0.1, 0.2], [], [0.3]]); + expect(vectors).toEqual([[0.1, 0.2], [0.4], [0.3]]); const postJsonParams = requirePostJsonParams(); expect(postJsonParams.url).toBe("https://memory.example/v1/embeddings"); expect(postJsonParams.headers).toEqual({ Authorization: "Bearer test" }); @@ -63,4 +63,60 @@ describe("fetchRemoteEmbeddingVectors", () => { }), ).rejects.toThrow("embedding fetch failed: 403 forbidden"); }); + + it("rejects non-object embedding responses", async () => { + postJsonMock.mockImplementationOnce(async (params) => await params.parse([])); + + await expect( + fetchRemoteEmbeddingVectors({ + url: "https://memory.example/v1/embeddings", + headers: {}, + body: { input: ["one"] }, + errorPrefix: "embedding fetch failed", + }), + ).rejects.toThrow("embedding fetch failed: malformed JSON response"); + }); + + it("rejects missing embedding data arrays", async () => { + postJsonMock.mockImplementationOnce(async (params) => await params.parse({})); + + await expect( + fetchRemoteEmbeddingVectors({ + url: "https://memory.example/v1/embeddings", + headers: {}, + body: { input: ["one"] }, + errorPrefix: "embedding fetch failed", + }), + ).rejects.toThrow("embedding fetch failed: malformed JSON response"); + }); + + it("rejects embedding counts that do not match the submitted input batch", async () => { + postJsonMock.mockImplementationOnce(async (params) => { + return await params.parse({ data: [{ embedding: [0.1] }] }); + }); + + await expect( + fetchRemoteEmbeddingVectors({ + url: "https://memory.example/v1/embeddings", + headers: {}, + body: { input: ["one", "two"] }, + errorPrefix: "embedding fetch failed", + }), + ).rejects.toThrow("embedding fetch failed: malformed JSON response"); + }); + + it("rejects wrong nested embedding vector types", async () => { + postJsonMock.mockImplementationOnce(async (params) => { + return await params.parse({ data: [{ embedding: [0.1, "bad"] }] }); + }); + + await expect( + fetchRemoteEmbeddingVectors({ + url: "https://memory.example/v1/embeddings", + headers: {}, + body: { input: ["one"] }, + errorPrefix: "embedding fetch failed", + }), + ).rejects.toThrow("embedding fetch failed: malformed JSON response"); + }); }); diff --git a/packages/memory-host-sdk/src/host/embeddings-remote-fetch.ts b/packages/memory-host-sdk/src/host/embeddings-remote-fetch.ts index a93fbd80d5a..22cebf90257 100644 --- a/packages/memory-host-sdk/src/host/embeddings-remote-fetch.ts +++ b/packages/memory-host-sdk/src/host/embeddings-remote-fetch.ts @@ -1,6 +1,33 @@ import { postJson } from "./post-json.js"; import type { SsrFPolicy } from "./ssrf-policy.js"; +function asRecord(value: unknown): Record | undefined { + return typeof value === "object" && value !== null && !Array.isArray(value) + ? (value as Record) + : undefined; +} + +function malformedEmbeddingResponse(errorPrefix: string): Error { + return new Error(`${errorPrefix}: malformed JSON response`); +} + +function readEmbeddingVector(value: unknown, errorPrefix: string): number[] { + if (!Array.isArray(value)) { + throw malformedEmbeddingResponse(errorPrefix); + } + for (const entry of value) { + if (typeof entry !== "number" || !Number.isFinite(entry)) { + throw malformedEmbeddingResponse(errorPrefix); + } + } + return value; +} + +function resolveExpectedEmbeddingCount(body: unknown): number | undefined { + const input = asRecord(body)?.input; + return Array.isArray(input) ? input.length : undefined; +} + export async function fetchRemoteEmbeddingVectors(params: { url: string; headers: Record; @@ -17,11 +44,21 @@ export async function fetchRemoteEmbeddingVectors(params: { body: params.body, errorPrefix: params.errorPrefix, parse: (payload) => { - const typedPayload = payload as { - data?: Array<{ embedding?: number[] }>; - }; - const data = typedPayload.data ?? []; - return data.map((entry) => entry.embedding ?? []); + const root = asRecord(payload); + if (!root || !Array.isArray(root.data)) { + throw malformedEmbeddingResponse(params.errorPrefix); + } + const expectedCount = resolveExpectedEmbeddingCount(params.body); + if (expectedCount !== undefined && root.data.length !== expectedCount) { + throw malformedEmbeddingResponse(params.errorPrefix); + } + return root.data.map((entry) => { + const record = asRecord(entry); + if (!record) { + throw malformedEmbeddingResponse(params.errorPrefix); + } + return readEmbeddingVector(record.embedding, params.errorPrefix); + }); }, }); }