fix(providers): harden embedding response schemas

This commit is contained in:
Vincent Koc
2026-05-16 11:09:45 +08:00
parent 202dd7590d
commit c8c6df73a9
7 changed files with 270 additions and 32 deletions

View File

@@ -42,6 +42,7 @@ Docs: https://docs.openclaw.ai
- Providers/images: reject malformed successful OpenAI-compatible, OpenAI, Google, fal, and OpenRouter image responses with provider-owned errors instead of raw shape failures, silent invalid base64 skips, or empty image results.
- Providers/videos: reject malformed successful xAI, OpenRouter, and fal video create, poll, and result responses with provider-owned errors instead of raw parser failures or long bogus polling.
- Providers/audio: reject malformed successful OpenAI-compatible, ElevenLabs, and Deepgram speech responses with provider-owned errors instead of raw parser failures, wrong-shaped transcripts, or JSON/text bodies treated as audio.
- Providers/embeddings: reject malformed successful OpenAI-compatible, Google Gemini, and Amazon Bedrock embedding responses instead of silently returning empty or coerced vectors.
- Trajectory export: skip and report malformed session/runtime JSONL rows in `manifest.json` instead of letting wrong-shaped session rows crash support bundle export.
- Voice calls: persist rejected inbound-call replay keys so duplicate carrier webhook retries stay ignored after a Gateway restart.
- Config/doctor: copy fallback-enabled channel `allowFrom` entries into explicit `groupAllowFrom` allowlists during `openclaw doctor --fix`, preserving current group access without adding runtime fallback-transition flags.

View File

@@ -76,4 +76,34 @@ describe("bedrock embedding response parsers", () => {
"Amazon Bedrock embedding response returned malformed JSON",
);
});
it("rejects non-object embedding JSON", () => {
expect(() => __testing.parseSingle("titan-v2", "[]")).toThrow(
"Amazon Bedrock embedding response returned malformed JSON",
);
});
it("rejects missing single embedding vectors", () => {
expect(() => __testing.parseSingle("titan-v2", "{}")).toThrow(
"Amazon Bedrock embedding response returned malformed JSON",
);
});
it("rejects wrong single embedding vector element types", () => {
expect(() => __testing.parseSingle("titan-v2", '{"embedding":[1,"bad"]}')).toThrow(
"Amazon Bedrock embedding response returned malformed JSON",
);
});
it("rejects missing batch embedding vectors", () => {
expect(() => __testing.parseCohereBatch("cohere-v3", "{}")).toThrow(
"Amazon Bedrock embedding response returned malformed JSON",
);
});
it("rejects wrong batch embedding vector shapes", () => {
expect(() =>
__testing.parseCohereBatch("cohere-v3", '{"embeddings":[[1],{"bad":true}]}'),
).toThrow("Amazon Bedrock embedding response returned malformed JSON");
});
});

View File

@@ -233,20 +233,42 @@ type BedrockEmbeddingResponseJson = {
function parseBedrockEmbeddingResponseJson(raw: string): BedrockEmbeddingResponseJson {
try {
const parsed = JSON.parse(raw) as unknown;
return parsed && typeof parsed === "object" && !Array.isArray(parsed)
? (parsed as BedrockEmbeddingResponseJson)
: {};
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
throw new Error("Amazon Bedrock embedding response returned malformed JSON");
}
return parsed as BedrockEmbeddingResponseJson;
} catch {
throw new Error("Amazon Bedrock embedding response returned malformed JSON");
}
}
function malformedBedrockEmbeddingResponse(): Error {
return new Error("Amazon Bedrock embedding response returned malformed JSON");
}
function asNumberArray(value: unknown): number[] {
return Array.isArray(value) ? (value as number[]) : [];
if (!Array.isArray(value)) {
throw malformedBedrockEmbeddingResponse();
}
for (const entry of value) {
if (typeof entry !== "number" || !Number.isFinite(entry)) {
throw malformedBedrockEmbeddingResponse();
}
}
return value;
}
function asRecord(value: unknown): Record<string, unknown> | undefined {
return typeof value === "object" && value !== null && !Array.isArray(value)
? (value as Record<string, unknown>)
: undefined;
}
function asNumberArrayBatch(value: unknown): number[][] {
return Array.isArray(value) ? (value.filter(Array.isArray) as number[][]) : [];
if (!Array.isArray(value)) {
throw malformedBedrockEmbeddingResponse();
}
return value.map((entry) => asNumberArray(entry));
}
function parseSingle(family: Family, raw: string): number[] {
@@ -256,10 +278,11 @@ function parseSingle(family: Family, raw: string): number[] {
return asNumberArray(Array.isArray(data.embeddings) ? data.embeddings[0]?.embedding : null);
case "twelvelabs": {
if (Array.isArray(data.data)) {
return asNumberArray(data.data[0]?.embedding);
return asNumberArray(asRecord(data.data[0])?.embedding);
}
if (data.data && typeof data.data === "object") {
return asNumberArray((data.data as { embedding?: unknown }).embedding);
const dataRecord = asRecord(data.data);
if (dataRecord) {
return asNumberArray(dataRecord.embedding);
}
return asNumberArray(data.embedding);
}
@@ -272,12 +295,14 @@ function parseCohereBatch(family: Family, raw: string): number[][] {
const data = parseBedrockEmbeddingResponseJson(raw);
const embeddings = data.embeddings;
if (!embeddings) {
return [];
throw malformedBedrockEmbeddingResponse();
}
if (family === "cohere-v4" && !Array.isArray(embeddings)) {
return embeddings && typeof embeddings === "object"
? asNumberArrayBatch((embeddings as { float?: unknown }).float)
: [];
const embeddingRecord = asRecord(embeddings);
if (!embeddingRecord) {
throw malformedBedrockEmbeddingResponse();
}
return asNumberArrayBatch(embeddingRecord.float);
}
return asNumberArrayBatch(embeddings);
}

View File

@@ -137,10 +137,10 @@ describe("Gemini embedding provider", () => {
return url.endsWith(":batchEmbedContents")
? {
embeddings: Array.from({ length: 2 }, () => ({
values: [0, Number.POSITIVE_INFINITY, 5],
values: [0, 0, 5],
})),
}
: { embedding: { values: [3, 4, Number.NaN] } };
: { embedding: { values: [3, 4, 0] } };
});
const { provider } = await createGeminiEmbeddingProvider({
@@ -213,4 +213,52 @@ describe("Gemini embedding provider", () => {
],
});
});
it("rejects non-object successful embedding responses", async () => {
installFetchMock(() => []);
const { provider } = await createGeminiEmbeddingProvider({
config: {} as never,
provider: "gemini",
remote: { apiKey: "test-key" },
model: "gemini-embedding-001",
fallback: "none",
});
await expect(provider.embedQuery("test query")).rejects.toThrow(
"gemini embeddings failed: malformed JSON response",
);
});
it("rejects wrong single embedding vector shapes", async () => {
installFetchMock(() => ({ embedding: { values: [1, "bad"] } }));
const { provider } = await createGeminiEmbeddingProvider({
config: {} as never,
provider: "gemini",
remote: { apiKey: "test-key" },
model: "gemini-embedding-001",
fallback: "none",
});
await expect(provider.embedQuery("test query")).rejects.toThrow(
"gemini embeddings failed: malformed JSON response",
);
});
it("rejects batch embedding count mismatches", async () => {
installFetchMock(() => ({ embeddings: [{ values: [1, 2] }] }));
const { provider } = await createGeminiEmbeddingProvider({
config: {} as never,
provider: "gemini",
remote: { apiKey: "test-key" },
model: "gemini-embedding-001",
fallback: "none",
});
await expect(provider.embedBatch(["one", "two"])).rejects.toThrow(
"gemini embeddings failed: malformed JSON response",
);
});
});

View File

@@ -17,6 +17,7 @@ import {
import {
createProviderHttpError,
providerOperationRetryConfig,
readProviderJsonObjectResponse,
} from "openclaw/plugin-sdk/provider-http";
import type { SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime";
import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
@@ -90,6 +91,52 @@ type GeminiEmbeddingRequest = {
};
export type GeminiTextEmbeddingRequest = GeminiEmbeddingRequest;
function asRecord(value: unknown): Record<string, unknown> | undefined {
return typeof value === "object" && value !== null && !Array.isArray(value)
? (value as Record<string, unknown>)
: undefined;
}
function malformedGeminiEmbeddingResponse(): Error {
return new Error("gemini embeddings failed: malformed JSON response");
}
function readGeminiEmbeddingValues(value: unknown): number[] {
if (!Array.isArray(value)) {
throw malformedGeminiEmbeddingResponse();
}
for (const entry of value) {
if (typeof entry !== "number" || !Number.isFinite(entry)) {
throw malformedGeminiEmbeddingResponse();
}
}
return value;
}
function readGeminiSingleEmbedding(payload: Record<string, unknown>): number[] {
const embedding = asRecord(payload.embedding);
if (!embedding) {
throw malformedGeminiEmbeddingResponse();
}
return readGeminiEmbeddingValues(embedding.values);
}
function readGeminiBatchEmbeddings(
payload: Record<string, unknown>,
expectedCount: number,
): number[][] {
if (!Array.isArray(payload.embeddings) || payload.embeddings.length !== expectedCount) {
throw malformedGeminiEmbeddingResponse();
}
return payload.embeddings.map((entry) => {
const embedding = asRecord(entry);
if (!embedding) {
throw malformedGeminiEmbeddingResponse();
}
return readGeminiEmbeddingValues(embedding.values);
});
}
/** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */
export function buildGeminiTextEmbeddingRequest(params: {
text: string;
@@ -195,10 +242,7 @@ async function fetchGeminiEmbeddingPayload(params: {
client: GeminiEmbeddingClient;
endpoint: string;
body: unknown;
}): Promise<{
embedding?: { values?: number[] };
embeddings?: Array<{ values?: number[] }>;
}> {
}): Promise<Record<string, unknown>> {
return await executeWithApiKeyRotation({
provider: "google",
apiKeys: params.client.apiKeys,
@@ -221,10 +265,7 @@ async function fetchGeminiEmbeddingPayload(params: {
if (!res.ok) {
throw await createProviderHttpError(res, "gemini embeddings failed");
}
return (await res.json()) as {
embedding?: { values?: number[] };
embeddings?: Array<{ values?: number[] }>;
};
return await readProviderJsonObjectResponse(res, "gemini embeddings failed");
},
});
},
@@ -288,7 +329,7 @@ export async function createGeminiEmbeddingProvider(
outputDimensionality: isV2 ? outputDimensionality : undefined,
}),
});
return sanitizeAndNormalizeEmbedding(payload.embedding?.values ?? []);
return sanitizeAndNormalizeEmbedding(readGeminiSingleEmbedding(payload));
};
const embedBatchInputs = async (inputs: EmbeddingInput[]): Promise<number[][]> => {
@@ -309,8 +350,8 @@ export async function createGeminiEmbeddingProvider(
),
},
});
const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : [];
return inputs.map((_, index) => sanitizeAndNormalizeEmbedding(embeddings[index]?.values ?? []));
const embeddings = readGeminiBatchEmbeddings(payload, inputs.length);
return embeddings.map((values) => sanitizeAndNormalizeEmbedding(values));
};
const embedBatch = async (texts: string[]): Promise<number[][]> => {

View File

@@ -32,7 +32,7 @@ describe("fetchRemoteEmbeddingVectors", () => {
it("maps remote embedding response data to vectors", async () => {
postJsonMock.mockImplementationOnce(async (params) => {
return await params.parse({
data: [{ embedding: [0.1, 0.2] }, {}, { embedding: [0.3] }],
data: [{ embedding: [0.1, 0.2] }, { embedding: [0.4] }, { embedding: [0.3] }],
});
});
@@ -43,7 +43,7 @@ describe("fetchRemoteEmbeddingVectors", () => {
errorPrefix: "embedding fetch failed",
});
expect(vectors).toEqual([[0.1, 0.2], [], [0.3]]);
expect(vectors).toEqual([[0.1, 0.2], [0.4], [0.3]]);
const postJsonParams = requirePostJsonParams();
expect(postJsonParams.url).toBe("https://memory.example/v1/embeddings");
expect(postJsonParams.headers).toEqual({ Authorization: "Bearer test" });
@@ -63,4 +63,60 @@ describe("fetchRemoteEmbeddingVectors", () => {
}),
).rejects.toThrow("embedding fetch failed: 403 forbidden");
});
it("rejects non-object embedding responses", async () => {
postJsonMock.mockImplementationOnce(async (params) => await params.parse([]));
await expect(
fetchRemoteEmbeddingVectors({
url: "https://memory.example/v1/embeddings",
headers: {},
body: { input: ["one"] },
errorPrefix: "embedding fetch failed",
}),
).rejects.toThrow("embedding fetch failed: malformed JSON response");
});
it("rejects missing embedding data arrays", async () => {
postJsonMock.mockImplementationOnce(async (params) => await params.parse({}));
await expect(
fetchRemoteEmbeddingVectors({
url: "https://memory.example/v1/embeddings",
headers: {},
body: { input: ["one"] },
errorPrefix: "embedding fetch failed",
}),
).rejects.toThrow("embedding fetch failed: malformed JSON response");
});
it("rejects embedding counts that do not match the submitted input batch", async () => {
postJsonMock.mockImplementationOnce(async (params) => {
return await params.parse({ data: [{ embedding: [0.1] }] });
});
await expect(
fetchRemoteEmbeddingVectors({
url: "https://memory.example/v1/embeddings",
headers: {},
body: { input: ["one", "two"] },
errorPrefix: "embedding fetch failed",
}),
).rejects.toThrow("embedding fetch failed: malformed JSON response");
});
it("rejects wrong nested embedding vector types", async () => {
postJsonMock.mockImplementationOnce(async (params) => {
return await params.parse({ data: [{ embedding: [0.1, "bad"] }] });
});
await expect(
fetchRemoteEmbeddingVectors({
url: "https://memory.example/v1/embeddings",
headers: {},
body: { input: ["one"] },
errorPrefix: "embedding fetch failed",
}),
).rejects.toThrow("embedding fetch failed: malformed JSON response");
});
});

View File

@@ -1,6 +1,33 @@
import { postJson } from "./post-json.js";
import type { SsrFPolicy } from "./ssrf-policy.js";
function asRecord(value: unknown): Record<string, unknown> | undefined {
return typeof value === "object" && value !== null && !Array.isArray(value)
? (value as Record<string, unknown>)
: undefined;
}
function malformedEmbeddingResponse(errorPrefix: string): Error {
return new Error(`${errorPrefix}: malformed JSON response`);
}
function readEmbeddingVector(value: unknown, errorPrefix: string): number[] {
if (!Array.isArray(value)) {
throw malformedEmbeddingResponse(errorPrefix);
}
for (const entry of value) {
if (typeof entry !== "number" || !Number.isFinite(entry)) {
throw malformedEmbeddingResponse(errorPrefix);
}
}
return value;
}
function resolveExpectedEmbeddingCount(body: unknown): number | undefined {
const input = asRecord(body)?.input;
return Array.isArray(input) ? input.length : undefined;
}
export async function fetchRemoteEmbeddingVectors(params: {
url: string;
headers: Record<string, string>;
@@ -17,11 +44,21 @@ export async function fetchRemoteEmbeddingVectors(params: {
body: params.body,
errorPrefix: params.errorPrefix,
parse: (payload) => {
const typedPayload = payload as {
data?: Array<{ embedding?: number[] }>;
};
const data = typedPayload.data ?? [];
return data.map((entry) => entry.embedding ?? []);
const root = asRecord(payload);
if (!root || !Array.isArray(root.data)) {
throw malformedEmbeddingResponse(params.errorPrefix);
}
const expectedCount = resolveExpectedEmbeddingCount(params.body);
if (expectedCount !== undefined && root.data.length !== expectedCount) {
throw malformedEmbeddingResponse(params.errorPrefix);
}
return root.data.map((entry) => {
const record = asRecord(entry);
if (!record) {
throw malformedEmbeddingResponse(params.errorPrefix);
}
return readEmbeddingVector(record.embedding, params.errorPrefix);
});
},
});
}