From 2d9ef76d5b6bd3bc386c1295e5ec26e6ec3d0656 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Thu, 14 May 2026 19:38:11 +0800 Subject: [PATCH] fix(bedrock): wrap malformed embedding json --- CHANGELOG.md | 1 + .../amazon-bedrock/embedding-provider.test.ts | 16 +++++- .../amazon-bedrock/embedding-provider.ts | 52 +++++++++++++++---- 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 29c8676617b..48c1c8c0193 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,7 @@ Docs: https://docs.openclaw.ai - Google Meet: report malformed browser-control status JSON with plugin-owned errors instead of leaking raw parser failures. - Google provider: report malformed SSE stream JSON with provider-owned errors instead of leaking raw parser failures. - Node host: report malformed built-in invoke `paramsJSON` with stable invalid-request errors instead of leaking raw parser failures. +- Amazon Bedrock embeddings: report malformed provider response JSON with provider-owned errors instead of leaking raw parser failures. - Models config/auth: stop inferring provider env-var markers from broad `^[A-Z_][A-Z0-9_]*$` strings, and resolve config-backed provider `apiKey` values only through structured env SecretRefs (`secrets.providers[id]` / `secrets.defaults`), so unrelated env vars cannot accidentally become provider credentials. Thanks @sallyom. - Media fetch: skip allocating and buffering the response body for bodyless media responses (HEAD probes and 204-style empty bodies), avoiding wasted heap on streams that carry no payload. Thanks @shakkernerd. - CLI/onboarding: forward provider-specific auth flags (e.g. `--openai-api-key`) through the onboarding wizard so they reach provider auth methods via `ctx.opts`, letting `--openai-api-key "$OPENAI_API_KEY"` skip the redundant "use existing env var?" prompt in non-interactive harnesses. (#81669) Thanks @sjf. diff --git a/extensions/amazon-bedrock/embedding-provider.test.ts b/extensions/amazon-bedrock/embedding-provider.test.ts index 9854302fb64..ce4cb76be74 100644 --- a/extensions/amazon-bedrock/embedding-provider.test.ts +++ b/extensions/amazon-bedrock/embedding-provider.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it, vi } from "vitest"; -import { hasAwsCredentials } from "./embedding-provider.js"; +import { __testing, hasAwsCredentials } from "./embedding-provider.js"; describe("hasAwsCredentials", () => { it("accepts static AWS key credentials without loading the credential chain", async () => { @@ -63,3 +63,17 @@ describe("hasAwsCredentials", () => { await expect(hasAwsCredentials({}, loadCredentialProvider)).resolves.toBe(false); }); }); + +describe("bedrock embedding response parsers", () => { + it("wraps malformed single embedding JSON", () => { + expect(() => __testing.parseSingle("titan-v2", "{not json")).toThrow( + "Amazon Bedrock embedding response returned malformed JSON", + ); + }); + + it("wraps malformed batch embedding JSON", () => { + expect(() => __testing.parseCohereBatch("cohere-v3", "{not json")).toThrow( + "Amazon Bedrock embedding response returned malformed JSON", + ); + }); +}); diff --git a/extensions/amazon-bedrock/embedding-provider.ts b/extensions/amazon-bedrock/embedding-provider.ts index b232eb95b5c..afd446d0dd0 100644 --- a/extensions/amazon-bedrock/embedding-provider.ts +++ b/extensions/amazon-bedrock/embedding-provider.ts @@ -224,37 +224,69 @@ function buildCohereBody( // Response parsers // --------------------------------------------------------------------------- +type BedrockEmbeddingResponseJson = { + embedding?: unknown; + embeddings?: unknown; + data?: unknown; +}; + +function parseBedrockEmbeddingResponseJson(raw: string): BedrockEmbeddingResponseJson { + try { + const parsed = JSON.parse(raw) as unknown; + return parsed && typeof parsed === "object" && !Array.isArray(parsed) + ? (parsed as BedrockEmbeddingResponseJson) + : {}; + } catch { + throw new Error("Amazon Bedrock embedding response returned malformed JSON"); + } +} + +function asNumberArray(value: unknown): number[] { + return Array.isArray(value) ? (value as number[]) : []; +} + +function asNumberArrayBatch(value: unknown): number[][] { + return Array.isArray(value) ? (value.filter(Array.isArray) as number[][]) : []; +} + function parseSingle(family: Family, raw: string): number[] { - const data = JSON.parse(raw); + const data = parseBedrockEmbeddingResponseJson(raw); switch (family) { case "nova": - return data.embeddings?.[0]?.embedding ?? []; + return asNumberArray(Array.isArray(data.embeddings) ? data.embeddings[0]?.embedding : null); case "twelvelabs": { if (Array.isArray(data.data)) { - return data.data[0]?.embedding ?? []; + return asNumberArray(data.data[0]?.embedding); } - if (Array.isArray(data.data?.embedding)) { - return data.data.embedding; + if (data.data && typeof data.data === "object") { + return asNumberArray((data.data as { embedding?: unknown }).embedding); } - return data.embedding ?? []; + return asNumberArray(data.embedding); } default: - return data.embedding ?? []; + return asNumberArray(data.embedding); } } function parseCohereBatch(family: Family, raw: string): number[][] { - const data = JSON.parse(raw); + const data = parseBedrockEmbeddingResponseJson(raw); const embeddings = data.embeddings; if (!embeddings) { return []; } if (family === "cohere-v4" && !Array.isArray(embeddings)) { - return embeddings.float ?? []; + return embeddings && typeof embeddings === "object" + ? asNumberArrayBatch((embeddings as { float?: unknown }).float) + : []; } - return embeddings; + return asNumberArrayBatch(embeddings); } +export const __testing = { + parseCohereBatch, + parseSingle, +}; + // --------------------------------------------------------------------------- // Provider // ---------------------------------------------------------------------------