mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-18 17:54:47 +00:00
fix(providers): harden embedding response schemas
This commit is contained in:
@@ -42,6 +42,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Providers/images: reject malformed successful OpenAI-compatible, OpenAI, Google, fal, and OpenRouter image responses with provider-owned errors instead of raw shape failures, silent invalid base64 skips, or empty image results.
|
||||
- Providers/videos: reject malformed successful xAI, OpenRouter, and fal video create, poll, and result responses with provider-owned errors instead of raw parser failures or long bogus polling.
|
||||
- Providers/audio: reject malformed successful OpenAI-compatible, ElevenLabs, and Deepgram speech responses with provider-owned errors instead of raw parser failures, wrong-shaped transcripts, or JSON/text bodies treated as audio.
|
||||
- Providers/embeddings: reject malformed successful OpenAI-compatible, Google Gemini, and Amazon Bedrock embedding responses instead of silently returning empty or coerced vectors.
|
||||
- Trajectory export: skip and report malformed session/runtime JSONL rows in `manifest.json` instead of letting wrong-shaped session rows crash support bundle export.
|
||||
- Voice calls: persist rejected inbound-call replay keys so duplicate carrier webhook retries stay ignored after a Gateway restart.
|
||||
- Config/doctor: copy fallback-enabled channel `allowFrom` entries into explicit `groupAllowFrom` allowlists during `openclaw doctor --fix`, preserving current group access without adding runtime fallback-transition flags.
|
||||
|
||||
@@ -76,4 +76,34 @@ describe("bedrock embedding response parsers", () => {
|
||||
"Amazon Bedrock embedding response returned malformed JSON",
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects non-object embedding JSON", () => {
|
||||
expect(() => __testing.parseSingle("titan-v2", "[]")).toThrow(
|
||||
"Amazon Bedrock embedding response returned malformed JSON",
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects missing single embedding vectors", () => {
|
||||
expect(() => __testing.parseSingle("titan-v2", "{}")).toThrow(
|
||||
"Amazon Bedrock embedding response returned malformed JSON",
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects wrong single embedding vector element types", () => {
|
||||
expect(() => __testing.parseSingle("titan-v2", '{"embedding":[1,"bad"]}')).toThrow(
|
||||
"Amazon Bedrock embedding response returned malformed JSON",
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects missing batch embedding vectors", () => {
|
||||
expect(() => __testing.parseCohereBatch("cohere-v3", "{}")).toThrow(
|
||||
"Amazon Bedrock embedding response returned malformed JSON",
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects wrong batch embedding vector shapes", () => {
|
||||
expect(() =>
|
||||
__testing.parseCohereBatch("cohere-v3", '{"embeddings":[[1],{"bad":true}]}'),
|
||||
).toThrow("Amazon Bedrock embedding response returned malformed JSON");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -233,20 +233,42 @@ type BedrockEmbeddingResponseJson = {
|
||||
function parseBedrockEmbeddingResponseJson(raw: string): BedrockEmbeddingResponseJson {
|
||||
try {
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
return parsed && typeof parsed === "object" && !Array.isArray(parsed)
|
||||
? (parsed as BedrockEmbeddingResponseJson)
|
||||
: {};
|
||||
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
|
||||
throw new Error("Amazon Bedrock embedding response returned malformed JSON");
|
||||
}
|
||||
return parsed as BedrockEmbeddingResponseJson;
|
||||
} catch {
|
||||
throw new Error("Amazon Bedrock embedding response returned malformed JSON");
|
||||
}
|
||||
}
|
||||
|
||||
function malformedBedrockEmbeddingResponse(): Error {
|
||||
return new Error("Amazon Bedrock embedding response returned malformed JSON");
|
||||
}
|
||||
|
||||
function asNumberArray(value: unknown): number[] {
|
||||
return Array.isArray(value) ? (value as number[]) : [];
|
||||
if (!Array.isArray(value)) {
|
||||
throw malformedBedrockEmbeddingResponse();
|
||||
}
|
||||
for (const entry of value) {
|
||||
if (typeof entry !== "number" || !Number.isFinite(entry)) {
|
||||
throw malformedBedrockEmbeddingResponse();
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value)
|
||||
? (value as Record<string, unknown>)
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function asNumberArrayBatch(value: unknown): number[][] {
|
||||
return Array.isArray(value) ? (value.filter(Array.isArray) as number[][]) : [];
|
||||
if (!Array.isArray(value)) {
|
||||
throw malformedBedrockEmbeddingResponse();
|
||||
}
|
||||
return value.map((entry) => asNumberArray(entry));
|
||||
}
|
||||
|
||||
function parseSingle(family: Family, raw: string): number[] {
|
||||
@@ -256,10 +278,11 @@ function parseSingle(family: Family, raw: string): number[] {
|
||||
return asNumberArray(Array.isArray(data.embeddings) ? data.embeddings[0]?.embedding : null);
|
||||
case "twelvelabs": {
|
||||
if (Array.isArray(data.data)) {
|
||||
return asNumberArray(data.data[0]?.embedding);
|
||||
return asNumberArray(asRecord(data.data[0])?.embedding);
|
||||
}
|
||||
if (data.data && typeof data.data === "object") {
|
||||
return asNumberArray((data.data as { embedding?: unknown }).embedding);
|
||||
const dataRecord = asRecord(data.data);
|
||||
if (dataRecord) {
|
||||
return asNumberArray(dataRecord.embedding);
|
||||
}
|
||||
return asNumberArray(data.embedding);
|
||||
}
|
||||
@@ -272,12 +295,14 @@ function parseCohereBatch(family: Family, raw: string): number[][] {
|
||||
const data = parseBedrockEmbeddingResponseJson(raw);
|
||||
const embeddings = data.embeddings;
|
||||
if (!embeddings) {
|
||||
return [];
|
||||
throw malformedBedrockEmbeddingResponse();
|
||||
}
|
||||
if (family === "cohere-v4" && !Array.isArray(embeddings)) {
|
||||
return embeddings && typeof embeddings === "object"
|
||||
? asNumberArrayBatch((embeddings as { float?: unknown }).float)
|
||||
: [];
|
||||
const embeddingRecord = asRecord(embeddings);
|
||||
if (!embeddingRecord) {
|
||||
throw malformedBedrockEmbeddingResponse();
|
||||
}
|
||||
return asNumberArrayBatch(embeddingRecord.float);
|
||||
}
|
||||
return asNumberArrayBatch(embeddings);
|
||||
}
|
||||
|
||||
@@ -137,10 +137,10 @@ describe("Gemini embedding provider", () => {
|
||||
return url.endsWith(":batchEmbedContents")
|
||||
? {
|
||||
embeddings: Array.from({ length: 2 }, () => ({
|
||||
values: [0, Number.POSITIVE_INFINITY, 5],
|
||||
values: [0, 0, 5],
|
||||
})),
|
||||
}
|
||||
: { embedding: { values: [3, 4, Number.NaN] } };
|
||||
: { embedding: { values: [3, 4, 0] } };
|
||||
});
|
||||
|
||||
const { provider } = await createGeminiEmbeddingProvider({
|
||||
@@ -213,4 +213,52 @@ describe("Gemini embedding provider", () => {
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects non-object successful embedding responses", async () => {
|
||||
installFetchMock(() => []);
|
||||
|
||||
const { provider } = await createGeminiEmbeddingProvider({
|
||||
config: {} as never,
|
||||
provider: "gemini",
|
||||
remote: { apiKey: "test-key" },
|
||||
model: "gemini-embedding-001",
|
||||
fallback: "none",
|
||||
});
|
||||
|
||||
await expect(provider.embedQuery("test query")).rejects.toThrow(
|
||||
"gemini embeddings failed: malformed JSON response",
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects wrong single embedding vector shapes", async () => {
|
||||
installFetchMock(() => ({ embedding: { values: [1, "bad"] } }));
|
||||
|
||||
const { provider } = await createGeminiEmbeddingProvider({
|
||||
config: {} as never,
|
||||
provider: "gemini",
|
||||
remote: { apiKey: "test-key" },
|
||||
model: "gemini-embedding-001",
|
||||
fallback: "none",
|
||||
});
|
||||
|
||||
await expect(provider.embedQuery("test query")).rejects.toThrow(
|
||||
"gemini embeddings failed: malformed JSON response",
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects batch embedding count mismatches", async () => {
|
||||
installFetchMock(() => ({ embeddings: [{ values: [1, 2] }] }));
|
||||
|
||||
const { provider } = await createGeminiEmbeddingProvider({
|
||||
config: {} as never,
|
||||
provider: "gemini",
|
||||
remote: { apiKey: "test-key" },
|
||||
model: "gemini-embedding-001",
|
||||
fallback: "none",
|
||||
});
|
||||
|
||||
await expect(provider.embedBatch(["one", "two"])).rejects.toThrow(
|
||||
"gemini embeddings failed: malformed JSON response",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -17,6 +17,7 @@ import {
|
||||
import {
|
||||
createProviderHttpError,
|
||||
providerOperationRetryConfig,
|
||||
readProviderJsonObjectResponse,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import type { SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime";
|
||||
import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
|
||||
@@ -90,6 +91,52 @@ type GeminiEmbeddingRequest = {
|
||||
};
|
||||
export type GeminiTextEmbeddingRequest = GeminiEmbeddingRequest;
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value)
|
||||
? (value as Record<string, unknown>)
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function malformedGeminiEmbeddingResponse(): Error {
|
||||
return new Error("gemini embeddings failed: malformed JSON response");
|
||||
}
|
||||
|
||||
function readGeminiEmbeddingValues(value: unknown): number[] {
|
||||
if (!Array.isArray(value)) {
|
||||
throw malformedGeminiEmbeddingResponse();
|
||||
}
|
||||
for (const entry of value) {
|
||||
if (typeof entry !== "number" || !Number.isFinite(entry)) {
|
||||
throw malformedGeminiEmbeddingResponse();
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function readGeminiSingleEmbedding(payload: Record<string, unknown>): number[] {
|
||||
const embedding = asRecord(payload.embedding);
|
||||
if (!embedding) {
|
||||
throw malformedGeminiEmbeddingResponse();
|
||||
}
|
||||
return readGeminiEmbeddingValues(embedding.values);
|
||||
}
|
||||
|
||||
function readGeminiBatchEmbeddings(
|
||||
payload: Record<string, unknown>,
|
||||
expectedCount: number,
|
||||
): number[][] {
|
||||
if (!Array.isArray(payload.embeddings) || payload.embeddings.length !== expectedCount) {
|
||||
throw malformedGeminiEmbeddingResponse();
|
||||
}
|
||||
return payload.embeddings.map((entry) => {
|
||||
const embedding = asRecord(entry);
|
||||
if (!embedding) {
|
||||
throw malformedGeminiEmbeddingResponse();
|
||||
}
|
||||
return readGeminiEmbeddingValues(embedding.values);
|
||||
});
|
||||
}
|
||||
|
||||
/** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */
|
||||
export function buildGeminiTextEmbeddingRequest(params: {
|
||||
text: string;
|
||||
@@ -195,10 +242,7 @@ async function fetchGeminiEmbeddingPayload(params: {
|
||||
client: GeminiEmbeddingClient;
|
||||
endpoint: string;
|
||||
body: unknown;
|
||||
}): Promise<{
|
||||
embedding?: { values?: number[] };
|
||||
embeddings?: Array<{ values?: number[] }>;
|
||||
}> {
|
||||
}): Promise<Record<string, unknown>> {
|
||||
return await executeWithApiKeyRotation({
|
||||
provider: "google",
|
||||
apiKeys: params.client.apiKeys,
|
||||
@@ -221,10 +265,7 @@ async function fetchGeminiEmbeddingPayload(params: {
|
||||
if (!res.ok) {
|
||||
throw await createProviderHttpError(res, "gemini embeddings failed");
|
||||
}
|
||||
return (await res.json()) as {
|
||||
embedding?: { values?: number[] };
|
||||
embeddings?: Array<{ values?: number[] }>;
|
||||
};
|
||||
return await readProviderJsonObjectResponse(res, "gemini embeddings failed");
|
||||
},
|
||||
});
|
||||
},
|
||||
@@ -288,7 +329,7 @@ export async function createGeminiEmbeddingProvider(
|
||||
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
||||
}),
|
||||
});
|
||||
return sanitizeAndNormalizeEmbedding(payload.embedding?.values ?? []);
|
||||
return sanitizeAndNormalizeEmbedding(readGeminiSingleEmbedding(payload));
|
||||
};
|
||||
|
||||
const embedBatchInputs = async (inputs: EmbeddingInput[]): Promise<number[][]> => {
|
||||
@@ -309,8 +350,8 @@ export async function createGeminiEmbeddingProvider(
|
||||
),
|
||||
},
|
||||
});
|
||||
const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : [];
|
||||
return inputs.map((_, index) => sanitizeAndNormalizeEmbedding(embeddings[index]?.values ?? []));
|
||||
const embeddings = readGeminiBatchEmbeddings(payload, inputs.length);
|
||||
return embeddings.map((values) => sanitizeAndNormalizeEmbedding(values));
|
||||
};
|
||||
|
||||
const embedBatch = async (texts: string[]): Promise<number[][]> => {
|
||||
|
||||
@@ -32,7 +32,7 @@ describe("fetchRemoteEmbeddingVectors", () => {
|
||||
it("maps remote embedding response data to vectors", async () => {
|
||||
postJsonMock.mockImplementationOnce(async (params) => {
|
||||
return await params.parse({
|
||||
data: [{ embedding: [0.1, 0.2] }, {}, { embedding: [0.3] }],
|
||||
data: [{ embedding: [0.1, 0.2] }, { embedding: [0.4] }, { embedding: [0.3] }],
|
||||
});
|
||||
});
|
||||
|
||||
@@ -43,7 +43,7 @@ describe("fetchRemoteEmbeddingVectors", () => {
|
||||
errorPrefix: "embedding fetch failed",
|
||||
});
|
||||
|
||||
expect(vectors).toEqual([[0.1, 0.2], [], [0.3]]);
|
||||
expect(vectors).toEqual([[0.1, 0.2], [0.4], [0.3]]);
|
||||
const postJsonParams = requirePostJsonParams();
|
||||
expect(postJsonParams.url).toBe("https://memory.example/v1/embeddings");
|
||||
expect(postJsonParams.headers).toEqual({ Authorization: "Bearer test" });
|
||||
@@ -63,4 +63,60 @@ describe("fetchRemoteEmbeddingVectors", () => {
|
||||
}),
|
||||
).rejects.toThrow("embedding fetch failed: 403 forbidden");
|
||||
});
|
||||
|
||||
it("rejects non-object embedding responses", async () => {
|
||||
postJsonMock.mockImplementationOnce(async (params) => await params.parse([]));
|
||||
|
||||
await expect(
|
||||
fetchRemoteEmbeddingVectors({
|
||||
url: "https://memory.example/v1/embeddings",
|
||||
headers: {},
|
||||
body: { input: ["one"] },
|
||||
errorPrefix: "embedding fetch failed",
|
||||
}),
|
||||
).rejects.toThrow("embedding fetch failed: malformed JSON response");
|
||||
});
|
||||
|
||||
it("rejects missing embedding data arrays", async () => {
|
||||
postJsonMock.mockImplementationOnce(async (params) => await params.parse({}));
|
||||
|
||||
await expect(
|
||||
fetchRemoteEmbeddingVectors({
|
||||
url: "https://memory.example/v1/embeddings",
|
||||
headers: {},
|
||||
body: { input: ["one"] },
|
||||
errorPrefix: "embedding fetch failed",
|
||||
}),
|
||||
).rejects.toThrow("embedding fetch failed: malformed JSON response");
|
||||
});
|
||||
|
||||
it("rejects embedding counts that do not match the submitted input batch", async () => {
|
||||
postJsonMock.mockImplementationOnce(async (params) => {
|
||||
return await params.parse({ data: [{ embedding: [0.1] }] });
|
||||
});
|
||||
|
||||
await expect(
|
||||
fetchRemoteEmbeddingVectors({
|
||||
url: "https://memory.example/v1/embeddings",
|
||||
headers: {},
|
||||
body: { input: ["one", "two"] },
|
||||
errorPrefix: "embedding fetch failed",
|
||||
}),
|
||||
).rejects.toThrow("embedding fetch failed: malformed JSON response");
|
||||
});
|
||||
|
||||
it("rejects wrong nested embedding vector types", async () => {
|
||||
postJsonMock.mockImplementationOnce(async (params) => {
|
||||
return await params.parse({ data: [{ embedding: [0.1, "bad"] }] });
|
||||
});
|
||||
|
||||
await expect(
|
||||
fetchRemoteEmbeddingVectors({
|
||||
url: "https://memory.example/v1/embeddings",
|
||||
headers: {},
|
||||
body: { input: ["one"] },
|
||||
errorPrefix: "embedding fetch failed",
|
||||
}),
|
||||
).rejects.toThrow("embedding fetch failed: malformed JSON response");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,33 @@
|
||||
import { postJson } from "./post-json.js";
|
||||
import type { SsrFPolicy } from "./ssrf-policy.js";
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value)
|
||||
? (value as Record<string, unknown>)
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function malformedEmbeddingResponse(errorPrefix: string): Error {
|
||||
return new Error(`${errorPrefix}: malformed JSON response`);
|
||||
}
|
||||
|
||||
function readEmbeddingVector(value: unknown, errorPrefix: string): number[] {
|
||||
if (!Array.isArray(value)) {
|
||||
throw malformedEmbeddingResponse(errorPrefix);
|
||||
}
|
||||
for (const entry of value) {
|
||||
if (typeof entry !== "number" || !Number.isFinite(entry)) {
|
||||
throw malformedEmbeddingResponse(errorPrefix);
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function resolveExpectedEmbeddingCount(body: unknown): number | undefined {
|
||||
const input = asRecord(body)?.input;
|
||||
return Array.isArray(input) ? input.length : undefined;
|
||||
}
|
||||
|
||||
export async function fetchRemoteEmbeddingVectors(params: {
|
||||
url: string;
|
||||
headers: Record<string, string>;
|
||||
@@ -17,11 +44,21 @@ export async function fetchRemoteEmbeddingVectors(params: {
|
||||
body: params.body,
|
||||
errorPrefix: params.errorPrefix,
|
||||
parse: (payload) => {
|
||||
const typedPayload = payload as {
|
||||
data?: Array<{ embedding?: number[] }>;
|
||||
};
|
||||
const data = typedPayload.data ?? [];
|
||||
return data.map((entry) => entry.embedding ?? []);
|
||||
const root = asRecord(payload);
|
||||
if (!root || !Array.isArray(root.data)) {
|
||||
throw malformedEmbeddingResponse(params.errorPrefix);
|
||||
}
|
||||
const expectedCount = resolveExpectedEmbeddingCount(params.body);
|
||||
if (expectedCount !== undefined && root.data.length !== expectedCount) {
|
||||
throw malformedEmbeddingResponse(params.errorPrefix);
|
||||
}
|
||||
return root.data.map((entry) => {
|
||||
const record = asRecord(entry);
|
||||
if (!record) {
|
||||
throw malformedEmbeddingResponse(params.errorPrefix);
|
||||
}
|
||||
return readEmbeddingVector(record.embedding, params.errorPrefix);
|
||||
});
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user