From 8813b79990f44bf038655fddd1e5dcc59d5ae53c Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Thu, 14 May 2026 19:43:34 +0800 Subject: [PATCH] fix(openai): wrap malformed embedding batch jsonl --- CHANGELOG.md | 1 + extensions/openai/embedding-batch.test.ts | 10 ++++++++++ extensions/openai/embedding-batch.ts | 10 ++++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 extensions/openai/embedding-batch.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 41081c7ae18..d11b69bcdcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,6 +62,7 @@ Docs: https://docs.openclaw.ai - Node host: report malformed built-in invoke `paramsJSON` with stable invalid-request errors instead of leaking raw parser failures. - Amazon Bedrock embeddings: report malformed provider response JSON with provider-owned errors instead of leaking raw parser failures. - QQBot: report malformed access-token JSON with provider-owned errors instead of leaking raw parser failures. +- OpenAI embeddings: report malformed batch output JSONL with provider-owned errors instead of leaking raw parser failures. - Models config/auth: stop inferring provider env-var markers from broad `^[A-Z_][A-Z0-9_]*$` strings, and resolve config-backed provider `apiKey` values only through structured env SecretRefs (`secrets.providers[id]` / `secrets.defaults`), so unrelated env vars cannot accidentally become provider credentials. Thanks @sallyom. - Media fetch: skip allocating and buffering the response body for bodyless media responses (HEAD probes and 204-style empty bodies), avoiding wasted heap on streams that carry no payload. Thanks @shakkernerd. - CLI/onboarding: forward provider-specific auth flags (e.g. `--openai-api-key`) through the onboarding wizard so they reach provider auth methods via `ctx.opts`, letting `--openai-api-key "$OPENAI_API_KEY"` skip the redundant "use existing env var?" prompt in non-interactive harnesses. (#81669) Thanks @sjf. diff --git a/extensions/openai/embedding-batch.test.ts b/extensions/openai/embedding-batch.test.ts new file mode 100644 index 00000000000..7dacf1310ae --- /dev/null +++ b/extensions/openai/embedding-batch.test.ts @@ -0,0 +1,10 @@ +import { describe, expect, it } from "vitest"; +import { parseOpenAiBatchOutput } from "./embedding-batch.js"; + +describe("OpenAI embedding batch output", () => { + it("wraps malformed JSONL output", () => { + expect(() => parseOpenAiBatchOutput('{"custom_id":"ok"}\n{not json')).toThrow( + "OpenAI embedding batch output contained malformed JSONL", + ); + }); +}); diff --git a/extensions/openai/embedding-batch.ts b/extensions/openai/embedding-batch.ts index 0b1366c1019..ab119aaa0bd 100644 --- a/extensions/openai/embedding-batch.ts +++ b/extensions/openai/embedding-batch.ts @@ -122,7 +122,7 @@ async function fetchOpenAiBatchResource(params: { }); } -function parseOpenAiBatchOutput(text: string): OpenAiBatchOutputLine[] { +export function parseOpenAiBatchOutput(text: string): OpenAiBatchOutputLine[] { if (!text.trim()) { return []; } @@ -130,7 +130,13 @@ function parseOpenAiBatchOutput(text: string): OpenAiBatchOutputLine[] { .split("\n") .map((line) => line.trim()) .filter(Boolean) - .map((line) => JSON.parse(line) as OpenAiBatchOutputLine); + .map((line) => { + try { + return JSON.parse(line) as OpenAiBatchOutputLine; + } catch { + throw new Error("OpenAI embedding batch output contained malformed JSONL"); + } + }); } async function readOpenAiBatchError(params: {