From 9d4b0d551d1d758a4abbc7df06ee30103a0c4ea9 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 7 Apr 2026 15:52:41 +0100 Subject: [PATCH] fix: support inferrs string-only completions --- CHANGELOG.md | 1 + docs/.generated/config-baseline.sha256 | 6 +- docs/gateway/configuration-reference.md | 1 + docs/gateway/local-models.md | 21 +++ docs/gateway/troubleshooting.md | 55 ++++++ docs/help/troubleshooting.md | 15 ++ docs/providers/index.md | 1 + docs/providers/inferrs.md | 173 ++++++++++++++++++ .../openai-completions-string-content.ts | 35 ++++ src/agents/openai-transport-stream.test.ts | 35 ++++ src/agents/openai-transport-stream.ts | 8 +- .../pi-embedded-runner-extraparams.test.ts | 50 +++++ src/agents/pi-embedded-runner/extra-params.ts | 6 +- .../openai-stream-wrappers.ts | 27 +++ src/config/config-misc.test.ts | 1 + src/config/schema.base.generated.ts | 3 + src/config/types.models.ts | 1 + src/config/zod-schema.core.ts | 1 + 18 files changed, 435 insertions(+), 5 deletions(-) create mode 100644 docs/providers/inferrs.md create mode 100644 src/agents/openai-completions-string-content.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index e547e6eee84..e181da2be0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai - Memory/wiki: use compiled digest artifacts as the first-pass wiki index for search/get flows, and resolve claim ids back to owning pages so agents can retrieve knowledge by belief identity instead of only by file path. Thanks @vincentkoc. - Memory/wiki: add an opt-in `context.includeCompiledDigestPrompt` flag so memory prompt supplements can append a compact compiled wiki snapshot for legacy prompt assembly and context engines that explicitly consume memory prompt sections. Thanks @vincentkoc. - Plugin SDK/context engines: pass `availableTools` and `citationsMode` into `assemble()`, and expose `buildMemorySystemPromptAddition(...)` so non-legacy context engines can adopt the active memory prompt path without reimplementing it. Thanks @vincentkoc. +- Providers/inferrs: add string-content compatibility for stricter OpenAI-compatible chat backends, document `inferrs` setup with a full config example, and add troubleshooting guidance for local backends that pass direct probes but fail on full agent-runtime prompts. ### Fixes diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index de6aa8d2272..68654c94d61 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -838e3c2f798321d47ccafd132b07a94a676ecf01ec128550c85cea9c2cacf0f5 config-baseline.json -531ad785e7877e8d426985df5074b958a09ea61da5557061f8762272ef9e1d46 config-baseline.core.json +af24bd5a2a86e8bb481302211b35c440e82636585c46f57050648c0290b1d4ee config-baseline.json +73bda77ebf7d70609c57f394655332536eb5ff55516a6b7db06243bd4e8e44a5 config-baseline.core.json d22f4414b79ee03d896e58d875c80523bcc12303cbacb1700261e6ec73945187 config-baseline.channel.json -d32b286c554e8fe7a53b01dde23987fa6eb2140f021297bf029aed5542d721af config-baseline.plugin.json +d42cee3dea4668bdb7daf6ff5e6f87f326fdef56a8c3716d73079b92cab6e7b2 config-baseline.plugin.json diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md index efc3bcbd221..481086f5353 100644 --- a/docs/gateway/configuration-reference.md +++ b/docs/gateway/configuration-reference.md @@ -2349,6 +2349,7 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi - `models.providers.*.models.*.contextWindow`: native model context window metadata. - `models.providers.*.models.*.contextTokens`: optional runtime context cap. Use this when you want a smaller effective context budget than the model's native `contextWindow`. - `models.providers.*.models.*.compat.supportsDeveloperRole`: optional compatibility hint. For `api: "openai-completions"` with a non-empty non-native `baseUrl` (host not `api.openai.com`), OpenClaw forces this to `false` at runtime. Empty/omitted `baseUrl` keeps default OpenAI behavior. +- `models.providers.*.models.*.compat.requiresStringContent`: optional compatibility hint for string-only OpenAI-compatible chat endpoints. When `true`, OpenClaw flattens pure text `messages[].content` arrays into plain strings before sending the request. - `plugins.entries.amazon-bedrock.config.discovery`: Bedrock auto-discovery settings root. - `plugins.entries.amazon-bedrock.config.discovery.enabled`: turn implicit discovery on/off. - `plugins.entries.amazon-bedrock.config.discovery.region`: AWS region for discovery. diff --git a/docs/gateway/local-models.md b/docs/gateway/local-models.md index dc38aba4452..38739afe051 100644 --- a/docs/gateway/local-models.md +++ b/docs/gateway/local-models.md @@ -155,9 +155,30 @@ Behavior note for local/proxied `/v1` backends: - hidden OpenClaw attribution headers (`originator`, `version`, `User-Agent`) are not injected on these custom proxy URLs +Compatibility notes for stricter OpenAI-compatible backends: + +- Some servers accept only string `messages[].content` on Chat Completions, not + structured content-part arrays. Set + `models.providers..models[].compat.requiresStringContent: true` for + those endpoints. +- Some smaller or stricter local backends are unstable with OpenClaw's full + agent-runtime prompt shape, especially when tool schemas are included. If the + backend works for tiny direct `/v1/chat/completions` calls but fails on normal + OpenClaw agent turns, try + `models.providers..models[].compat.supportsTools: false` first. +- If the backend still fails only on larger OpenClaw runs, the remaining issue + is usually upstream model/server capacity or a backend bug, not OpenClaw's + transport layer. + ## Troubleshooting - Gateway can reach the proxy? `curl http://127.0.0.1:1234/v1/models`. - LM Studio model unloaded? Reload; cold start is a common “hanging” cause. - Context errors? Lower `contextWindow` or raise your server limit. +- OpenAI-compatible server returns `messages[].content ... expected a string`? + Add `compat.requiresStringContent: true` on that model entry. +- Direct tiny `/v1/chat/completions` calls work, but `openclaw infer model run` + fails on Gemma or another local model? Disable tool schemas first with + `compat.supportsTools: false`, then retest. If the server still crashes only + on larger OpenClaw prompts, treat it as an upstream server/model limitation. - Safety: local models skip provider-side filters; keep agents narrow and compaction on to limit prompt injection blast radius. diff --git a/docs/gateway/troubleshooting.md b/docs/gateway/troubleshooting.md index 0cd550d4057..a910d774dcd 100644 --- a/docs/gateway/troubleshooting.md +++ b/docs/gateway/troubleshooting.md @@ -59,6 +59,61 @@ Related: - [/reference/token-use](/reference/token-use) - [/help/faq#why-am-i-seeing-http-429-ratelimiterror-from-anthropic](/help/faq#why-am-i-seeing-http-429-ratelimiterror-from-anthropic) +## Local OpenAI-compatible backend passes direct probes but agent runs fail + +Use this when: + +- `curl ... /v1/models` works +- tiny direct `/v1/chat/completions` calls work +- OpenClaw model runs fail only on normal agent turns + +```bash +curl http://127.0.0.1:1234/v1/models +curl http://127.0.0.1:1234/v1/chat/completions \ + -H 'content-type: application/json' \ + -d '{"model":"","messages":[{"role":"user","content":"hi"}],"stream":false}' +openclaw infer model run --model --prompt "hi" --json +openclaw logs --follow +``` + +Look for: + +- direct tiny calls succeed, but OpenClaw runs fail only on larger prompts +- backend errors about `messages[].content` expecting a string +- backend crashes that appear only with larger prompt-token counts or full agent + runtime prompts + +Common signatures: + +- `messages[...].content: invalid type: sequence, expected a string` → backend + rejects structured Chat Completions content parts. Fix: set + `models.providers..models[].compat.requiresStringContent: true`. +- direct tiny requests succeed, but OpenClaw agent runs fail with backend/model + crashes (for example Gemma on some `inferrs` builds) → OpenClaw transport is + likely already correct; the backend is failing on the larger agent-runtime + prompt shape. +- failures shrink after disabling tools but do not disappear → tool schemas were + part of the pressure, but the remaining issue is still upstream model/server + capacity or a backend bug. + +Fix options: + +1. Set `compat.requiresStringContent: true` for string-only Chat Completions backends. +2. Set `compat.supportsTools: false` for models/backends that cannot handle + OpenClaw's tool schema surface reliably. +3. Lower prompt pressure where possible: smaller workspace bootstrap, shorter + session history, lighter local model, or a backend with stronger long-context + support. +4. If tiny direct requests keep passing while OpenClaw agent turns still crash + inside the backend, treat it as an upstream server/model limitation and file + a repro there with the accepted payload shape. + +Related: + +- [/gateway/local-models](/gateway/local-models) +- [/gateway/configuration#models](/gateway/configuration#models) +- [/gateway/configuration-reference#openai-compatible-endpoints](/gateway/configuration-reference#openai-compatible-endpoints) + ## No replies If channels are up but nothing answers, check routing and policy before reconnecting anything. diff --git a/docs/help/troubleshooting.md b/docs/help/troubleshooting.md index b00da724445..ec1ede31311 100644 --- a/docs/help/troubleshooting.md +++ b/docs/help/troubleshooting.md @@ -42,6 +42,21 @@ If you see: `HTTP 429: rate_limit_error: Extra usage is required for long context requests`, go to [/gateway/troubleshooting#anthropic-429-extra-usage-required-for-long-context](/gateway/troubleshooting#anthropic-429-extra-usage-required-for-long-context). +## Local OpenAI-compatible backend works directly but fails in OpenClaw + +If your local or self-hosted `/v1` backend answers small direct +`/v1/chat/completions` probes but fails on `openclaw infer model run` or normal +agent turns: + +1. If the error mentions `messages[].content` expecting a string, set + `models.providers..models[].compat.requiresStringContent: true`. +2. If the backend still fails only on OpenClaw agent turns, set + `models.providers..models[].compat.supportsTools: false` and retry. +3. If tiny direct calls still work but larger OpenClaw prompts crash the + backend, treat the remaining issue as an upstream model/server limitation and + continue in the deep runbook: + [/gateway/troubleshooting#local-openai-compatible-backend-passes-direct-probes-but-agent-runs-fail](/gateway/troubleshooting#local-openai-compatible-backend-passes-direct-probes-but-agent-runs-fail) + ## Plugin install fails with missing openclaw extensions If install fails with `package.json missing openclaw.extensions`, the plugin package diff --git a/docs/providers/index.md b/docs/providers/index.md index 7cd5dcbab1a..d5cadd595ed 100644 --- a/docs/providers/index.md +++ b/docs/providers/index.md @@ -42,6 +42,7 @@ Looking for chat channel docs (WhatsApp/Telegram/Discord/Slack/Mattermost (plugi - [Google (Gemini)](/providers/google) - [Groq (LPU inference)](/providers/groq) - [Hugging Face (Inference)](/providers/huggingface) +- [inferrs (local models)](/providers/inferrs) - [Kilocode](/providers/kilocode) - [LiteLLM (unified gateway)](/providers/litellm) - [MiniMax](/providers/minimax) diff --git a/docs/providers/inferrs.md b/docs/providers/inferrs.md new file mode 100644 index 00000000000..069f9ece505 --- /dev/null +++ b/docs/providers/inferrs.md @@ -0,0 +1,173 @@ +--- +summary: "Run OpenClaw through inferrs (OpenAI-compatible local server)" +read_when: + - You want to run OpenClaw against a local inferrs server + - You are serving Gemma or another model through inferrs + - You need the exact OpenClaw compat flags for inferrs +title: "inferrs" +--- + +# inferrs + +[inferrs](https://github.com/ericcurtin/inferrs) can serve local models behind an +OpenAI-compatible `/v1` API. OpenClaw works with `inferrs` through the generic +`openai-completions` path. + +`inferrs` is currently best treated as a custom self-hosted OpenAI-compatible +backend, not a dedicated OpenClaw provider plugin. + +## Quick start + +1. Start `inferrs` with a model. + +Example: + +```bash +inferrs serve gg-hf-gg/gemma-4-E2B-it \ + --host 127.0.0.1 \ + --port 8080 \ + --device metal +``` + +2. Verify the server is reachable. + +```bash +curl http://127.0.0.1:8080/health +curl http://127.0.0.1:8080/v1/models +``` + +3. Add an explicit OpenClaw provider entry and point your default model at it. + +## Full config example + +This example uses Gemma 4 on a local `inferrs` server. + +```json5 +{ + agents: { + defaults: { + model: { primary: "inferrs/gg-hf-gg/gemma-4-E2B-it" }, + models: { + "inferrs/gg-hf-gg/gemma-4-E2B-it": { + alias: "Gemma 4 (inferrs)", + }, + }, + }, + }, + models: { + mode: "merge", + providers: { + inferrs: { + baseUrl: "http://127.0.0.1:8080/v1", + apiKey: "inferrs-local", + api: "openai-completions", + models: [ + { + id: "gg-hf-gg/gemma-4-E2B-it", + name: "Gemma 4 E2B (inferrs)", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 131072, + maxTokens: 4096, + compat: { + requiresStringContent: true, + }, + }, + ], + }, + }, + }, +} +``` + +## Why `requiresStringContent` matters + +Some `inferrs` Chat Completions routes accept only string +`messages[].content`, not structured content-part arrays. + +If OpenClaw runs fail with an error like: + +```text +messages[1].content: invalid type: sequence, expected a string +``` + +set: + +```json5 +compat: { + requiresStringContent: true +} +``` + +OpenClaw will flatten pure text content parts into plain strings before sending +the request. + +## Gemma and tool-schema caveat + +Some current `inferrs` + Gemma combinations accept small direct +`/v1/chat/completions` requests but still fail on full OpenClaw agent-runtime +turns. + +If that happens, try this first: + +```json5 +compat: { + requiresStringContent: true, + supportsTools: false +} +``` + +That disables OpenClaw's tool schema surface for the model and can reduce prompt +pressure on stricter local backends. + +If tiny direct requests still work but normal OpenClaw agent turns continue to +crash inside `inferrs`, the remaining issue is usually upstream model/server +behavior rather than OpenClaw's transport layer. + +## Manual smoke test + +Once configured, test both layers: + +```bash +curl http://127.0.0.1:8080/v1/chat/completions \ + -H 'content-type: application/json' \ + -d '{"model":"gg-hf-gg/gemma-4-E2B-it","messages":[{"role":"user","content":"What is 2 + 2?"}],"stream":false}' + +openclaw infer model run \ + --model inferrs/gg-hf-gg/gemma-4-E2B-it \ + --prompt "What is 2 + 2? Reply with one short sentence." \ + --json +``` + +If the first command works but the second fails, use the troubleshooting notes +below. + +## Troubleshooting + +- `curl /v1/models` fails: `inferrs` is not running, not reachable, or not + bound to the expected host/port. +- `messages[].content ... expected a string`: set + `compat.requiresStringContent: true`. +- Direct tiny `/v1/chat/completions` calls pass, but `openclaw infer model run` + fails: try `compat.supportsTools: false`. +- OpenClaw no longer gets schema errors, but `inferrs` still crashes on larger + agent turns: treat it as an upstream `inferrs` or model limitation and reduce + prompt pressure or switch local backend/model. + +## Proxy-style behavior + +`inferrs` is treated as a proxy-style OpenAI-compatible `/v1` backend, not a +native OpenAI endpoint. + +- native OpenAI-only request shaping does not apply here +- no `service_tier`, no Responses `store`, no prompt-cache hints, and no + OpenAI reasoning-compat payload shaping +- hidden OpenClaw attribution headers (`originator`, `version`, `User-Agent`) + are not injected on custom `inferrs` base URLs + +## See also + +- [Local models](/gateway/local-models) +- [Gateway troubleshooting](/gateway/troubleshooting#local-openai-compatible-backend-passes-direct-probes-but-agent-runs-fail) +- [Model providers](/concepts/model-providers) diff --git a/src/agents/openai-completions-string-content.ts b/src/agents/openai-completions-string-content.ts new file mode 100644 index 00000000000..0eb59f1097a --- /dev/null +++ b/src/agents/openai-completions-string-content.ts @@ -0,0 +1,35 @@ +export function flattenStringOnlyCompletionContent(content: unknown): unknown { + if (!Array.isArray(content)) { + return content; + } + const textParts: string[] = []; + for (const item of content) { + if ( + !item || + typeof item !== "object" || + (item as { type?: unknown }).type !== "text" || + typeof (item as { text?: unknown }).text !== "string" + ) { + return content; + } + textParts.push((item as { text: string }).text); + } + return textParts.join("\n"); +} + +export function flattenCompletionMessagesToStringContent(messages: unknown[]): unknown[] { + return messages.map((message) => { + if (!message || typeof message !== "object") { + return message; + } + const content = (message as { content?: unknown }).content; + const flattenedContent = flattenStringOnlyCompletionContent(content); + if (flattenedContent === content) { + return message; + } + return { + ...message, + content: flattenedContent, + }; + }); +} diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index e6ac9905aac..be673005ffc 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -1079,6 +1079,41 @@ describe("openai transport stream", () => { expect(params.tools?.[0]?.function).not.toHaveProperty("strict"); }); + it("flattens pure text content arrays for string-only completions backends when opted in", () => { + const params = buildOpenAICompletionsParams( + { + id: "gg-hf-gg/gemma-4-E2B-it", + name: "Gemma 4 E2B", + api: "openai-completions", + provider: "inferrs", + baseUrl: "http://127.0.0.1:8080/v1", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 131072, + maxTokens: 4096, + compat: { + requiresStringContent: true, + } as Record, + } satisfies Model<"openai-completions">, + { + systemPrompt: "system", + messages: [ + { + role: "user", + content: [{ type: "text", text: "What is 2 + 2?" }], + timestamp: Date.now(), + }, + ], + tools: [], + } as never, + undefined, + ) as { messages?: Array<{ role?: string; content?: unknown }> }; + + expect(params.messages?.[0]).toMatchObject({ role: "system", content: "system" }); + expect(params.messages?.[1]).toMatchObject({ role: "user", content: "What is 2 + 2?" }); + }); + it("uses max_tokens for Chutes default-route completions providers without relying on baseUrl host sniffing", () => { const params = buildOpenAICompletionsParams( { diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index 90937082c7f..47638813733 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -23,6 +23,7 @@ import { resolveProviderTransportTurnStateWithPlugin } from "../plugins/provider import type { ProviderRuntimeModel } from "../plugins/types.js"; import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./copilot-dynamic-headers.js"; import { detectOpenAICompletionsCompat } from "./openai-completions-compat.js"; +import { flattenCompletionMessagesToStringContent } from "./openai-completions-string-content.js"; import { applyOpenAIResponsesPayloadPolicy, resolveOpenAIResponsesPayloadPolicy, @@ -1164,6 +1165,7 @@ function getCompat(model: OpenAIModeModel): { openRouterRouting: Record; vercelGatewayRouting: Record; supportsStrictMode: boolean; + requiresStringContent: boolean; } { const detected = detectCompat(model); const compat = model.compat ?? {}; @@ -1198,6 +1200,7 @@ function getCompat(model: OpenAIModeModel): { detected.vercelGatewayRouting, supportsStrictMode: (compat.supportsStrictMode as boolean | undefined) ?? detected.supportsStrictMode, + requiresStringContent: (compat.requiresStringContent as boolean | undefined) ?? false, }; } @@ -1261,9 +1264,12 @@ export function buildOpenAICompletionsParams( systemPrompt: stripSystemPromptCacheBoundary(context.systemPrompt), } : context; + const messages = convertMessages(model as never, completionsContext, compat as never); const params: Record = { model: model.id, - messages: convertMessages(model as never, completionsContext, compat as never), + messages: compat.requiresStringContent + ? flattenCompletionMessagesToStringContent(messages) + : messages, stream: true, }; if (compat.supportsUsageInStreaming) { diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 19b22f5a73c..241a3510447 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -132,6 +132,7 @@ import { createOpenAIReasoningCompatibilityWrapper, createOpenAIResponsesContextManagementWrapper, createOpenAIServiceTierWrapper, + createOpenAIStringContentWrapper, createOpenAITextVerbosityWrapper, resolveOpenAIFastMode, resolveOpenAIServiceTier, @@ -170,6 +171,7 @@ function createTestOpenAIProviderWrapper( config: params.context.config, agentDir: params.context.agentDir, }); + streamFn = createOpenAIStringContentWrapper(streamFn); return createOpenAIResponsesContextManagementWrapper( createOpenAIReasoningCompatibilityWrapper(streamFn), params.context.extraParams, @@ -562,6 +564,54 @@ describe("applyExtraParamsToAgent", () => { expect(payload.parallel_tool_calls).toBe(false); }); + it("flattens pure text OpenAI completions message arrays for string-only compat models", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "inferrs", + applyModelId: "gg-hf-gg/gemma-4-E2B-it", + model: { + api: "openai-completions", + provider: "inferrs", + id: "gg-hf-gg/gemma-4-E2B-it", + name: "Gemma 4 E2B (inferrs)", + baseUrl: "http://127.0.0.1:8080/v1", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 131072, + maxTokens: 4096, + compat: { + requiresStringContent: true, + } as Record, + } as unknown as Model<"openai-completions">, + payload: { + messages: [ + { + role: "system", + content: [{ type: "text", text: "System text" }], + }, + { + role: "user", + content: [ + { type: "text", text: "Line one" }, + { type: "text", text: "Line two" }, + ], + }, + ], + }, + }); + + expect(payload.messages).toEqual([ + { + role: "system", + content: "System text", + }, + { + role: "user", + content: "Line one\nLine two", + }, + ]); + }); + it("injects parallel_tool_calls for openai-responses payloads when configured", () => { const payload = runParallelToolCallsPayloadMutationCase({ applyProvider: "openai", diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index d8afcfd9c11..c4f15f3af68 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -16,7 +16,10 @@ import { createSiliconFlowThinkingWrapper, shouldApplySiliconFlowThinkingOffCompat, } from "./moonshot-stream-wrappers.js"; -import { createOpenAIResponsesContextManagementWrapper } from "./openai-stream-wrappers.js"; +import { + createOpenAIResponsesContextManagementWrapper, + createOpenAIStringContentWrapper, +} from "./openai-stream-wrappers.js"; import { resolveCacheRetention } from "./prompt-cache-retention.js"; import { createOpenRouterSystemCacheWrapper } from "./proxy-stream-wrappers.js"; import { streamWithPayloadPatch } from "./stream-payload-utils.js"; @@ -389,6 +392,7 @@ function applyPostPluginStreamWrappers( ctx: ApplyExtraParamsContext & { providerWrapperHandled: boolean }, ): void { ctx.agent.streamFn = createOpenRouterSystemCacheWrapper(ctx.agent.streamFn); + ctx.agent.streamFn = createOpenAIStringContentWrapper(ctx.agent.streamFn); if (!ctx.providerWrapperHandled) { // Guard Google-family payloads against invalid negative thinking budgets diff --git a/src/agents/pi-embedded-runner/openai-stream-wrappers.ts b/src/agents/pi-embedded-runner/openai-stream-wrappers.ts index 82e737e13c6..2f4f0e814b9 100644 --- a/src/agents/pi-embedded-runner/openai-stream-wrappers.ts +++ b/src/agents/pi-embedded-runner/openai-stream-wrappers.ts @@ -7,6 +7,7 @@ import { patchCodexNativeWebSearchPayload, resolveCodexNativeSearchActivation, } from "../codex-native-web-search.js"; +import { flattenCompletionMessagesToStringContent } from "../openai-completions-string-content.js"; import { applyOpenAIResponsesPayloadPolicy, resolveOpenAIResponsesPayloadPolicy, @@ -66,6 +67,17 @@ function shouldApplyOpenAIReasoningCompatibility(model: { return resolveOpenAIRequestCapabilities(model).supportsOpenAIReasoningCompatPayload; } +function shouldFlattenOpenAICompletionMessages(model: { + api?: unknown; + compat?: unknown; +}): boolean { + const compat = + model.compat && typeof model.compat === "object" + ? (model.compat as { requiresStringContent?: unknown }) + : undefined; + return model.api === "openai-completions" && compat?.requiresStringContent === true; +} + function normalizeOpenAIServiceTier(value: unknown): OpenAIServiceTier | undefined { if (typeof value !== "string") { return undefined; @@ -219,6 +231,21 @@ export function createOpenAIReasoningCompatibilityWrapper( }; } +export function createOpenAIStringContentWrapper(baseStreamFn: StreamFn | undefined): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => { + if (!shouldFlattenOpenAICompletionMessages(model)) { + return underlying(model, context, options); + } + return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => { + if (!Array.isArray(payloadObj.messages)) { + return; + } + payloadObj.messages = flattenCompletionMessagesToStringContent(payloadObj.messages); + }); + }; +} + export function createOpenAIFastModeWrapper(baseStreamFn: StreamFn | undefined): StreamFn { const underlying = baseStreamFn ?? streamSimple; return (model, context, options) => { diff --git a/src/config/config-misc.test.ts b/src/config/config-misc.test.ts index 8d9a12ca762..23889d40b97 100644 --- a/src/config/config-misc.test.ts +++ b/src/config/config-misc.test.ts @@ -391,6 +391,7 @@ describe("model compat config schema", () => { compat: { supportsUsageInStreaming: true, supportsStrictMode: false, + requiresStringContent: true, thinkingFormat: "qwen", requiresToolResultName: true, requiresAssistantAfterToolResult: false, diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 1f5ffe0363a..932e20ad549 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -2807,6 +2807,9 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { supportsStrictMode: { type: "boolean", }, + requiresStringContent: { + type: "boolean", + }, maxTokensField: { anyOf: [ { diff --git a/src/config/types.models.ts b/src/config/types.models.ts index afd82ee6806..3497d7ffb25 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -37,6 +37,7 @@ type SupportedThinkingFormat = export type ModelCompatConfig = SupportedOpenAICompatFields & { thinkingFormat?: SupportedThinkingFormat; supportsTools?: boolean; + requiresStringContent?: boolean; toolSchemaProfile?: string; unsupportedToolSchemaKeywords?: string[]; nativeWebSearchTool?: boolean; diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index a2bef61d96e..7ef6db117ce 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -189,6 +189,7 @@ export const ModelCompatSchema = z supportsUsageInStreaming: z.boolean().optional(), supportsTools: z.boolean().optional(), supportsStrictMode: z.boolean().optional(), + requiresStringContent: z.boolean().optional(), maxTokensField: z .union([z.literal("max_completion_tokens"), z.literal("max_tokens")]) .optional(),