fix: support inferrs string-only completions

2026-04-12 01:31:08 +00:00 · 2026-04-07 15:52:41 +01:00
parent ea9efc0e81
commit 9d4b0d551d
18 changed files with 435 additions and 5 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
 - Memory/wiki: use compiled digest artifacts as the first-pass wiki index for search/get flows, and resolve claim ids back to owning pages so agents can retrieve knowledge by belief identity instead of only by file path. Thanks @vincentkoc.
 - Memory/wiki: add an opt-in `context.includeCompiledDigestPrompt` flag so memory prompt supplements can append a compact compiled wiki snapshot for legacy prompt assembly and context engines that explicitly consume memory prompt sections. Thanks @vincentkoc.
 - Plugin SDK/context engines: pass `availableTools` and `citationsMode` into `assemble()`, and expose `buildMemorySystemPromptAddition(...)` so non-legacy context engines can adopt the active memory prompt path without reimplementing it. Thanks @vincentkoc.
+- Providers/inferrs: add string-content compatibility for stricter OpenAI-compatible chat backends, document `inferrs` setup with a full config example, and add troubleshooting guidance for local backends that pass direct probes but fail on full agent-runtime prompts.

 ### Fixes

--- a/docs/.generated/config-baseline.sha256
+++ b/docs/.generated/config-baseline.sha256
@@ -1,4 +1,4 @@
-838e3c2f798321d47ccafd132b07a94a676ecf01ec128550c85cea9c2cacf0f5  config-baseline.json
-531ad785e7877e8d426985df5074b958a09ea61da5557061f8762272ef9e1d46  config-baseline.core.json
+af24bd5a2a86e8bb481302211b35c440e82636585c46f57050648c0290b1d4ee  config-baseline.json
+73bda77ebf7d70609c57f394655332536eb5ff55516a6b7db06243bd4e8e44a5  config-baseline.core.json
 d22f4414b79ee03d896e58d875c80523bcc12303cbacb1700261e6ec73945187  config-baseline.channel.json
-d32b286c554e8fe7a53b01dde23987fa6eb2140f021297bf029aed5542d721af  config-baseline.plugin.json
+d42cee3dea4668bdb7daf6ff5e6f87f326fdef56a8c3716d73079b92cab6e7b2  config-baseline.plugin.json
--- a/docs/gateway/configuration-reference.md
+++ b/docs/gateway/configuration-reference.md
@@ -2349,6 +2349,7 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi
 - `models.providers.*.models.*.contextWindow`: native model context window metadata.
 - `models.providers.*.models.*.contextTokens`: optional runtime context cap. Use this when you want a smaller effective context budget than the model's native `contextWindow`.
 - `models.providers.*.models.*.compat.supportsDeveloperRole`: optional compatibility hint. For `api: "openai-completions"` with a non-empty non-native `baseUrl` (host not `api.openai.com`), OpenClaw forces this to `false` at runtime. Empty/omitted `baseUrl` keeps default OpenAI behavior.
+- `models.providers.*.models.*.compat.requiresStringContent`: optional compatibility hint for string-only OpenAI-compatible chat endpoints. When `true`, OpenClaw flattens pure text `messages[].content` arrays into plain strings before sending the request.
 - `plugins.entries.amazon-bedrock.config.discovery`: Bedrock auto-discovery settings root.
 - `plugins.entries.amazon-bedrock.config.discovery.enabled`: turn implicit discovery on/off.
 - `plugins.entries.amazon-bedrock.config.discovery.region`: AWS region for discovery.
--- a/docs/gateway/local-models.md
+++ b/docs/gateway/local-models.md
@@ -155,9 +155,30 @@ Behavior note for local/proxied `/v1` backends:
 - hidden OpenClaw attribution headers (`originator`, `version`, `User-Agent`)
  are not injected on these custom proxy URLs

+Compatibility notes for stricter OpenAI-compatible backends:
+
+- Some servers accept only string `messages[].content` on Chat Completions, not
+  structured content-part arrays. Set
+  `models.providers.<provider>.models[].compat.requiresStringContent: true` for
+  those endpoints.
+- Some smaller or stricter local backends are unstable with OpenClaw's full
+  agent-runtime prompt shape, especially when tool schemas are included. If the
+  backend works for tiny direct `/v1/chat/completions` calls but fails on normal
+  OpenClaw agent turns, try
+  `models.providers.<provider>.models[].compat.supportsTools: false` first.
+- If the backend still fails only on larger OpenClaw runs, the remaining issue
+  is usually upstream model/server capacity or a backend bug, not OpenClaw's
+  transport layer.
+
 ## Troubleshooting

 - Gateway can reach the proxy? `curl http://127.0.0.1:1234/v1/models`.
 - LM Studio model unloaded? Reload; cold start is a common “hanging” cause.
 - Context errors? Lower `contextWindow` or raise your server limit.
+- OpenAI-compatible server returns `messages[].content ... expected a string`?
+  Add `compat.requiresStringContent: true` on that model entry.
+- Direct tiny `/v1/chat/completions` calls work, but `openclaw infer model run`
+  fails on Gemma or another local model? Disable tool schemas first with
+  `compat.supportsTools: false`, then retest. If the server still crashes only
+  on larger OpenClaw prompts, treat it as an upstream server/model limitation.
 - Safety: local models skip provider-side filters; keep agents narrow and compaction on to limit prompt injection blast radius.
--- a/docs/gateway/troubleshooting.md
+++ b/docs/gateway/troubleshooting.md
@@ -59,6 +59,61 @@ Related:
 - [/reference/token-use](/reference/token-use)
 - [/help/faq#why-am-i-seeing-http-429-ratelimiterror-from-anthropic](/help/faq#why-am-i-seeing-http-429-ratelimiterror-from-anthropic)

+## Local OpenAI-compatible backend passes direct probes but agent runs fail
+
+Use this when:
+
+- `curl ... /v1/models` works
+- tiny direct `/v1/chat/completions` calls work
+- OpenClaw model runs fail only on normal agent turns
+
+```bash
+curl http://127.0.0.1:1234/v1/models
+curl http://127.0.0.1:1234/v1/chat/completions \
+  -H 'content-type: application/json' \
+  -d '{"model":"<id>","messages":[{"role":"user","content":"hi"}],"stream":false}'
+openclaw infer model run --model <provider/model> --prompt "hi" --json
+openclaw logs --follow
+```
+
+Look for:
+
+- direct tiny calls succeed, but OpenClaw runs fail only on larger prompts
+- backend errors about `messages[].content` expecting a string
+- backend crashes that appear only with larger prompt-token counts or full agent
+  runtime prompts
+
+Common signatures:
+
+- `messages[...].content: invalid type: sequence, expected a string` → backend
+  rejects structured Chat Completions content parts. Fix: set
+  `models.providers.<provider>.models[].compat.requiresStringContent: true`.
+- direct tiny requests succeed, but OpenClaw agent runs fail with backend/model
+  crashes (for example Gemma on some `inferrs` builds) → OpenClaw transport is
+  likely already correct; the backend is failing on the larger agent-runtime
+  prompt shape.
+- failures shrink after disabling tools but do not disappear → tool schemas were
+  part of the pressure, but the remaining issue is still upstream model/server
+  capacity or a backend bug.
+
+Fix options:
+
+1. Set `compat.requiresStringContent: true` for string-only Chat Completions backends.
+2. Set `compat.supportsTools: false` for models/backends that cannot handle
+   OpenClaw's tool schema surface reliably.
+3. Lower prompt pressure where possible: smaller workspace bootstrap, shorter
+   session history, lighter local model, or a backend with stronger long-context
+   support.
+4. If tiny direct requests keep passing while OpenClaw agent turns still crash
+   inside the backend, treat it as an upstream server/model limitation and file
+   a repro there with the accepted payload shape.
+
+Related:
+
+- [/gateway/local-models](/gateway/local-models)
+- [/gateway/configuration#models](/gateway/configuration#models)
+- [/gateway/configuration-reference#openai-compatible-endpoints](/gateway/configuration-reference#openai-compatible-endpoints)
+
 ## No replies

 If channels are up but nothing answers, check routing and policy before reconnecting anything.
--- a/docs/help/troubleshooting.md
+++ b/docs/help/troubleshooting.md
@@ -42,6 +42,21 @@ If you see:
 `HTTP 429: rate_limit_error: Extra usage is required for long context requests`,
 go to [/gateway/troubleshooting#anthropic-429-extra-usage-required-for-long-context](/gateway/troubleshooting#anthropic-429-extra-usage-required-for-long-context).

+## Local OpenAI-compatible backend works directly but fails in OpenClaw
+
+If your local or self-hosted `/v1` backend answers small direct
+`/v1/chat/completions` probes but fails on `openclaw infer model run` or normal
+agent turns:
+
+1. If the error mentions `messages[].content` expecting a string, set
+   `models.providers.<provider>.models[].compat.requiresStringContent: true`.
+2. If the backend still fails only on OpenClaw agent turns, set
+   `models.providers.<provider>.models[].compat.supportsTools: false` and retry.
+3. If tiny direct calls still work but larger OpenClaw prompts crash the
+   backend, treat the remaining issue as an upstream model/server limitation and
+   continue in the deep runbook:
+   [/gateway/troubleshooting#local-openai-compatible-backend-passes-direct-probes-but-agent-runs-fail](/gateway/troubleshooting#local-openai-compatible-backend-passes-direct-probes-but-agent-runs-fail)
+
 ## Plugin install fails with missing openclaw extensions

 If install fails with `package.json missing openclaw.extensions`, the plugin package
--- a/docs/providers/index.md
+++ b/docs/providers/index.md
@@ -42,6 +42,7 @@ Looking for chat channel docs (WhatsApp/Telegram/Discord/Slack/Mattermost (plugi
 - [Google (Gemini)](/providers/google)
 - [Groq (LPU inference)](/providers/groq)
 - [Hugging Face (Inference)](/providers/huggingface)
+- [inferrs (local models)](/providers/inferrs)
 - [Kilocode](/providers/kilocode)
 - [LiteLLM (unified gateway)](/providers/litellm)
 - [MiniMax](/providers/minimax)
--- a/docs/providers/inferrs.md
+++ b/docs/providers/inferrs.md
@@ -0,0 +1,173 @@
+---
+summary: "Run OpenClaw through inferrs (OpenAI-compatible local server)"
+read_when:
+  - You want to run OpenClaw against a local inferrs server
+  - You are serving Gemma or another model through inferrs
+  - You need the exact OpenClaw compat flags for inferrs
+title: "inferrs"
+---
+
+# inferrs
+
+[inferrs](https://github.com/ericcurtin/inferrs) can serve local models behind an
+OpenAI-compatible `/v1` API. OpenClaw works with `inferrs` through the generic
+`openai-completions` path.
+
+`inferrs` is currently best treated as a custom self-hosted OpenAI-compatible
+backend, not a dedicated OpenClaw provider plugin.
+
+## Quick start
+
+1. Start `inferrs` with a model.
+
+Example:
+
+```bash
+inferrs serve gg-hf-gg/gemma-4-E2B-it \
+  --host 127.0.0.1 \
+  --port 8080 \
+  --device metal
+```
+
+2. Verify the server is reachable.
+
+```bash
+curl http://127.0.0.1:8080/health
+curl http://127.0.0.1:8080/v1/models
+```
+
+3. Add an explicit OpenClaw provider entry and point your default model at it.
+
+## Full config example
+
+This example uses Gemma 4 on a local `inferrs` server.
+
+```json5
+{
+  agents: {
+    defaults: {
+      model: { primary: "inferrs/gg-hf-gg/gemma-4-E2B-it" },
+      models: {
+        "inferrs/gg-hf-gg/gemma-4-E2B-it": {
+          alias: "Gemma 4 (inferrs)",
+        },
+      },
+    },
+  },
+  models: {
+    mode: "merge",
+    providers: {
+      inferrs: {
+        baseUrl: "http://127.0.0.1:8080/v1",
+        apiKey: "inferrs-local",
+        api: "openai-completions",
+        models: [
+          {
+            id: "gg-hf-gg/gemma-4-E2B-it",
+            name: "Gemma 4 E2B (inferrs)",
+            reasoning: false,
+            input: ["text"],
+            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+            contextWindow: 131072,
+            maxTokens: 4096,
+            compat: {
+              requiresStringContent: true,
+            },
+          },
+        ],
+      },
+    },
+  },
+}
+```
+
+## Why `requiresStringContent` matters
+
+Some `inferrs` Chat Completions routes accept only string
+`messages[].content`, not structured content-part arrays.
+
+If OpenClaw runs fail with an error like:
+
+```text
+messages[1].content: invalid type: sequence, expected a string
+```
+
+set:
+
+```json5
+compat: {
+  requiresStringContent: true
+}
+```
+
+OpenClaw will flatten pure text content parts into plain strings before sending
+the request.
+
+## Gemma and tool-schema caveat
+
+Some current `inferrs` + Gemma combinations accept small direct
+`/v1/chat/completions` requests but still fail on full OpenClaw agent-runtime
+turns.
+
+If that happens, try this first:
+
+```json5
+compat: {
+  requiresStringContent: true,
+  supportsTools: false
+}
+```
+
+That disables OpenClaw's tool schema surface for the model and can reduce prompt
+pressure on stricter local backends.
+
+If tiny direct requests still work but normal OpenClaw agent turns continue to
+crash inside `inferrs`, the remaining issue is usually upstream model/server
+behavior rather than OpenClaw's transport layer.
+
+## Manual smoke test
+
+Once configured, test both layers:
+
+```bash
+curl http://127.0.0.1:8080/v1/chat/completions \
+  -H 'content-type: application/json' \
+  -d '{"model":"gg-hf-gg/gemma-4-E2B-it","messages":[{"role":"user","content":"What is 2 + 2?"}],"stream":false}'
+
+openclaw infer model run \
+  --model inferrs/gg-hf-gg/gemma-4-E2B-it \
+  --prompt "What is 2 + 2? Reply with one short sentence." \
+  --json
+```
+
+If the first command works but the second fails, use the troubleshooting notes
+below.
+
+## Troubleshooting
+
+- `curl /v1/models` fails: `inferrs` is not running, not reachable, or not
+  bound to the expected host/port.
+- `messages[].content ... expected a string`: set
+  `compat.requiresStringContent: true`.
+- Direct tiny `/v1/chat/completions` calls pass, but `openclaw infer model run`
+  fails: try `compat.supportsTools: false`.
+- OpenClaw no longer gets schema errors, but `inferrs` still crashes on larger
+  agent turns: treat it as an upstream `inferrs` or model limitation and reduce
+  prompt pressure or switch local backend/model.
+
+## Proxy-style behavior
+
+`inferrs` is treated as a proxy-style OpenAI-compatible `/v1` backend, not a
+native OpenAI endpoint.
+
+- native OpenAI-only request shaping does not apply here
+- no `service_tier`, no Responses `store`, no prompt-cache hints, and no
+  OpenAI reasoning-compat payload shaping
+- hidden OpenClaw attribution headers (`originator`, `version`, `User-Agent`)
+  are not injected on custom `inferrs` base URLs
+
+## See also
+
+- [Local models](/gateway/local-models)
+- [Gateway troubleshooting](/gateway/troubleshooting#local-openai-compatible-backend-passes-direct-probes-but-agent-runs-fail)
+- [Model providers](/concepts/model-providers)
--- a/src/agents/openai-completions-string-content.ts
+++ b/src/agents/openai-completions-string-content.ts
@@ -0,0 +1,35 @@
+export function flattenStringOnlyCompletionContent(content: unknown): unknown {
+  if (!Array.isArray(content)) {
+    return content;
+  }
+  const textParts: string[] = [];
+  for (const item of content) {
+    if (
+      !item ||
+      typeof item !== "object" ||
+      (item as { type?: unknown }).type !== "text" ||
+      typeof (item as { text?: unknown }).text !== "string"
+    ) {
+      return content;
+    }
+    textParts.push((item as { text: string }).text);
+  }
+  return textParts.join("\n");
+}
+
+export function flattenCompletionMessagesToStringContent(messages: unknown[]): unknown[] {
+  return messages.map((message) => {
+    if (!message || typeof message !== "object") {
+      return message;
+    }
+    const content = (message as { content?: unknown }).content;
+    const flattenedContent = flattenStringOnlyCompletionContent(content);
+    if (flattenedContent === content) {
+      return message;
+    }
+    return {
+      ...message,
+      content: flattenedContent,
+    };
+  });
+}
--- a/src/agents/openai-transport-stream.test.ts
+++ b/src/agents/openai-transport-stream.test.ts
@@ -1079,6 +1079,41 @@ describe("openai transport stream", () => {
    expect(params.tools?.[0]?.function).not.toHaveProperty("strict");
  });

+  it("flattens pure text content arrays for string-only completions backends when opted in", () => {
+    const params = buildOpenAICompletionsParams(
+      {
+        id: "gg-hf-gg/gemma-4-E2B-it",
+        name: "Gemma 4 E2B",
+        api: "openai-completions",
+        provider: "inferrs",
+        baseUrl: "http://127.0.0.1:8080/v1",
+        reasoning: false,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 131072,
+        maxTokens: 4096,
+        compat: {
+          requiresStringContent: true,
+        } as Record<string, unknown>,
+      } satisfies Model<"openai-completions">,
+      {
+        systemPrompt: "system",
+        messages: [
+          {
+            role: "user",
+            content: [{ type: "text", text: "What is 2 + 2?" }],
+            timestamp: Date.now(),
+          },
+        ],
+        tools: [],
+      } as never,
+      undefined,
+    ) as { messages?: Array<{ role?: string; content?: unknown }> };
+
+    expect(params.messages?.[0]).toMatchObject({ role: "system", content: "system" });
+    expect(params.messages?.[1]).toMatchObject({ role: "user", content: "What is 2 + 2?" });
+  });
+
  it("uses max_tokens for Chutes default-route completions providers without relying on baseUrl host sniffing", () => {
    const params = buildOpenAICompletionsParams(
      {
--- a/src/agents/openai-transport-stream.ts
+++ b/src/agents/openai-transport-stream.ts
@@ -23,6 +23,7 @@ import { resolveProviderTransportTurnStateWithPlugin } from "../plugins/provider
 import type { ProviderRuntimeModel } from "../plugins/types.js";
 import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./copilot-dynamic-headers.js";
 import { detectOpenAICompletionsCompat } from "./openai-completions-compat.js";
+import { flattenCompletionMessagesToStringContent } from "./openai-completions-string-content.js";
 import {
  applyOpenAIResponsesPayloadPolicy,
  resolveOpenAIResponsesPayloadPolicy,
@@ -1164,6 +1165,7 @@ function getCompat(model: OpenAIModeModel): {
  openRouterRouting: Record<string, unknown>;
  vercelGatewayRouting: Record<string, unknown>;
  supportsStrictMode: boolean;
+  requiresStringContent: boolean;
 } {
  const detected = detectCompat(model);
  const compat = model.compat ?? {};
@@ -1198,6 +1200,7 @@ function getCompat(model: OpenAIModeModel): {
      detected.vercelGatewayRouting,
    supportsStrictMode:
      (compat.supportsStrictMode as boolean | undefined) ?? detected.supportsStrictMode,
+    requiresStringContent: (compat.requiresStringContent as boolean | undefined) ?? false,
  };
 }

@@ -1261,9 +1264,12 @@ export function buildOpenAICompletionsParams(
        systemPrompt: stripSystemPromptCacheBoundary(context.systemPrompt),
      }
    : context;
+  const messages = convertMessages(model as never, completionsContext, compat as never);
  const params: Record<string, unknown> = {
    model: model.id,
-    messages: convertMessages(model as never, completionsContext, compat as never),
+    messages: compat.requiresStringContent
+      ? flattenCompletionMessagesToStringContent(messages)
+      : messages,
    stream: true,
  };
  if (compat.supportsUsageInStreaming) {
--- a/src/agents/pi-embedded-runner-extraparams.test.ts
+++ b/src/agents/pi-embedded-runner-extraparams.test.ts
@@ -132,6 +132,7 @@ import {
  createOpenAIReasoningCompatibilityWrapper,
  createOpenAIResponsesContextManagementWrapper,
  createOpenAIServiceTierWrapper,
+  createOpenAIStringContentWrapper,
  createOpenAITextVerbosityWrapper,
  resolveOpenAIFastMode,
  resolveOpenAIServiceTier,
@@ -170,6 +171,7 @@ function createTestOpenAIProviderWrapper(
    config: params.context.config,
    agentDir: params.context.agentDir,
  });
+  streamFn = createOpenAIStringContentWrapper(streamFn);
  return createOpenAIResponsesContextManagementWrapper(
    createOpenAIReasoningCompatibilityWrapper(streamFn),
    params.context.extraParams,
@@ -562,6 +564,54 @@ describe("applyExtraParamsToAgent", () => {
    expect(payload.parallel_tool_calls).toBe(false);
  });

+  it("flattens pure text OpenAI completions message arrays for string-only compat models", () => {
+    const payload = runResponsesPayloadMutationCase({
+      applyProvider: "inferrs",
+      applyModelId: "gg-hf-gg/gemma-4-E2B-it",
+      model: {
+        api: "openai-completions",
+        provider: "inferrs",
+        id: "gg-hf-gg/gemma-4-E2B-it",
+        name: "Gemma 4 E2B (inferrs)",
+        baseUrl: "http://127.0.0.1:8080/v1",
+        reasoning: false,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 131072,
+        maxTokens: 4096,
+        compat: {
+          requiresStringContent: true,
+        } as Record<string, unknown>,
+      } as unknown as Model<"openai-completions">,
+      payload: {
+        messages: [
+          {
+            role: "system",
+            content: [{ type: "text", text: "System text" }],
+          },
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "Line one" },
+              { type: "text", text: "Line two" },
+            ],
+          },
+        ],
+      },
+    });
+
+    expect(payload.messages).toEqual([
+      {
+        role: "system",
+        content: "System text",
+      },
+      {
+        role: "user",
+        content: "Line one\nLine two",
+      },
+    ]);
+  });
+
  it("injects parallel_tool_calls for openai-responses payloads when configured", () => {
    const payload = runParallelToolCallsPayloadMutationCase({
      applyProvider: "openai",
--- a/src/agents/pi-embedded-runner/extra-params.ts
+++ b/src/agents/pi-embedded-runner/extra-params.ts
@@ -16,7 +16,10 @@ import {
  createSiliconFlowThinkingWrapper,
  shouldApplySiliconFlowThinkingOffCompat,
 } from "./moonshot-stream-wrappers.js";
-import { createOpenAIResponsesContextManagementWrapper } from "./openai-stream-wrappers.js";
+import {
+  createOpenAIResponsesContextManagementWrapper,
+  createOpenAIStringContentWrapper,
+} from "./openai-stream-wrappers.js";
 import { resolveCacheRetention } from "./prompt-cache-retention.js";
 import { createOpenRouterSystemCacheWrapper } from "./proxy-stream-wrappers.js";
 import { streamWithPayloadPatch } from "./stream-payload-utils.js";
@@ -389,6 +392,7 @@ function applyPostPluginStreamWrappers(
  ctx: ApplyExtraParamsContext & { providerWrapperHandled: boolean },
 ): void {
  ctx.agent.streamFn = createOpenRouterSystemCacheWrapper(ctx.agent.streamFn);
+  ctx.agent.streamFn = createOpenAIStringContentWrapper(ctx.agent.streamFn);

  if (!ctx.providerWrapperHandled) {
    // Guard Google-family payloads against invalid negative thinking budgets
--- a/src/agents/pi-embedded-runner/openai-stream-wrappers.ts
+++ b/src/agents/pi-embedded-runner/openai-stream-wrappers.ts
@@ -7,6 +7,7 @@ import {
  patchCodexNativeWebSearchPayload,
  resolveCodexNativeSearchActivation,
 } from "../codex-native-web-search.js";
+import { flattenCompletionMessagesToStringContent } from "../openai-completions-string-content.js";
 import {
  applyOpenAIResponsesPayloadPolicy,
  resolveOpenAIResponsesPayloadPolicy,
@@ -66,6 +67,17 @@ function shouldApplyOpenAIReasoningCompatibility(model: {
  return resolveOpenAIRequestCapabilities(model).supportsOpenAIReasoningCompatPayload;
 }

+function shouldFlattenOpenAICompletionMessages(model: {
+  api?: unknown;
+  compat?: unknown;
+}): boolean {
+  const compat =
+    model.compat && typeof model.compat === "object"
+      ? (model.compat as { requiresStringContent?: unknown })
+      : undefined;
+  return model.api === "openai-completions" && compat?.requiresStringContent === true;
+}
+
 function normalizeOpenAIServiceTier(value: unknown): OpenAIServiceTier | undefined {
  if (typeof value !== "string") {
    return undefined;
@@ -219,6 +231,21 @@ export function createOpenAIReasoningCompatibilityWrapper(
  };
 }

+export function createOpenAIStringContentWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
+  const underlying = baseStreamFn ?? streamSimple;
+  return (model, context, options) => {
+    if (!shouldFlattenOpenAICompletionMessages(model)) {
+      return underlying(model, context, options);
+    }
+    return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => {
+      if (!Array.isArray(payloadObj.messages)) {
+        return;
+      }
+      payloadObj.messages = flattenCompletionMessagesToStringContent(payloadObj.messages);
+    });
+  };
+}
+
 export function createOpenAIFastModeWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
  const underlying = baseStreamFn ?? streamSimple;
  return (model, context, options) => {
--- a/src/config/config-misc.test.ts
+++ b/src/config/config-misc.test.ts
@@ -391,6 +391,7 @@ describe("model compat config schema", () => {
                compat: {
                  supportsUsageInStreaming: true,
                  supportsStrictMode: false,
+                  requiresStringContent: true,
                  thinkingFormat: "qwen",
                  requiresToolResultName: true,
                  requiresAssistantAfterToolResult: false,
--- a/src/config/schema.base.generated.ts
+++ b/src/config/schema.base.generated.ts
@@ -2807,6 +2807,9 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
                          supportsStrictMode: {
                            type: "boolean",
                          },
+                          requiresStringContent: {
+                            type: "boolean",
+                          },
                          maxTokensField: {
                            anyOf: [
                              {
--- a/src/config/types.models.ts
+++ b/src/config/types.models.ts
@@ -37,6 +37,7 @@ type SupportedThinkingFormat =
 export type ModelCompatConfig = SupportedOpenAICompatFields & {
  thinkingFormat?: SupportedThinkingFormat;
  supportsTools?: boolean;
+  requiresStringContent?: boolean;
  toolSchemaProfile?: string;
  unsupportedToolSchemaKeywords?: string[];
  nativeWebSearchTool?: boolean;
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -189,6 +189,7 @@ export const ModelCompatSchema = z
    supportsUsageInStreaming: z.boolean().optional(),
    supportsTools: z.boolean().optional(),
    supportsStrictMode: z.boolean().optional(),
+    requiresStringContent: z.boolean().optional(),
    maxTokensField: z
      .union([z.literal("max_completion_tokens"), z.literal("max_tokens")])
      .optional(),