From 49f72b332f5daf0484292e1fc63b2088fa94c8c6 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 03:26:51 +0100 Subject: [PATCH] fix: harden openai-compatible completions payloads --- CHANGELOG.md | 1 + docs/concepts/model-providers.md | 9 +- docs/gateway/config-agents.md | 49 ++++---- docs/providers/openai.md | 2 + .../pi-embedded-runner-extraparams.test.ts | 105 ++++++++++++++++++ src/agents/pi-embedded-runner/extra-params.ts | 83 ++++++++++++++ 6 files changed, 223 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35d76c2ba5e..b6a3e8e4bcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,6 +70,7 @@ Docs: https://docs.openclaw.ai - Browser/config: expand `~` in `browser.executablePath` before Chromium launch, so home-relative custom browser paths no longer fail with `ENOENT`. Fixes #67264. Thanks @Quratulain-bilal. - Telegram/streaming: hide tool-progress status updates by default while keeping explicit `streaming.preview.toolProgress` opt-in support for edited preview messages. Fixes #71320. Thanks @neeravmakwana. - Gateway/sessions: copy the oversized `sessions.json` to a rotation backup before the atomic rewrite instead of renaming the live store away, so a crash during rotation keeps the existing session-to-transcript mapping authoritative. Fixes #68229. Thanks @jjjojoj. +- Providers/OpenAI-compatible: strip OpenAI-only Completions `store` from proxy payloads and allow `extra_body`/`extraBody` passthrough params for provider-specific request fields. Fixes #61826 and #69717. - Discord/subagents: preserve thread-bound completion delivery by keeping the requester-agent announce path primary and falling back to direct thread sends only when the announce produces no visible output. (#71064) Thanks @DolencLuka. - Browser/tool: give Chrome MCP existing-session manage calls a longer default timeout, pass explicit tool timeouts through tab management, and recover stale selected-page MCP sessions instead of forcing a manual reset. Thanks @steipete. - Browser/sandbox: clean up idle tracked tabs opened by primary-agent browser sessions, while preserving active tab reuse and lifecycle cleanup for subagents, cron, and ACP sessions. Fixes #71165. Thanks @dwbutler. diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index 1dc7b123f41..0d979137b51 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -625,9 +625,12 @@ Notes: - Recommended: set explicit values that match your proxy/model limits. - For `api: "openai-completions"` on non-native endpoints (any non-empty `baseUrl` whose host is not `api.openai.com`), OpenClaw forces `compat.supportsDeveloperRole: false` to avoid provider 400 errors for unsupported `developer` roles. - Proxy-style OpenAI-compatible routes also skip native OpenAI-only request - shaping: no `service_tier`, no Responses `store`, no prompt-cache hints, no - OpenAI reasoning-compat payload shaping, and no hidden OpenClaw attribution - headers. + shaping: no `service_tier`, no Responses `store`, no Completions `store`, no + prompt-cache hints, no OpenAI reasoning-compat payload shaping, and no hidden + OpenClaw attribution headers. +- For OpenAI-compatible Completions proxies that need vendor-specific fields, + set `agents.defaults.models["provider/model"].params.extra_body` (or + `extraBody`) to merge extra JSON into the outbound request body. - If `baseUrl` is empty/omitted, OpenClaw keeps the default OpenAI behavior (which resolves to `api.openai.com`). - For safety, an explicit `compat.supportsDeveloperRole: true` is still overridden on non-native `openai-completions` endpoints. diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index 62a8f49f38b..8a5e86e41cb 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -363,12 +363,13 @@ Time format in system prompt. Default: `auto` (OS preference). - `verboseDefault`: default verbose level for agents. Values: `"off"`, `"on"`, `"full"`. Default: `"off"`. - `elevatedDefault`: default elevated-output level for agents. Values: `"off"`, `"on"`, `"ask"`, `"full"`. Default: `"on"`. - `model.primary`: format `provider/model` (e.g. `openai/gpt-5.4` for API-key access or `openai-codex/gpt-5.5` for Codex OAuth). If you omit the provider, OpenClaw tries an alias first, then a unique configured-provider match for that exact model id, and only then falls back to the configured default provider (deprecated compatibility behavior, so prefer explicit `provider/model`). If that provider no longer exposes the configured default model, OpenClaw falls back to the first configured provider/model instead of surfacing a stale removed-provider default. -- `models`: the configured model catalog and allowlist for `/model`. Each entry can include `alias` (shortcut) and `params` (provider-specific, for example `temperature`, `maxTokens`, `cacheRetention`, `context1m`, `responsesServerCompaction`, `responsesCompactThreshold`). +- `models`: the configured model catalog and allowlist for `/model`. Each entry can include `alias` (shortcut) and `params` (provider-specific, for example `temperature`, `maxTokens`, `cacheRetention`, `context1m`, `responsesServerCompaction`, `responsesCompactThreshold`, `extra_body`/`extraBody`). - Safe edits: use `openclaw config set agents.defaults.models '' --strict-json --merge` to add entries. `config set` refuses replacements that would remove existing allowlist entries unless you pass `--replace`. - Provider-scoped configure/onboarding flows merge selected provider models into this map and preserve unrelated providers already configured. - For direct OpenAI Responses models, server-side compaction is enabled automatically. Use `params.responsesServerCompaction: false` to stop injecting `context_management`, or `params.responsesCompactThreshold` to override the threshold. See [OpenAI server-side compaction](/providers/openai#server-side-compaction-responses-api). - `params`: global default provider parameters applied to all models. Set at `agents.defaults.params` (e.g. `{ cacheRetention: "long" }`). - `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details. +- `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward. - `embeddedHarness`: default low-level embedded agent runtime policy. Omitted runtime defaults to OpenClaw Pi. Use `runtime: "pi"` to force the built-in PI harness, `runtime: "auto"` to let registered plugin harnesses claim supported models, or a registered harness id such as `runtime: "codex"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection. - Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible. - `maxConcurrent`: max parallel agent runs across sessions (each session still serialized). Default: 4. @@ -1257,27 +1258,29 @@ Batches rapid text-only messages from the same sender into a single agent turn. maxTextLength: 4000, timeoutMs: 30000, prefsPath: "~/.openclaw/settings/tts.json", - elevenlabs: { - apiKey: "elevenlabs_api_key", - baseUrl: "https://api.elevenlabs.io", - voiceId: "voice_id", - modelId: "eleven_multilingual_v2", - seed: 42, - applyTextNormalization: "auto", - languageCode: "en", - voiceSettings: { - stability: 0.5, - similarityBoost: 0.75, - style: 0.0, - useSpeakerBoost: true, - speed: 1.0, + providers: { + elevenlabs: { + apiKey: "elevenlabs_api_key", + baseUrl: "https://api.elevenlabs.io", + voiceId: "voice_id", + modelId: "eleven_multilingual_v2", + seed: 42, + applyTextNormalization: "auto", + languageCode: "en", + voiceSettings: { + stability: 0.5, + similarityBoost: 0.75, + style: 0.0, + useSpeakerBoost: true, + speed: 1.0, + }, + }, + openai: { + apiKey: "openai_api_key", + baseUrl: "https://api.openai.com/v1", + model: "gpt-4o-mini-tts", + voice: "alloy", }, - }, - openai: { - apiKey: "openai_api_key", - baseUrl: "https://api.openai.com/v1", - model: "gpt-4o-mini-tts", - voice: "alloy", }, }, }, @@ -1288,8 +1291,8 @@ Batches rapid text-only messages from the same sender into a single agent turn. - `summaryModel` overrides `agents.defaults.model.primary` for auto-summary. - `modelOverrides` is enabled by default; `modelOverrides.allowProvider` defaults to `false` (opt-in). - API keys fall back to `ELEVENLABS_API_KEY`/`XI_API_KEY` and `OPENAI_API_KEY`. -- `openai.baseUrl` overrides the OpenAI TTS endpoint. Resolution order is config, then `OPENAI_TTS_BASE_URL`, then `https://api.openai.com/v1`. -- When `openai.baseUrl` points to a non-OpenAI endpoint, OpenClaw treats it as an OpenAI-compatible TTS server and relaxes model/voice validation. +- `providers.openai.baseUrl` overrides the OpenAI TTS endpoint. Resolution order is config, then `OPENAI_TTS_BASE_URL`, then `https://api.openai.com/v1`. +- When `providers.openai.baseUrl` points to a non-OpenAI endpoint, OpenClaw treats it as an OpenAI-compatible TTS server and relaxes model/voice validation. --- diff --git a/docs/providers/openai.md b/docs/providers/openai.md index 92dc6796754..a8d43002f5b 100644 --- a/docs/providers/openai.md +++ b/docs/providers/openai.md @@ -789,6 +789,8 @@ the Server-side compaction accordion below. **Proxy/compatible routes:** - Use looser compat behavior + - Strip Completions `store` from non-native `openai-completions` payloads + - Accept advanced `params.extra_body`/`params.extraBody` pass-through JSON for OpenAI-compatible Completions proxies - Do not force strict tool schemas or native-only headers Azure OpenAI uses native transport and compat behavior but does not receive the hidden attribution headers. diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 41d62bfad05..2f80853e0a1 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -752,6 +752,111 @@ describe("applyExtraParamsToAgent", () => { expect(payload.parallel_tool_calls).toBe(false); }); + it("strips store from proxied openai-completions payloads", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "google", + applyModelId: "gemini-2.5-pro", + model: { + api: "openai-completions", + provider: "google", + id: "gemini-2.5-pro", + baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai", + } as Model<"openai-completions">, + payload: { + messages: [], + store: false, + }, + }); + + expect(payload).not.toHaveProperty("store"); + }); + + it("keeps store untouched for native openai-completions payloads", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "openai", + applyModelId: "gpt-4.1", + model: { + api: "openai-completions", + provider: "openai", + id: "gpt-4.1", + baseUrl: "https://api.openai.com/v1", + } as Model<"openai-completions">, + payload: { + messages: [], + store: false, + }, + }); + + expect(payload.store).toBe(false); + }); + + it("merges extra_body into openai-completions payloads before proxy store stripping", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "google", + applyModelId: "gemini-2.5-pro", + cfg: { + agents: { + defaults: { + models: { + "google/gemini-2.5-pro": { + params: { + extraBody: { + google: { thinking_config: { thinking_budget: 0 } }, + store: false, + }, + }, + }, + }, + }, + }, + }, + model: { + api: "openai-completions", + provider: "google", + id: "gemini-2.5-pro", + baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai", + } as Model<"openai-completions">, + payload: { + messages: [], + }, + }); + + expect(payload.google).toEqual({ thinking_config: { thinking_budget: 0 } }); + expect(payload).not.toHaveProperty("store"); + }); + + it("warns and skips invalid extra_body params", () => { + const warnSpy = vi.spyOn(log, "warn").mockImplementation(() => {}); + try { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "google", + applyModelId: "gemini-2.5-pro", + cfg: { + agents: { + defaults: { + models: { + "google/gemini-2.5-pro": { + params: { extra_body: "not-an-object" }, + }, + }, + }, + }, + }, + model: { + api: "openai-completions", + provider: "google", + id: "gemini-2.5-pro", + baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai", + } as Model<"openai-completions">, + }); + + expect(payload).not.toHaveProperty("extra_body"); + expect(warnSpy).toHaveBeenCalledWith("ignoring invalid extra_body param: not-an-object"); + } finally { + warnSpy.mockRestore(); + } + }); + it("flattens pure text OpenAI completions message arrays for string-only compat models", () => { const payload = runResponsesPayloadMutationCase({ applyProvider: "inferrs", diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index cd2e5694772..db7fd3d9dc9 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -11,6 +11,7 @@ import { } from "../../plugins/provider-hook-runtime.js"; import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js"; import { supportsGptParallelToolCallsPayload } from "../provider-api-families.js"; +import { resolveProviderRequestPolicyConfig } from "../provider-request-config.js"; import { createGoogleThinkingPayloadWrapper } from "./google-stream-wrappers.js"; import { log } from "./logger.js"; import { createMinimaxThinkingDisabledWrapper } from "./minimax-stream-wrappers.js"; @@ -389,6 +390,77 @@ function createParallelToolCallsWrapper( }; } +function shouldStripOpenAICompletionsStore(model: ProviderRuntimeModel): boolean { + if (model.api !== "openai-completions") { + return false; + } + const compat = + model.compat && typeof model.compat === "object" + ? (model.compat as Record) + : undefined; + const capabilities = resolveProviderRequestPolicyConfig({ + provider: typeof model.provider === "string" ? model.provider : undefined, + api: model.api, + baseUrl: typeof model.baseUrl === "string" ? model.baseUrl : undefined, + compat, + capability: "llm", + transport: "stream", + }).capabilities; + return !capabilities.usesKnownNativeOpenAIRoute; +} + +function createOpenAICompletionsStoreCompatWrapper(baseStreamFn: StreamFn | undefined): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => { + if (!shouldStripOpenAICompletionsStore(model as ProviderRuntimeModel)) { + return underlying(model, context, options); + } + return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => { + delete payloadObj.store; + }); + }; +} + +function sanitizeExtraBodyRecord(value: Record): Record { + return Object.fromEntries( + Object.entries(sanitizeExtraParamsRecord(value) ?? {}).filter( + ([, entry]) => entry !== undefined, + ), + ); +} + +function resolveExtraBodyParam(rawExtraBody: unknown): Record | undefined { + if (rawExtraBody === undefined || rawExtraBody === null) { + return undefined; + } + if (typeof rawExtraBody !== "object" || Array.isArray(rawExtraBody)) { + const summary = typeof rawExtraBody === "string" ? rawExtraBody : typeof rawExtraBody; + log.warn(`ignoring invalid extra_body param: ${summary}`); + return undefined; + } + const extraBody = sanitizeExtraBodyRecord(rawExtraBody as Record); + return Object.keys(extraBody).length > 0 ? extraBody : undefined; +} + +function createOpenAICompletionsExtraBodyWrapper( + baseStreamFn: StreamFn | undefined, + extraBody: Record, +): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => { + if (model.api !== "openai-completions") { + return underlying(model, context, options); + } + return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => { + const collisions = Object.keys(extraBody).filter((key) => Object.hasOwn(payloadObj, key)); + if (collisions.length > 0) { + log.warn(`extra_body overwriting request payload keys: ${collisions.join(", ")}`); + } + Object.assign(payloadObj, extraBody); + }); + }; +} + type ApplyExtraParamsContext = { agent: { streamFn?: StreamFn }; cfg: OpenClawConfig | undefined; @@ -455,6 +527,17 @@ function applyPostPluginStreamWrappers( // blocks. Disable thinking unless an earlier wrapper already set it. ctx.agent.streamFn = createMinimaxThinkingDisabledWrapper(ctx.agent.streamFn); + const rawExtraBody = resolveAliasedParamValue( + [ctx.effectiveExtraParams, ctx.override], + "extra_body", + "extraBody", + ); + const extraBody = resolveExtraBodyParam(rawExtraBody); + if (extraBody) { + ctx.agent.streamFn = createOpenAICompletionsExtraBodyWrapper(ctx.agent.streamFn, extraBody); + } + ctx.agent.streamFn = createOpenAICompletionsStoreCompatWrapper(ctx.agent.streamFn); + const rawParallelToolCalls = resolveAliasedParamValue( [ctx.effectiveExtraParams, ctx.override], "parallel_tool_calls",