mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 01:31:08 +00:00
fix: support inferrs string-only completions
This commit is contained in:
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Memory/wiki: use compiled digest artifacts as the first-pass wiki index for search/get flows, and resolve claim ids back to owning pages so agents can retrieve knowledge by belief identity instead of only by file path. Thanks @vincentkoc.
|
||||
- Memory/wiki: add an opt-in `context.includeCompiledDigestPrompt` flag so memory prompt supplements can append a compact compiled wiki snapshot for legacy prompt assembly and context engines that explicitly consume memory prompt sections. Thanks @vincentkoc.
|
||||
- Plugin SDK/context engines: pass `availableTools` and `citationsMode` into `assemble()`, and expose `buildMemorySystemPromptAddition(...)` so non-legacy context engines can adopt the active memory prompt path without reimplementing it. Thanks @vincentkoc.
|
||||
- Providers/inferrs: add string-content compatibility for stricter OpenAI-compatible chat backends, document `inferrs` setup with a full config example, and add troubleshooting guidance for local backends that pass direct probes but fail on full agent-runtime prompts.
|
||||
|
||||
### Fixes
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
838e3c2f798321d47ccafd132b07a94a676ecf01ec128550c85cea9c2cacf0f5 config-baseline.json
|
||||
531ad785e7877e8d426985df5074b958a09ea61da5557061f8762272ef9e1d46 config-baseline.core.json
|
||||
af24bd5a2a86e8bb481302211b35c440e82636585c46f57050648c0290b1d4ee config-baseline.json
|
||||
73bda77ebf7d70609c57f394655332536eb5ff55516a6b7db06243bd4e8e44a5 config-baseline.core.json
|
||||
d22f4414b79ee03d896e58d875c80523bcc12303cbacb1700261e6ec73945187 config-baseline.channel.json
|
||||
d32b286c554e8fe7a53b01dde23987fa6eb2140f021297bf029aed5542d721af config-baseline.plugin.json
|
||||
d42cee3dea4668bdb7daf6ff5e6f87f326fdef56a8c3716d73079b92cab6e7b2 config-baseline.plugin.json
|
||||
|
||||
@@ -2349,6 +2349,7 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi
|
||||
- `models.providers.*.models.*.contextWindow`: native model context window metadata.
|
||||
- `models.providers.*.models.*.contextTokens`: optional runtime context cap. Use this when you want a smaller effective context budget than the model's native `contextWindow`.
|
||||
- `models.providers.*.models.*.compat.supportsDeveloperRole`: optional compatibility hint. For `api: "openai-completions"` with a non-empty non-native `baseUrl` (host not `api.openai.com`), OpenClaw forces this to `false` at runtime. Empty/omitted `baseUrl` keeps default OpenAI behavior.
|
||||
- `models.providers.*.models.*.compat.requiresStringContent`: optional compatibility hint for string-only OpenAI-compatible chat endpoints. When `true`, OpenClaw flattens pure text `messages[].content` arrays into plain strings before sending the request.
|
||||
- `plugins.entries.amazon-bedrock.config.discovery`: Bedrock auto-discovery settings root.
|
||||
- `plugins.entries.amazon-bedrock.config.discovery.enabled`: turn implicit discovery on/off.
|
||||
- `plugins.entries.amazon-bedrock.config.discovery.region`: AWS region for discovery.
|
||||
|
||||
@@ -155,9 +155,30 @@ Behavior note for local/proxied `/v1` backends:
|
||||
- hidden OpenClaw attribution headers (`originator`, `version`, `User-Agent`)
|
||||
are not injected on these custom proxy URLs
|
||||
|
||||
Compatibility notes for stricter OpenAI-compatible backends:
|
||||
|
||||
- Some servers accept only string `messages[].content` on Chat Completions, not
|
||||
structured content-part arrays. Set
|
||||
`models.providers.<provider>.models[].compat.requiresStringContent: true` for
|
||||
those endpoints.
|
||||
- Some smaller or stricter local backends are unstable with OpenClaw's full
|
||||
agent-runtime prompt shape, especially when tool schemas are included. If the
|
||||
backend works for tiny direct `/v1/chat/completions` calls but fails on normal
|
||||
OpenClaw agent turns, try
|
||||
`models.providers.<provider>.models[].compat.supportsTools: false` first.
|
||||
- If the backend still fails only on larger OpenClaw runs, the remaining issue
|
||||
is usually upstream model/server capacity or a backend bug, not OpenClaw's
|
||||
transport layer.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Gateway can reach the proxy? `curl http://127.0.0.1:1234/v1/models`.
|
||||
- LM Studio model unloaded? Reload; cold start is a common “hanging” cause.
|
||||
- Context errors? Lower `contextWindow` or raise your server limit.
|
||||
- OpenAI-compatible server returns `messages[].content ... expected a string`?
|
||||
Add `compat.requiresStringContent: true` on that model entry.
|
||||
- Direct tiny `/v1/chat/completions` calls work, but `openclaw infer model run`
|
||||
fails on Gemma or another local model? Disable tool schemas first with
|
||||
`compat.supportsTools: false`, then retest. If the server still crashes only
|
||||
on larger OpenClaw prompts, treat it as an upstream server/model limitation.
|
||||
- Safety: local models skip provider-side filters; keep agents narrow and compaction on to limit prompt injection blast radius.
|
||||
|
||||
@@ -59,6 +59,61 @@ Related:
|
||||
- [/reference/token-use](/reference/token-use)
|
||||
- [/help/faq#why-am-i-seeing-http-429-ratelimiterror-from-anthropic](/help/faq#why-am-i-seeing-http-429-ratelimiterror-from-anthropic)
|
||||
|
||||
## Local OpenAI-compatible backend passes direct probes but agent runs fail
|
||||
|
||||
Use this when:
|
||||
|
||||
- `curl ... /v1/models` works
|
||||
- tiny direct `/v1/chat/completions` calls work
|
||||
- OpenClaw model runs fail only on normal agent turns
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:1234/v1/models
|
||||
curl http://127.0.0.1:1234/v1/chat/completions \
|
||||
-H 'content-type: application/json' \
|
||||
-d '{"model":"<id>","messages":[{"role":"user","content":"hi"}],"stream":false}'
|
||||
openclaw infer model run --model <provider/model> --prompt "hi" --json
|
||||
openclaw logs --follow
|
||||
```
|
||||
|
||||
Look for:
|
||||
|
||||
- direct tiny calls succeed, but OpenClaw runs fail only on larger prompts
|
||||
- backend errors about `messages[].content` expecting a string
|
||||
- backend crashes that appear only with larger prompt-token counts or full agent
|
||||
runtime prompts
|
||||
|
||||
Common signatures:
|
||||
|
||||
- `messages[...].content: invalid type: sequence, expected a string` → backend
|
||||
rejects structured Chat Completions content parts. Fix: set
|
||||
`models.providers.<provider>.models[].compat.requiresStringContent: true`.
|
||||
- direct tiny requests succeed, but OpenClaw agent runs fail with backend/model
|
||||
crashes (for example Gemma on some `inferrs` builds) → OpenClaw transport is
|
||||
likely already correct; the backend is failing on the larger agent-runtime
|
||||
prompt shape.
|
||||
- failures shrink after disabling tools but do not disappear → tool schemas were
|
||||
part of the pressure, but the remaining issue is still upstream model/server
|
||||
capacity or a backend bug.
|
||||
|
||||
Fix options:
|
||||
|
||||
1. Set `compat.requiresStringContent: true` for string-only Chat Completions backends.
|
||||
2. Set `compat.supportsTools: false` for models/backends that cannot handle
|
||||
OpenClaw's tool schema surface reliably.
|
||||
3. Lower prompt pressure where possible: smaller workspace bootstrap, shorter
|
||||
session history, lighter local model, or a backend with stronger long-context
|
||||
support.
|
||||
4. If tiny direct requests keep passing while OpenClaw agent turns still crash
|
||||
inside the backend, treat it as an upstream server/model limitation and file
|
||||
a repro there with the accepted payload shape.
|
||||
|
||||
Related:
|
||||
|
||||
- [/gateway/local-models](/gateway/local-models)
|
||||
- [/gateway/configuration#models](/gateway/configuration#models)
|
||||
- [/gateway/configuration-reference#openai-compatible-endpoints](/gateway/configuration-reference#openai-compatible-endpoints)
|
||||
|
||||
## No replies
|
||||
|
||||
If channels are up but nothing answers, check routing and policy before reconnecting anything.
|
||||
|
||||
@@ -42,6 +42,21 @@ If you see:
|
||||
`HTTP 429: rate_limit_error: Extra usage is required for long context requests`,
|
||||
go to [/gateway/troubleshooting#anthropic-429-extra-usage-required-for-long-context](/gateway/troubleshooting#anthropic-429-extra-usage-required-for-long-context).
|
||||
|
||||
## Local OpenAI-compatible backend works directly but fails in OpenClaw
|
||||
|
||||
If your local or self-hosted `/v1` backend answers small direct
|
||||
`/v1/chat/completions` probes but fails on `openclaw infer model run` or normal
|
||||
agent turns:
|
||||
|
||||
1. If the error mentions `messages[].content` expecting a string, set
|
||||
`models.providers.<provider>.models[].compat.requiresStringContent: true`.
|
||||
2. If the backend still fails only on OpenClaw agent turns, set
|
||||
`models.providers.<provider>.models[].compat.supportsTools: false` and retry.
|
||||
3. If tiny direct calls still work but larger OpenClaw prompts crash the
|
||||
backend, treat the remaining issue as an upstream model/server limitation and
|
||||
continue in the deep runbook:
|
||||
[/gateway/troubleshooting#local-openai-compatible-backend-passes-direct-probes-but-agent-runs-fail](/gateway/troubleshooting#local-openai-compatible-backend-passes-direct-probes-but-agent-runs-fail)
|
||||
|
||||
## Plugin install fails with missing openclaw extensions
|
||||
|
||||
If install fails with `package.json missing openclaw.extensions`, the plugin package
|
||||
|
||||
@@ -42,6 +42,7 @@ Looking for chat channel docs (WhatsApp/Telegram/Discord/Slack/Mattermost (plugi
|
||||
- [Google (Gemini)](/providers/google)
|
||||
- [Groq (LPU inference)](/providers/groq)
|
||||
- [Hugging Face (Inference)](/providers/huggingface)
|
||||
- [inferrs (local models)](/providers/inferrs)
|
||||
- [Kilocode](/providers/kilocode)
|
||||
- [LiteLLM (unified gateway)](/providers/litellm)
|
||||
- [MiniMax](/providers/minimax)
|
||||
|
||||
173
docs/providers/inferrs.md
Normal file
173
docs/providers/inferrs.md
Normal file
@@ -0,0 +1,173 @@
|
||||
---
|
||||
summary: "Run OpenClaw through inferrs (OpenAI-compatible local server)"
|
||||
read_when:
|
||||
- You want to run OpenClaw against a local inferrs server
|
||||
- You are serving Gemma or another model through inferrs
|
||||
- You need the exact OpenClaw compat flags for inferrs
|
||||
title: "inferrs"
|
||||
---
|
||||
|
||||
# inferrs
|
||||
|
||||
[inferrs](https://github.com/ericcurtin/inferrs) can serve local models behind an
|
||||
OpenAI-compatible `/v1` API. OpenClaw works with `inferrs` through the generic
|
||||
`openai-completions` path.
|
||||
|
||||
`inferrs` is currently best treated as a custom self-hosted OpenAI-compatible
|
||||
backend, not a dedicated OpenClaw provider plugin.
|
||||
|
||||
## Quick start
|
||||
|
||||
1. Start `inferrs` with a model.
|
||||
|
||||
Example:
|
||||
|
||||
```bash
|
||||
inferrs serve gg-hf-gg/gemma-4-E2B-it \
|
||||
--host 127.0.0.1 \
|
||||
--port 8080 \
|
||||
--device metal
|
||||
```
|
||||
|
||||
2. Verify the server is reachable.
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8080/health
|
||||
curl http://127.0.0.1:8080/v1/models
|
||||
```
|
||||
|
||||
3. Add an explicit OpenClaw provider entry and point your default model at it.
|
||||
|
||||
## Full config example
|
||||
|
||||
This example uses Gemma 4 on a local `inferrs` server.
|
||||
|
||||
```json5
|
||||
{
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "inferrs/gg-hf-gg/gemma-4-E2B-it" },
|
||||
models: {
|
||||
"inferrs/gg-hf-gg/gemma-4-E2B-it": {
|
||||
alias: "Gemma 4 (inferrs)",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
models: {
|
||||
mode: "merge",
|
||||
providers: {
|
||||
inferrs: {
|
||||
baseUrl: "http://127.0.0.1:8080/v1",
|
||||
apiKey: "inferrs-local",
|
||||
api: "openai-completions",
|
||||
models: [
|
||||
{
|
||||
id: "gg-hf-gg/gemma-4-E2B-it",
|
||||
name: "Gemma 4 E2B (inferrs)",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 131072,
|
||||
maxTokens: 4096,
|
||||
compat: {
|
||||
requiresStringContent: true,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
## Why `requiresStringContent` matters
|
||||
|
||||
Some `inferrs` Chat Completions routes accept only string
|
||||
`messages[].content`, not structured content-part arrays.
|
||||
|
||||
If OpenClaw runs fail with an error like:
|
||||
|
||||
```text
|
||||
messages[1].content: invalid type: sequence, expected a string
|
||||
```
|
||||
|
||||
set:
|
||||
|
||||
```json5
|
||||
compat: {
|
||||
requiresStringContent: true
|
||||
}
|
||||
```
|
||||
|
||||
OpenClaw will flatten pure text content parts into plain strings before sending
|
||||
the request.
|
||||
|
||||
## Gemma and tool-schema caveat
|
||||
|
||||
Some current `inferrs` + Gemma combinations accept small direct
|
||||
`/v1/chat/completions` requests but still fail on full OpenClaw agent-runtime
|
||||
turns.
|
||||
|
||||
If that happens, try this first:
|
||||
|
||||
```json5
|
||||
compat: {
|
||||
requiresStringContent: true,
|
||||
supportsTools: false
|
||||
}
|
||||
```
|
||||
|
||||
That disables OpenClaw's tool schema surface for the model and can reduce prompt
|
||||
pressure on stricter local backends.
|
||||
|
||||
If tiny direct requests still work but normal OpenClaw agent turns continue to
|
||||
crash inside `inferrs`, the remaining issue is usually upstream model/server
|
||||
behavior rather than OpenClaw's transport layer.
|
||||
|
||||
## Manual smoke test
|
||||
|
||||
Once configured, test both layers:
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8080/v1/chat/completions \
|
||||
-H 'content-type: application/json' \
|
||||
-d '{"model":"gg-hf-gg/gemma-4-E2B-it","messages":[{"role":"user","content":"What is 2 + 2?"}],"stream":false}'
|
||||
|
||||
openclaw infer model run \
|
||||
--model inferrs/gg-hf-gg/gemma-4-E2B-it \
|
||||
--prompt "What is 2 + 2? Reply with one short sentence." \
|
||||
--json
|
||||
```
|
||||
|
||||
If the first command works but the second fails, use the troubleshooting notes
|
||||
below.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- `curl /v1/models` fails: `inferrs` is not running, not reachable, or not
|
||||
bound to the expected host/port.
|
||||
- `messages[].content ... expected a string`: set
|
||||
`compat.requiresStringContent: true`.
|
||||
- Direct tiny `/v1/chat/completions` calls pass, but `openclaw infer model run`
|
||||
fails: try `compat.supportsTools: false`.
|
||||
- OpenClaw no longer gets schema errors, but `inferrs` still crashes on larger
|
||||
agent turns: treat it as an upstream `inferrs` or model limitation and reduce
|
||||
prompt pressure or switch local backend/model.
|
||||
|
||||
## Proxy-style behavior
|
||||
|
||||
`inferrs` is treated as a proxy-style OpenAI-compatible `/v1` backend, not a
|
||||
native OpenAI endpoint.
|
||||
|
||||
- native OpenAI-only request shaping does not apply here
|
||||
- no `service_tier`, no Responses `store`, no prompt-cache hints, and no
|
||||
OpenAI reasoning-compat payload shaping
|
||||
- hidden OpenClaw attribution headers (`originator`, `version`, `User-Agent`)
|
||||
are not injected on custom `inferrs` base URLs
|
||||
|
||||
## See also
|
||||
|
||||
- [Local models](/gateway/local-models)
|
||||
- [Gateway troubleshooting](/gateway/troubleshooting#local-openai-compatible-backend-passes-direct-probes-but-agent-runs-fail)
|
||||
- [Model providers](/concepts/model-providers)
|
||||
35
src/agents/openai-completions-string-content.ts
Normal file
35
src/agents/openai-completions-string-content.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
export function flattenStringOnlyCompletionContent(content: unknown): unknown {
|
||||
if (!Array.isArray(content)) {
|
||||
return content;
|
||||
}
|
||||
const textParts: string[] = [];
|
||||
for (const item of content) {
|
||||
if (
|
||||
!item ||
|
||||
typeof item !== "object" ||
|
||||
(item as { type?: unknown }).type !== "text" ||
|
||||
typeof (item as { text?: unknown }).text !== "string"
|
||||
) {
|
||||
return content;
|
||||
}
|
||||
textParts.push((item as { text: string }).text);
|
||||
}
|
||||
return textParts.join("\n");
|
||||
}
|
||||
|
||||
export function flattenCompletionMessagesToStringContent(messages: unknown[]): unknown[] {
|
||||
return messages.map((message) => {
|
||||
if (!message || typeof message !== "object") {
|
||||
return message;
|
||||
}
|
||||
const content = (message as { content?: unknown }).content;
|
||||
const flattenedContent = flattenStringOnlyCompletionContent(content);
|
||||
if (flattenedContent === content) {
|
||||
return message;
|
||||
}
|
||||
return {
|
||||
...message,
|
||||
content: flattenedContent,
|
||||
};
|
||||
});
|
||||
}
|
||||
@@ -1079,6 +1079,41 @@ describe("openai transport stream", () => {
|
||||
expect(params.tools?.[0]?.function).not.toHaveProperty("strict");
|
||||
});
|
||||
|
||||
it("flattens pure text content arrays for string-only completions backends when opted in", () => {
|
||||
const params = buildOpenAICompletionsParams(
|
||||
{
|
||||
id: "gg-hf-gg/gemma-4-E2B-it",
|
||||
name: "Gemma 4 E2B",
|
||||
api: "openai-completions",
|
||||
provider: "inferrs",
|
||||
baseUrl: "http://127.0.0.1:8080/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 131072,
|
||||
maxTokens: 4096,
|
||||
compat: {
|
||||
requiresStringContent: true,
|
||||
} as Record<string, unknown>,
|
||||
} satisfies Model<"openai-completions">,
|
||||
{
|
||||
systemPrompt: "system",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [{ type: "text", text: "What is 2 + 2?" }],
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
tools: [],
|
||||
} as never,
|
||||
undefined,
|
||||
) as { messages?: Array<{ role?: string; content?: unknown }> };
|
||||
|
||||
expect(params.messages?.[0]).toMatchObject({ role: "system", content: "system" });
|
||||
expect(params.messages?.[1]).toMatchObject({ role: "user", content: "What is 2 + 2?" });
|
||||
});
|
||||
|
||||
it("uses max_tokens for Chutes default-route completions providers without relying on baseUrl host sniffing", () => {
|
||||
const params = buildOpenAICompletionsParams(
|
||||
{
|
||||
|
||||
@@ -23,6 +23,7 @@ import { resolveProviderTransportTurnStateWithPlugin } from "../plugins/provider
|
||||
import type { ProviderRuntimeModel } from "../plugins/types.js";
|
||||
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./copilot-dynamic-headers.js";
|
||||
import { detectOpenAICompletionsCompat } from "./openai-completions-compat.js";
|
||||
import { flattenCompletionMessagesToStringContent } from "./openai-completions-string-content.js";
|
||||
import {
|
||||
applyOpenAIResponsesPayloadPolicy,
|
||||
resolveOpenAIResponsesPayloadPolicy,
|
||||
@@ -1164,6 +1165,7 @@ function getCompat(model: OpenAIModeModel): {
|
||||
openRouterRouting: Record<string, unknown>;
|
||||
vercelGatewayRouting: Record<string, unknown>;
|
||||
supportsStrictMode: boolean;
|
||||
requiresStringContent: boolean;
|
||||
} {
|
||||
const detected = detectCompat(model);
|
||||
const compat = model.compat ?? {};
|
||||
@@ -1198,6 +1200,7 @@ function getCompat(model: OpenAIModeModel): {
|
||||
detected.vercelGatewayRouting,
|
||||
supportsStrictMode:
|
||||
(compat.supportsStrictMode as boolean | undefined) ?? detected.supportsStrictMode,
|
||||
requiresStringContent: (compat.requiresStringContent as boolean | undefined) ?? false,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1261,9 +1264,12 @@ export function buildOpenAICompletionsParams(
|
||||
systemPrompt: stripSystemPromptCacheBoundary(context.systemPrompt),
|
||||
}
|
||||
: context;
|
||||
const messages = convertMessages(model as never, completionsContext, compat as never);
|
||||
const params: Record<string, unknown> = {
|
||||
model: model.id,
|
||||
messages: convertMessages(model as never, completionsContext, compat as never),
|
||||
messages: compat.requiresStringContent
|
||||
? flattenCompletionMessagesToStringContent(messages)
|
||||
: messages,
|
||||
stream: true,
|
||||
};
|
||||
if (compat.supportsUsageInStreaming) {
|
||||
|
||||
@@ -132,6 +132,7 @@ import {
|
||||
createOpenAIReasoningCompatibilityWrapper,
|
||||
createOpenAIResponsesContextManagementWrapper,
|
||||
createOpenAIServiceTierWrapper,
|
||||
createOpenAIStringContentWrapper,
|
||||
createOpenAITextVerbosityWrapper,
|
||||
resolveOpenAIFastMode,
|
||||
resolveOpenAIServiceTier,
|
||||
@@ -170,6 +171,7 @@ function createTestOpenAIProviderWrapper(
|
||||
config: params.context.config,
|
||||
agentDir: params.context.agentDir,
|
||||
});
|
||||
streamFn = createOpenAIStringContentWrapper(streamFn);
|
||||
return createOpenAIResponsesContextManagementWrapper(
|
||||
createOpenAIReasoningCompatibilityWrapper(streamFn),
|
||||
params.context.extraParams,
|
||||
@@ -562,6 +564,54 @@ describe("applyExtraParamsToAgent", () => {
|
||||
expect(payload.parallel_tool_calls).toBe(false);
|
||||
});
|
||||
|
||||
it("flattens pure text OpenAI completions message arrays for string-only compat models", () => {
|
||||
const payload = runResponsesPayloadMutationCase({
|
||||
applyProvider: "inferrs",
|
||||
applyModelId: "gg-hf-gg/gemma-4-E2B-it",
|
||||
model: {
|
||||
api: "openai-completions",
|
||||
provider: "inferrs",
|
||||
id: "gg-hf-gg/gemma-4-E2B-it",
|
||||
name: "Gemma 4 E2B (inferrs)",
|
||||
baseUrl: "http://127.0.0.1:8080/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 131072,
|
||||
maxTokens: 4096,
|
||||
compat: {
|
||||
requiresStringContent: true,
|
||||
} as Record<string, unknown>,
|
||||
} as unknown as Model<"openai-completions">,
|
||||
payload: {
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: [{ type: "text", text: "System text" }],
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "Line one" },
|
||||
{ type: "text", text: "Line two" },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(payload.messages).toEqual([
|
||||
{
|
||||
role: "system",
|
||||
content: "System text",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: "Line one\nLine two",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("injects parallel_tool_calls for openai-responses payloads when configured", () => {
|
||||
const payload = runParallelToolCallsPayloadMutationCase({
|
||||
applyProvider: "openai",
|
||||
|
||||
@@ -16,7 +16,10 @@ import {
|
||||
createSiliconFlowThinkingWrapper,
|
||||
shouldApplySiliconFlowThinkingOffCompat,
|
||||
} from "./moonshot-stream-wrappers.js";
|
||||
import { createOpenAIResponsesContextManagementWrapper } from "./openai-stream-wrappers.js";
|
||||
import {
|
||||
createOpenAIResponsesContextManagementWrapper,
|
||||
createOpenAIStringContentWrapper,
|
||||
} from "./openai-stream-wrappers.js";
|
||||
import { resolveCacheRetention } from "./prompt-cache-retention.js";
|
||||
import { createOpenRouterSystemCacheWrapper } from "./proxy-stream-wrappers.js";
|
||||
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
|
||||
@@ -389,6 +392,7 @@ function applyPostPluginStreamWrappers(
|
||||
ctx: ApplyExtraParamsContext & { providerWrapperHandled: boolean },
|
||||
): void {
|
||||
ctx.agent.streamFn = createOpenRouterSystemCacheWrapper(ctx.agent.streamFn);
|
||||
ctx.agent.streamFn = createOpenAIStringContentWrapper(ctx.agent.streamFn);
|
||||
|
||||
if (!ctx.providerWrapperHandled) {
|
||||
// Guard Google-family payloads against invalid negative thinking budgets
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
patchCodexNativeWebSearchPayload,
|
||||
resolveCodexNativeSearchActivation,
|
||||
} from "../codex-native-web-search.js";
|
||||
import { flattenCompletionMessagesToStringContent } from "../openai-completions-string-content.js";
|
||||
import {
|
||||
applyOpenAIResponsesPayloadPolicy,
|
||||
resolveOpenAIResponsesPayloadPolicy,
|
||||
@@ -66,6 +67,17 @@ function shouldApplyOpenAIReasoningCompatibility(model: {
|
||||
return resolveOpenAIRequestCapabilities(model).supportsOpenAIReasoningCompatPayload;
|
||||
}
|
||||
|
||||
function shouldFlattenOpenAICompletionMessages(model: {
|
||||
api?: unknown;
|
||||
compat?: unknown;
|
||||
}): boolean {
|
||||
const compat =
|
||||
model.compat && typeof model.compat === "object"
|
||||
? (model.compat as { requiresStringContent?: unknown })
|
||||
: undefined;
|
||||
return model.api === "openai-completions" && compat?.requiresStringContent === true;
|
||||
}
|
||||
|
||||
function normalizeOpenAIServiceTier(value: unknown): OpenAIServiceTier | undefined {
|
||||
if (typeof value !== "string") {
|
||||
return undefined;
|
||||
@@ -219,6 +231,21 @@ export function createOpenAIReasoningCompatibilityWrapper(
|
||||
};
|
||||
}
|
||||
|
||||
export function createOpenAIStringContentWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
||||
const underlying = baseStreamFn ?? streamSimple;
|
||||
return (model, context, options) => {
|
||||
if (!shouldFlattenOpenAICompletionMessages(model)) {
|
||||
return underlying(model, context, options);
|
||||
}
|
||||
return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => {
|
||||
if (!Array.isArray(payloadObj.messages)) {
|
||||
return;
|
||||
}
|
||||
payloadObj.messages = flattenCompletionMessagesToStringContent(payloadObj.messages);
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
export function createOpenAIFastModeWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
||||
const underlying = baseStreamFn ?? streamSimple;
|
||||
return (model, context, options) => {
|
||||
|
||||
@@ -391,6 +391,7 @@ describe("model compat config schema", () => {
|
||||
compat: {
|
||||
supportsUsageInStreaming: true,
|
||||
supportsStrictMode: false,
|
||||
requiresStringContent: true,
|
||||
thinkingFormat: "qwen",
|
||||
requiresToolResultName: true,
|
||||
requiresAssistantAfterToolResult: false,
|
||||
|
||||
@@ -2807,6 +2807,9 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
supportsStrictMode: {
|
||||
type: "boolean",
|
||||
},
|
||||
requiresStringContent: {
|
||||
type: "boolean",
|
||||
},
|
||||
maxTokensField: {
|
||||
anyOf: [
|
||||
{
|
||||
|
||||
@@ -37,6 +37,7 @@ type SupportedThinkingFormat =
|
||||
export type ModelCompatConfig = SupportedOpenAICompatFields & {
|
||||
thinkingFormat?: SupportedThinkingFormat;
|
||||
supportsTools?: boolean;
|
||||
requiresStringContent?: boolean;
|
||||
toolSchemaProfile?: string;
|
||||
unsupportedToolSchemaKeywords?: string[];
|
||||
nativeWebSearchTool?: boolean;
|
||||
|
||||
@@ -189,6 +189,7 @@ export const ModelCompatSchema = z
|
||||
supportsUsageInStreaming: z.boolean().optional(),
|
||||
supportsTools: z.boolean().optional(),
|
||||
supportsStrictMode: z.boolean().optional(),
|
||||
requiresStringContent: z.boolean().optional(),
|
||||
maxTokensField: z
|
||||
.union([z.literal("max_completion_tokens"), z.literal("max_tokens")])
|
||||
.optional(),
|
||||
|
||||
Reference in New Issue
Block a user