fix: support inferrs string-only completions

This commit is contained in:
Peter Steinberger
2026-04-07 15:52:41 +01:00
parent ea9efc0e81
commit 9d4b0d551d
18 changed files with 435 additions and 5 deletions

View File

@@ -0,0 +1,35 @@
export function flattenStringOnlyCompletionContent(content: unknown): unknown {
if (!Array.isArray(content)) {
return content;
}
const textParts: string[] = [];
for (const item of content) {
if (
!item ||
typeof item !== "object" ||
(item as { type?: unknown }).type !== "text" ||
typeof (item as { text?: unknown }).text !== "string"
) {
return content;
}
textParts.push((item as { text: string }).text);
}
return textParts.join("\n");
}
export function flattenCompletionMessagesToStringContent(messages: unknown[]): unknown[] {
return messages.map((message) => {
if (!message || typeof message !== "object") {
return message;
}
const content = (message as { content?: unknown }).content;
const flattenedContent = flattenStringOnlyCompletionContent(content);
if (flattenedContent === content) {
return message;
}
return {
...message,
content: flattenedContent,
};
});
}

View File

@@ -1079,6 +1079,41 @@ describe("openai transport stream", () => {
expect(params.tools?.[0]?.function).not.toHaveProperty("strict");
});
it("flattens pure text content arrays for string-only completions backends when opted in", () => {
const params = buildOpenAICompletionsParams(
{
id: "gg-hf-gg/gemma-4-E2B-it",
name: "Gemma 4 E2B",
api: "openai-completions",
provider: "inferrs",
baseUrl: "http://127.0.0.1:8080/v1",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 131072,
maxTokens: 4096,
compat: {
requiresStringContent: true,
} as Record<string, unknown>,
} satisfies Model<"openai-completions">,
{
systemPrompt: "system",
messages: [
{
role: "user",
content: [{ type: "text", text: "What is 2 + 2?" }],
timestamp: Date.now(),
},
],
tools: [],
} as never,
undefined,
) as { messages?: Array<{ role?: string; content?: unknown }> };
expect(params.messages?.[0]).toMatchObject({ role: "system", content: "system" });
expect(params.messages?.[1]).toMatchObject({ role: "user", content: "What is 2 + 2?" });
});
it("uses max_tokens for Chutes default-route completions providers without relying on baseUrl host sniffing", () => {
const params = buildOpenAICompletionsParams(
{

View File

@@ -23,6 +23,7 @@ import { resolveProviderTransportTurnStateWithPlugin } from "../plugins/provider
import type { ProviderRuntimeModel } from "../plugins/types.js";
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./copilot-dynamic-headers.js";
import { detectOpenAICompletionsCompat } from "./openai-completions-compat.js";
import { flattenCompletionMessagesToStringContent } from "./openai-completions-string-content.js";
import {
applyOpenAIResponsesPayloadPolicy,
resolveOpenAIResponsesPayloadPolicy,
@@ -1164,6 +1165,7 @@ function getCompat(model: OpenAIModeModel): {
openRouterRouting: Record<string, unknown>;
vercelGatewayRouting: Record<string, unknown>;
supportsStrictMode: boolean;
requiresStringContent: boolean;
} {
const detected = detectCompat(model);
const compat = model.compat ?? {};
@@ -1198,6 +1200,7 @@ function getCompat(model: OpenAIModeModel): {
detected.vercelGatewayRouting,
supportsStrictMode:
(compat.supportsStrictMode as boolean | undefined) ?? detected.supportsStrictMode,
requiresStringContent: (compat.requiresStringContent as boolean | undefined) ?? false,
};
}
@@ -1261,9 +1264,12 @@ export function buildOpenAICompletionsParams(
systemPrompt: stripSystemPromptCacheBoundary(context.systemPrompt),
}
: context;
const messages = convertMessages(model as never, completionsContext, compat as never);
const params: Record<string, unknown> = {
model: model.id,
messages: convertMessages(model as never, completionsContext, compat as never),
messages: compat.requiresStringContent
? flattenCompletionMessagesToStringContent(messages)
: messages,
stream: true,
};
if (compat.supportsUsageInStreaming) {

View File

@@ -132,6 +132,7 @@ import {
createOpenAIReasoningCompatibilityWrapper,
createOpenAIResponsesContextManagementWrapper,
createOpenAIServiceTierWrapper,
createOpenAIStringContentWrapper,
createOpenAITextVerbosityWrapper,
resolveOpenAIFastMode,
resolveOpenAIServiceTier,
@@ -170,6 +171,7 @@ function createTestOpenAIProviderWrapper(
config: params.context.config,
agentDir: params.context.agentDir,
});
streamFn = createOpenAIStringContentWrapper(streamFn);
return createOpenAIResponsesContextManagementWrapper(
createOpenAIReasoningCompatibilityWrapper(streamFn),
params.context.extraParams,
@@ -562,6 +564,54 @@ describe("applyExtraParamsToAgent", () => {
expect(payload.parallel_tool_calls).toBe(false);
});
it("flattens pure text OpenAI completions message arrays for string-only compat models", () => {
const payload = runResponsesPayloadMutationCase({
applyProvider: "inferrs",
applyModelId: "gg-hf-gg/gemma-4-E2B-it",
model: {
api: "openai-completions",
provider: "inferrs",
id: "gg-hf-gg/gemma-4-E2B-it",
name: "Gemma 4 E2B (inferrs)",
baseUrl: "http://127.0.0.1:8080/v1",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 131072,
maxTokens: 4096,
compat: {
requiresStringContent: true,
} as Record<string, unknown>,
} as unknown as Model<"openai-completions">,
payload: {
messages: [
{
role: "system",
content: [{ type: "text", text: "System text" }],
},
{
role: "user",
content: [
{ type: "text", text: "Line one" },
{ type: "text", text: "Line two" },
],
},
],
},
});
expect(payload.messages).toEqual([
{
role: "system",
content: "System text",
},
{
role: "user",
content: "Line one\nLine two",
},
]);
});
it("injects parallel_tool_calls for openai-responses payloads when configured", () => {
const payload = runParallelToolCallsPayloadMutationCase({
applyProvider: "openai",

View File

@@ -16,7 +16,10 @@ import {
createSiliconFlowThinkingWrapper,
shouldApplySiliconFlowThinkingOffCompat,
} from "./moonshot-stream-wrappers.js";
import { createOpenAIResponsesContextManagementWrapper } from "./openai-stream-wrappers.js";
import {
createOpenAIResponsesContextManagementWrapper,
createOpenAIStringContentWrapper,
} from "./openai-stream-wrappers.js";
import { resolveCacheRetention } from "./prompt-cache-retention.js";
import { createOpenRouterSystemCacheWrapper } from "./proxy-stream-wrappers.js";
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
@@ -389,6 +392,7 @@ function applyPostPluginStreamWrappers(
ctx: ApplyExtraParamsContext & { providerWrapperHandled: boolean },
): void {
ctx.agent.streamFn = createOpenRouterSystemCacheWrapper(ctx.agent.streamFn);
ctx.agent.streamFn = createOpenAIStringContentWrapper(ctx.agent.streamFn);
if (!ctx.providerWrapperHandled) {
// Guard Google-family payloads against invalid negative thinking budgets

View File

@@ -7,6 +7,7 @@ import {
patchCodexNativeWebSearchPayload,
resolveCodexNativeSearchActivation,
} from "../codex-native-web-search.js";
import { flattenCompletionMessagesToStringContent } from "../openai-completions-string-content.js";
import {
applyOpenAIResponsesPayloadPolicy,
resolveOpenAIResponsesPayloadPolicy,
@@ -66,6 +67,17 @@ function shouldApplyOpenAIReasoningCompatibility(model: {
return resolveOpenAIRequestCapabilities(model).supportsOpenAIReasoningCompatPayload;
}
function shouldFlattenOpenAICompletionMessages(model: {
api?: unknown;
compat?: unknown;
}): boolean {
const compat =
model.compat && typeof model.compat === "object"
? (model.compat as { requiresStringContent?: unknown })
: undefined;
return model.api === "openai-completions" && compat?.requiresStringContent === true;
}
function normalizeOpenAIServiceTier(value: unknown): OpenAIServiceTier | undefined {
if (typeof value !== "string") {
return undefined;
@@ -219,6 +231,21 @@ export function createOpenAIReasoningCompatibilityWrapper(
};
}
export function createOpenAIStringContentWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
const underlying = baseStreamFn ?? streamSimple;
return (model, context, options) => {
if (!shouldFlattenOpenAICompletionMessages(model)) {
return underlying(model, context, options);
}
return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => {
if (!Array.isArray(payloadObj.messages)) {
return;
}
payloadObj.messages = flattenCompletionMessagesToStringContent(payloadObj.messages);
});
};
}
export function createOpenAIFastModeWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
const underlying = baseStreamFn ?? streamSimple;
return (model, context, options) => {

View File

@@ -391,6 +391,7 @@ describe("model compat config schema", () => {
compat: {
supportsUsageInStreaming: true,
supportsStrictMode: false,
requiresStringContent: true,
thinkingFormat: "qwen",
requiresToolResultName: true,
requiresAssistantAfterToolResult: false,

View File

@@ -2807,6 +2807,9 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
supportsStrictMode: {
type: "boolean",
},
requiresStringContent: {
type: "boolean",
},
maxTokensField: {
anyOf: [
{

View File

@@ -37,6 +37,7 @@ type SupportedThinkingFormat =
export type ModelCompatConfig = SupportedOpenAICompatFields & {
thinkingFormat?: SupportedThinkingFormat;
supportsTools?: boolean;
requiresStringContent?: boolean;
toolSchemaProfile?: string;
unsupportedToolSchemaKeywords?: string[];
nativeWebSearchTool?: boolean;

View File

@@ -189,6 +189,7 @@ export const ModelCompatSchema = z
supportsUsageInStreaming: z.boolean().optional(),
supportsTools: z.boolean().optional(),
supportsStrictMode: z.boolean().optional(),
requiresStringContent: z.boolean().optional(),
maxTokensField: z
.union([z.literal("max_completion_tokens"), z.literal("max_tokens")])
.optional(),