fix(agents): preserve reasoning replay from model metadata

Preserve OpenAI-compatible replay reasoning when the selected custom or self-hosted model already has reasoning metadata enabled.

The transcript policy now treats existing model metadata as the replay contract instead of requiring a new provider config knob, and the OpenAI-compatible serializer preserves reasoning_content for those routes while keeping stock OpenAI, Gemma 4, and known non-replayable OpenRouter safeguards.

Fixes #88068.
Replaces #88071.
This commit is contained in:
Peter Steinberger
2026-05-31 13:41:44 +01:00
committed by GitHub
parent 7a22515972
commit cf315ddef6
6 changed files with 149 additions and 6 deletions

View File

@@ -131,6 +131,8 @@ inter-session user turns that only have provenance metadata.
reasoning fields such as `reasoning` or `reasoning_content`.
- Current same-turn tool-call continuations keep the assistant reasoning block
attached to the tool call until the tool result has been replayed.
- Custom/self-hosted model entries with `reasoning: true` preserve replayed
reasoning metadata.
- Provider-owned exceptions can opt out when their wire protocol requires
replayed reasoning metadata.

View File

@@ -1351,6 +1351,53 @@ describe("sanitizeSessionHistory", () => {
]);
});
it("preserves prior assistant reasoning for OpenAI-compatible replay with reasoning model metadata", async () => {
setNonGoogleModelApi();
const messages = castAgentMessages([
makeUserMessage("first"),
makeAssistantMessage([
{
type: "thinking",
thinking: "private reasoning",
thinkingSignature: "reasoning_content",
},
{ type: "text", text: "visible answer" },
]),
makeUserMessage("second"),
]);
const result = await sanitizeSessionHistory({
messages,
modelApi: "openai-completions",
provider: "vllm",
modelId: "Qwen3.6-27B",
model: {
id: "Qwen3.6-27B",
name: "Qwen3.6 27B",
provider: "vllm",
api: "openai-completions",
baseUrl: "https://example.invalid",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128_000,
maxTokens: 16_384,
},
sessionManager: makeMockSessionManager(),
sessionId: TEST_SESSION_ID,
});
expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
{
type: "thinking",
thinking: "private reasoning",
thinkingSignature: "reasoning_content",
},
{ type: "text", text: "visible answer" },
]);
});
it.each([
["Kimi K2.6", "custom-openai-proxy", "moonshotai/kimi-k2.6"],
["MiMo V2.6 Pro", "custom-openai-proxy", "xiaomi/mimo-v2.6-pro"],

View File

@@ -6696,7 +6696,6 @@ describe("openai transport stream", () => {
});
});
it("strips tool call blocks when provider signals finish_reason stop", async () => {
const model = {
id: "llama-3.3-70b",
@@ -6914,8 +6913,6 @@ describe("openai transport stream", () => {
expect((output.content[0] as { type?: string }).type).toBe("text");
});
it("handles reasoning_details from OpenRouter/Qwen3 in completions stream", async () => {
const model = {
id: "openrouter/qwen/qwen3-235b-a22b",
@@ -8259,6 +8256,19 @@ describe("buildOpenAICompletionsParams sanitizes reasoning replay fields", () =>
maxTokens: 32_000,
} satisfies Model<"openai-completions">;
const customQwenReasoningModel = {
id: "Qwen3.6-35B-A3B",
name: "Qwen3.6 35B",
api: "openai-completions",
provider: "custom-openai-proxy",
baseUrl: "https://proxy.example.com/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 262_144,
maxTokens: 32_000,
} satisfies Model<"openai-completions">;
const kimiCodingProxyModel = {
...customKimiProxyModel,
id: "kimi-for-coding",
@@ -8429,6 +8439,17 @@ describe("buildOpenAICompletionsParams sanitizes reasoning replay fields", () =>
expect(assistant.reasoning).toBe("Need to answer politely.");
});
it("preserves reasoning_content replay for custom reasoning model metadata", () => {
const assistant = getAssistantMessage(
buildReplayParams(customQwenReasoningModel, "reasoning_content"),
);
expect(assistant.reasoning_content).toBe("Need to answer politely.");
expect(assistant).not.toHaveProperty("reasoning_details");
expect(assistant).not.toHaveProperty("reasoning");
expect(assistant).not.toHaveProperty("reasoning_text");
});
it("preserves DeepSeek-style reasoning_content replay for Xiaomi MiMo", () => {
const assistant = getAssistantMessage(buildReplayParams(xiaomiModel, "reasoning_content"));

View File

@@ -28,6 +28,7 @@ import { redactSensitiveText } from "../logging/redact.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.types.js";
import { resolveProviderTransportTurnStateWithPlugin } from "../plugins/provider-runtime.js";
import { isGemma4ModelId } from "../shared/google-models.js";
import { CHARS_PER_TOKEN_ESTIMATE, estimateStringChars } from "../utils/cjk-chars.js";
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./copilot-dynamic-headers.js";
import { createDeepSeekTextFilter } from "./deepseek-text-filter.js";
@@ -3504,7 +3505,8 @@ function shouldPreserveReasoningContentReplay(
if (
compat.requiresReasoningContentOnAssistantMessages ||
compat.thinkingFormat === "deepseek" ||
compat.thinkingFormat === "zai"
compat.thinkingFormat === "zai" ||
shouldTrustReasoningContentReplayMetadata(model)
) {
return true;
}
@@ -3521,6 +3523,17 @@ function shouldPreserveOpenRouterReasoningReplay(model: OpenAIModeModel): boolea
return !(normalizedModelId.startsWith("anthropic/") || normalizedModelId.startsWith("x-ai/"));
}
function shouldTrustReasoningContentReplayMetadata(model: OpenAIModeModel): boolean {
if (model.reasoning !== true || isGemma4ModelId(model.id)) {
return false;
}
const provider = model.provider.trim().toLowerCase();
if (provider === "openai") {
return false;
}
return shouldPreserveOpenRouterReasoningReplay(model);
}
// OpenAI Chat Completions assistant-message input does not define reasoning
// replay fields, while OpenRouter and DeepSeek-style providers document
// compatible pass-back contracts. Keep valid provider-owned replay fields, but

View File

@@ -1,6 +1,7 @@
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { resolveProviderRuntimePlugin } from "../plugins/provider-hook-runtime.js";
import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.types.js";
vi.mock("../plugins/provider-hook-runtime.js", async () => {
const replayHelpers = await vi.importActual<
@@ -229,6 +230,24 @@ describe("resolveTranscriptPolicy", () => {
expect(policy.validateAnthropicTurns).toBe(true);
}
function makeOpenAiCompatibleReasoningModel(
overrides: Partial<ProviderRuntimeModel> = {},
): ProviderRuntimeModel {
return {
id: "qwen3.6-27b",
name: "Qwen3.6 27B",
provider: "custom-openai-proxy",
api: "openai-completions",
baseUrl: "https://example.invalid",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128_000,
maxTokens: 16_384,
...overrides,
};
}
it("enables sanitizeToolCallIds for Anthropic provider", () => {
const policy = resolveTranscriptPolicy({
provider: "anthropic",
@@ -345,7 +364,7 @@ describe("resolveTranscriptPolicy", () => {
expect(policy.validateAnthropicTurns).toBe(true);
});
it("strips historical reasoning for strict OpenAI-compatible providers", () => {
it("strips historical reasoning for strict OpenAI-compatible providers by default", () => {
const policy = resolveTranscriptPolicy({
provider: "custom-openai-proxy",
modelId: "qwen3.6-27b",
@@ -361,6 +380,17 @@ describe("resolveTranscriptPolicy", () => {
expect(responsesPolicy.dropReasoningFromHistory).toBe(false);
});
it("preserves historical reasoning for strict OpenAI-compatible models with reasoning metadata", () => {
const policy = resolveTranscriptPolicy({
provider: "custom-openai-proxy",
modelId: "qwen3.6-27b",
modelApi: "openai-completions",
model: makeOpenAiCompatibleReasoningModel({ reasoning: true }),
});
expect(policy.dropReasoningFromHistory).toBe(false);
});
it.each([
"kimi-for-coding",
"moonshotai/kimi-k2.6",
@@ -488,6 +518,28 @@ describe("resolveTranscriptPolicy", () => {
expect(noReasoningPolicy.dropThinkingBlocks).toBe(true);
});
it("does not reuse cached OpenAI-compatible policies across reasoning metadata changes", () => {
const config = {} as OpenClawConfig;
const defaultPolicy = resolveTranscriptPolicy({
config,
provider: "custom-openai-proxy",
modelId: "qwen3.6-27b",
modelApi: "openai-completions",
model: makeOpenAiCompatibleReasoningModel(),
});
const reasoningPolicy = resolveTranscriptPolicy({
config,
provider: "custom-openai-proxy",
modelId: "qwen3.6-27b",
modelApi: "openai-completions",
model: makeOpenAiCompatibleReasoningModel({ reasoning: true }),
});
expect(defaultPolicy.dropReasoningFromHistory).toBe(true);
expect(reasoningPolicy.dropReasoningFromHistory).toBe(false);
});
it("preserves transport defaults when a runtime plugin has not adopted replay hooks", () => {
expectStrictOpenAiCompatibleReplayDefaults("vllm");
});

View File

@@ -95,6 +95,13 @@ function modelDisablesReasoningEffort(model?: ProviderRuntimeModel): boolean {
return compat?.supportsReasoningEffort === false;
}
function shouldPreserveReasoningContentReplay(params: {
modelId?: string | null;
model?: ProviderRuntimeModel;
}): boolean {
return params.model?.reasoning === true || requiresReasoningContentReplay(params.modelId);
}
/**
* Provides a narrow replay-policy fallback for providers that do not have an
* owning runtime plugin.
@@ -153,7 +160,7 @@ function buildUnownedProviderTransportReplayFallback(params: {
? { dropThinkingBlocks: true }
: {}),
...(isStrictOpenAiCompatible
? { dropReasoningFromHistory: !requiresReasoningContentReplay(params.modelId) }
? { dropReasoningFromHistory: !shouldPreserveReasoningContentReplay(params) }
: {}),
...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}),
...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}),
@@ -268,6 +275,7 @@ function resolveTranscriptPolicyCacheKey(params: {
modelApi: params.modelApi ?? "",
modelId: params.modelId ?? "",
dropsThinkingForReasoningCompat: modelDisablesReasoningEffort(params.model),
preservesReasoningContentReplay: params.model?.reasoning === true,
workspaceDir: params.workspaceDir ?? "",
pluginControlPlane: resolvePluginControlPlaneFingerprint({
config: params.config,