diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md index f4b83ac1f46..7b777258bf6 100644 --- a/docs/reference/transcript-hygiene.md +++ b/docs/reference/transcript-hygiene.md @@ -131,6 +131,8 @@ inter-session user turns that only have provenance metadata. reasoning fields such as `reasoning` or `reasoning_content`. - Current same-turn tool-call continuations keep the assistant reasoning block attached to the tool call until the tool result has been replayed. +- Custom/self-hosted model entries with `reasoning: true` preserve replayed + reasoning metadata. - Provider-owned exceptions can opt out when their wire protocol requires replayed reasoning metadata. diff --git a/src/agents/embedded-agent-runner.sanitize-session-history.test.ts b/src/agents/embedded-agent-runner.sanitize-session-history.test.ts index 959d967d38a..472e5f7f038 100644 --- a/src/agents/embedded-agent-runner.sanitize-session-history.test.ts +++ b/src/agents/embedded-agent-runner.sanitize-session-history.test.ts @@ -1351,6 +1351,53 @@ describe("sanitizeSessionHistory", () => { ]); }); + it("preserves prior assistant reasoning for OpenAI-compatible replay with reasoning model metadata", async () => { + setNonGoogleModelApi(); + + const messages = castAgentMessages([ + makeUserMessage("first"), + makeAssistantMessage([ + { + type: "thinking", + thinking: "private reasoning", + thinkingSignature: "reasoning_content", + }, + { type: "text", text: "visible answer" }, + ]), + makeUserMessage("second"), + ]); + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-completions", + provider: "vllm", + modelId: "Qwen3.6-27B", + model: { + id: "Qwen3.6-27B", + name: "Qwen3.6 27B", + provider: "vllm", + api: "openai-completions", + baseUrl: "https://example.invalid", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128_000, + maxTokens: 16_384, + }, + sessionManager: makeMockSessionManager(), + sessionId: TEST_SESSION_ID, + }); + + expect((result[1] as Extract).content).toEqual([ + { + type: "thinking", + thinking: "private reasoning", + thinkingSignature: "reasoning_content", + }, + { type: "text", text: "visible answer" }, + ]); + }); + it.each([ ["Kimi K2.6", "custom-openai-proxy", "moonshotai/kimi-k2.6"], ["MiMo V2.6 Pro", "custom-openai-proxy", "xiaomi/mimo-v2.6-pro"], diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index 951d7f86dcc..f4c21106b39 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -6696,7 +6696,6 @@ describe("openai transport stream", () => { }); }); - it("strips tool call blocks when provider signals finish_reason stop", async () => { const model = { id: "llama-3.3-70b", @@ -6914,8 +6913,6 @@ describe("openai transport stream", () => { expect((output.content[0] as { type?: string }).type).toBe("text"); }); - - it("handles reasoning_details from OpenRouter/Qwen3 in completions stream", async () => { const model = { id: "openrouter/qwen/qwen3-235b-a22b", @@ -8259,6 +8256,19 @@ describe("buildOpenAICompletionsParams sanitizes reasoning replay fields", () => maxTokens: 32_000, } satisfies Model<"openai-completions">; + const customQwenReasoningModel = { + id: "Qwen3.6-35B-A3B", + name: "Qwen3.6 35B", + api: "openai-completions", + provider: "custom-openai-proxy", + baseUrl: "https://proxy.example.com/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 262_144, + maxTokens: 32_000, + } satisfies Model<"openai-completions">; + const kimiCodingProxyModel = { ...customKimiProxyModel, id: "kimi-for-coding", @@ -8429,6 +8439,17 @@ describe("buildOpenAICompletionsParams sanitizes reasoning replay fields", () => expect(assistant.reasoning).toBe("Need to answer politely."); }); + it("preserves reasoning_content replay for custom reasoning model metadata", () => { + const assistant = getAssistantMessage( + buildReplayParams(customQwenReasoningModel, "reasoning_content"), + ); + + expect(assistant.reasoning_content).toBe("Need to answer politely."); + expect(assistant).not.toHaveProperty("reasoning_details"); + expect(assistant).not.toHaveProperty("reasoning"); + expect(assistant).not.toHaveProperty("reasoning_text"); + }); + it("preserves DeepSeek-style reasoning_content replay for Xiaomi MiMo", () => { const assistant = getAssistantMessage(buildReplayParams(xiaomiModel, "reasoning_content")); diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index b32a2db3624..ec456f5ce2d 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -28,6 +28,7 @@ import { redactSensitiveText } from "../logging/redact.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.types.js"; import { resolveProviderTransportTurnStateWithPlugin } from "../plugins/provider-runtime.js"; +import { isGemma4ModelId } from "../shared/google-models.js"; import { CHARS_PER_TOKEN_ESTIMATE, estimateStringChars } from "../utils/cjk-chars.js"; import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./copilot-dynamic-headers.js"; import { createDeepSeekTextFilter } from "./deepseek-text-filter.js"; @@ -3504,7 +3505,8 @@ function shouldPreserveReasoningContentReplay( if ( compat.requiresReasoningContentOnAssistantMessages || compat.thinkingFormat === "deepseek" || - compat.thinkingFormat === "zai" + compat.thinkingFormat === "zai" || + shouldTrustReasoningContentReplayMetadata(model) ) { return true; } @@ -3521,6 +3523,17 @@ function shouldPreserveOpenRouterReasoningReplay(model: OpenAIModeModel): boolea return !(normalizedModelId.startsWith("anthropic/") || normalizedModelId.startsWith("x-ai/")); } +function shouldTrustReasoningContentReplayMetadata(model: OpenAIModeModel): boolean { + if (model.reasoning !== true || isGemma4ModelId(model.id)) { + return false; + } + const provider = model.provider.trim().toLowerCase(); + if (provider === "openai") { + return false; + } + return shouldPreserveOpenRouterReasoningReplay(model); +} + // OpenAI Chat Completions assistant-message input does not define reasoning // replay fields, while OpenRouter and DeepSeek-style providers document // compatible pass-back contracts. Keep valid provider-owned replay fields, but diff --git a/src/agents/transcript-policy.test.ts b/src/agents/transcript-policy.test.ts index 2727dd0779e..2bb5972bdd9 100644 --- a/src/agents/transcript-policy.test.ts +++ b/src/agents/transcript-policy.test.ts @@ -1,6 +1,7 @@ import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../config/types.openclaw.js"; import { resolveProviderRuntimePlugin } from "../plugins/provider-hook-runtime.js"; +import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.types.js"; vi.mock("../plugins/provider-hook-runtime.js", async () => { const replayHelpers = await vi.importActual< @@ -229,6 +230,24 @@ describe("resolveTranscriptPolicy", () => { expect(policy.validateAnthropicTurns).toBe(true); } + function makeOpenAiCompatibleReasoningModel( + overrides: Partial = {}, + ): ProviderRuntimeModel { + return { + id: "qwen3.6-27b", + name: "Qwen3.6 27B", + provider: "custom-openai-proxy", + api: "openai-completions", + baseUrl: "https://example.invalid", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128_000, + maxTokens: 16_384, + ...overrides, + }; + } + it("enables sanitizeToolCallIds for Anthropic provider", () => { const policy = resolveTranscriptPolicy({ provider: "anthropic", @@ -345,7 +364,7 @@ describe("resolveTranscriptPolicy", () => { expect(policy.validateAnthropicTurns).toBe(true); }); - it("strips historical reasoning for strict OpenAI-compatible providers", () => { + it("strips historical reasoning for strict OpenAI-compatible providers by default", () => { const policy = resolveTranscriptPolicy({ provider: "custom-openai-proxy", modelId: "qwen3.6-27b", @@ -361,6 +380,17 @@ describe("resolveTranscriptPolicy", () => { expect(responsesPolicy.dropReasoningFromHistory).toBe(false); }); + it("preserves historical reasoning for strict OpenAI-compatible models with reasoning metadata", () => { + const policy = resolveTranscriptPolicy({ + provider: "custom-openai-proxy", + modelId: "qwen3.6-27b", + modelApi: "openai-completions", + model: makeOpenAiCompatibleReasoningModel({ reasoning: true }), + }); + + expect(policy.dropReasoningFromHistory).toBe(false); + }); + it.each([ "kimi-for-coding", "moonshotai/kimi-k2.6", @@ -488,6 +518,28 @@ describe("resolveTranscriptPolicy", () => { expect(noReasoningPolicy.dropThinkingBlocks).toBe(true); }); + it("does not reuse cached OpenAI-compatible policies across reasoning metadata changes", () => { + const config = {} as OpenClawConfig; + + const defaultPolicy = resolveTranscriptPolicy({ + config, + provider: "custom-openai-proxy", + modelId: "qwen3.6-27b", + modelApi: "openai-completions", + model: makeOpenAiCompatibleReasoningModel(), + }); + const reasoningPolicy = resolveTranscriptPolicy({ + config, + provider: "custom-openai-proxy", + modelId: "qwen3.6-27b", + modelApi: "openai-completions", + model: makeOpenAiCompatibleReasoningModel({ reasoning: true }), + }); + + expect(defaultPolicy.dropReasoningFromHistory).toBe(true); + expect(reasoningPolicy.dropReasoningFromHistory).toBe(false); + }); + it("preserves transport defaults when a runtime plugin has not adopted replay hooks", () => { expectStrictOpenAiCompatibleReplayDefaults("vllm"); }); diff --git a/src/agents/transcript-policy.ts b/src/agents/transcript-policy.ts index 5cabe2bba74..fd39f0b6987 100644 --- a/src/agents/transcript-policy.ts +++ b/src/agents/transcript-policy.ts @@ -95,6 +95,13 @@ function modelDisablesReasoningEffort(model?: ProviderRuntimeModel): boolean { return compat?.supportsReasoningEffort === false; } +function shouldPreserveReasoningContentReplay(params: { + modelId?: string | null; + model?: ProviderRuntimeModel; +}): boolean { + return params.model?.reasoning === true || requiresReasoningContentReplay(params.modelId); +} + /** * Provides a narrow replay-policy fallback for providers that do not have an * owning runtime plugin. @@ -153,7 +160,7 @@ function buildUnownedProviderTransportReplayFallback(params: { ? { dropThinkingBlocks: true } : {}), ...(isStrictOpenAiCompatible - ? { dropReasoningFromHistory: !requiresReasoningContentReplay(params.modelId) } + ? { dropReasoningFromHistory: !shouldPreserveReasoningContentReplay(params) } : {}), ...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}), ...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}), @@ -268,6 +275,7 @@ function resolveTranscriptPolicyCacheKey(params: { modelApi: params.modelApi ?? "", modelId: params.modelId ?? "", dropsThinkingForReasoningCompat: modelDisablesReasoningEffort(params.model), + preservesReasoningContentReplay: params.model?.reasoning === true, workspaceDir: params.workspaceDir ?? "", pluginControlPlane: resolvePluginControlPlaneFingerprint({ config: params.config,