fix(agents): preserve reasoning replay from model metadata

Preserve OpenAI-compatible replay reasoning when the selected custom or self-hosted model already has reasoning metadata enabled. The transcript policy now treats existing model metadata as the replay contract instead of requiring a new provider config knob, and the OpenAI-compatible serializer preserves reasoning_content for those routes while keeping stock OpenAI, Gemma 4, and known non-replayable OpenRouter safeguards. Fixes #88068. Replaces #88071.
2026-07-19 22:11:39 +00:00 · 2026-05-31 13:41:44 +01:00
parent 7a22515972
commit cf315ddef6
6 changed files with 149 additions and 6 deletions
--- a/docs/reference/transcript-hygiene.md
+++ b/docs/reference/transcript-hygiene.md
@@ -131,6 +131,8 @@ inter-session user turns that only have provenance metadata.
  reasoning fields such as `reasoning` or `reasoning_content`.
 - Current same-turn tool-call continuations keep the assistant reasoning block
  attached to the tool call until the tool result has been replayed.
+- Custom/self-hosted model entries with `reasoning: true` preserve replayed
+  reasoning metadata.
 - Provider-owned exceptions can opt out when their wire protocol requires
  replayed reasoning metadata.

--- a/src/agents/embedded-agent-runner.sanitize-session-history.test.ts
+++ b/src/agents/embedded-agent-runner.sanitize-session-history.test.ts
@@ -1351,6 +1351,53 @@ describe("sanitizeSessionHistory", () => {
    ]);
  });

+  it("preserves prior assistant reasoning for OpenAI-compatible replay with reasoning model metadata", async () => {
+    setNonGoogleModelApi();
+
+    const messages = castAgentMessages([
+      makeUserMessage("first"),
+      makeAssistantMessage([
+        {
+          type: "thinking",
+          thinking: "private reasoning",
+          thinkingSignature: "reasoning_content",
+        },
+        { type: "text", text: "visible answer" },
+      ]),
+      makeUserMessage("second"),
+    ]);
+
+    const result = await sanitizeSessionHistory({
+      messages,
+      modelApi: "openai-completions",
+      provider: "vllm",
+      modelId: "Qwen3.6-27B",
+      model: {
+        id: "Qwen3.6-27B",
+        name: "Qwen3.6 27B",
+        provider: "vllm",
+        api: "openai-completions",
+        baseUrl: "https://example.invalid",
+        reasoning: true,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 128_000,
+        maxTokens: 16_384,
+      },
+      sessionManager: makeMockSessionManager(),
+      sessionId: TEST_SESSION_ID,
+    });
+
+    expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+      {
+        type: "thinking",
+        thinking: "private reasoning",
+        thinkingSignature: "reasoning_content",
+      },
+      { type: "text", text: "visible answer" },
+    ]);
+  });
+
  it.each([
    ["Kimi K2.6", "custom-openai-proxy", "moonshotai/kimi-k2.6"],
    ["MiMo V2.6 Pro", "custom-openai-proxy", "xiaomi/mimo-v2.6-pro"],
--- a/src/agents/openai-transport-stream.test.ts
+++ b/src/agents/openai-transport-stream.test.ts
@@ -6696,7 +6696,6 @@ describe("openai transport stream", () => {
    });
  });

-
  it("strips tool call blocks when provider signals finish_reason stop", async () => {
    const model = {
      id: "llama-3.3-70b",
@@ -6914,8 +6913,6 @@ describe("openai transport stream", () => {
    expect((output.content[0] as { type?: string }).type).toBe("text");
  });

-
-
  it("handles reasoning_details from OpenRouter/Qwen3 in completions stream", async () => {
    const model = {
      id: "openrouter/qwen/qwen3-235b-a22b",
@@ -8259,6 +8256,19 @@ describe("buildOpenAICompletionsParams sanitizes reasoning replay fields", () =>
    maxTokens: 32_000,
  } satisfies Model<"openai-completions">;

+  const customQwenReasoningModel = {
+    id: "Qwen3.6-35B-A3B",
+    name: "Qwen3.6 35B",
+    api: "openai-completions",
+    provider: "custom-openai-proxy",
+    baseUrl: "https://proxy.example.com/v1",
+    reasoning: true,
+    input: ["text"],
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+    contextWindow: 262_144,
+    maxTokens: 32_000,
+  } satisfies Model<"openai-completions">;
+
  const kimiCodingProxyModel = {
    ...customKimiProxyModel,
    id: "kimi-for-coding",
@@ -8429,6 +8439,17 @@ describe("buildOpenAICompletionsParams sanitizes reasoning replay fields", () =>
    expect(assistant.reasoning).toBe("Need to answer politely.");
  });

+  it("preserves reasoning_content replay for custom reasoning model metadata", () => {
+    const assistant = getAssistantMessage(
+      buildReplayParams(customQwenReasoningModel, "reasoning_content"),
+    );
+
+    expect(assistant.reasoning_content).toBe("Need to answer politely.");
+    expect(assistant).not.toHaveProperty("reasoning_details");
+    expect(assistant).not.toHaveProperty("reasoning");
+    expect(assistant).not.toHaveProperty("reasoning_text");
+  });
+
  it("preserves DeepSeek-style reasoning_content replay for Xiaomi MiMo", () => {
    const assistant = getAssistantMessage(buildReplayParams(xiaomiModel, "reasoning_content"));

--- a/src/agents/openai-transport-stream.ts
+++ b/src/agents/openai-transport-stream.ts
@@ -28,6 +28,7 @@ import { redactSensitiveText } from "../logging/redact.js";
 import { createSubsystemLogger } from "../logging/subsystem.js";
 import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.types.js";
 import { resolveProviderTransportTurnStateWithPlugin } from "../plugins/provider-runtime.js";
+import { isGemma4ModelId } from "../shared/google-models.js";
 import { CHARS_PER_TOKEN_ESTIMATE, estimateStringChars } from "../utils/cjk-chars.js";
 import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./copilot-dynamic-headers.js";
 import { createDeepSeekTextFilter } from "./deepseek-text-filter.js";
@@ -3504,7 +3505,8 @@ function shouldPreserveReasoningContentReplay(
  if (
    compat.requiresReasoningContentOnAssistantMessages ||
    compat.thinkingFormat === "deepseek" ||
-    compat.thinkingFormat === "zai"
+    compat.thinkingFormat === "zai" ||
+    shouldTrustReasoningContentReplayMetadata(model)
  ) {
    return true;
  }
@@ -3521,6 +3523,17 @@ function shouldPreserveOpenRouterReasoningReplay(model: OpenAIModeModel): boolea
  return !(normalizedModelId.startsWith("anthropic/") || normalizedModelId.startsWith("x-ai/"));
 }

+function shouldTrustReasoningContentReplayMetadata(model: OpenAIModeModel): boolean {
+  if (model.reasoning !== true || isGemma4ModelId(model.id)) {
+    return false;
+  }
+  const provider = model.provider.trim().toLowerCase();
+  if (provider === "openai") {
+    return false;
+  }
+  return shouldPreserveOpenRouterReasoningReplay(model);
+}
+
 // OpenAI Chat Completions assistant-message input does not define reasoning
 // replay fields, while OpenRouter and DeepSeek-style providers document
 // compatible pass-back contracts. Keep valid provider-owned replay fields, but
--- a/src/agents/transcript-policy.test.ts
+++ b/src/agents/transcript-policy.test.ts
@@ -1,6 +1,7 @@
 import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { resolveProviderRuntimePlugin } from "../plugins/provider-hook-runtime.js";
+import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.types.js";

 vi.mock("../plugins/provider-hook-runtime.js", async () => {
  const replayHelpers = await vi.importActual<
@@ -229,6 +230,24 @@ describe("resolveTranscriptPolicy", () => {
    expect(policy.validateAnthropicTurns).toBe(true);
  }

+  function makeOpenAiCompatibleReasoningModel(
+    overrides: Partial<ProviderRuntimeModel> = {},
+  ): ProviderRuntimeModel {
+    return {
+      id: "qwen3.6-27b",
+      name: "Qwen3.6 27B",
+      provider: "custom-openai-proxy",
+      api: "openai-completions",
+      baseUrl: "https://example.invalid",
+      reasoning: false,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 128_000,
+      maxTokens: 16_384,
+      ...overrides,
+    };
+  }
+
  it("enables sanitizeToolCallIds for Anthropic provider", () => {
    const policy = resolveTranscriptPolicy({
      provider: "anthropic",
@@ -345,7 +364,7 @@ describe("resolveTranscriptPolicy", () => {
    expect(policy.validateAnthropicTurns).toBe(true);
  });

-  it("strips historical reasoning for strict OpenAI-compatible providers", () => {
+  it("strips historical reasoning for strict OpenAI-compatible providers by default", () => {
    const policy = resolveTranscriptPolicy({
      provider: "custom-openai-proxy",
      modelId: "qwen3.6-27b",
@@ -361,6 +380,17 @@ describe("resolveTranscriptPolicy", () => {
    expect(responsesPolicy.dropReasoningFromHistory).toBe(false);
  });

+  it("preserves historical reasoning for strict OpenAI-compatible models with reasoning metadata", () => {
+    const policy = resolveTranscriptPolicy({
+      provider: "custom-openai-proxy",
+      modelId: "qwen3.6-27b",
+      modelApi: "openai-completions",
+      model: makeOpenAiCompatibleReasoningModel({ reasoning: true }),
+    });
+
+    expect(policy.dropReasoningFromHistory).toBe(false);
+  });
+
  it.each([
    "kimi-for-coding",
    "moonshotai/kimi-k2.6",
@@ -488,6 +518,28 @@ describe("resolveTranscriptPolicy", () => {
    expect(noReasoningPolicy.dropThinkingBlocks).toBe(true);
  });

+  it("does not reuse cached OpenAI-compatible policies across reasoning metadata changes", () => {
+    const config = {} as OpenClawConfig;
+
+    const defaultPolicy = resolveTranscriptPolicy({
+      config,
+      provider: "custom-openai-proxy",
+      modelId: "qwen3.6-27b",
+      modelApi: "openai-completions",
+      model: makeOpenAiCompatibleReasoningModel(),
+    });
+    const reasoningPolicy = resolveTranscriptPolicy({
+      config,
+      provider: "custom-openai-proxy",
+      modelId: "qwen3.6-27b",
+      modelApi: "openai-completions",
+      model: makeOpenAiCompatibleReasoningModel({ reasoning: true }),
+    });
+
+    expect(defaultPolicy.dropReasoningFromHistory).toBe(true);
+    expect(reasoningPolicy.dropReasoningFromHistory).toBe(false);
+  });
+
  it("preserves transport defaults when a runtime plugin has not adopted replay hooks", () => {
    expectStrictOpenAiCompatibleReplayDefaults("vllm");
  });
--- a/src/agents/transcript-policy.ts
+++ b/src/agents/transcript-policy.ts
@@ -95,6 +95,13 @@ function modelDisablesReasoningEffort(model?: ProviderRuntimeModel): boolean {
  return compat?.supportsReasoningEffort === false;
 }

+function shouldPreserveReasoningContentReplay(params: {
+  modelId?: string | null;
+  model?: ProviderRuntimeModel;
+}): boolean {
+  return params.model?.reasoning === true || requiresReasoningContentReplay(params.modelId);
+}
+
 /**
 * Provides a narrow replay-policy fallback for providers that do not have an
 * owning runtime plugin.
@@ -153,7 +160,7 @@ function buildUnownedProviderTransportReplayFallback(params: {
      ? { dropThinkingBlocks: true }
      : {}),
    ...(isStrictOpenAiCompatible
-      ? { dropReasoningFromHistory: !requiresReasoningContentReplay(params.modelId) }
+      ? { dropReasoningFromHistory: !shouldPreserveReasoningContentReplay(params) }
      : {}),
    ...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}),
    ...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}),
@@ -268,6 +275,7 @@ function resolveTranscriptPolicyCacheKey(params: {
    modelApi: params.modelApi ?? "",
    modelId: params.modelId ?? "",
    dropsThinkingForReasoningCompat: modelDisablesReasoningEffort(params.model),
+    preservesReasoningContentReplay: params.model?.reasoning === true,
    workspaceDir: params.workspaceDir ?? "",
    pluginControlPlane: resolvePluginControlPlaneFingerprint({
      config: params.config,