fix(agents): strip Gemma reasoning from local replay

2026-05-06 09:30:43 +00:00 · 2026-04-27 08:26:21 +01:00
parent f427ddc220
commit 556c3e87df
18 changed files with 366 additions and 11 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai

 - CLI/update: keep the automatic post-update completion refresh on the core-command tree so it no longer stages bundled plugin runtime deps before the Gateway restart path, avoiding `.24` update hangs and 1006 disconnect cascades. Fixes #72665. Thanks @sakalaboator and @He-Pin.
 - Agents/Bedrock: stop heartbeat runs from persisting blank user transcript turns and repair existing blank user text messages before replay, preventing AWS Bedrock `ContentBlock` blank-text validation failures. Fixes #72640 and #72622. Thanks @goldzulu.
+- Agents/LM Studio: strip prior-turn Gemma 4 reasoning from OpenAI-compatible replay while preserving active tool-call continuation reasoning. Fixes #68704. Thanks @chip-snomo and @Kailigithub.
 - LM Studio: allow interactive onboarding to leave the API key blank for unauthenticated local servers, using local synthetic auth while clearing stale LM Studio auth profiles. Fixes #66937. Thanks @olamedia.
 - Process/Windows: decode command stdout and stderr from raw bytes with console-codepage awareness, while preserving valid UTF-8 output and multibyte characters split across chunks. Fixes #50519. Thanks @iready, @kevinten10, @zhangyongjie1997, @knightplat-blip, @heiqishi666, and @slepybear.
 - Agents/bootstrap: dedupe hook-injected bootstrap context files by workspace-relative path and store normalized resolved paths so duplicate relative and absolute hook paths no longer depend on the process cwd. (#59344; fixes #59319; related #56721, #56725, and #57587) Thanks @koen666.
--- a/docs/reference/transcript-hygiene.md
+++ b/docs/reference/transcript-hygiene.md
@@ -118,6 +118,13 @@ external end-user instructions.
 - Missing OpenAI Responses-family tool outputs are synthesized as `aborted` to match Codex replay normalization.
 - No thought signature stripping.

+**OpenAI-compatible Gemma 4**
+
+- Historical assistant thinking/reasoning blocks are stripped before replay so local
+  OpenAI-compatible Gemma 4 servers do not receive prior-turn reasoning content.
+- Current same-turn tool-call continuations keep the assistant reasoning block
+  attached to the tool call until the tool result has been replayed.
+
 **Google (Generative AI / Gemini CLI / Antigravity)**

 - Tool call id sanitization: strict alphanumeric.
--- a/src/agents/pi-embedded-helpers/google.ts
+++ b/src/agents/pi-embedded-helpers/google.ts
@@ -1,7 +1,12 @@
+import { isGemma4ModelId } from "../../shared/google-models.js";
 import { sanitizeGoogleTurnOrdering } from "./bootstrap.js";

 export function isGoogleModelApi(api?: string | null): boolean {
  return api === "google-gemini-cli" || api === "google-generative-ai";
 }

+export function isGemma4ModelRequiringReasoningStrip(modelId?: string | null): boolean {
+  return isGemma4ModelId(modelId);
+}
+
 export { sanitizeGoogleTurnOrdering };
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -1133,6 +1133,77 @@ describe("sanitizeSessionHistory", () => {
    ]);
  });

+  it("strips prior assistant reasoning for Gemma 4 OpenAI-compatible replay", async () => {
+    setNonGoogleModelApi();
+
+    const messages = castAgentMessages([
+      makeUserMessage("first"),
+      makeAssistantMessage([
+        {
+          type: "thinking",
+          thinking: "private reasoning",
+          thinkingSignature: "reasoning_content",
+        },
+        { type: "text", text: "visible answer" },
+      ]),
+      makeUserMessage("second"),
+    ]);
+
+    const result = await sanitizeSessionHistory({
+      messages,
+      modelApi: "openai-completions",
+      provider: "lmstudio",
+      modelId: "google/gemma-4-26b-a4b-it",
+      sessionManager: makeMockSessionManager(),
+      sessionId: TEST_SESSION_ID,
+    });
+
+    expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+      { type: "text", text: "visible answer" },
+    ]);
+  });
+
+  it("preserves current Gemma 4 tool-call reasoning during tool continuation replay", async () => {
+    setNonGoogleModelApi();
+
+    const messages = castAgentMessages([
+      makeUserMessage("look up the answer"),
+      makeAssistantMessage([
+        {
+          type: "thinking",
+          thinking: "call the tool",
+          thinkingSignature: "reasoning_content",
+        },
+        { type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
+      ]),
+      {
+        role: "toolResult",
+        toolCallId: "call123456",
+        toolName: "lookup",
+        content: "42",
+        timestamp: nextTimestamp(),
+      },
+    ]);
+
+    const result = await sanitizeSessionHistory({
+      messages,
+      modelApi: "openai-completions",
+      provider: "lmstudio",
+      modelId: "google/gemma-4-26b-a4b-it",
+      sessionManager: makeMockSessionManager(),
+      sessionId: TEST_SESSION_ID,
+    });
+
+    expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
+      {
+        type: "thinking",
+        thinking: "call the tool",
+        thinkingSignature: "reasoning_content",
+      },
+      { type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
+    ]);
+  });
+
  it("preserves latest assistant thinking blocks for github-copilot models", async () => {
    setNonGoogleModelApi();

--- a/src/agents/pi-embedded-runner/replay-history.ts
+++ b/src/agents/pi-embedded-runner/replay-history.ts
@@ -43,7 +43,11 @@ import {
  type UsageLike,
 } from "../usage.js";
 import { isZeroUsageEmptyStopAssistantTurn } from "./empty-assistant-turn.js";
-import { dropThinkingBlocks, stripInvalidThinkingSignatures } from "./thinking.js";
+import {
+  dropReasoningFromHistory,
+  dropThinkingBlocks,
+  stripInvalidThinkingSignatures,
+} from "./thinking.js";

 const INTER_SESSION_PREFIX_BASE = "[Inter-session message]";
 const MODEL_SNAPSHOT_CUSTOM_TYPE = "model-snapshot";
@@ -630,9 +634,12 @@ export async function sanitizeSessionHistory(params: {
  const validatedThinkingSignatures = policy.preserveSignatures
    ? stripInvalidThinkingSignatures(sanitizedImages)
    : sanitizedImages;
-  const droppedThinking = policy.dropThinkingBlocks
-    ? dropThinkingBlocks(validatedThinkingSignatures)
+  const droppedReasoning = policy.dropReasoningFromHistory
+    ? dropReasoningFromHistory(validatedThinkingSignatures)
    : validatedThinkingSignatures;
+  const droppedThinking = policy.dropThinkingBlocks
+    ? dropThinkingBlocks(droppedReasoning)
+    : droppedReasoning;
  const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, {
    allowedToolNames: params.allowedToolNames,
    allowProviderOwnedThinkingReplay,
--- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts
@@ -612,6 +612,7 @@ vi.mock("../sandbox-info.js", () => ({
 }));

 vi.mock("../thinking.js", () => ({
+  dropReasoningFromHistory: <T>(messages: T) => messages,
  dropThinkingBlocks: <T>(messages: T) => messages,
 }));

--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -207,7 +207,7 @@ import {
  buildEmbeddedSystemPrompt,
  createSystemPromptOverride,
 } from "../system-prompt.js";
-import { dropThinkingBlocks } from "../thinking.js";
+import { dropReasoningFromHistory, dropThinkingBlocks } from "../thinking.js";
 import {
  collectAllowedToolNames,
  collectRegisteredToolNames,
@@ -1673,7 +1673,7 @@ export async function runEmbeddedAttempt(
      // (e.g. thinkingSignature:"reasoning_text") on any follow-up provider
      // call, including tool continuations. Wrap the stream function so every
      // outbound request sees sanitized messages.
-      if (transcriptPolicy.dropThinkingBlocks) {
+      if (transcriptPolicy.dropThinkingBlocks || transcriptPolicy.dropReasoningFromHistory) {
        const inner = activeSession.agent.streamFn;
        activeSession.agent.streamFn = (model, context, options) => {
          const ctx = context as unknown as { messages?: unknown };
@@ -1681,7 +1681,12 @@ export async function runEmbeddedAttempt(
          if (!Array.isArray(messages)) {
            return inner(model, context, options);
          }
-          const sanitized = dropThinkingBlocks(messages as unknown as AgentMessage[]) as unknown;
+          const reasoningSanitized = transcriptPolicy.dropReasoningFromHistory
+            ? dropReasoningFromHistory(messages as unknown as AgentMessage[])
+            : (messages as unknown as AgentMessage[]);
+          const sanitized = transcriptPolicy.dropThinkingBlocks
+            ? (dropThinkingBlocks(reasoningSanitized) as unknown)
+            : (reasoningSanitized as unknown);
          if (sanitized === messages) {
            return inner(model, context, options);
          }
--- a/src/agents/pi-embedded-runner/thinking.test.ts
+++ b/src/agents/pi-embedded-runner/thinking.test.ts
@@ -5,6 +5,7 @@ import { castAgentMessage, castAgentMessages } from "../test-helpers/agent-messa
 import {
  OMITTED_ASSISTANT_REASONING_TEXT,
  assessLastAssistantMessage,
+  dropReasoningFromHistory,
  dropThinkingBlocks,
  isAssistantMessageWithContent,
  sanitizeThinkingForRecovery,
@@ -157,6 +158,105 @@ describe("dropThinkingBlocks", () => {
  });
 });

+describe("dropReasoningFromHistory", () => {
+  it("returns the original reference when no thinking blocks are present", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "hello" }),
+      castAgentMessage({ role: "assistant", content: [{ type: "text", text: "world" }] }),
+    ];
+
+    const result = dropReasoningFromHistory(messages);
+    expect(result).toBe(messages);
+  });
+
+  it("strips assistant reasoning from prior completed turns", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "first" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "private" },
+          { type: "text", text: "visible" },
+        ],
+      }),
+      castAgentMessage({ role: "user", content: "second" }),
+    ];
+
+    const result = dropReasoningFromHistory(messages);
+    const assistant = result[1] as AssistantMessage;
+
+    expect(result).not.toBe(messages);
+    expect(assistant.content).toEqual([{ type: "text", text: "visible" }]);
+  });
+
+  it("uses omitted-reasoning text when a completed assistant turn is reasoning-only", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "first" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [{ type: "thinking", thinking: "private" }],
+      }),
+      castAgentMessage({ role: "user", content: "second" }),
+    ];
+
+    const result = dropReasoningFromHistory(messages);
+    const assistant = result[1] as AssistantMessage;
+
+    expect(assistant.content).toEqual([{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }]);
+  });
+
+  it("preserves reasoning for the active tool-call continuation after the latest user turn", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "look up the answer" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "call the tool" },
+          { type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
+        ],
+      }),
+      castAgentMessage({
+        role: "toolResult",
+        toolCallId: "call123456",
+        toolName: "lookup",
+        content: "42",
+      }),
+    ];
+
+    const result = dropReasoningFromHistory(messages);
+
+    expect(result).toBe(messages);
+  });
+
+  it("strips reasoning from old tool-call turns once a later user turn starts", () => {
+    const messages: AgentMessage[] = [
+      castAgentMessage({ role: "user", content: "look up the answer" }),
+      castAgentMessage({
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "call the tool" },
+          { type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
+        ],
+      }),
+      castAgentMessage({
+        role: "toolResult",
+        toolCallId: "call123456",
+        toolName: "lookup",
+        content: "42",
+      }),
+      castAgentMessage({ role: "assistant", content: [{ type: "text", text: "42" }] }),
+      castAgentMessage({ role: "user", content: "thanks" }),
+    ];
+
+    const result = dropReasoningFromHistory(messages);
+    const assistant = result[1] as AssistantMessage;
+
+    expect(assistant.content).toEqual([
+      { type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
+    ]);
+  });
+});
+
 describe("stripInvalidThinkingSignatures", () => {
  it("returns the original reference when no invalid thinking signatures are present", () => {
    const messages: AgentMessage[] = [
--- a/src/agents/pi-embedded-runner/thinking.ts
+++ b/src/agents/pi-embedded-runner/thinking.ts
@@ -29,6 +29,26 @@ function isThinkingBlock(block: AssistantContentBlock): boolean {
  );
 }

+function isToolCallBlock(block: AssistantContentBlock): boolean {
+  if (!block || typeof block !== "object") {
+    return false;
+  }
+  const type = (block as { type?: unknown }).type;
+  return type === "toolCall" || type === "tool_use" || type === "function_call";
+}
+
+function hasAssistantToolCall(message: AssistantMessage): boolean {
+  return message.content.some((block) => isToolCallBlock(block));
+}
+
+function isToolResultMessage(message: AgentMessage): boolean {
+  return (
+    !!message &&
+    typeof message === "object" &&
+    (message as { role?: unknown }).role === "toolResult"
+  );
+}
+
 function isSignedThinkingBlock(block: AssistantContentBlock): boolean {
  if (!isThinkingBlock(block)) {
    return false;
@@ -177,6 +197,44 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
  return touched ? out : messages;
 }

+function shouldPreserveCurrentToolTurnReasoning(
+  messages: AgentMessage[],
+  index: number,
+  latestUserIndex: number,
+): boolean {
+  const message = messages[index];
+  if (
+    index < latestUserIndex ||
+    !isAssistantMessageWithContent(message) ||
+    !hasAssistantToolCall(message)
+  ) {
+    return false;
+  }
+
+  for (let i = index - 1; i >= 0; i -= 1) {
+    const role = (messages[i] as { role?: unknown })?.role;
+    if (role === "user") {
+      break;
+    }
+    if (role === "assistant") {
+      return false;
+    }
+  }
+
+  for (let i = index + 1; i < messages.length; i += 1) {
+    const next = messages[i];
+    const role = (next as { role?: unknown })?.role;
+    if (isToolResultMessage(next)) {
+      return true;
+    }
+    if (role === "user") {
+      return false;
+    }
+  }
+
+  return false;
+}
+
 function stripAllThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
  let touched = false;
  const out: AgentMessage[] = [];
@@ -201,6 +259,43 @@ function stripAllThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
  return touched ? out : messages;
 }

+export function dropReasoningFromHistory(messages: AgentMessage[]): AgentMessage[] {
+  let latestUserIndex = -1;
+  for (let index = messages.length - 1; index >= 0; index -= 1) {
+    if ((messages[index] as { role?: unknown })?.role === "user") {
+      latestUserIndex = index;
+      break;
+    }
+  }
+
+  let touched = false;
+  const out: AgentMessage[] = [];
+  for (let index = 0; index < messages.length; index += 1) {
+    const message = messages[index];
+    if (!isAssistantMessageWithContent(message)) {
+      out.push(message);
+      continue;
+    }
+    if (shouldPreserveCurrentToolTurnReasoning(messages, index, latestUserIndex)) {
+      out.push(message);
+      continue;
+    }
+
+    const nextContent = message.content.filter((block) => !isThinkingBlock(block));
+    if (nextContent.length === message.content.length) {
+      out.push(message);
+      continue;
+    }
+
+    touched = true;
+    out.push({
+      ...message,
+      content: nextContent.length > 0 ? nextContent : buildOmittedAssistantReasoningContent(),
+    });
+  }
+  return touched ? out : messages;
+}
+
 export function assessLastAssistantMessage(message: AgentMessage): RecoveryAssessment {
  if (!isAssistantMessageWithContent(message)) {
    return "valid";
--- a/src/agents/runtime-plan/types.ts
+++ b/src/agents/runtime-plan/types.ts
@@ -193,6 +193,7 @@ export type AgentRuntimeTranscriptPolicy = {
  };
  sanitizeThinkingSignatures: boolean;
  dropThinkingBlocks: boolean;
+  dropReasoningFromHistory?: boolean;
  applyGoogleTurnOrdering: boolean;
  validateGeminiTurns: boolean;
  validateAnthropicTurns: boolean;
--- a/src/agents/transcript-policy.test.ts
+++ b/src/agents/transcript-policy.test.ts
@@ -281,6 +281,22 @@ describe("resolveTranscriptPolicy", () => {
    expect(policy.validateAnthropicTurns).toBe(true);
  });

+  it("strips historical reasoning for Gemma 4 on OpenAI-compatible providers", () => {
+    const policy = resolveTranscriptPolicy({
+      provider: "custom-openai-proxy",
+      modelId: "google/gemma-4-26b-a4b-it",
+      modelApi: "openai-completions",
+    });
+    expect(policy.dropReasoningFromHistory).toBe(true);
+
+    const gemma3Policy = resolveTranscriptPolicy({
+      provider: "custom-openai-proxy",
+      modelId: "google/gemma-3-27b-it",
+      modelApi: "openai-completions",
+    });
+    expect(gemma3Policy.dropReasoningFromHistory).toBe(false);
+  });
+
  it("falls back to unowned transport defaults when no owning plugin exists", () => {
    expectStrictOpenAiCompatibleReplayDefaults("custom-openai-proxy");
  });
--- a/src/agents/transcript-policy.ts
+++ b/src/agents/transcript-policy.ts
@@ -5,7 +5,10 @@ import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.typ
 import type { ProviderReplayPolicy } from "../plugins/types.js";
 import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
 import { normalizeProviderId } from "./model-selection.js";
-import { isGoogleModelApi } from "./pi-embedded-helpers/google.js";
+import {
+  isGemma4ModelRequiringReasoningStrip,
+  isGoogleModelApi,
+} from "./pi-embedded-helpers/google.js";
 import type { ToolCallIdMode } from "./tool-call-id.js";

 export type TranscriptSanitizeMode = "full" | "images-only";
@@ -23,6 +26,7 @@ export type TranscriptPolicy = {
  };
  sanitizeThinkingSignatures: boolean;
  dropThinkingBlocks: boolean;
+  dropReasoningFromHistory?: boolean;
  applyGoogleTurnOrdering: boolean;
  validateGeminiTurns: boolean;
  validateAnthropicTurns: boolean;
@@ -54,6 +58,7 @@ const DEFAULT_TRANSCRIPT_POLICY: TranscriptPolicy = {
  sanitizeThoughtSignatures: undefined,
  sanitizeThinkingSignatures: false,
  dropThinkingBlocks: false,
+  dropReasoningFromHistory: false,
  applyGoogleTurnOrdering: false,
  validateGeminiTurns: false,
  validateAnthropicTurns: false,
@@ -114,6 +119,9 @@ function buildUnownedProviderTransportReplayFallback(params: {
    ...(isAnthropic && modelId.includes("claude")
      ? { dropThinkingBlocks: !shouldPreserveThinkingBlocks(modelId) }
      : {}),
+    ...(isStrictOpenAiCompatible && isGemma4ModelRequiringReasoningStrip(modelId)
+      ? { dropReasoningFromHistory: true }
+      : {}),
    ...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}),
    ...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}),
    ...(isAnthropic || isStrictOpenAiCompatible ? { validateAnthropicTurns: true } : {}),
@@ -151,6 +159,9 @@ function mergeTranscriptPolicy(
    ...(typeof policy.dropThinkingBlocks === "boolean"
      ? { dropThinkingBlocks: policy.dropThinkingBlocks }
      : {}),
+    ...(typeof policy.dropReasoningFromHistory === "boolean"
+      ? { dropReasoningFromHistory: policy.dropReasoningFromHistory }
+      : {}),
    ...(typeof policy.applyAssistantFirstOrderingFix === "boolean"
      ? { applyGoogleTurnOrdering: policy.applyAssistantFirstOrderingFix }
      : {}),
--- a/src/plugin-sdk/provider-model-shared.test.ts
+++ b/src/plugin-sdk/provider-model-shared.test.ts
@@ -183,12 +183,13 @@ describe("buildProviderReplayFamilyHooks", () => {
      OPENAI_COMPATIBLE_REPLAY_HOOKS.buildReplayPolicy?.({
        provider: "xai",
        modelApi: "openai-completions",
-        modelId: "grok-4",
+        modelId: "google/gemma-4-26b-a4b-it",
      } as never),
    ).toMatchObject({
      sanitizeToolCallIds: true,
      applyAssistantFirstOrderingFix: true,
      validateGeminiTurns: true,
+      dropReasoningFromHistory: true,
    });

    const nativeIdsHooks = buildProviderReplayFamilyHooks({
--- a/src/plugin-sdk/provider-model-shared.ts
+++ b/src/plugin-sdk/provider-model-shared.ts
@@ -136,7 +136,10 @@ export function buildProviderReplayFamilyHooks(
      const policyOptions = { sanitizeToolCallIds: options.sanitizeToolCallIds };
      return {
        buildReplayPolicy: (ctx: ProviderReplayPolicyContext) =>
-          buildOpenAICompatibleReplayPolicy(ctx.modelApi, policyOptions),
+          buildOpenAICompatibleReplayPolicy(ctx.modelApi, {
+            ...policyOptions,
+            modelId: ctx.modelId,
+          }),
      };
    }
    case "anthropic-by-model":
--- a/src/plugins/provider-replay-helpers.test.ts
+++ b/src/plugins/provider-replay-helpers.test.ts
@@ -35,6 +35,26 @@ describe("provider replay helpers", () => {
    expect(policy).not.toHaveProperty("toolCallIdMode");
  });

+  it("drops historical reasoning for Gemma 4 openai-completions replay", () => {
+    expect(
+      buildOpenAICompatibleReplayPolicy("openai-completions", {
+        modelId: "google/gemma-4-26b-a4b-it",
+      }),
+    ).toMatchObject({
+      dropReasoningFromHistory: true,
+    });
+    expect(
+      buildOpenAICompatibleReplayPolicy("openai-completions", {
+        modelId: "google/gemma-3-27b-it",
+      }),
+    ).not.toHaveProperty("dropReasoningFromHistory");
+    expect(
+      buildOpenAICompatibleReplayPolicy("openai-responses", {
+        modelId: "google/gemma-4-26b-a4b-it",
+      }),
+    ).not.toHaveProperty("dropReasoningFromHistory");
+  });
+
  it("omits tool-call id sanitization when opted out for openai-responses", () => {
    const policy = buildOpenAICompatibleReplayPolicy("openai-responses", {
      sanitizeToolCallIds: false,
--- a/src/plugins/provider-replay-helpers.ts
+++ b/src/plugins/provider-replay-helpers.ts
@@ -1,4 +1,5 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import { isGemma4ModelId } from "../shared/google-models.js";
 import { sanitizeGoogleAssistantFirstOrdering } from "../shared/google-turn-ordering.js";
 import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
 import type {
@@ -11,7 +12,7 @@ import type {

 export function buildOpenAICompatibleReplayPolicy(
  modelApi: string | null | undefined,
-  options: { sanitizeToolCallIds?: boolean } = {},
+  options: { sanitizeToolCallIds?: boolean; modelId?: string | null } = {},
 ): ProviderReplayPolicy | undefined {
  if (
    modelApi !== "openai-completions" &&
@@ -39,6 +40,9 @@ export function buildOpenAICompatibleReplayPolicy(
          validateGeminiTurns: false,
          validateAnthropicTurns: false,
        }),
+    ...(modelApi === "openai-completions" && isGemma4ModelId(options.modelId)
+      ? { dropReasoningFromHistory: true }
+      : {}),
  };
 }

@@ -131,7 +135,7 @@ export function buildHybridAnthropicOrOpenAIReplayPolicy(
    });
  }

-  return buildOpenAICompatibleReplayPolicy(ctx.modelApi);
+  return buildOpenAICompatibleReplayPolicy(ctx.modelApi, { modelId: ctx.modelId });
 }

 const GOOGLE_TURN_ORDERING_CUSTOM_TYPE = "google-turn-ordering-bootstrap";
--- a/src/plugins/types.ts
+++ b/src/plugins/types.ts
@@ -702,6 +702,7 @@ export type ProviderReplayPolicy = {
    includeCamelCase?: boolean;
  };
  dropThinkingBlocks?: boolean;
+  dropReasoningFromHistory?: boolean;
  repairToolUseResultPairing?: boolean;
  applyAssistantFirstOrderingFix?: boolean;
  validateGeminiTurns?: boolean;
--- a/src/shared/google-models.ts
+++ b/src/shared/google-models.ts
@@ -0,0 +1,6 @@
+import { normalizeLowercaseStringOrEmpty } from "./string-coerce.js";
+
+export function isGemma4ModelId(modelId?: string | null): boolean {
+  const normalized = normalizeLowercaseStringOrEmpty(modelId);
+  return /(?:^|[/_:-])gemma[-_]?4(?:$|[/_.:-])/.test(normalized);
+}