fix(agents): strip Gemma reasoning from local replay

This commit is contained in:
Peter Steinberger
2026-04-27 08:26:21 +01:00
parent f427ddc220
commit 556c3e87df
18 changed files with 366 additions and 11 deletions

View File

@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
- CLI/update: keep the automatic post-update completion refresh on the core-command tree so it no longer stages bundled plugin runtime deps before the Gateway restart path, avoiding `.24` update hangs and 1006 disconnect cascades. Fixes #72665. Thanks @sakalaboator and @He-Pin.
- Agents/Bedrock: stop heartbeat runs from persisting blank user transcript turns and repair existing blank user text messages before replay, preventing AWS Bedrock `ContentBlock` blank-text validation failures. Fixes #72640 and #72622. Thanks @goldzulu.
- Agents/LM Studio: strip prior-turn Gemma 4 reasoning from OpenAI-compatible replay while preserving active tool-call continuation reasoning. Fixes #68704. Thanks @chip-snomo and @Kailigithub.
- LM Studio: allow interactive onboarding to leave the API key blank for unauthenticated local servers, using local synthetic auth while clearing stale LM Studio auth profiles. Fixes #66937. Thanks @olamedia.
- Process/Windows: decode command stdout and stderr from raw bytes with console-codepage awareness, while preserving valid UTF-8 output and multibyte characters split across chunks. Fixes #50519. Thanks @iready, @kevinten10, @zhangyongjie1997, @knightplat-blip, @heiqishi666, and @slepybear.
- Agents/bootstrap: dedupe hook-injected bootstrap context files by workspace-relative path and store normalized resolved paths so duplicate relative and absolute hook paths no longer depend on the process cwd. (#59344; fixes #59319; related #56721, #56725, and #57587) Thanks @koen666.

View File

@@ -118,6 +118,13 @@ external end-user instructions.
- Missing OpenAI Responses-family tool outputs are synthesized as `aborted` to match Codex replay normalization.
- No thought signature stripping.
**OpenAI-compatible Gemma 4**
- Historical assistant thinking/reasoning blocks are stripped before replay so local
OpenAI-compatible Gemma 4 servers do not receive prior-turn reasoning content.
- Current same-turn tool-call continuations keep the assistant reasoning block
attached to the tool call until the tool result has been replayed.
**Google (Generative AI / Gemini CLI / Antigravity)**
- Tool call id sanitization: strict alphanumeric.

View File

@@ -1,7 +1,12 @@
import { isGemma4ModelId } from "../../shared/google-models.js";
import { sanitizeGoogleTurnOrdering } from "./bootstrap.js";
export function isGoogleModelApi(api?: string | null): boolean {
return api === "google-gemini-cli" || api === "google-generative-ai";
}
export function isGemma4ModelRequiringReasoningStrip(modelId?: string | null): boolean {
return isGemma4ModelId(modelId);
}
export { sanitizeGoogleTurnOrdering };

View File

@@ -1133,6 +1133,77 @@ describe("sanitizeSessionHistory", () => {
]);
});
it("strips prior assistant reasoning for Gemma 4 OpenAI-compatible replay", async () => {
setNonGoogleModelApi();
const messages = castAgentMessages([
makeUserMessage("first"),
makeAssistantMessage([
{
type: "thinking",
thinking: "private reasoning",
thinkingSignature: "reasoning_content",
},
{ type: "text", text: "visible answer" },
]),
makeUserMessage("second"),
]);
const result = await sanitizeSessionHistory({
messages,
modelApi: "openai-completions",
provider: "lmstudio",
modelId: "google/gemma-4-26b-a4b-it",
sessionManager: makeMockSessionManager(),
sessionId: TEST_SESSION_ID,
});
expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
{ type: "text", text: "visible answer" },
]);
});
it("preserves current Gemma 4 tool-call reasoning during tool continuation replay", async () => {
setNonGoogleModelApi();
const messages = castAgentMessages([
makeUserMessage("look up the answer"),
makeAssistantMessage([
{
type: "thinking",
thinking: "call the tool",
thinkingSignature: "reasoning_content",
},
{ type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
]),
{
role: "toolResult",
toolCallId: "call123456",
toolName: "lookup",
content: "42",
timestamp: nextTimestamp(),
},
]);
const result = await sanitizeSessionHistory({
messages,
modelApi: "openai-completions",
provider: "lmstudio",
modelId: "google/gemma-4-26b-a4b-it",
sessionManager: makeMockSessionManager(),
sessionId: TEST_SESSION_ID,
});
expect((result[1] as Extract<AgentMessage, { role: "assistant" }>).content).toEqual([
{
type: "thinking",
thinking: "call the tool",
thinkingSignature: "reasoning_content",
},
{ type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
]);
});
it("preserves latest assistant thinking blocks for github-copilot models", async () => {
setNonGoogleModelApi();

View File

@@ -43,7 +43,11 @@ import {
type UsageLike,
} from "../usage.js";
import { isZeroUsageEmptyStopAssistantTurn } from "./empty-assistant-turn.js";
import { dropThinkingBlocks, stripInvalidThinkingSignatures } from "./thinking.js";
import {
dropReasoningFromHistory,
dropThinkingBlocks,
stripInvalidThinkingSignatures,
} from "./thinking.js";
const INTER_SESSION_PREFIX_BASE = "[Inter-session message]";
const MODEL_SNAPSHOT_CUSTOM_TYPE = "model-snapshot";
@@ -630,9 +634,12 @@ export async function sanitizeSessionHistory(params: {
const validatedThinkingSignatures = policy.preserveSignatures
? stripInvalidThinkingSignatures(sanitizedImages)
: sanitizedImages;
const droppedThinking = policy.dropThinkingBlocks
? dropThinkingBlocks(validatedThinkingSignatures)
const droppedReasoning = policy.dropReasoningFromHistory
? dropReasoningFromHistory(validatedThinkingSignatures)
: validatedThinkingSignatures;
const droppedThinking = policy.dropThinkingBlocks
? dropThinkingBlocks(droppedReasoning)
: droppedReasoning;
const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, {
allowedToolNames: params.allowedToolNames,
allowProviderOwnedThinkingReplay,

View File

@@ -612,6 +612,7 @@ vi.mock("../sandbox-info.js", () => ({
}));
vi.mock("../thinking.js", () => ({
dropReasoningFromHistory: <T>(messages: T) => messages,
dropThinkingBlocks: <T>(messages: T) => messages,
}));

View File

@@ -207,7 +207,7 @@ import {
buildEmbeddedSystemPrompt,
createSystemPromptOverride,
} from "../system-prompt.js";
import { dropThinkingBlocks } from "../thinking.js";
import { dropReasoningFromHistory, dropThinkingBlocks } from "../thinking.js";
import {
collectAllowedToolNames,
collectRegisteredToolNames,
@@ -1673,7 +1673,7 @@ export async function runEmbeddedAttempt(
// (e.g. thinkingSignature:"reasoning_text") on any follow-up provider
// call, including tool continuations. Wrap the stream function so every
// outbound request sees sanitized messages.
if (transcriptPolicy.dropThinkingBlocks) {
if (transcriptPolicy.dropThinkingBlocks || transcriptPolicy.dropReasoningFromHistory) {
const inner = activeSession.agent.streamFn;
activeSession.agent.streamFn = (model, context, options) => {
const ctx = context as unknown as { messages?: unknown };
@@ -1681,7 +1681,12 @@ export async function runEmbeddedAttempt(
if (!Array.isArray(messages)) {
return inner(model, context, options);
}
const sanitized = dropThinkingBlocks(messages as unknown as AgentMessage[]) as unknown;
const reasoningSanitized = transcriptPolicy.dropReasoningFromHistory
? dropReasoningFromHistory(messages as unknown as AgentMessage[])
: (messages as unknown as AgentMessage[]);
const sanitized = transcriptPolicy.dropThinkingBlocks
? (dropThinkingBlocks(reasoningSanitized) as unknown)
: (reasoningSanitized as unknown);
if (sanitized === messages) {
return inner(model, context, options);
}

View File

@@ -5,6 +5,7 @@ import { castAgentMessage, castAgentMessages } from "../test-helpers/agent-messa
import {
OMITTED_ASSISTANT_REASONING_TEXT,
assessLastAssistantMessage,
dropReasoningFromHistory,
dropThinkingBlocks,
isAssistantMessageWithContent,
sanitizeThinkingForRecovery,
@@ -157,6 +158,105 @@ describe("dropThinkingBlocks", () => {
});
});
describe("dropReasoningFromHistory", () => {
it("returns the original reference when no thinking blocks are present", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "hello" }),
castAgentMessage({ role: "assistant", content: [{ type: "text", text: "world" }] }),
];
const result = dropReasoningFromHistory(messages);
expect(result).toBe(messages);
});
it("strips assistant reasoning from prior completed turns", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "first" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "private" },
{ type: "text", text: "visible" },
],
}),
castAgentMessage({ role: "user", content: "second" }),
];
const result = dropReasoningFromHistory(messages);
const assistant = result[1] as AssistantMessage;
expect(result).not.toBe(messages);
expect(assistant.content).toEqual([{ type: "text", text: "visible" }]);
});
it("uses omitted-reasoning text when a completed assistant turn is reasoning-only", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "first" }),
castAgentMessage({
role: "assistant",
content: [{ type: "thinking", thinking: "private" }],
}),
castAgentMessage({ role: "user", content: "second" }),
];
const result = dropReasoningFromHistory(messages);
const assistant = result[1] as AssistantMessage;
expect(assistant.content).toEqual([{ type: "text", text: OMITTED_ASSISTANT_REASONING_TEXT }]);
});
it("preserves reasoning for the active tool-call continuation after the latest user turn", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "look up the answer" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "call the tool" },
{ type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
],
}),
castAgentMessage({
role: "toolResult",
toolCallId: "call123456",
toolName: "lookup",
content: "42",
}),
];
const result = dropReasoningFromHistory(messages);
expect(result).toBe(messages);
});
it("strips reasoning from old tool-call turns once a later user turn starts", () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "look up the answer" }),
castAgentMessage({
role: "assistant",
content: [
{ type: "thinking", thinking: "call the tool" },
{ type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
],
}),
castAgentMessage({
role: "toolResult",
toolCallId: "call123456",
toolName: "lookup",
content: "42",
}),
castAgentMessage({ role: "assistant", content: [{ type: "text", text: "42" }] }),
castAgentMessage({ role: "user", content: "thanks" }),
];
const result = dropReasoningFromHistory(messages);
const assistant = result[1] as AssistantMessage;
expect(assistant.content).toEqual([
{ type: "toolCall", id: "call123456", name: "lookup", arguments: {} },
]);
});
});
describe("stripInvalidThinkingSignatures", () => {
it("returns the original reference when no invalid thinking signatures are present", () => {
const messages: AgentMessage[] = [

View File

@@ -29,6 +29,26 @@ function isThinkingBlock(block: AssistantContentBlock): boolean {
);
}
function isToolCallBlock(block: AssistantContentBlock): boolean {
if (!block || typeof block !== "object") {
return false;
}
const type = (block as { type?: unknown }).type;
return type === "toolCall" || type === "tool_use" || type === "function_call";
}
function hasAssistantToolCall(message: AssistantMessage): boolean {
return message.content.some((block) => isToolCallBlock(block));
}
function isToolResultMessage(message: AgentMessage): boolean {
return (
!!message &&
typeof message === "object" &&
(message as { role?: unknown }).role === "toolResult"
);
}
function isSignedThinkingBlock(block: AssistantContentBlock): boolean {
if (!isThinkingBlock(block)) {
return false;
@@ -177,6 +197,44 @@ export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
return touched ? out : messages;
}
function shouldPreserveCurrentToolTurnReasoning(
messages: AgentMessage[],
index: number,
latestUserIndex: number,
): boolean {
const message = messages[index];
if (
index < latestUserIndex ||
!isAssistantMessageWithContent(message) ||
!hasAssistantToolCall(message)
) {
return false;
}
for (let i = index - 1; i >= 0; i -= 1) {
const role = (messages[i] as { role?: unknown })?.role;
if (role === "user") {
break;
}
if (role === "assistant") {
return false;
}
}
for (let i = index + 1; i < messages.length; i += 1) {
const next = messages[i];
const role = (next as { role?: unknown })?.role;
if (isToolResultMessage(next)) {
return true;
}
if (role === "user") {
return false;
}
}
return false;
}
function stripAllThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
let touched = false;
const out: AgentMessage[] = [];
@@ -201,6 +259,43 @@ function stripAllThinkingBlocks(messages: AgentMessage[]): AgentMessage[] {
return touched ? out : messages;
}
export function dropReasoningFromHistory(messages: AgentMessage[]): AgentMessage[] {
let latestUserIndex = -1;
for (let index = messages.length - 1; index >= 0; index -= 1) {
if ((messages[index] as { role?: unknown })?.role === "user") {
latestUserIndex = index;
break;
}
}
let touched = false;
const out: AgentMessage[] = [];
for (let index = 0; index < messages.length; index += 1) {
const message = messages[index];
if (!isAssistantMessageWithContent(message)) {
out.push(message);
continue;
}
if (shouldPreserveCurrentToolTurnReasoning(messages, index, latestUserIndex)) {
out.push(message);
continue;
}
const nextContent = message.content.filter((block) => !isThinkingBlock(block));
if (nextContent.length === message.content.length) {
out.push(message);
continue;
}
touched = true;
out.push({
...message,
content: nextContent.length > 0 ? nextContent : buildOmittedAssistantReasoningContent(),
});
}
return touched ? out : messages;
}
export function assessLastAssistantMessage(message: AgentMessage): RecoveryAssessment {
if (!isAssistantMessageWithContent(message)) {
return "valid";

View File

@@ -193,6 +193,7 @@ export type AgentRuntimeTranscriptPolicy = {
};
sanitizeThinkingSignatures: boolean;
dropThinkingBlocks: boolean;
dropReasoningFromHistory?: boolean;
applyGoogleTurnOrdering: boolean;
validateGeminiTurns: boolean;
validateAnthropicTurns: boolean;

View File

@@ -281,6 +281,22 @@ describe("resolveTranscriptPolicy", () => {
expect(policy.validateAnthropicTurns).toBe(true);
});
it("strips historical reasoning for Gemma 4 on OpenAI-compatible providers", () => {
const policy = resolveTranscriptPolicy({
provider: "custom-openai-proxy",
modelId: "google/gemma-4-26b-a4b-it",
modelApi: "openai-completions",
});
expect(policy.dropReasoningFromHistory).toBe(true);
const gemma3Policy = resolveTranscriptPolicy({
provider: "custom-openai-proxy",
modelId: "google/gemma-3-27b-it",
modelApi: "openai-completions",
});
expect(gemma3Policy.dropReasoningFromHistory).toBe(false);
});
it("falls back to unowned transport defaults when no owning plugin exists", () => {
expectStrictOpenAiCompatibleReplayDefaults("custom-openai-proxy");
});

View File

@@ -5,7 +5,10 @@ import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.typ
import type { ProviderReplayPolicy } from "../plugins/types.js";
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
import { normalizeProviderId } from "./model-selection.js";
import { isGoogleModelApi } from "./pi-embedded-helpers/google.js";
import {
isGemma4ModelRequiringReasoningStrip,
isGoogleModelApi,
} from "./pi-embedded-helpers/google.js";
import type { ToolCallIdMode } from "./tool-call-id.js";
export type TranscriptSanitizeMode = "full" | "images-only";
@@ -23,6 +26,7 @@ export type TranscriptPolicy = {
};
sanitizeThinkingSignatures: boolean;
dropThinkingBlocks: boolean;
dropReasoningFromHistory?: boolean;
applyGoogleTurnOrdering: boolean;
validateGeminiTurns: boolean;
validateAnthropicTurns: boolean;
@@ -54,6 +58,7 @@ const DEFAULT_TRANSCRIPT_POLICY: TranscriptPolicy = {
sanitizeThoughtSignatures: undefined,
sanitizeThinkingSignatures: false,
dropThinkingBlocks: false,
dropReasoningFromHistory: false,
applyGoogleTurnOrdering: false,
validateGeminiTurns: false,
validateAnthropicTurns: false,
@@ -114,6 +119,9 @@ function buildUnownedProviderTransportReplayFallback(params: {
...(isAnthropic && modelId.includes("claude")
? { dropThinkingBlocks: !shouldPreserveThinkingBlocks(modelId) }
: {}),
...(isStrictOpenAiCompatible && isGemma4ModelRequiringReasoningStrip(modelId)
? { dropReasoningFromHistory: true }
: {}),
...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}),
...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}),
...(isAnthropic || isStrictOpenAiCompatible ? { validateAnthropicTurns: true } : {}),
@@ -151,6 +159,9 @@ function mergeTranscriptPolicy(
...(typeof policy.dropThinkingBlocks === "boolean"
? { dropThinkingBlocks: policy.dropThinkingBlocks }
: {}),
...(typeof policy.dropReasoningFromHistory === "boolean"
? { dropReasoningFromHistory: policy.dropReasoningFromHistory }
: {}),
...(typeof policy.applyAssistantFirstOrderingFix === "boolean"
? { applyGoogleTurnOrdering: policy.applyAssistantFirstOrderingFix }
: {}),

View File

@@ -183,12 +183,13 @@ describe("buildProviderReplayFamilyHooks", () => {
OPENAI_COMPATIBLE_REPLAY_HOOKS.buildReplayPolicy?.({
provider: "xai",
modelApi: "openai-completions",
modelId: "grok-4",
modelId: "google/gemma-4-26b-a4b-it",
} as never),
).toMatchObject({
sanitizeToolCallIds: true,
applyAssistantFirstOrderingFix: true,
validateGeminiTurns: true,
dropReasoningFromHistory: true,
});
const nativeIdsHooks = buildProviderReplayFamilyHooks({

View File

@@ -136,7 +136,10 @@ export function buildProviderReplayFamilyHooks(
const policyOptions = { sanitizeToolCallIds: options.sanitizeToolCallIds };
return {
buildReplayPolicy: (ctx: ProviderReplayPolicyContext) =>
buildOpenAICompatibleReplayPolicy(ctx.modelApi, policyOptions),
buildOpenAICompatibleReplayPolicy(ctx.modelApi, {
...policyOptions,
modelId: ctx.modelId,
}),
};
}
case "anthropic-by-model":

View File

@@ -35,6 +35,26 @@ describe("provider replay helpers", () => {
expect(policy).not.toHaveProperty("toolCallIdMode");
});
it("drops historical reasoning for Gemma 4 openai-completions replay", () => {
expect(
buildOpenAICompatibleReplayPolicy("openai-completions", {
modelId: "google/gemma-4-26b-a4b-it",
}),
).toMatchObject({
dropReasoningFromHistory: true,
});
expect(
buildOpenAICompatibleReplayPolicy("openai-completions", {
modelId: "google/gemma-3-27b-it",
}),
).not.toHaveProperty("dropReasoningFromHistory");
expect(
buildOpenAICompatibleReplayPolicy("openai-responses", {
modelId: "google/gemma-4-26b-a4b-it",
}),
).not.toHaveProperty("dropReasoningFromHistory");
});
it("omits tool-call id sanitization when opted out for openai-responses", () => {
const policy = buildOpenAICompatibleReplayPolicy("openai-responses", {
sanitizeToolCallIds: false,

View File

@@ -1,4 +1,5 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { isGemma4ModelId } from "../shared/google-models.js";
import { sanitizeGoogleAssistantFirstOrdering } from "../shared/google-turn-ordering.js";
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
import type {
@@ -11,7 +12,7 @@ import type {
export function buildOpenAICompatibleReplayPolicy(
modelApi: string | null | undefined,
options: { sanitizeToolCallIds?: boolean } = {},
options: { sanitizeToolCallIds?: boolean; modelId?: string | null } = {},
): ProviderReplayPolicy | undefined {
if (
modelApi !== "openai-completions" &&
@@ -39,6 +40,9 @@ export function buildOpenAICompatibleReplayPolicy(
validateGeminiTurns: false,
validateAnthropicTurns: false,
}),
...(modelApi === "openai-completions" && isGemma4ModelId(options.modelId)
? { dropReasoningFromHistory: true }
: {}),
};
}
@@ -131,7 +135,7 @@ export function buildHybridAnthropicOrOpenAIReplayPolicy(
});
}
return buildOpenAICompatibleReplayPolicy(ctx.modelApi);
return buildOpenAICompatibleReplayPolicy(ctx.modelApi, { modelId: ctx.modelId });
}
const GOOGLE_TURN_ORDERING_CUSTOM_TYPE = "google-turn-ordering-bootstrap";

View File

@@ -702,6 +702,7 @@ export type ProviderReplayPolicy = {
includeCamelCase?: boolean;
};
dropThinkingBlocks?: boolean;
dropReasoningFromHistory?: boolean;
repairToolUseResultPairing?: boolean;
applyAssistantFirstOrderingFix?: boolean;
validateGeminiTurns?: boolean;

View File

@@ -0,0 +1,6 @@
import { normalizeLowercaseStringOrEmpty } from "./string-coerce.js";
export function isGemma4ModelId(modelId?: string | null): boolean {
const normalized = normalizeLowercaseStringOrEmpty(modelId);
return /(?:^|[/_:-])gemma[-_]?4(?:$|[/_.:-])/.test(normalized);
}