diff --git a/CHANGELOG.md b/CHANGELOG.md index b7ee0eb4c44..58b3214e654 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ Docs: https://docs.openclaw.ai - Agents/tool-loop: enable the unknown-tool stream guard by default. Previously `resolveUnknownToolGuardThreshold` returned `undefined` unless `tools.loopDetection.enabled` was explicitly set to `true`, which left the protection off in the default configuration. A hallucinated or removed tool (for example `himalaya` after it was dropped from `skills.allowBundled`) would then loop "Tool X not found" attempts until the full embedded-run timeout. The guard has no false-positive surface because it only triggers on tools that are objectively not registered in the run, so it now stays on regardless of `tools.loopDetection.enabled` and still accepts `tools.loopDetection.unknownToolThreshold` as a per-run override (default 10). (#67401) Thanks @xantorres. - TUI/streaming: add a client-side streaming watchdog to `tui-event-handlers` so the `streaming · Xm Ys` activity indicator resets to `idle` after 30s of delta silence on the active run. Guards against lost or late `state: "final"` chat events (WS reconnects, gateway restarts, etc.) leaving the TUI stuck on `streaming` indefinitely; a new system log line surfaces the reset so users know to send a new message to resync. The window is configurable via the new `streamingWatchdogMs` context option (set to `0` to disable), and the handler now exposes a `dispose()` that clears the pending timer on shutdown. (#67401) Thanks @xantorres. - Extensions/lmstudio: add exponential backoff to the inference-preload wrapper so an LM Studio model-load failure (for example the built-in memory guardrail rejecting a load because the swap is saturated) no longer produces a WARN line every ~2s for every chat request. The wrapper now records consecutive preload failures per `(baseUrl, modelKey, contextLength)` tuple with a 5s → 10s → 20s → … → 5min cooldown and skips the preload step entirely while a cooldown is active, letting chat requests proceed directly to the stream (the model is often already loaded via the LM Studio UI). The combined `preload failed` log line now reports consecutive-failure count and remaining cooldown so operators can act on the real issue instead of drowning in repeated warnings. (#67401) Thanks @xantorres. +- Agents/replay: re-run tool/result pairing after strict replay tool-call ID sanitization on outbound requests so Anthropic-compatible providers like MiniMax no longer receive malformed orphan tool-result IDs such as `...toolresult1` during compaction and retry flows. (#67620) Thanks @stainlu. ## 2026.4.15-beta.1 diff --git a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts new file mode 100644 index 00000000000..6620dda0d89 --- /dev/null +++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.test.ts @@ -0,0 +1,107 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { describe, expect, it } from "vitest"; +import { sanitizeReplayToolCallIdsForStream } from "./attempt.tool-call-normalization.js"; + +describe("sanitizeReplayToolCallIdsForStream", () => { + it("drops orphaned tool results after strict id sanitization", () => { + const messages: AgentMessage[] = [ + { + role: "toolResult", + toolCallId: "call_function_av7cbkigmk7x1", + toolUseId: "call_function_av7cbkigmk7x1", + toolName: "read", + content: [{ type: "text", text: "stale" }], + isError: false, + } as never, + ]; + + expect( + sanitizeReplayToolCallIdsForStream({ + messages, + mode: "strict", + repairToolUseResultPairing: true, + }), + ).toEqual([]); + }); + + it("keeps matched assistant and tool-result ids aligned", () => { + const rawId = "call_function_av7cbkigmk7x1"; + const messages: AgentMessage[] = [ + { + role: "assistant", + content: [{ type: "toolUse", id: rawId, name: "read", input: { path: "." } }], + } as never, + { + role: "toolResult", + toolCallId: rawId, + toolUseId: rawId, + toolName: "read", + content: [{ type: "text", text: "ok" }], + isError: false, + } as never, + ]; + + const out = sanitizeReplayToolCallIdsForStream({ + messages, + mode: "strict", + repairToolUseResultPairing: true, + }); + + expect(out).toMatchObject([ + { + role: "assistant", + content: [{ type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" }], + }, + { + role: "toolResult", + toolCallId: "callfunctionav7cbkigmk7x1", + toolUseId: "callfunctionav7cbkigmk7x1", + toolName: "read", + }, + ]); + }); + + it("keeps real tool results for aborted assistant spans", () => { + const rawId = "call_function_av7cbkigmk7x1"; + const out = sanitizeReplayToolCallIdsForStream({ + messages: [ + { + role: "assistant", + stopReason: "aborted", + content: [{ type: "toolUse", id: rawId, name: "read", input: { path: "." } }], + } as never, + { + role: "toolResult", + toolCallId: rawId, + toolUseId: rawId, + toolName: "read", + content: [{ type: "text", text: "partial" }], + isError: false, + } as never, + { + role: "user", + content: [{ type: "text", text: "retry" }], + } as never, + ], + mode: "strict", + repairToolUseResultPairing: true, + }); + + expect(out).toMatchObject([ + { + role: "assistant", + stopReason: "aborted", + content: [{ type: "toolUse", id: "callfunctionav7cbkigmk7x1", name: "read" }], + }, + { + role: "toolResult", + toolCallId: "callfunctionav7cbkigmk7x1", + toolUseId: "callfunctionav7cbkigmk7x1", + toolName: "read", + }, + { + role: "user", + }, + ]); + }); +}); diff --git a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts index dc75e37bc17..307223e59ea 100644 --- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts +++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts @@ -6,7 +6,11 @@ import { isRedactedSessionsSpawnAttachment, sanitizeToolUseResultPairing, } from "../../session-transcript-repair.js"; -import { extractToolCallsFromAssistant } from "../../tool-call-id.js"; +import { + extractToolCallsFromAssistant, + sanitizeToolCallIdsForCloudCodeAssist, + type ToolCallIdMode, +} from "../../tool-call-id.js"; import { normalizeToolName } from "../../tool-policy.js"; import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js"; import type { TranscriptPolicy } from "../../transcript-policy.js"; @@ -868,6 +872,25 @@ export function wrapStreamFnTrimToolCallNames( }; } +export function sanitizeReplayToolCallIdsForStream(params: { + messages: AgentMessage[]; + mode: ToolCallIdMode; + allowedToolNames?: Set; + preserveNativeAnthropicToolUseIds?: boolean; + preserveReplaySafeThinkingToolCallIds?: boolean; + repairToolUseResultPairing?: boolean; +}): AgentMessage[] { + const sanitized = sanitizeToolCallIdsForCloudCodeAssist(params.messages, params.mode, { + preserveNativeAnthropicToolUseIds: params.preserveNativeAnthropicToolUseIds, + preserveReplaySafeThinkingToolCallIds: params.preserveReplaySafeThinkingToolCallIds, + allowedToolNames: params.allowedToolNames, + }); + if (!params.repairToolUseResultPairing) { + return sanitized; + } + return sanitizeToolUseResultPairing(sanitized); +} + export function wrapStreamFnSanitizeMalformedToolCalls( baseFn: StreamFn, allowedToolNames?: Set, diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index c48377ceafb..9598a01ec45 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -115,7 +115,6 @@ import { resolveSystemPromptOverride } from "../../system-prompt-override.js"; import { buildSystemPromptParams } from "../../system-prompt-params.js"; import { buildSystemPromptReport } from "../../system-prompt-report.js"; import { resolveAgentTimeoutMs } from "../../timeout.js"; -import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js"; import { UNKNOWN_TOOL_THRESHOLD } from "../../tool-loop-detection.js"; import { resolveTranscriptPolicy, @@ -225,6 +224,7 @@ import { wrapStreamFnRepairMalformedToolCallArguments, } from "./attempt.tool-call-argument-repair.js"; import { + sanitizeReplayToolCallIdsForStream, wrapStreamFnSanitizeMalformedToolCalls, wrapStreamFnTrimToolCallNames, } from "./attempt.tool-call-normalization.js"; @@ -1251,25 +1251,23 @@ export async function runEmbeddedAttempt( if (!Array.isArray(messages)) { return inner(model, context, options); } - const allowProviderOwnedThinkingReplay = shouldAllowProviderOwnedThinkingReplay({ - modelApi: (model as { api?: unknown })?.api as string | null | undefined, - policy: transcriptPolicy, - }); - const sanitized = sanitizeToolCallIdsForCloudCodeAssist( - messages as AgentMessage[], + const nextMessages = sanitizeReplayToolCallIdsForStream({ + messages: messages as AgentMessage[], mode, - { - preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds, - preserveReplaySafeThinkingToolCallIds: allowProviderOwnedThinkingReplay, - allowedToolNames, - }, - ); - if (sanitized === messages) { + allowedToolNames, + preserveNativeAnthropicToolUseIds: transcriptPolicy.preserveNativeAnthropicToolUseIds, + preserveReplaySafeThinkingToolCallIds: shouldAllowProviderOwnedThinkingReplay({ + modelApi: (model as { api?: unknown })?.api as string | null | undefined, + policy: transcriptPolicy, + }), + repairToolUseResultPairing: transcriptPolicy.repairToolUseResultPairing, + }); + if (nextMessages === messages) { return inner(model, context, options); } const nextContext = { ...(context as unknown as Record), - messages: sanitized, + messages: nextMessages, } as unknown; return inner(model, nextContext as typeof context, options); }; diff --git a/src/plugins/provider-replay-helpers.test.ts b/src/plugins/provider-replay-helpers.test.ts index c257da9e216..bc1fc1af578 100644 --- a/src/plugins/provider-replay-helpers.test.ts +++ b/src/plugins/provider-replay-helpers.test.ts @@ -93,7 +93,6 @@ describe("provider replay helpers", () => { }); it("builds hybrid anthropic or openai replay policy", () => { - // Sonnet 4.6 preserves thinking blocks even when flag is set const sonnet46Policy = buildHybridAnthropicOrOpenAIReplayPolicy( { provider: "minimax", @@ -107,7 +106,6 @@ describe("provider replay helpers", () => { }); expect(sonnet46Policy).not.toHaveProperty("dropThinkingBlocks"); - // Legacy model still drops expect( buildHybridAnthropicOrOpenAIReplayPolicy( {