diff --git a/CHANGELOG.md b/CHANGELOG.md index 0468be16ab6..5a73d1f7449 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -117,6 +117,7 @@ Docs: https://docs.openclaw.ai - CLI/skills JSON: route `skills list --json`, `skills info --json`, and `skills check --json` output to stdout instead of stderr so machine-readable consumers receive JSON on the expected stream again. (#60914; fixes #57599; landed from contributor PR #57611 by @Aftabbs) Thanks @Aftabbs. - Agents/subagents: honor allowlist validation, auth-profile handoff, and session override state when a subagent retries after `LiveSessionModelSwitchError`. (#58178) Thanks @openperf. - Agents/exec: restore `host=node` routing for node-pinned and `host=auto` sessions, while still blocking sandboxed `auto` sessions from jumping to gateway. (#60788) Thanks @openperf. +- Agents/compaction: keep assistant tool calls and displaced tool results in the same compaction chunk so strict summarization providers stop rejecting orphaned tool pairs. (#58849) Thanks @openperf. ## 2026.4.2 diff --git a/src/agents/compaction.test.ts b/src/agents/compaction.test.ts index 3fb35e6f74e..0b33fc1bb07 100644 --- a/src/agents/compaction.test.ts +++ b/src/agents/compaction.test.ts @@ -24,6 +24,7 @@ function makeAssistantToolCall( timestamp: number, toolCallId: string, text = "x".repeat(4000), + stopReason: AssistantMessage["stopReason"] = "stop", ): AssistantMessage { return makeAgentAssistantMessage({ content: [ @@ -31,7 +32,7 @@ function makeAssistantToolCall( { type: "toolCall", id: toolCallId, name: "test_tool", arguments: {} }, ], model: "gpt-5.4", - stopReason: "stop", + stopReason, timestamp, }); } @@ -76,6 +77,112 @@ describe("splitMessagesByTokenShare", () => { const parts = splitMessagesByTokenShare(messages, 3); expect(parts.flat().map((msg) => msg.timestamp)).toEqual(messages.map((msg) => msg.timestamp)); }); + + it("keeps tool_use and matching toolResult in the same chunk", () => { + const messages: AgentMessage[] = [ + makeMessage(1, 4000), + makeAssistantToolCall(2, "call_split"), + makeToolResult(3, "call_split", "r".repeat(800)), + makeMessage(4, 4000), + ]; + + const parts = splitMessagesByTokenShare(messages, 2); + + const chunkWithToolUse = parts.find((chunk) => + chunk.some((m) => m.role === "assistant" && m.timestamp === 2), + ); + const chunkWithToolResult = parts.find((chunk) => + chunk.some((m) => m.role === "toolResult" && m.timestamp === 3), + ); + expect(chunkWithToolUse).toBeDefined(); + expect(chunkWithToolResult).toBeDefined(); + expect(chunkWithToolUse).toBe(chunkWithToolResult); + expect(parts.flat().length).toBe(messages.length); + }); + + it("keeps multiple toolResults with their assistant in the same chunk", () => { + const assistant = makeAgentAssistantMessage({ + content: [ + { type: "text", text: "x".repeat(4000) }, + { type: "toolCall", id: "call_a", name: "tool_a", arguments: {} }, + { type: "toolCall", id: "call_b", name: "tool_b", arguments: {} }, + ], + model: "gpt-5.2", + stopReason: "stop", + timestamp: 2, + }); + + const messages: AgentMessage[] = [ + makeMessage(1, 4000), + assistant, + makeToolResult(3, "call_a", "result_a".repeat(200)), + makeToolResult(4, "call_b", "result_b".repeat(200)), + makeMessage(5, 4000), + ]; + + const parts = splitMessagesByTokenShare(messages, 2); + + const chunkWithAssistant = parts.find((chunk) => + chunk.some((m) => m.role === "assistant" && m.timestamp === 2), + )!; + const resultTimestamps = chunkWithAssistant + .filter((m) => m.role === "toolResult") + .map((m) => m.timestamp); + expect(resultTimestamps).toContain(3); + expect(resultTimestamps).toContain(4); + expect(parts.flat().length).toBe(messages.length); + }); + + it("keeps displaced toolResults with their assistant chunk", () => { + const messages: AgentMessage[] = [ + makeMessage(1, 4000), + makeAssistantToolCall(2, "call_split"), + makeMessage(3, 800), + makeToolResult(4, "call_split", "r".repeat(800)), + makeMessage(5, 4000), + ]; + + const parts = splitMessagesByTokenShare(messages, 2); + + const chunkWithToolUse = parts.find((chunk) => + chunk.some((m) => m.role === "assistant" && m.timestamp === 2), + ); + const chunkWithToolResult = parts.find((chunk) => + chunk.some((m) => m.role === "toolResult" && m.timestamp === 4), + ); + + expect(chunkWithToolUse).toBeDefined(); + expect(chunkWithToolUse).toBe(chunkWithToolResult); + }); + + it("splits after a completed tool_call/result pair when over budget", () => { + const messages: AgentMessage[] = [ + makeAssistantToolCall(1, "call_x", "y".repeat(4000)), + makeToolResult(2, "call_x", "r".repeat(4000)), + makeMessage(3, 4000), + ]; + + const parts = splitMessagesByTokenShare(messages, 2); + + expect(parts.length).toBe(2); + const chunk1Roles = parts[0].map((m) => m.role); + expect(chunk1Roles).toContain("assistant"); + expect(chunk1Roles).toContain("toolResult"); + expect(parts.flat().length).toBe(messages.length); + }); + + it("does not block splits after aborted tool-call assistants", () => { + const messages: AgentMessage[] = [ + makeAssistantToolCall(1, "call_abort", "y".repeat(4000), "aborted"), + makeMessage(2, 4000), + makeMessage(3, 4000), + ]; + + const parts = splitMessagesByTokenShare(messages, 2); + + expect(parts.length).toBe(2); + expect(parts.flat().length).toBe(messages.length); + }); }); describe("pruneHistoryForContextShare", () => { @@ -120,17 +227,11 @@ describe("pruneHistoryForContextShare", () => { }); it("returns droppedMessagesList containing dropped messages", () => { - // Note: This test uses simple user messages with no tool calls. - // When orphaned tool_results exist, droppedMessages may exceed - // droppedMessagesList.length since orphans are counted but not - // added to the list (they lack context for summarization). const { messages, pruned } = pruneLargeSimpleHistory(); expect(pruned.droppedChunks).toBeGreaterThan(0); - // Without orphaned tool_results, counts match exactly expect(pruned.droppedMessagesList.length).toBe(pruned.droppedMessages); - // All messages accounted for: kept + dropped = original const allIds = [ ...pruned.droppedMessagesList.map((m) => m.timestamp), ...pruned.messages.map((m) => m.timestamp), @@ -154,13 +255,8 @@ describe("pruneHistoryForContextShare", () => { }); it("removes orphaned tool_result messages when tool_use is dropped", () => { - // Scenario: assistant with tool_use is in chunk 1 (dropped), - // tool_result is in chunk 2 (kept) - orphaned tool_result should be removed - // to prevent "unexpected tool_use_id" errors from Anthropic's API const messages: AgentMessage[] = [ - // Chunk 1 (will be dropped) - contains tool_use makeAssistantToolCall(1, "call_123"), - // Chunk 2 (will be kept) - contains orphaned tool_result makeToolResult(2, "call_123", "result".repeat(500)), { role: "user", @@ -176,27 +272,18 @@ describe("pruneHistoryForContextShare", () => { parts: 2, }); - // The orphaned tool_result should NOT be in kept messages - // (this is the critical invariant that prevents API errors) const keptRoles = pruned.messages.map((m) => m.role); expect(keptRoles).not.toContain("toolResult"); - - // The orphan count should be reflected in droppedMessages - // (orphaned tool_results are dropped but not added to droppedMessagesList - // since they lack context for summarization) - expect(pruned.droppedMessages).toBeGreaterThan(pruned.droppedMessagesList.length); + expect(pruned.droppedMessages).toBe(pruned.droppedMessagesList.length); }); it("keeps tool_result when its tool_use is also kept", () => { - // Scenario: both tool_use and tool_result are in the kept portion const messages: AgentMessage[] = [ - // Chunk 1 (will be dropped) - just user content { role: "user", content: "x".repeat(4000), timestamp: 1, }, - // Chunk 2 (will be kept) - contains both tool_use and tool_result makeAssistantToolCall(2, "call_456", "y".repeat(500)), makeToolResult(3, "call_456", "result"), ]; @@ -208,17 +295,13 @@ describe("pruneHistoryForContextShare", () => { parts: 2, }); - // Both assistant and toolResult should be in kept messages const keptRoles = pruned.messages.map((m) => m.role); expect(keptRoles).toContain("assistant"); expect(keptRoles).toContain("toolResult"); }); it("removes multiple orphaned tool_results from the same dropped tool_use", () => { - // Scenario: assistant with multiple tool_use blocks is dropped, - // all corresponding tool_results should be removed from kept messages const messages: AgentMessage[] = [ - // Chunk 1 (will be dropped) - contains multiple tool_use blocks makeAgentAssistantMessage({ content: [ { type: "text", text: "x".repeat(4000) }, @@ -229,7 +312,6 @@ describe("pruneHistoryForContextShare", () => { stopReason: "stop", timestamp: 1, }), - // Chunk 2 (will be kept) - contains orphaned tool_results makeToolResult(2, "call_a", "result_a"), makeToolResult(3, "call_b", "result_b"), { @@ -246,13 +328,8 @@ describe("pruneHistoryForContextShare", () => { parts: 2, }); - // No orphaned tool_results should be in kept messages const keptToolResults = pruned.messages.filter((m) => m.role === "toolResult"); expect(keptToolResults).toHaveLength(0); - - // The orphan count should reflect both dropped tool_results - // droppedMessages = 1 (assistant) + 2 (orphaned tool_results) = 3 - // droppedMessagesList only has the assistant message - expect(pruned.droppedMessages).toBe(pruned.droppedMessagesList.length + 2); + expect(pruned.droppedMessages).toBe(pruned.droppedMessagesList.length); }); }); diff --git a/src/agents/compaction.ts b/src/agents/compaction.ts index 2823b553ffa..e466716fe04 100644 --- a/src/agents/compaction.ts +++ b/src/agents/compaction.ts @@ -9,6 +9,7 @@ import { retryAsync } from "../infra/retry.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { DEFAULT_CONTEXT_TOKENS } from "./defaults.js"; import { repairToolUseResultPairing, stripToolResultDetails } from "./session-transcript-repair.js"; +import { extractToolCallsFromAssistant, extractToolResultId } from "./tool-call-id.js"; const log = createSubsystemLogger("compaction"); @@ -131,9 +132,13 @@ export function splitMessagesByTokenShare( let current: AgentMessage[] = []; let currentTokens = 0; + let pendingToolCallIds = new Set(); + for (const message of messages) { const messageTokens = estimateCompactionMessageTokens(message); + if ( + pendingToolCallIds.size === 0 && chunks.length < normalizedParts - 1 && current.length > 0 && currentTokens + messageTokens > targetTokens @@ -145,6 +150,31 @@ export function splitMessagesByTokenShare( current.push(message); currentTokens += messageTokens; + + if (message.role === "assistant") { + const toolCalls = extractToolCallsFromAssistant(message); + const stopReason = (message as { stopReason?: unknown }).stopReason; + pendingToolCallIds = + stopReason === "aborted" || stopReason === "error" + ? new Set() + : new Set(toolCalls.map((t) => t.id)); + } else if (message.role === "toolResult" && pendingToolCallIds.size > 0) { + const resultId = extractToolResultId(message); + if (!resultId) { + pendingToolCallIds = new Set(); + } else { + pendingToolCallIds.delete(resultId); + } + if ( + pendingToolCallIds.size === 0 && + chunks.length < normalizedParts - 1 && + currentTokens > targetTokens + ) { + chunks.push(current); + current = []; + currentTokens = 0; + } + } } if (current.length > 0) {