diff --git a/src/agents/session-tool-result-guard.test.ts b/src/agents/session-tool-result-guard.test.ts index d2cc1901d59..870e08223e3 100644 --- a/src/agents/session-tool-result-guard.test.ts +++ b/src/agents/session-tool-result-guard.test.ts @@ -370,6 +370,31 @@ describe("installSessionToolResultGuard", () => { expectPersistedRoles(sm, ["assistant", "toolResult"]); }); + it("does not synthesize older pending results before a new assistant tool-call turn", () => { + const sm = SessionManager.inMemory(); + const guard = installSessionToolResultGuard(sm); + + appendAssistantToolCall(sm, { id: "call_1", name: "read" }); + appendAssistantToolCall(sm, { id: "call_2", name: "exec" }); + sm.appendMessage( + asAppendMessage({ + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "real output" }], + isError: false, + }), + ); + + const messages = expectPersistedRoles(sm, ["assistant", "assistant", "toolResult"]); + expect((messages[2] as { toolCallId?: string; isError?: boolean }).toolCallId).toBe( + "call_1", + ); + expect((messages[2] as { isError?: boolean }).isError).toBe(false); + expect(JSON.stringify(messages)).not.toContain("missing tool result"); + expect(guard.getPendingIds()).toStrictEqual(["call_2"]); + }); + it("clears pending when a sanitized assistant message is dropped and synthetic results are disabled", () => { const sm = SessionManager.inMemory(); const guard = installSessionToolResultGuard(sm, { diff --git a/src/agents/session-tool-result-guard.ts b/src/agents/session-tool-result-guard.ts index b0244844b26..ff50f82e43a 100644 --- a/src/agents/session-tool-result-guard.ts +++ b/src/agents/session-tool-result-guard.ts @@ -745,8 +745,15 @@ export function installSessionToolResultGuard( ) { flushPendingToolResults(); } - // If new tool calls arrive while older ones are pending, flush the old ones first. - if (pendingState.shouldFlushBeforeNewToolCalls(toolCalls.length)) { + // If synthetic results are disabled, a new assistant tool-call turn is a safe + // boundary to drop older pending ids. When synthetic results are enabled, + // do not synthesize here: parallel tool-result appends can still be racing + // this assistant append, and transcript repair can move late real results + // back into strict provider order before the next replay. + if ( + !allowSyntheticToolResults && + pendingState.shouldFlushBeforeNewToolCalls(toolCalls.length) + ) { flushPendingToolResults(); } diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index cfda4ac07a7..35452dd905d 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -193,6 +193,52 @@ describe("sanitizeToolUseResultPairing", () => { expect(result.moved).toBe(true); }); + it("moves late real results ahead of newer assistant tool calls instead of synthesizing", () => { + const input = castAgentMessages([ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_read", name: "read", arguments: {} }], + }, + { + role: "assistant", + content: [{ type: "toolCall", id: "call_exec", name: "exec", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_read", + toolName: "read", + content: [{ type: "text", text: "real read output" }], + isError: false, + }, + { + role: "toolResult", + toolCallId: "call_exec", + toolName: "exec", + content: [{ type: "text", text: "real exec output" }], + isError: false, + }, + ]); + + const result = repairToolUseResultPairing(input); + + expect(result.added).toHaveLength(0); + expect(result.messages.map((message) => message.role)).toEqual([ + "assistant", + "toolResult", + "assistant", + "toolResult", + ]); + expect((result.messages[1] as { toolCallId?: string; isError?: boolean }).toolCallId).toBe( + "call_read", + ); + expect((result.messages[1] as { isError?: boolean }).isError).toBe(false); + expect((result.messages[3] as { toolCallId?: string; isError?: boolean }).toolCallId).toBe( + "call_exec", + ); + expect(JSON.stringify(result.messages)).not.toContain("missing tool result"); + expect(result.moved).toBe(true); + }); + it("repairs blank tool result names from matching tool calls", () => { const input = castAgentMessages([ { diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index c5ac94e62c0..bbd743d3391 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -445,6 +445,29 @@ function assistantHasToolCalls(message: AgentMessage): boolean { return extractToolCallsFromAssistant(message).length > 0; } +function findLaterMatchingToolResult(params: { + messages: AgentMessage[]; + startIndex: number; + toolCallId: string; + toolName?: string; + toolCalls: Array<{ id: string; name?: string }>; + seenToolResultIds: Set; +}): Extract | undefined { + for (let index = params.startIndex; index < params.messages.length; index += 1) { + const candidate = params.messages[index]; + if (!candidate || typeof candidate !== "object" || candidate.role !== "toolResult") { + continue; + } + const normalizedLegacyResult = normalizeLegacyToolResultId(candidate, params.toolCalls); + const id = extractToolResultId(normalizedLegacyResult); + if (!id || id !== params.toolCallId || params.seenToolResultIds.has(id)) { + continue; + } + return normalizeToolResultName(normalizedLegacyResult, params.toolName); + } + return undefined; +} + export function repairToolUseResultPairing( messages: AgentMessage[], options?: ToolUseResultPairingOptions, @@ -540,12 +563,12 @@ export function repairToolUseResultPairing( toolCalls, ); const id = extractToolResultId(toolResult); + if (id && seenToolResultIds.has(id)) { + droppedDuplicateCount += 1; + changed = true; + continue; + } if (id && toolCallIds.has(id)) { - if (seenToolResultIds.has(id)) { - droppedDuplicateCount += 1; - changed = true; - continue; - } if (toolResult !== next) { changed = true; } @@ -612,14 +635,28 @@ export function repairToolUseResultPairing( if (existing) { pushToolResult(existing); } else { - const missing = makeMissingToolResult({ + const laterResult = findLaterMatchingToolResult({ + messages, + startIndex: j, toolCallId: call.id, toolName: call.name, - text: options?.missingToolResultText, + toolCalls, + seenToolResultIds, }); - added.push(missing); - changed = true; - pushToolResult(missing); + if (laterResult) { + moved = true; + changed = true; + pushToolResult(laterResult); + } else { + const missing = makeMissingToolResult({ + toolCallId: call.id, + toolName: call.name, + text: options?.missingToolResultText, + }); + added.push(missing); + changed = true; + pushToolResult(missing); + } } }