From 44c65de17a8c7bf40b911dbab05f4635b71df2ae Mon Sep 17 00:00:00 2001 From: Andy Ye <35905412+TurboTheTurtle@users.noreply.github.com> Date: Sun, 31 May 2026 07:00:44 -0700 Subject: [PATCH] fix(agents): avoid synthetic tool results during parallel races Fixes the session transcript race where a newer assistant tool-call turn could force pending older tool calls to be written as synthetic missing-result entries while real parallel tool results were still in flight. The guard no longer synthesizes at that racing boundary when synthetic repair is enabled, and transcript repair now moves late real results back beside their matching assistant tool-call turn before adding any placeholder. This keeps provider replay strict while preserving useful tool output. Regression coverage: focused guard and transcript-repair tests for late parallel results. Closes #88168. Follow-up lock-lifetime report tracked in #88647. Thanks @TurboTheTurtle for the fix and @jhartman00 for the report. Co-authored-by: Andy Ye <35905412+TurboTheTurtle@users.noreply.github.com> --- src/agents/session-tool-result-guard.test.ts | 25 +++++++++ src/agents/session-tool-result-guard.ts | 11 +++- src/agents/session-transcript-repair.test.ts | 46 ++++++++++++++++ src/agents/session-transcript-repair.ts | 57 ++++++++++++++++---- 4 files changed, 127 insertions(+), 12 deletions(-) diff --git a/src/agents/session-tool-result-guard.test.ts b/src/agents/session-tool-result-guard.test.ts index d2cc1901d59..870e08223e3 100644 --- a/src/agents/session-tool-result-guard.test.ts +++ b/src/agents/session-tool-result-guard.test.ts @@ -370,6 +370,31 @@ describe("installSessionToolResultGuard", () => { expectPersistedRoles(sm, ["assistant", "toolResult"]); }); + it("does not synthesize older pending results before a new assistant tool-call turn", () => { + const sm = SessionManager.inMemory(); + const guard = installSessionToolResultGuard(sm); + + appendAssistantToolCall(sm, { id: "call_1", name: "read" }); + appendAssistantToolCall(sm, { id: "call_2", name: "exec" }); + sm.appendMessage( + asAppendMessage({ + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "real output" }], + isError: false, + }), + ); + + const messages = expectPersistedRoles(sm, ["assistant", "assistant", "toolResult"]); + expect((messages[2] as { toolCallId?: string; isError?: boolean }).toolCallId).toBe( + "call_1", + ); + expect((messages[2] as { isError?: boolean }).isError).toBe(false); + expect(JSON.stringify(messages)).not.toContain("missing tool result"); + expect(guard.getPendingIds()).toStrictEqual(["call_2"]); + }); + it("clears pending when a sanitized assistant message is dropped and synthetic results are disabled", () => { const sm = SessionManager.inMemory(); const guard = installSessionToolResultGuard(sm, { diff --git a/src/agents/session-tool-result-guard.ts b/src/agents/session-tool-result-guard.ts index b0244844b26..ff50f82e43a 100644 --- a/src/agents/session-tool-result-guard.ts +++ b/src/agents/session-tool-result-guard.ts @@ -745,8 +745,15 @@ export function installSessionToolResultGuard( ) { flushPendingToolResults(); } - // If new tool calls arrive while older ones are pending, flush the old ones first. - if (pendingState.shouldFlushBeforeNewToolCalls(toolCalls.length)) { + // If synthetic results are disabled, a new assistant tool-call turn is a safe + // boundary to drop older pending ids. When synthetic results are enabled, + // do not synthesize here: parallel tool-result appends can still be racing + // this assistant append, and transcript repair can move late real results + // back into strict provider order before the next replay. + if ( + !allowSyntheticToolResults && + pendingState.shouldFlushBeforeNewToolCalls(toolCalls.length) + ) { flushPendingToolResults(); } diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index cfda4ac07a7..35452dd905d 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -193,6 +193,52 @@ describe("sanitizeToolUseResultPairing", () => { expect(result.moved).toBe(true); }); + it("moves late real results ahead of newer assistant tool calls instead of synthesizing", () => { + const input = castAgentMessages([ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_read", name: "read", arguments: {} }], + }, + { + role: "assistant", + content: [{ type: "toolCall", id: "call_exec", name: "exec", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_read", + toolName: "read", + content: [{ type: "text", text: "real read output" }], + isError: false, + }, + { + role: "toolResult", + toolCallId: "call_exec", + toolName: "exec", + content: [{ type: "text", text: "real exec output" }], + isError: false, + }, + ]); + + const result = repairToolUseResultPairing(input); + + expect(result.added).toHaveLength(0); + expect(result.messages.map((message) => message.role)).toEqual([ + "assistant", + "toolResult", + "assistant", + "toolResult", + ]); + expect((result.messages[1] as { toolCallId?: string; isError?: boolean }).toolCallId).toBe( + "call_read", + ); + expect((result.messages[1] as { isError?: boolean }).isError).toBe(false); + expect((result.messages[3] as { toolCallId?: string; isError?: boolean }).toolCallId).toBe( + "call_exec", + ); + expect(JSON.stringify(result.messages)).not.toContain("missing tool result"); + expect(result.moved).toBe(true); + }); + it("repairs blank tool result names from matching tool calls", () => { const input = castAgentMessages([ { diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index c5ac94e62c0..bbd743d3391 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -445,6 +445,29 @@ function assistantHasToolCalls(message: AgentMessage): boolean { return extractToolCallsFromAssistant(message).length > 0; } +function findLaterMatchingToolResult(params: { + messages: AgentMessage[]; + startIndex: number; + toolCallId: string; + toolName?: string; + toolCalls: Array<{ id: string; name?: string }>; + seenToolResultIds: Set; +}): Extract | undefined { + for (let index = params.startIndex; index < params.messages.length; index += 1) { + const candidate = params.messages[index]; + if (!candidate || typeof candidate !== "object" || candidate.role !== "toolResult") { + continue; + } + const normalizedLegacyResult = normalizeLegacyToolResultId(candidate, params.toolCalls); + const id = extractToolResultId(normalizedLegacyResult); + if (!id || id !== params.toolCallId || params.seenToolResultIds.has(id)) { + continue; + } + return normalizeToolResultName(normalizedLegacyResult, params.toolName); + } + return undefined; +} + export function repairToolUseResultPairing( messages: AgentMessage[], options?: ToolUseResultPairingOptions, @@ -540,12 +563,12 @@ export function repairToolUseResultPairing( toolCalls, ); const id = extractToolResultId(toolResult); + if (id && seenToolResultIds.has(id)) { + droppedDuplicateCount += 1; + changed = true; + continue; + } if (id && toolCallIds.has(id)) { - if (seenToolResultIds.has(id)) { - droppedDuplicateCount += 1; - changed = true; - continue; - } if (toolResult !== next) { changed = true; } @@ -612,14 +635,28 @@ export function repairToolUseResultPairing( if (existing) { pushToolResult(existing); } else { - const missing = makeMissingToolResult({ + const laterResult = findLaterMatchingToolResult({ + messages, + startIndex: j, toolCallId: call.id, toolName: call.name, - text: options?.missingToolResultText, + toolCalls, + seenToolResultIds, }); - added.push(missing); - changed = true; - pushToolResult(missing); + if (laterResult) { + moved = true; + changed = true; + pushToolResult(laterResult); + } else { + const missing = makeMissingToolResult({ + toolCallId: call.id, + toolName: call.name, + text: options?.missingToolResultText, + }); + added.push(missing); + changed = true; + pushToolResult(missing); + } } }