fix(agents): avoid synthetic tool results during parallel races

Fixes the session transcript race where a newer assistant tool-call turn could force pending older tool calls to be written as synthetic missing-result entries while real parallel tool results were still in flight. The guard no longer synthesizes at that racing boundary when synthetic repair is enabled, and transcript repair now moves late real results back beside their matching assistant tool-call turn before adding any placeholder. This keeps provider replay strict while preserving useful tool output. Regression coverage: focused guard and transcript-repair tests for late parallel results. Closes #88168. Follow-up lock-lifetime report tracked in #88647. Thanks @TurboTheTurtle for the fix and @jhartman00 for the report. Co-authored-by: Andy Ye <35905412+TurboTheTurtle@users.noreply.github.com>
2026-07-20 15:41:42 +00:00 · 2026-05-31 07:00:44 -07:00
parent 0833c68b1b
commit 44c65de17a
4 changed files with 127 additions and 12 deletions
--- a/src/agents/session-tool-result-guard.test.ts
+++ b/src/agents/session-tool-result-guard.test.ts
@@ -370,6 +370,31 @@ describe("installSessionToolResultGuard", () => {
    expectPersistedRoles(sm, ["assistant", "toolResult"]);
  });

+  it("does not synthesize older pending results before a new assistant tool-call turn", () => {
+    const sm = SessionManager.inMemory();
+    const guard = installSessionToolResultGuard(sm);
+
+    appendAssistantToolCall(sm, { id: "call_1", name: "read" });
+    appendAssistantToolCall(sm, { id: "call_2", name: "exec" });
+    sm.appendMessage(
+      asAppendMessage({
+        role: "toolResult",
+        toolCallId: "call_1",
+        toolName: "read",
+        content: [{ type: "text", text: "real output" }],
+        isError: false,
+      }),
+    );
+
+    const messages = expectPersistedRoles(sm, ["assistant", "assistant", "toolResult"]);
+    expect((messages[2] as { toolCallId?: string; isError?: boolean }).toolCallId).toBe(
+      "call_1",
+    );
+    expect((messages[2] as { isError?: boolean }).isError).toBe(false);
+    expect(JSON.stringify(messages)).not.toContain("missing tool result");
+    expect(guard.getPendingIds()).toStrictEqual(["call_2"]);
+  });
+
  it("clears pending when a sanitized assistant message is dropped and synthetic results are disabled", () => {
    const sm = SessionManager.inMemory();
    const guard = installSessionToolResultGuard(sm, {
--- a/src/agents/session-tool-result-guard.ts
+++ b/src/agents/session-tool-result-guard.ts
@@ -745,8 +745,15 @@ export function installSessionToolResultGuard(
    ) {
      flushPendingToolResults();
    }
-    // If new tool calls arrive while older ones are pending, flush the old ones first.
-    if (pendingState.shouldFlushBeforeNewToolCalls(toolCalls.length)) {
+    // If synthetic results are disabled, a new assistant tool-call turn is a safe
+    // boundary to drop older pending ids. When synthetic results are enabled,
+    // do not synthesize here: parallel tool-result appends can still be racing
+    // this assistant append, and transcript repair can move late real results
+    // back into strict provider order before the next replay.
+    if (
+      !allowSyntheticToolResults &&
+      pendingState.shouldFlushBeforeNewToolCalls(toolCalls.length)
+    ) {
      flushPendingToolResults();
    }

--- a/src/agents/session-transcript-repair.test.ts
+++ b/src/agents/session-transcript-repair.test.ts
@@ -193,6 +193,52 @@ describe("sanitizeToolUseResultPairing", () => {
    expect(result.moved).toBe(true);
  });

+  it("moves late real results ahead of newer assistant tool calls instead of synthesizing", () => {
+    const input = castAgentMessages([
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "call_read", name: "read", arguments: {} }],
+      },
+      {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "call_exec", name: "exec", arguments: {} }],
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_read",
+        toolName: "read",
+        content: [{ type: "text", text: "real read output" }],
+        isError: false,
+      },
+      {
+        role: "toolResult",
+        toolCallId: "call_exec",
+        toolName: "exec",
+        content: [{ type: "text", text: "real exec output" }],
+        isError: false,
+      },
+    ]);
+
+    const result = repairToolUseResultPairing(input);
+
+    expect(result.added).toHaveLength(0);
+    expect(result.messages.map((message) => message.role)).toEqual([
+      "assistant",
+      "toolResult",
+      "assistant",
+      "toolResult",
+    ]);
+    expect((result.messages[1] as { toolCallId?: string; isError?: boolean }).toolCallId).toBe(
+      "call_read",
+    );
+    expect((result.messages[1] as { isError?: boolean }).isError).toBe(false);
+    expect((result.messages[3] as { toolCallId?: string; isError?: boolean }).toolCallId).toBe(
+      "call_exec",
+    );
+    expect(JSON.stringify(result.messages)).not.toContain("missing tool result");
+    expect(result.moved).toBe(true);
+  });
+
  it("repairs blank tool result names from matching tool calls", () => {
    const input = castAgentMessages([
      {
--- a/src/agents/session-transcript-repair.ts
+++ b/src/agents/session-transcript-repair.ts
@@ -445,6 +445,29 @@ function assistantHasToolCalls(message: AgentMessage): boolean {
  return extractToolCallsFromAssistant(message).length > 0;
 }

+function findLaterMatchingToolResult(params: {
+  messages: AgentMessage[];
+  startIndex: number;
+  toolCallId: string;
+  toolName?: string;
+  toolCalls: Array<{ id: string; name?: string }>;
+  seenToolResultIds: Set<string>;
+}): Extract<AgentMessage, { role: "toolResult" }> | undefined {
+  for (let index = params.startIndex; index < params.messages.length; index += 1) {
+    const candidate = params.messages[index];
+    if (!candidate || typeof candidate !== "object" || candidate.role !== "toolResult") {
+      continue;
+    }
+    const normalizedLegacyResult = normalizeLegacyToolResultId(candidate, params.toolCalls);
+    const id = extractToolResultId(normalizedLegacyResult);
+    if (!id || id !== params.toolCallId || params.seenToolResultIds.has(id)) {
+      continue;
+    }
+    return normalizeToolResultName(normalizedLegacyResult, params.toolName);
+  }
+  return undefined;
+}
+
 export function repairToolUseResultPairing(
  messages: AgentMessage[],
  options?: ToolUseResultPairingOptions,
@@ -540,12 +563,12 @@ export function repairToolUseResultPairing(
          toolCalls,
        );
        const id = extractToolResultId(toolResult);
+        if (id && seenToolResultIds.has(id)) {
+          droppedDuplicateCount += 1;
+          changed = true;
+          continue;
+        }
        if (id && toolCallIds.has(id)) {
-          if (seenToolResultIds.has(id)) {
-            droppedDuplicateCount += 1;
-            changed = true;
-            continue;
-          }
          if (toolResult !== next) {
            changed = true;
          }
@@ -612,14 +635,28 @@ export function repairToolUseResultPairing(
      if (existing) {
        pushToolResult(existing);
      } else {
-        const missing = makeMissingToolResult({
+        const laterResult = findLaterMatchingToolResult({
+          messages,
+          startIndex: j,
          toolCallId: call.id,
          toolName: call.name,
-          text: options?.missingToolResultText,
+          toolCalls,
+          seenToolResultIds,
        });
-        added.push(missing);
-        changed = true;
-        pushToolResult(missing);
+        if (laterResult) {
+          moved = true;
+          changed = true;
+          pushToolResult(laterResult);
+        } else {
+          const missing = makeMissingToolResult({
+            toolCallId: call.id,
+            toolName: call.name,
+            text: options?.missingToolResultText,
+          });
+          added.push(missing);
+          changed = true;
+          pushToolResult(missing);
+        }
      }
    }