From 2584d0d415fa063b0e45b23b254d64d2545eab75 Mon Sep 17 00:00:00 2001 From: mrinalgaur2005 Date: Sun, 3 May 2026 05:17:46 +0530 Subject: [PATCH] fix(gateway): preserve every client tool call when agent calls multiple tools per turn Fixes #52288. Co-authored-by: Mrinal Gaur --- CHANGELOG.md | 1 + .../run.incomplete-turn.test.ts | 2 +- src/agents/pi-embedded-runner/run.ts | 16 +- src/agents/pi-embedded-runner/run/attempt.ts | 70 ++++++++- .../pi-embedded-runner/run/incomplete-turn.ts | 14 +- src/agents/pi-embedded-runner/run/types.ts | 10 +- .../sessions-yield.orchestration.test.ts | 40 ++++- src/agents/pi-tool-definition-adapter.ts | 67 +++++--- ...s.before-tool-call.integration.e2e.test.ts | 100 ++++++++++++ src/gateway/openresponses-http.test.ts | 146 ++++++++++++++++++ src/gateway/openresponses-http.ts | 91 ++++++----- 11 files changed, 468 insertions(+), 89 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b89f8d1ae74..f7ae39892f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Gateway/responses: emit every client tool call from `/v1/responses` JSON and SSE responses when the agent invokes multiple client tools in a single turn, so multi-tool plans, graph orchestration calls, and similar batched flows no longer drop every call but the last. Fixes #52288. Thanks @CharZhou and @bonelli. - Control UI/Gateway: avoid full session-list reloads for locally applied message-phase session updates, carry known session keys through transcript-file update events, and defer media provider listing when explicit generation model config is present. Refs #76236, #76203, #76188, #76107, and #76166. Thanks @BunsDev. - Install/update: prune the obsolete `plugin-runtime-deps` state directory during packaged postinstall so upgrades from pre-2026.5.2 releases reclaim old bundled-plugin dependency caches without touching external plugin installs. - Gateway: keep directly requested plugin tools invokable under restrictive tool profiles while preserving explicit deny lists and the HTTP safety deny list, preventing catalog/invoke mismatches that surface as "Tool not available". Thanks @BunsDev. diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts index c101c1c3549..868bb8bc77c 100644 --- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts +++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts @@ -1899,7 +1899,7 @@ describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { messagingToolSentTexts: [], messagingToolSentMediaUrls: [], }), - clientToolCall: null, + clientToolCalls: undefined, yieldDetected: false, didSendDeterministicApprovalPrompt: false, didSendViaMessagingTool: false, diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 99bf4e3f6f4..dcbec9ec8cc 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -2515,7 +2515,7 @@ export async function runEmbeddedPiAgent( attempt, incompleteTurnText: null, }); - const stopReason = attempt.clientToolCall + const stopReason = attempt.clientToolCalls ? "tool_calls" : attempt.yieldDetected ? "end_turn" @@ -2553,15 +2553,11 @@ export async function runEmbeddedPiAgent( // Propagate the LLM stop reason so callers (lifecycle events, // ACP bridge) can distinguish end_turn from max_tokens. stopReason, - pendingToolCalls: attempt.clientToolCall - ? [ - { - id: randomBytes(5).toString("hex").slice(0, 9), - name: attempt.clientToolCall.name, - arguments: JSON.stringify(attempt.clientToolCall.params), - }, - ] - : undefined, + pendingToolCalls: attempt.clientToolCalls?.map((call) => ({ + id: randomBytes(5).toString("hex").slice(0, 9), + name: call.name, + arguments: JSON.stringify(call.params), + })), executionTrace: { winnerProvider: reportedModelRef.provider, winnerModel: reportedModelRef.model, diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 12a9abc27d3..20a33f83bcc 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -1523,8 +1523,28 @@ export async function runEmbeddedAttempt( sandboxEnabled: !!sandbox?.enabled, }); - // Add client tools (OpenResponses hosted tools) to customTools - let clientToolCallDetected: { name: string; params: Record } | null = null; + // Add client tools (OpenResponses hosted tools) to customTools. + // Reserve slots synchronously at tool execution entry, before async + // before_tool_call hooks run, so parallel client-tool batches preserve + // assistant source order even when later hooks finish first. + const clientToolCallSlots: Array<{ + toolCallId: string; + name: string; + params?: Record; + completed: boolean; + }> = []; + const clientToolCallSlotIndexes = new Map(); + const reserveClientToolCallSlot = (toolCallId: string, toolName: string) => { + if (clientToolCallSlotIndexes.has(toolCallId)) { + return; + } + clientToolCallSlotIndexes.set(toolCallId, clientToolCallSlots.length); + clientToolCallSlots.push({ + toolCallId, + name: toolName, + completed: false, + }); + }; const clientToolLoopDetection = resolveToolLoopDetectionConfig({ cfg: params.config, agentId: sessionAgentId, @@ -1563,8 +1583,33 @@ export async function runEmbeddedAttempt( const clientToolDefs = clientTools ? toClientToolDefinitions( clientTools, - (toolName, toolParams) => { - clientToolCallDetected = { name: toolName, params: toolParams }; + { + reserve: reserveClientToolCallSlot, + complete: (toolCallId, toolName, toolParams) => { + reserveClientToolCallSlot(toolCallId, toolName); + const slotIndex = clientToolCallSlotIndexes.get(toolCallId); + if (slotIndex === undefined) { + return; + } + const slot = clientToolCallSlots[slotIndex]; + if (!slot) { + return; + } + slot.name = toolName; + slot.params = toolParams; + slot.completed = true; + }, + discard: (toolCallId) => { + const slotIndex = clientToolCallSlotIndexes.get(toolCallId); + if (slotIndex === undefined) { + return; + } + const slot = clientToolCallSlots[slotIndex]; + if (slot) { + slot.completed = false; + slot.params = undefined; + } + }, }, { agentId: sessionAgentId, @@ -3526,6 +3571,17 @@ export async function runEmbeddedAttempt( }); trajectoryEndRecorded = true; + const completedClientToolCalls = clientToolCallSlots.flatMap((slot) => + slot.completed && slot.params + ? [ + { + name: slot.name, + params: slot.params, + }, + ] + : [], + ); + return { replayMetadata, itemLifecycle: getItemLifecycle(), @@ -3567,8 +3623,10 @@ export async function runEmbeddedAttempt( promptCache, compactionCount: getCompactionCount(), compactionTokensAfter: getLastCompactionTokensAfter(), - // Client tool call detected (OpenResponses hosted tools) - clientToolCall: clientToolCallDetected ?? undefined, + // Client tool calls detected (OpenResponses hosted tools). + // Stay `undefined` (not `[]`) when none were detected so downstream + // truthiness predicates keep working without a `.length` check. + clientToolCalls: completedClientToolCalls.length > 0 ? completedClientToolCalls : undefined, yieldDetected: yieldDetected || undefined, }; } finally { diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts index 736193206c7..1c21cf5e8e2 100644 --- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts +++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts @@ -34,7 +34,7 @@ type ReplayMetadataAttempt = Pick< type IncompleteTurnAttempt = Pick< EmbeddedRunAttemptResult, | "assistantTexts" - | "clientToolCall" + | "clientToolCalls" | "currentAttemptAssistant" | "yieldDetected" | "didSendDeterministicApprovalPrompt" @@ -52,7 +52,7 @@ type IncompleteTurnAttempt = Pick< type PlanningOnlyAttempt = Pick< EmbeddedRunAttemptResult, | "assistantTexts" - | "clientToolCall" + | "clientToolCalls" | "yieldDetected" | "didSendDeterministicApprovalPrompt" | "didSendViaMessagingTool" @@ -68,7 +68,7 @@ type PlanningOnlyAttempt = Pick< type SilentToolResultAttempt = Pick< EmbeddedRunAttemptResult, - | "clientToolCall" + | "clientToolCalls" | "yieldDetected" | "didSendDeterministicApprovalPrompt" | "lastToolError" @@ -224,7 +224,7 @@ export function resolveIncompleteTurnPayloadText(params: { params.payloadCount !== 0 || params.aborted || params.timedOut || - params.attempt.clientToolCall || + params.attempt.clientToolCalls || params.attempt.yieldDetected || params.attempt.didSendDeterministicApprovalPrompt || params.attempt.lastToolError @@ -339,7 +339,7 @@ export function resolveSilentToolResultReplyPayload(params: { params.aborted || params.timedOut || (params.attempt.toolMetas?.length ?? 0) === 0 || - params.attempt.clientToolCall || + params.attempt.clientToolCalls || params.attempt.yieldDetected || params.attempt.didSendDeterministicApprovalPrompt || params.attempt.lastToolError || @@ -468,7 +468,7 @@ function shouldSkipPlanningOnlyRetry(params: { return Boolean( params.aborted || params.timedOut || - params.attempt.clientToolCall || + params.attempt.clientToolCalls || params.attempt.yieldDetected || params.attempt.didSendDeterministicApprovalPrompt || params.attempt.lastToolError || @@ -819,7 +819,7 @@ export function resolvePlanningOnlyRetryInstruction(params: { (typeof params.prompt === "string" && !isLikelyActionableUserPrompt(params.prompt)) || params.aborted || params.timedOut || - params.attempt.clientToolCall || + params.attempt.clientToolCalls || params.attempt.yieldDetected || params.attempt.didSendDeterministicApprovalPrompt || hasMessagingToolDeliveryEvidence(params.attempt) || diff --git a/src/agents/pi-embedded-runner/run/types.ts b/src/agents/pi-embedded-runner/run/types.ts index b3113ddadde..c5f514b74ea 100644 --- a/src/agents/pi-embedded-runner/run/types.ts +++ b/src/agents/pi-embedded-runner/run/types.ts @@ -112,8 +112,14 @@ export type EmbeddedRunAttemptResult = { promptCache?: ContextEnginePromptCacheInfo; compactionCount?: number; compactionTokensAfter?: number; - /** Client tool call detected (OpenResponses hosted tools). */ - clientToolCall?: { name: string; params: Record }; + /** + * Client tool calls detected during this attempt (OpenResponses hosted + * tools), in the order the underlying LLM emitted them. Field is + * `undefined` when no client tools were called so existing truthiness + * checks across the runner pipeline (`attempt.clientToolCalls ? ...`) + * keep their meaning. When set, the array always has at least one entry. + */ + clientToolCalls?: Array<{ name: string; params: Record }>; /** True when sessions_yield tool was called during this attempt. */ yieldDetected?: boolean; replayMetadata: EmbeddedRunReplayMetadata; diff --git a/src/agents/pi-embedded-runner/sessions-yield.orchestration.test.ts b/src/agents/pi-embedded-runner/sessions-yield.orchestration.test.ts index 69a81d129fb..dd898a8ed18 100644 --- a/src/agents/pi-embedded-runner/sessions-yield.orchestration.test.ts +++ b/src/agents/pi-embedded-runner/sessions-yield.orchestration.test.ts @@ -56,13 +56,13 @@ describe("sessions_yield orchestration", () => { expect(queueEmbeddedPiMessage(sessionId, "subagent result")).toBe(false); }); - it("clientToolCall takes precedence over yieldDetected", async () => { - // Edge case: both flags set (shouldn't happen, but clientToolCall wins) + it("clientToolCalls takes precedence over yieldDetected", async () => { + // Edge case: both flags set (shouldn't happen, but clientToolCalls wins) mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ promptError: null, yieldDetected: true, - clientToolCall: { name: "hosted_tool", params: { arg: "value" } }, + clientToolCalls: [{ name: "hosted_tool", params: { arg: "value" } }], }), ); @@ -71,12 +71,44 @@ describe("sessions_yield orchestration", () => { runId: "run-yield-vs-client-tool", }); - // clientToolCall wins — tool_calls stopReason, pendingToolCalls populated + // clientToolCalls wins — tool_calls stopReason, pendingToolCalls populated expect(result.meta.stopReason).toBe("tool_calls"); expect(result.meta.pendingToolCalls).toHaveLength(1); expect(result.meta.pendingToolCalls![0].name).toBe("hosted_tool"); }); + it("preserves order across multiple client tool calls in one attempt (#52288)", async () => { + // Regression: a turn that invokes three client tools must surface all + // three through `pendingToolCalls`, in the order the LLM emitted them. + // Pre-fix this slot was a single variable that only kept the last call. + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + promptError: null, + clientToolCalls: [ + { name: "create_graph", params: { nodes: ["a", "b"] } }, + { name: "activate_graph", params: {} }, + { name: "get_status", params: {} }, + ], + }), + ); + + const result = await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + runId: "run-multi-client-tool", + }); + + expect(result.meta.stopReason).toBe("tool_calls"); + expect(result.meta.pendingToolCalls).toHaveLength(3); + expect(result.meta.pendingToolCalls!.map((c) => c.name)).toEqual([ + "create_graph", + "activate_graph", + "get_status", + ]); + expect(JSON.parse(result.meta.pendingToolCalls![0].arguments)).toEqual({ + nodes: ["a", "b"], + }); + }); + it("normal attempt without yield has no stopReason override", async () => { mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null })); diff --git a/src/agents/pi-tool-definition-adapter.ts b/src/agents/pi-tool-definition-adapter.ts index 5e937877975..56bfd49d99f 100644 --- a/src/agents/pi-tool-definition-adapter.ts +++ b/src/agents/pi-tool-definition-adapter.ts @@ -41,6 +41,14 @@ type ToolExecuteArgs = ToolDefinition["execute"] extends (...args: infer P) => u type ToolExecuteArgsAny = ToolExecuteArgs | ToolExecuteArgsLegacy | ToolExecuteArgsCurrent; const TOOL_ERROR_PARAM_PREVIEW_MAX_CHARS = 600; +export type ClientToolCallRecorder = + | ((toolName: string, params: Record) => void) + | { + reserve?: (toolCallId: string, toolName: string) => void; + complete: (toolCallId: string, toolName: string, params: Record) => void; + discard?: (toolCallId: string, toolName: string) => void; + }; + function isAbortSignal(value: unknown): value is AbortSignal { return typeof value === "object" && value !== null && "aborted" in value; } @@ -318,7 +326,7 @@ function coerceParamsRecord(value: unknown): Record { // These tools are intercepted to return a "pending" result instead of executing export function toClientToolDefinitions( tools: ClientToolDefinition[], - onClientToolCall?: (toolName: string, params: Record) => void, + onClientToolCall?: ClientToolCallRecorder, hookContext?: HookContext, ): ToolDefinition[] { return tools.map((tool) => { @@ -330,27 +338,44 @@ export function toClientToolDefinitions( parameters: func.parameters as ToolDefinition["parameters"], execute: async (...args: ToolExecuteArgs): Promise> => { const { toolCallId, params } = splitToolExecuteArgs(args); - const initialParamsRecord = coerceParamsRecord(params); - const outcome = await runBeforeToolCallHook({ - toolName: func.name, - params: initialParamsRecord, - toolCallId, - ctx: hookContext, - }); - if (outcome.blocked) { - if (outcome.kind === "veto") { - return buildBlockedToolResult({ - reason: outcome.reason, - deniedReason: outcome.deniedReason, - }); - } - throw new Error(outcome.reason); + if (onClientToolCall && typeof onClientToolCall !== "function") { + onClientToolCall.reserve?.(toolCallId, func.name); } - const adjustedParams = outcome.params; - const paramsRecord = coerceParamsRecord(adjustedParams); - // Notify handler that a client tool was called - if (onClientToolCall) { - onClientToolCall(func.name, paramsRecord); + const initialParamsRecord = coerceParamsRecord(params); + try { + const outcome = await runBeforeToolCallHook({ + toolName: func.name, + params: initialParamsRecord, + toolCallId, + ctx: hookContext, + }); + if (outcome.blocked) { + if (onClientToolCall && typeof onClientToolCall !== "function") { + onClientToolCall.discard?.(toolCallId, func.name); + } + if (outcome.kind === "veto") { + return buildBlockedToolResult({ + reason: outcome.reason, + deniedReason: outcome.deniedReason, + }); + } + throw new Error(outcome.reason); + } + const adjustedParams = outcome.params; + const paramsRecord = coerceParamsRecord(adjustedParams); + // Notify handler that a client tool was called. + if (onClientToolCall) { + if (typeof onClientToolCall === "function") { + onClientToolCall(func.name, paramsRecord); + } else { + onClientToolCall.complete(toolCallId, func.name, paramsRecord); + } + } + } catch (err) { + if (onClientToolCall && typeof onClientToolCall !== "function") { + onClientToolCall.discard?.(toolCallId, func.name); + } + throw err; } // Return a pending result - the client will execute this tool return jsonResult({ diff --git a/src/agents/pi-tools.before-tool-call.integration.e2e.test.ts b/src/agents/pi-tools.before-tool-call.integration.e2e.test.ts index 24f82402444..4a19b79ecdf 100644 --- a/src/agents/pi-tools.before-tool-call.integration.e2e.test.ts +++ b/src/agents/pi-tools.before-tool-call.integration.e2e.test.ts @@ -351,4 +351,104 @@ describe("before_tool_call hook integration for client tools", () => { extra: true, }); }); + + it("preserves client tool source order when hooks resolve out of order", async () => { + let releaseFirstHook!: () => void; + const firstHookGate = new Promise((resolve) => { + releaseFirstHook = resolve; + }); + installBeforeToolCallHook({ + runBeforeToolCallImpl: async (event: unknown) => { + const toolName = (event as { toolName?: string }).toolName; + if (toolName === "first_tool") { + await firstHookGate; + } + return { params: { marker: toolName } }; + }, + }); + + const slots: Array<{ + toolCallId: string; + name: string; + params?: Record; + completed: boolean; + }> = []; + const indexes = new Map(); + const reserve = (toolCallId: string, name: string) => { + indexes.set(toolCallId, slots.length); + slots.push({ toolCallId, name, completed: false }); + }; + const complete = (toolCallId: string, name: string, params: Record) => { + const index = indexes.get(toolCallId); + if (index === undefined) { + throw new Error(`missing reserved client tool slot for ${toolCallId}`); + } + const slot = slots[index]; + if (!slot) { + throw new Error(`missing client tool slot at ${index}`); + } + slot.name = name; + slot.params = params; + slot.completed = true; + }; + const [firstTool, secondTool] = toClientToolDefinitions( + [ + { + type: "function", + function: { + name: "first_tool", + description: "First client tool", + parameters: { type: "object", properties: { value: { type: "string" } } }, + }, + }, + { + type: "function", + function: { + name: "second_tool", + description: "Second client tool", + parameters: { type: "object", properties: { value: { type: "string" } } }, + }, + }, + ], + { reserve, complete }, + { agentId: "main", sessionKey: "main" }, + ); + if (!firstTool || !secondTool) { + throw new Error("missing client tool definitions"); + } + const extensionContext = {} as Parameters[4]; + + const firstRun = firstTool.execute( + "client-call-1", + { value: "first" }, + undefined, + undefined, + extensionContext, + ); + const secondRun = secondTool.execute( + "client-call-2", + { value: "second" }, + undefined, + undefined, + extensionContext, + ); + + await secondRun; + expect(slots.map((slot) => ({ name: slot.name, completed: slot.completed }))).toEqual([ + { name: "first_tool", completed: false }, + { name: "second_tool", completed: true }, + ]); + + releaseFirstHook(); + await firstRun; + + expect(slots.filter((slot) => slot.completed).map((slot) => slot.name)).toEqual([ + "first_tool", + "second_tool", + ]); + expect(slots.map((slot) => slot.params)).toEqual([ + { value: "first", marker: "first_tool" }, + { value: "second", marker: "second_tool" }, + ]); + }); }); diff --git a/src/gateway/openresponses-http.test.ts b/src/gateway/openresponses-http.test.ts index a4aef589d3d..1d7bab86183 100644 --- a/src/gateway/openresponses-http.test.ts +++ b/src/gateway/openresponses-http.test.ts @@ -936,6 +936,152 @@ describe("OpenResponses HTTP API (e2e)", () => { expect(events.some((event) => event.data === "[DONE]")).toBe(true); }); + it("returns every client tool call when an agent invokes multiple tools in one turn (#52288)", async () => { + // Pre-fix: the non-streaming `/v1/responses` handler read only + // `pendingToolCalls[0]`, so a turn that called three client tools + // collapsed to a single `function_call` item. Here we mock three pending + // calls and assert the response surfaces all three in arrival order + // alongside the assistant text. This locks in the contract for callers + // who run multi-tool agents (graph orchestration, planners, etc.). + const port = enabledPort; + agentCommand.mockClear(); + agentCommand.mockResolvedValueOnce({ + payloads: [{ text: "Calling all three tools now." }], + meta: { + stopReason: "tool_calls", + pendingToolCalls: [ + { id: "call_1", name: "create_graph", arguments: '{"nodes":["a","b"]}' }, + { id: "call_2", name: "activate_graph", arguments: "{}" }, + { id: "call_3", name: "get_status", arguments: "{}" }, + ], + }, + } as never); + + const res = await postResponses(port, { + stream: false, + model: "openclaw", + input: "call all three tools", + tools: [ + { type: "function", name: "create_graph", description: "Create graph" }, + { type: "function", name: "activate_graph", description: "Activate graph" }, + { type: "function", name: "get_status", description: "Get status" }, + ], + }); + + expect(res.status).toBe(200); + const json = (await res.json()) as { + status?: string; + output?: Array>; + }; + expect(json.status).toBe("incomplete"); + expect(json.output?.map((item) => item.type)).toEqual([ + "message", + "function_call", + "function_call", + "function_call", + ]); + expect(json.output?.slice(1).map((item) => item.name)).toEqual([ + "create_graph", + "activate_graph", + "get_status", + ]); + expect(json.output?.slice(1).map((item) => item.call_id)).toEqual([ + "call_1", + "call_2", + "call_3", + ]); + expect(json.output?.[1]?.arguments).toBe('{"nodes":["a","b"]}'); + await ensureResponseConsumed(res); + }); + + it("emits one SSE function_call per pending call at incrementing output_index (#52288)", async () => { + // Streaming counterpart to the non-streaming regression above. Pre-fix + // the streaming branch hard-coded `output_index: 1` and only emitted + // one `output_item.added`/`done` pair, so multi-tool turns silently + // dropped every call past the first. Verify that: + // - we get one `output_item.added` and one `output_item.done` for + // each pending call, + // - their `output_index` values count up monotonically from 1 (the + // assistant message owns index 0), and + // - the final `response.completed` payload contains the assistant + // message followed by all three function_call items in order. + const port = enabledPort; + agentCommand.mockClear(); + agentCommand.mockResolvedValueOnce({ + payloads: [{ text: "Calling all three tools now." }], + meta: { + stopReason: "tool_calls", + pendingToolCalls: [ + { id: "call_1", name: "create_graph", arguments: '{"nodes":["a","b"]}' }, + { id: "call_2", name: "activate_graph", arguments: "{}" }, + { id: "call_3", name: "get_status", arguments: "{}" }, + ], + }, + } as never); + + const res = await postResponses(port, { + stream: true, + model: "openclaw", + input: "call all three tools", + tools: [ + { type: "function", name: "create_graph", description: "Create graph" }, + { type: "function", name: "activate_graph", description: "Activate graph" }, + { type: "function", name: "get_status", description: "Get status" }, + ], + }); + + expect(res.status).toBe(200); + const text = await res.text(); + const events = parseSseEvents(text); + + type FunctionCallEvent = { + output_index: number; + item: { type: string; name?: string; call_id?: string; arguments?: string }; + }; + const addedFunctionCalls = events + .filter((e) => e.event === "response.output_item.added") + .map((e) => JSON.parse(e.data) as FunctionCallEvent) + .filter((evt) => evt.item.type === "function_call"); + expect(addedFunctionCalls.map((evt) => evt.item.name)).toEqual([ + "create_graph", + "activate_graph", + "get_status", + ]); + expect(addedFunctionCalls.map((evt) => evt.output_index)).toEqual([1, 2, 3]); + expect(addedFunctionCalls.map((evt) => evt.item.call_id)).toEqual([ + "call_1", + "call_2", + "call_3", + ]); + + const doneFunctionCalls = events + .filter((e) => e.event === "response.output_item.done") + .map((e) => JSON.parse(e.data) as FunctionCallEvent) + .filter((evt) => evt.item.type === "function_call"); + expect(doneFunctionCalls.map((evt) => evt.output_index)).toEqual([1, 2, 3]); + + const completed = events.find((event) => event.event === "response.completed"); + expect(completed).toBeTruthy(); + const response = ( + JSON.parse(completed?.data ?? "{}") as { + response?: { status?: string; output?: Array> }; + } + ).response; + expect(response?.status).toBe("incomplete"); + expect(response?.output?.map((item) => item.type)).toEqual([ + "message", + "function_call", + "function_call", + "function_call", + ]); + expect(response?.output?.slice(1).map((item) => item.name)).toEqual([ + "create_graph", + "activate_graph", + "get_status", + ]); + expect(events.some((event) => event.data === "[DONE]")).toBe(true); + }); + it("reuses the prior session when previous_response_id is provided", async () => { const port = enabledPort; agentCommand.mockClear(); diff --git a/src/gateway/openresponses-http.ts b/src/gateway/openresponses-http.ts index b340f1a5e70..cea2cc4d548 100644 --- a/src/gateway/openresponses-http.ts +++ b/src/gateway/openresponses-http.ts @@ -705,11 +705,12 @@ export async function handleOpenResponsesHttpRequest( const meta = (result as { meta?: unknown } | null)?.meta; const { stopReason, pendingToolCalls } = resolveStopReasonAndPendingToolCalls(meta); - // If agent called a client tool, return function_call (and any assistant text) to caller + // If the agent invoked client tools, return one `function_call` + // output item per call (in arrival order) plus any assistant text the + // model produced before the tool calls. Pre-#52288 only the first + // pending call was emitted, so multi-tool turns lost every call but + // the leading one. if (stopReason === "tool_calls" && pendingToolCalls && pendingToolCalls.length > 0) { - const functionCall = pendingToolCalls[0]; - const functionCallItemId = `call_${randomUUID()}`; - const assistantText = Array.isArray(payloads) && payloads.length > 0 ? payloads @@ -729,14 +730,16 @@ export async function handleOpenResponsesHttpRequest( }), ); } - output.push( - createFunctionCallOutputItem({ - id: functionCallItemId, - callId: functionCall.id, - name: functionCall.name, - arguments: functionCall.arguments, - }), - ); + for (const functionCall of pendingToolCalls) { + output.push( + createFunctionCallOutputItem({ + id: `call_${randomUUID()}`, + callId: functionCall.id, + name: functionCall.name, + arguments: functionCall.arguments, + }), + ); + } const response = createResponseResource({ id: responseId, @@ -998,7 +1001,6 @@ export async function handleOpenResponsesHttpRequest( pendingToolCalls && pendingToolCalls.length > 0 ) { - const functionCall = pendingToolCalls[0]; const usage = finalUsage ?? createEmptyUsage(); const finalText = accumulatedText || @@ -1036,36 +1038,49 @@ export async function handleOpenResponsesHttpRequest( item: completedItem, }); - const functionCallItemId = `call_${randomUUID()}`; - const functionCallItem = createFunctionCallOutputItem({ - id: functionCallItemId, - callId: functionCall.id, - name: functionCall.name, - arguments: functionCall.arguments, - }); - writeSseEvent(res, { - type: "response.output_item.added", - output_index: 1, - item: functionCallItem, - }); - const completedFunctionCallItem = createFunctionCallOutputItem({ - id: functionCallItemId, - callId: functionCall.id, - name: functionCall.name, - arguments: functionCall.arguments, - status: "completed", - }); - writeSseEvent(res, { - type: "response.output_item.done", - output_index: 1, - item: completedFunctionCallItem, - }); + // Emit one `function_call` output item per pending call, preserving + // arrival order. `output_index` continues past the assistant + // message at index 0 so the SSE stream keeps a single, monotonic + // index per response. Pre-#52288 the streaming path read only + // `pendingToolCalls[0]` and hard-coded `output_index: 1`, so a turn + // with multiple client tool calls dropped every call past the + // first. + const functionCallItems: OutputItem[] = []; + let nextStreamOutputIndex = 1; + for (const functionCall of pendingToolCalls) { + const functionCallItemId = `call_${randomUUID()}`; + const functionCallItem = createFunctionCallOutputItem({ + id: functionCallItemId, + callId: functionCall.id, + name: functionCall.name, + arguments: functionCall.arguments, + }); + writeSseEvent(res, { + type: "response.output_item.added", + output_index: nextStreamOutputIndex, + item: functionCallItem, + }); + const completedFunctionCallItem = createFunctionCallOutputItem({ + id: functionCallItemId, + callId: functionCall.id, + name: functionCall.name, + arguments: functionCall.arguments, + status: "completed", + }); + writeSseEvent(res, { + type: "response.output_item.done", + output_index: nextStreamOutputIndex, + item: completedFunctionCallItem, + }); + functionCallItems.push(functionCallItem); + nextStreamOutputIndex += 1; + } const incompleteResponse = createResponseResource({ id: responseId, model, status: "incomplete", - output: [completedItem, functionCallItem], + output: [completedItem, ...functionCallItems], usage, }); closed = true;