From bd4a5bd9d4780e608c7d55f4eb67da6500bcff6c Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Mon, 23 Feb 2026 17:46:03 -0500 Subject: [PATCH] fix(gateway): flush throttled delta before emitChatFinal The 150ms throttle in emitChatDelta can suppress the last text chunk before emitChatFinal fires, causing streaming clients (e.g. ACP) to receive truncated responses. The final event carries the complete text, but clients that build responses incrementally from deltas miss the tail end. Flush one last unthrottled delta with the complete buffered text immediately before sending the final event. This ensures all streaming consumers have the full response without needing to reconcile deltas against the final payload. --- src/gateway/server-chat.ts | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/gateway/server-chat.ts b/src/gateway/server-chat.ts index d54d0a99eeb..64675c8b5b7 100644 --- a/src/gateway/server-chat.ts +++ b/src/gateway/server-chat.ts @@ -352,6 +352,27 @@ export function createAgentEventHandler({ const text = normalizedHeartbeatText.text.trim(); const shouldSuppressSilent = normalizedHeartbeatText.suppress || isSilentReplyText(text, SILENT_REPLY_TOKEN); + // Flush any throttled delta so streaming clients receive the complete text + // before the final event. The 150 ms throttle in emitChatDelta may have + // suppressed the most recent chunk, leaving the client with stale text. + if (text && !shouldSuppressSilent) { + const lastSent = chatRunState.deltaSentAt.get(clientRunId) ?? 0; + if (lastSent > 0) { + const flushPayload = { + runId: clientRunId, + sessionKey, + seq, + state: "delta" as const, + message: { + role: "assistant", + content: [{ type: "text", text }], + timestamp: Date.now(), + }, + }; + broadcast("chat", flushPayload, { dropIfSlow: true }); + nodeSendToSession(sessionKey, "chat", flushPayload); + } + } chatRunState.buffers.delete(clientRunId); chatRunState.deltaSentAt.delete(clientRunId); if (jobState === "done") {