From 1ff07245f33af8f27c4be76239e1b1416b2ee87e Mon Sep 17 00:00:00 2001 From: wangshu94 <53429538+wangshu94@users.noreply.github.com> Date: Fri, 24 Apr 2026 22:59:47 +0800 Subject: [PATCH] fix(gateway): surface chat.send lifecycle errors to clients (#69747) Merged via squash. Prepared head SHA: 75b403b2de3e8fe1704c679cdc4b178d5fc433ae Co-authored-by: wangshu94 <53429538+wangshu94@users.noreply.github.com> Co-authored-by: hxy91819 <8814856+hxy91819@users.noreply.github.com> Reviewed-by: @hxy91819 --- CHANGELOG.md | 1 + src/gateway/server-chat.agent-events.test.ts | 38 +++++++++++++++++++ .../chat.directive-tags.test.ts | 2 + src/gateway/server-methods/chat.ts | 7 ++++ 4 files changed, 48 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63ec6666755..440c5037f14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,6 +75,7 @@ Docs: https://docs.openclaw.ai - Providers/OpenAI: honor the private-network SSRF opt-in for OpenAI-compatible image generation endpoints, so trusted LocalAI/LAN `image_generate` routes work without disabling SSRF checks globally. Fixes #62879. Thanks @seitzbg. - Providers/OpenAI: stop advertising the removed `gpt-5.3-codex-spark` Codex model through fallback catalogs, and suppress stale rows with a GPT-5.5 recovery hint. - Plugins/QR: replace legacy `qrcode-terminal` QR rendering with bounded `qrcode-tui` helpers for plugin login/setup flows. (#65969) Thanks @vincentkoc. +- Gateway/chat: register chat.send runs in the chat run registry so lifecycle error events reach the client instead of being silently dropped, fixing stuck 'waiting' state and /abort reporting no active run. (#69747) Thanks @wangshu94. - Voice-call/realtime: wait for OpenAI session configuration before greeting or forwarding buffered audio, and reject non-allowlisted Twilio callers before stream setup. (#43501) Thanks @forrestblount. - ACPX/Codex: stop materializing `auth.json` bridge files for Codex ACP, Codex app-server, and Codex CLI runs; Codex-owned runtimes now use their normal `CODEX_HOME`/`~/.codex` auth path directly. - Auto-reply/system events: route async exec-event completion replies through the persisted session delivery context, so long-running command results return to the originating channel instead of being dropped when live origin metadata is missing. (#70258) Thanks @wzfukui. diff --git a/src/gateway/server-chat.agent-events.test.ts b/src/gateway/server-chat.agent-events.test.ts index 0e5886aca49..70674a5fff5 100644 --- a/src/gateway/server-chat.agent-events.test.ts +++ b/src/gateway/server-chat.agent-events.test.ts @@ -1326,6 +1326,44 @@ describe("agent event handler", () => { expect(agentRunSeq.has("run-chat-send")).toBe(false); }); + it("emits lifecycle chat errors for active chat.send runs with a chat run link", () => { + vi.useFakeTimers(); + const { broadcast, chatRunState, clearAgentRunContext, agentRunSeq, handler } = createHarness({ + resolveSessionKeyForRun: () => "session-chat-send", + lifecycleErrorRetryGraceMs: 100, + isChatSendRunActive: (runId) => runId === "run-chat-send", + }); + chatRunState.registry.add("run-chat-send", { + sessionKey: "session-chat-send", + clientRunId: "run-chat-send", + }); + registerAgentRunContext("run-chat-send", { sessionKey: "session-chat-send" }); + + handler({ + runId: "run-chat-send", + seq: 1, + stream: "lifecycle", + ts: Date.now(), + data: { phase: "error", error: "chat.send failed" }, + }); + + vi.advanceTimersByTime(100); + + const chatErrors = chatBroadcastCalls(broadcast).filter( + ([, payload]) => (payload as { state?: string }).state === "error", + ); + expect(chatErrors).toHaveLength(1); + expect(chatErrors[0]?.[1]).toMatchObject({ + runId: "run-chat-send", + sessionKey: "session-chat-send", + state: "error", + errorMessage: "chat.send failed", + }); + expect(chatRunState.registry.peek("run-chat-send")).toBeUndefined(); + expect(clearAgentRunContext).toHaveBeenCalledWith("run-chat-send"); + expect(agentRunSeq.has("run-chat-send")).toBe(false); + }); + it("suppresses chat and node session events for non-control-UI-visible runs", () => { const { broadcast, nodeSendToSession, handler } = createHarness({ resolveSessionKeyForRun: () => "session-hidden", diff --git a/src/gateway/server-methods/chat.directive-tags.test.ts b/src/gateway/server-methods/chat.directive-tags.test.ts index 5d4412318c7..b6af0120a4d 100644 --- a/src/gateway/server-methods/chat.directive-tags.test.ts +++ b/src/gateway/server-methods/chat.directive-tags.test.ts @@ -307,6 +307,7 @@ function createChatContext(): Pick< | "chatDeltaSentAt" | "chatDeltaLastBroadcastLen" | "chatAbortedRuns" + | "addChatRun" | "removeChatRun" | "dedupe" | "loadGatewayModelCatalog" @@ -322,6 +323,7 @@ function createChatContext(): Pick< chatDeltaSentAt: new Map(), chatDeltaLastBroadcastLen: new Map(), chatAbortedRuns: new Map(), + addChatRun: vi.fn(), removeChatRun: vi.fn(), dedupe: new Map(), loadGatewayModelCatalog: async () => diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index cea6b4707f4..5a8c7c608ee 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -2248,6 +2248,10 @@ export const chatHandlers: GatewayRequestHandlers = { ownerConnId: normalizeOptionalText(client?.connId), ownerDeviceId: normalizeOptionalText(client?.connect?.device?.id), }); + context.addChatRun(clientRunId, { + sessionKey, + clientRunId, + }); const ackPayload = { runId: clientRunId, status: "started" as const, @@ -2740,8 +2744,11 @@ export const chatHandlers: GatewayRequestHandlers = { }) .finally(() => { context.chatAbortControllers.delete(clientRunId); + context.removeChatRun(clientRunId, clientRunId, sessionKey); }); } catch (err) { + context.chatAbortControllers.delete(clientRunId); + context.removeChatRun(clientRunId, clientRunId, sessionKey); const error = errorShape(ErrorCodes.UNAVAILABLE, String(err)); const payload = { runId: clientRunId,