From bf0653846e66fe13b6fc2c3caf39d54d9db2b5d6 Mon Sep 17 00:00:00 2001
From: liuxiaopai-ai <73659136+liuxiaopai-ai@users.noreply.github.com>
Date: Tue, 3 Mar 2026 03:56:14 +0800
Subject: [PATCH] Gateway: suppress NO_REPLY lead-fragment chat leaks

---
 CHANGELOG.md                                 |  1 +
 src/gateway/server-chat.agent-events.test.ts | 46 ++++++++++++++++++++
 src/gateway/server-chat.ts                   | 19 +++++++-
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 33e9f1b6772..08d6618ff48 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -57,6 +57,7 @@ Docs: https://docs.openclaw.ai
 - Webchat/stream finalization: persist streamed assistant text when final events omit `message`, while keeping final payload precedence and skipping empty stream buffers to prevent disappearing replies after tool turns. (#31920) Thanks @Sid-Qin.
 - Cron/store migration: normalize legacy cron jobs with string `schedule` and top-level `command`/`timeout` fields into canonical schedule/payload/session-target shape on load, preventing schedule-error loops on old persisted stores. (#31926) Thanks @bmendonca3.
 - Gateway/Heartbeat model reload: treat `models.*` and `agents.defaults.model` config updates as heartbeat hot-reload triggers so heartbeat picks up model changes without a full gateway restart. (#32046) Thanks @stakeswky.
+- Gateway/Webchat NO_REPLY streaming: suppress assistant lead-fragment deltas that are prefixes of `NO_REPLY` and keep final-message buffering in sync, preventing partial `NO` leaks on silent-response runs while preserving legitimate short replies. (#32073) Thanks @liuxiaopai-ai.
 - Slack/inbound debounce routing: isolate top-level non-DM message debounce keys by message timestamp to avoid cross-thread collisions, preserve DM batching, and flush pending top-level buffers before immediate non-debounce follow-ups to keep ordering stable. (#31951) Thanks @scoootscooob.
 - OpenRouter/x-ai compatibility: skip `reasoning.effort` injection for `x-ai/*` models (for example Grok) so OpenRouter requests no longer fail with invalid-arguments errors on unsupported reasoning params. (#32054) Thanks @scoootscooob.
 - Tools/fsPolicy propagation: honor `tools.fs.workspaceOnly` for image/pdf local-root allowlists so non-sandbox media paths outside workspace are rejected when workspace-only mode is enabled. (#31882) Thanks @justinhuangcode.
diff --git a/src/gateway/server-chat.agent-events.test.ts b/src/gateway/server-chat.agent-events.test.ts
index e2cc88aa4e8..e02ed25eb42 100644
--- a/src/gateway/server-chat.agent-events.test.ts
+++ b/src/gateway/server-chat.agent-events.test.ts
@@ -220,6 +220,52 @@ describe("agent event handler", () => {
     nowSpy?.mockRestore();
   });
 
+  it("suppresses NO_REPLY lead fragments and does not leak NO in final chat message", () => {
+    const { broadcast, nodeSendToSession, chatRunState, handler, nowSpy } = createHarness({
+      now: 2_100,
+    });
+    chatRunState.registry.add("run-3", { sessionKey: "session-3", clientRunId: "client-3" });
+
+    for (const text of ["NO", "NO_", "NO_RE", "NO_REPLY"]) {
+      handler({
+        runId: "run-3",
+        seq: 1,
+        stream: "assistant",
+        ts: Date.now(),
+        data: { text },
+      });
+    }
+    emitLifecycleEnd(handler, "run-3");
+
+    const payload = expectSingleFinalChatPayload(broadcast) as { message?: unknown };
+    expect(payload.message).toBeUndefined();
+    expect(sessionChatCalls(nodeSendToSession)).toHaveLength(1);
+    nowSpy?.mockRestore();
+  });
+
+  it("keeps final short replies like 'No' even when lead-fragment deltas are suppressed", () => {
+    const { broadcast, nodeSendToSession, chatRunState, handler, nowSpy } = createHarness({
+      now: 2_200,
+    });
+    chatRunState.registry.add("run-4", { sessionKey: "session-4", clientRunId: "client-4" });
+
+    handler({
+      runId: "run-4",
+      seq: 1,
+      stream: "assistant",
+      ts: Date.now(),
+      data: { text: "No" },
+    });
+    emitLifecycleEnd(handler, "run-4");
+
+    const payload = expectSingleFinalChatPayload(broadcast) as {
+      message?: { content?: Array<{ text?: string }> };
+    };
+    expect(payload.message?.content?.[0]?.text).toBe("No");
+    expect(sessionChatCalls(nodeSendToSession)).toHaveLength(1);
+    nowSpy?.mockRestore();
+  });
+
   it("cleans up agent run sequence tracking when lifecycle completes", () => {
     const { agentRunSeq, chatRunState, handler, nowSpy } = createHarness({ now: 2_500 });
     chatRunState.registry.add("run-cleanup", {
diff --git a/src/gateway/server-chat.ts b/src/gateway/server-chat.ts
index 5ac16c4cbba..d54d0a99eeb 100644
--- a/src/gateway/server-chat.ts
+++ b/src/gateway/server-chat.ts
@@ -75,6 +75,20 @@ function normalizeHeartbeatChatFinalText(params: {
   return { suppress: false, text: stripped.text };
 }
 
+function isSilentReplyLeadFragment(text: string): boolean {
+  const normalized = text.trim().toUpperCase();
+  if (!normalized) {
+    return false;
+  }
+  if (!/^[A-Z_]+$/.test(normalized)) {
+    return false;
+  }
+  if (normalized === SILENT_REPLY_TOKEN) {
+    return false;
+  }
+  return SILENT_REPLY_TOKEN.startsWith(normalized);
+}
+
 export type ChatRunEntry = {
   sessionKey: string;
   clientRunId: string;
@@ -288,10 +302,13 @@ export function createAgentEventHandler({
     if (!cleaned) {
       return;
     }
+    chatRunState.buffers.set(clientRunId, cleaned);
     if (isSilentReplyText(cleaned, SILENT_REPLY_TOKEN)) {
       return;
     }
-    chatRunState.buffers.set(clientRunId, cleaned);
+    if (isSilentReplyLeadFragment(cleaned)) {
+      return;
+    }
     if (shouldHideHeartbeatChatOutput(clientRunId, sourceRunId)) {
       return;
     }