fix(auto-reply): warn on substantive private message-tool finals

Warn operators when message_tool_only produces unusually substantive private final text without a delivered source reply. Keeps short/NO_REPLY silence quiet, avoids logging response bodies, and distinguishes unrelated side effects from source-reply delivery.
2026-07-20 01:51:37 +00:00 · 2026-05-31 21:35:58 +08:00
parent 81b9da0bb0
commit 75e0053cf9
7 changed files with 427 additions and 8 deletions
--- a/docs/tools/slash-commands.md
+++ b/docs/tools/slash-commands.md
@@ -212,11 +212,13 @@ plugins.
        - `/fast on|off` persists a session override; use the Sessions UI `inherit` option to clear it.
        - `/fast` is provider-specific: OpenAI/Codex map it to `service_tier=priority`; direct Anthropic requests map it to `service_tier=auto` or `standard_only`.
        - `/reasoning`, `/verbose`, and `/trace` are risky in group settings — they may reveal internal reasoning or plugin diagnostics. Keep them off in group chats.
+
      </Accordion>
      <Accordion title="Model switching details">
        - `/model` persists the new model immediately to the session.
        - If the agent is idle, the next run uses it right away.
        - If a run is active, the switch is marked pending and applied at the next clean retry point.
+
      </Accordion>
    </AccordionGroup>

@@ -468,6 +470,7 @@ See [BTW side questions](/tools/btw) for the full behavior.
    - **Native Slack commands:** `agent:<agentId>:slack:slash:<userId>` (prefix configurable via `channels.slack.slashCommand.sessionPrefix`)
    - **Native Telegram commands:** `telegram:slash:<userId>` (targets the chat session via `CommandTargetSessionKey`)
    - **`/stop`** targets the active chat session to abort the current run.
+
  </Accordion>
  <Accordion title="Slack specifics">
    `channels.slack.slashCommand` supports a single `/openclaw`-style command.
@@ -479,11 +482,13 @@ See [BTW side questions](/tools/btw) for the full behavior.
    - Command-only messages from allowlisted senders are handled immediately (bypass queue + model).
    - Inline shortcuts (`/help`, `/commands`, `/status`, `/whoami`) also work embedded in normal messages and are stripped before the model sees the remaining text.
    - Unauthorized command-only messages are silently ignored; inline `/...` tokens are treated as plain text.
+
  </Accordion>
  <Accordion title="Argument notes">
    - Commands accept an optional `:` between the command and args (`/think: high`, `/send: on`).
    - `/new <model>` accepts a model alias, `provider/model`, or a provider name (fuzzy match); if no match, the text is treated as the message body.
    - `/allowlist add|remove` requires `commands.config: true` and honors channel `configWrites`.
+
  </Accordion>
 </AccordionGroup>

--- a/qa/scenarios/channels/message-tool-stranded-final-reply.md
+++ b/qa/scenarios/channels/message-tool-stranded-final-reply.md
@@ -0,0 +1,92 @@
+# Message-tool-only private final reply warning
+
+```yaml qa-scenario
+id: message-tool-stranded-final-reply
+title: Message-tool-only private final reply warning
+surface: channel
+coverage:
+  primary:
+    - channels.direct-visible-replies
+  secondary:
+    - channels.qa-channel
+    - tools.message
+objective: Reproduce #85714 — under messages.visibleReplies=message_tool a long private final reply that never calls the message tool is kept private (no outbound), and the gateway emits the private-final WARN.
+gatewayConfigPatch:
+  messages:
+    visibleReplies: message_tool
+successCriteria:
+  - The mock provider returns a long normal final answer and does not plan the message tool.
+  - Under message_tool_only delivery the reply is kept private, so the direct conversation receives no outbound message.
+  - The gateway logs the private-final WARN from source-reply/private-final.
+docsRefs:
+  - docs/channels/qa-channel.md
+codeRefs:
+  - src/auto-reply/reply/agent-runner.ts
+  - src/auto-reply/reply/private-message-tool-final.ts
+  - src/auto-reply/reply/dispatch-from-config.ts
+execution:
+  kind: flow
+  summary: Send a direct message_tool_only turn whose model reply omits the message tool, and verify a substantive private final warns without outbound delivery.
+  config:
+    conversationId: qa-stranded-dm
+    promptSnippet: qa private final reply warning check
+    prompt: "qa private final reply warning check. Reply to me directly in two complete sentences with `QA-STRANDED-85714` in the first sentence and a short explanation in the second sentence. Do NOT call any tool. Do NOT use the message tool."
+    expectedMarker: QA-STRANDED-85714
+    privateFinalLogNeedle: "source-reply/private-final"
+```
+
+```yaml qa-flow
+steps:
+  - name: warns for substantive private final text when the model omits the message tool
+    actions:
+      - call: waitForGatewayHealthy
+        args:
+          - ref: env
+          - 60000
+      - call: waitForQaChannelReady
+        args:
+          - ref: env
+          - 60000
+      - call: reset
+      - set: logCursor
+        value:
+          expr: markGatewayLogCursor()
+      - set: requestCountBefore
+        value:
+          expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).length : 0"
+      - call: state.addInboundMessage
+        args:
+          - conversation:
+              id:
+                expr: config.conversationId
+              kind: direct
+            senderId: alice
+            senderName: Alice
+            text:
+              expr: config.prompt
+      - call: waitForNoOutbound
+        args:
+          - ref: state
+          - expr: liveTurnTimeoutMs(env, 30000)
+      - set: scenarioRequests
+        value:
+          expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).slice(requestCountBefore).filter((request) => String(request.allInputText ?? '').includes(config.promptSnippet)) : []"
+      - assert:
+          expr: "!env.mock || scenarioRequests.length > 0"
+          message: expected mock request evidence that the turn actually ran
+      - assert:
+          expr: "!env.mock || scenarioRequests.every((request) => request.plannedToolName !== 'message')"
+          message:
+            expr: "`model should not have planned the message tool, saw ${JSON.stringify(scenarioRequests.map((request) => request.plannedToolName ?? null))}`"
+      - set: privateFinalLog
+        value:
+          expr: "String(readGatewayLogs() ?? '').slice(logCursor)"
+      - set: privateFinalLine
+        value:
+          expr: "(privateFinalLog.split('\\n').find((line) => line.includes(config.privateFinalLogNeedle)) ?? '').trim()"
+      - assert:
+          expr: "privateFinalLog.includes(config.privateFinalLogNeedle)"
+          message:
+            expr: "`expected the gateway to log ${config.privateFinalLogNeedle} after a substantive private message_tool_only reply, but it was absent`"
+    detailsExpr: "`no-outbound private final; WARN logged=${privateFinalLog.includes(config.privateFinalLogNeedle)}; mock requests=${scenarioRequests.length}; gateway log: ${privateFinalLine}`"
+```
--- a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts
+++ b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts
@@ -190,6 +190,14 @@ vi.mock("../../agents/subagent-registry.js", () => ({
  markSubagentRunTerminated: () => 0,
 }));

+// #85714: keep the real private-final decision but spy the WARN emitter so we
+// can assert it fires only through the substantive text suppression branch.
+const warnPrivateFinalSpy = vi.hoisted(() => vi.fn());
+vi.mock("./private-message-tool-final.js", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("./private-message-tool-final.js")>();
+  return { ...actual, warnPrivateMessageToolFinal: warnPrivateFinalSpy };
+});
+
 import { runReplyAgent } from "./agent-runner.js";

 type RunWithModelFallbackParams = {
@@ -244,6 +252,7 @@ beforeEach(() => {
  embeddedRunTesting.resetActiveEmbeddedRuns();
  replyRunRegistryTesting.resetReplyRunRegistry();
  runEmbeddedAgentMock.mockClear();
+  warnPrivateFinalSpy.mockClear();
  runCliAgentMock.mockClear();
  runWithModelFallbackMock.mockClear();
  runtimeErrorMock.mockClear();
@@ -2984,3 +2993,129 @@ describe("runReplyAgent mid-turn rate-limit fallback", () => {
    expect(payload?.text).toBeUndefined();
  });
 });
+
+describe("runReplyAgent private message_tool_only final warning (#85714)", () => {
+  async function runPrivateFinalCase(params: {
+    messagingToolSentTargets?: unknown[];
+    finalAssistantText?: string;
+    payloadText?: string;
+    successfulCronAdds?: number;
+  }) {
+    const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-stranded-"));
+    const storePath = path.join(tmp, "sessions.json");
+    const sessionKey = "stranded";
+    const sessionEntry = { sessionId: "session", updatedAt: Date.now(), totalTokens: 1_000 };
+    await fs.writeFile(storePath, JSON.stringify({ [sessionKey]: sessionEntry }, null, 2), "utf-8");
+
+    const finalAssistantText =
+      params.finalAssistantText ??
+      "Here is the answer the user asked for. It includes enough detail to read like a user-facing response rather than a short private note. This should have been sent with the message tool if the channel expected a visible reply.";
+    runEmbeddedAgentMock.mockResolvedValue({
+      // payloadText can differ from the assistant text to simulate metadata-only
+      // payloads (verbose notices, usage line) that must NOT trigger the warn —
+      // detection keys off the assistant final text, not the payload bundle.
+      payloads: [{ text: params.payloadText ?? finalAssistantText }],
+      meta: { agentMeta: {}, finalAssistantVisibleText: finalAssistantText },
+      ...(params.messagingToolSentTargets
+        ? { messagingToolSentTargets: params.messagingToolSentTargets }
+        : {}),
+      ...(params.successfulCronAdds === undefined
+        ? {}
+        : { successfulCronAdds: params.successfulCronAdds }),
+    });
+
+    const sessionCtx = {
+      Provider: "whatsapp",
+      OriginatingTo: "+15550001111",
+      AccountId: "primary",
+      MessageSid: "msg",
+      ChatType: "direct",
+    } as unknown as TemplateContext;
+    const followupRun = {
+      prompt: "hello",
+      summaryLine: "hello",
+      enqueuedAt: Date.now(),
+      run: {
+        agentId: "main",
+        agentDir: "/tmp/agent",
+        sessionId: "session",
+        sessionKey,
+        messageProvider: "whatsapp",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: tmp,
+        // Direct chat + visibleReplies=message_tool resolves to message_tool_only,
+        // so the final text is kept private (no automatic delivery).
+        config: { messages: { visibleReplies: "message_tool" } },
+        skillsSnapshot: {},
+        provider: "anthropic",
+        model: "claude",
+        thinkLevel: "low",
+        reasoningLevel: "on",
+        verboseLevel: "off",
+        elevatedLevel: "off",
+        bashElevated: { enabled: false, allowed: false, defaultLevel: "off" },
+        timeoutMs: 1_000,
+        blockReplyBreak: "message_end",
+      },
+    } as unknown as FollowupRun;
+
+    await runReplyAgent({
+      commandBody: "hello",
+      followupRun,
+      queueKey: sessionKey,
+      resolvedQueue: { mode: "interrupt" } as unknown as QueueSettings,
+      shouldSteer: false,
+      shouldFollowup: false,
+      isActive: false,
+      isStreaming: false,
+      typing: createMockTypingController(),
+      sessionCtx,
+      sessionEntry,
+      sessionStore: { [sessionKey]: sessionEntry },
+      sessionKey,
+      storePath,
+      defaultModel: "anthropic/claude-opus-4-6",
+      agentCfgContextTokens: 200_000,
+      resolvedVerboseLevel: "off",
+      isNewSession: false,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      shouldInjectGroupIntro: false,
+      typingMode: "instant",
+    });
+  }
+
+  it("warns when a substantive private final reply never used the message tool", async () => {
+    await runPrivateFinalCase({});
+    expect(warnPrivateFinalSpy).toHaveBeenCalledTimes(1);
+    expect(warnPrivateFinalSpy.mock.calls[0]?.[0]).toMatchObject({ sessionKey: "stranded" });
+  });
+
+  it("does not warn for a short private final reply", async () => {
+    await runPrivateFinalCase({ finalAssistantText: "Nothing to send here." });
+    expect(warnPrivateFinalSpy).not.toHaveBeenCalled();
+  });
+
+  it("does not warn when the message tool delivered this turn", async () => {
+    await runPrivateFinalCase({
+      messagingToolSentTargets: [{ tool: "message", provider: "whatsapp", to: "+15550001111" }],
+    });
+    expect(warnPrivateFinalSpy).not.toHaveBeenCalled();
+  });
+
+  it("still warns when only an unrelated cron side effect succeeded", async () => {
+    await runPrivateFinalCase({ successfulCronAdds: 1 });
+    expect(warnPrivateFinalSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it("does not warn on an intentional NO_REPLY turn even when metadata payloads remain", async () => {
+    // Assistant went silent (NO_REPLY), but a verbose/usage metadata payload
+    // survives in finalPayloads. The warn must key off the assistant text, not
+    // the payload bundle, so no private-final warning should fire.
+    await runPrivateFinalCase({
+      finalAssistantText: "no_reply",
+      payloadText: "Auto-compaction complete (count 1).",
+    });
+    expect(warnPrivateFinalSpy).not.toHaveBeenCalled();
+  });
+});
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@@ -94,6 +94,10 @@ import { resolveOriginMessageProvider, resolveOriginMessageTo } from "./origin-r
 import { sanitizePendingFinalDeliveryText } from "./pending-final-delivery.js";
 import { drainPendingToolTasks } from "./pending-tool-task-drain.js";
 import { readPostCompactionContext } from "./post-compaction-context.js";
+import {
+  shouldWarnAboutPrivateMessageToolFinal,
+  warnPrivateMessageToolFinal,
+} from "./private-message-tool-final.js";
 import { resolveActiveRunQueueAction } from "./queue-policy.js";
 import {
  enqueueFollowupRun,
@@ -230,15 +234,27 @@ function hasSuccessfulSideEffectDelivery(params: {
  messagingToolSentTargets?: unknown[];
  successfulCronAdds?: number;
  didSendDeterministicApprovalPrompt?: boolean;
+}): boolean {
+  return (
+    hasSuccessfulSourceReplyDelivery(params) ||
+    (params.successfulCronAdds ?? 0) > 0 ||
+    params.didSendDeterministicApprovalPrompt === true
+  );
+}
+
+function hasSuccessfulSourceReplyDelivery(params: {
+  blockReplyPipeline: { didStream: () => boolean; isAborted: () => boolean } | null;
+  directlySentBlockKeys?: Set<string>;
+  messagingToolSentTexts?: string[];
+  messagingToolSentMediaUrls?: string[];
+  messagingToolSentTargets?: unknown[];
 }): boolean {
  return (
    (params.blockReplyPipeline?.didStream() && !params.blockReplyPipeline.isAborted()) ||
    (params.directlySentBlockKeys?.size ?? 0) > 0 ||
    hasNonEmptyStringArray(params.messagingToolSentTexts) ||
    hasNonEmptyStringArray(params.messagingToolSentMediaUrls) ||
-    hasCommittedMessagingTargetDeliveryEvidence(params.messagingToolSentTargets) ||
-    (params.successfulCronAdds ?? 0) > 0 ||
-    params.didSendDeterministicApprovalPrompt === true
+    hasCommittedMessagingTargetDeliveryEvidence(params.messagingToolSentTargets)
  );
 }

@@ -1795,6 +1811,13 @@ export async function runReplyAgent(params: {
      successfulCronAdds: runResult.successfulCronAdds,
      didSendDeterministicApprovalPrompt: runResult.didSendDeterministicApprovalPrompt,
    });
+    const successfulSourceReplyDelivery = hasSuccessfulSourceReplyDelivery({
+      blockReplyPipeline,
+      directlySentBlockKeys,
+      messagingToolSentTexts: runResult.messagingToolSentTexts,
+      messagingToolSentMediaUrls: runResult.messagingToolSentMediaUrls,
+      messagingToolSentTargets: runResult.messagingToolSentTargets,
+    });
    const returnSilentFallbackFailureIfNeeded = async (): Promise<ReplyPayload | undefined> => {
      const silentFallbackFailurePayload = buildSilentFallbackFailurePayload({
        fallbackTransition,
@@ -2276,9 +2299,30 @@ export async function runReplyAgent(params: {
        runtimePolicySessionKey,
        opts,
      });
-      const pendingText = sourceReplyPolicy.suppressDelivery
-        ? ""
-        : buildPendingFinalDeliveryText(finalPayloads);
+      const finalDeliveryText = buildPendingFinalDeliveryText(finalPayloads);
+      // #85714: warn only for unusually substantive private final text. In
+      // message_tool_only, no tool call can be intentional silence, and
+      // finalDeliveryText also includes verbose/status/usage metadata.
+      const assistantFinalText = rawAssistantText ?? "";
+      if (
+        shouldWarnAboutPrivateMessageToolFinal({
+          sourceReplyDeliveryMode: sourceReplyPolicy.sourceReplyDeliveryMode,
+          sendPolicyDenied: sourceReplyPolicy.sendPolicyDenied,
+          successfulSourceReplyDelivery,
+          finalText: assistantFinalText,
+        })
+      ) {
+        warnPrivateMessageToolFinal({
+          sessionKey,
+          channel:
+            sessionCtx.OriginatingChannel ??
+            sessionCtx.Surface ??
+            sessionCtx.Provider ??
+            activeSessionEntry?.channel,
+          finalTextLength: assistantFinalText.trim().length,
+        });
+      }
+      const pendingText = sourceReplyPolicy.suppressDelivery ? "" : finalDeliveryText;
      const agentId = followupRun.run.agentId;
      const heartbeatAgentCfg = agentId ? resolveAgentConfig(cfg, agentId)?.heartbeat : undefined;
      const heartbeatAckMaxChars = Math.max(
--- a/src/auto-reply/reply/private-message-tool-final.test.ts
+++ b/src/auto-reply/reply/private-message-tool-final.test.ts
@@ -0,0 +1,74 @@
+import { describe, expect, it } from "vitest";
+import { shouldWarnAboutPrivateMessageToolFinal } from "./private-message-tool-final.js";
+
+const base = {
+  sourceReplyDeliveryMode: "message_tool_only" as const,
+  sendPolicyDenied: false,
+  successfulSourceReplyDelivery: false,
+  finalText:
+    "Here is the answer the user asked for. It includes enough detail to look like a visible response rather than an internal no-op note.",
+};
+
+describe("shouldWarnAboutPrivateMessageToolFinal", () => {
+  it("flags a multi-sentence private final that was never delivered via the message tool (#85714)", () => {
+    expect(shouldWarnAboutPrivateMessageToolFinal(base)).toBe(true);
+  });
+
+  it("flags a long private final even without multiple sentence terminators", () => {
+    expect(
+      shouldWarnAboutPrivateMessageToolFinal({
+        ...base,
+        finalText: "x".repeat(280),
+      }),
+    ).toBe(true);
+  });
+
+  it("does not flag automatic delivery mode (final text is delivered normally)", () => {
+    expect(
+      shouldWarnAboutPrivateMessageToolFinal({ ...base, sourceReplyDeliveryMode: "automatic" }),
+    ).toBe(false);
+    expect(
+      shouldWarnAboutPrivateMessageToolFinal({ ...base, sourceReplyDeliveryMode: undefined }),
+    ).toBe(false);
+  });
+
+  it("does not flag when the message tool already delivered this turn", () => {
+    expect(
+      shouldWarnAboutPrivateMessageToolFinal({ ...base, successfulSourceReplyDelivery: true }),
+    ).toBe(false);
+  });
+
+  it("does not flag silent sentinel variants (intentional silence)", () => {
+    expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: "NO_REPLY" })).toBe(false);
+    expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: "  no_reply  " })).toBe(
+      false,
+    );
+    expect(
+      shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: "NO_REPLY\n\nNO_REPLY" }),
+    ).toBe(false);
+  });
+
+  it("does not flag a short private final", () => {
+    expect(
+      shouldWarnAboutPrivateMessageToolFinal({
+        ...base,
+        finalText: "Nothing to add here.",
+      }),
+    ).toBe(false);
+    expect(
+      shouldWarnAboutPrivateMessageToolFinal({
+        ...base,
+        finalText: "I do not need to send anything. Nothing else to add.",
+      }),
+    ).toBe(false);
+  });
+
+  it("does not flag empty or whitespace-only final text", () => {
+    expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: "" })).toBe(false);
+    expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: "   \n " })).toBe(false);
+  });
+
+  it("does not flag when delivery was intentionally denied by send policy", () => {
+    expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, sendPolicyDenied: true })).toBe(false);
+  });
+});
--- a/src/auto-reply/reply/private-message-tool-final.ts
+++ b/src/auto-reply/reply/private-message-tool-final.ts
@@ -0,0 +1,67 @@
+import { createSubsystemLogger } from "../../logging/subsystem.js";
+import type { SourceReplyDeliveryMode } from "../get-reply-options.types.js";
+import { isSilentReplyText } from "../tokens.js";
+
+const privateFinalReplyLogger = createSubsystemLogger("source-reply/private-final");
+
+const LONG_PRIVATE_FINAL_MIN_CHARS = 280;
+const MULTI_SENTENCE_PRIVATE_FINAL_MIN_CHARS = 120;
+const MULTI_SENTENCE_TERMINATOR_MIN_COUNT = 2;
+const SENTENCE_TERMINATOR_REGEX = /[.!?]+(?:\s|$)/g;
+
+/**
+ * `message_tool_only` allows the model to stay silent by simply not calling the
+ * message tool, so short private final text is not evidence of message loss.
+ * Warn only for unusually substantive private finals, which usually means the
+ * model wrote a user-facing answer but missed the configured delivery tool.
+ */
+export function shouldWarnAboutPrivateMessageToolFinal(params: {
+  sourceReplyDeliveryMode: SourceReplyDeliveryMode | undefined;
+  sendPolicyDenied: boolean;
+  successfulSourceReplyDelivery: boolean;
+  finalText: string;
+}): boolean {
+  if (params.sourceReplyDeliveryMode !== "message_tool_only") {
+    return false;
+  }
+  // A send-policy denial is an intentional block, and a successful source-reply
+  // delivery means the contract was honored. Other side effects do not count.
+  if (params.sendPolicyDenied || params.successfulSourceReplyDelivery) {
+    return false;
+  }
+  const trimmed = params.finalText.trim();
+  if (!trimmed || isSilentReplyText(trimmed)) {
+    return false;
+  }
+  if (trimmed.length >= LONG_PRIVATE_FINAL_MIN_CHARS) {
+    return true;
+  }
+  const sentenceTerminatorCount = countSentenceLikeTerminators(trimmed);
+  return (
+    trimmed.length >= MULTI_SENTENCE_PRIVATE_FINAL_MIN_CHARS &&
+    sentenceTerminatorCount >= MULTI_SENTENCE_TERMINATOR_MIN_COUNT
+  );
+}
+
+/**
+ * Emit metadata-only operator signal. The body is intentionally omitted:
+ * `message_tool_only` keeps normal final text private by design.
+ */
+export function warnPrivateMessageToolFinal(params: {
+  sessionKey: string | undefined;
+  channel: string | undefined;
+  finalTextLength: number;
+}): void {
+  privateFinalReplyLogger.warn(
+    "agent produced a long private final reply without calling the configured delivery tool (message_tool_only); response kept private and not delivered to the source channel",
+    {
+      sessionKey: params.sessionKey,
+      channel: params.channel,
+      chars: params.finalTextLength,
+    },
+  );
+}
+
+function countSentenceLikeTerminators(text: string): number {
+  return Array.from(text.matchAll(SENTENCE_TERMINATOR_REGEX)).length;
+}
--- a/test/scripts/plugin-prerelease-test-plan.test.ts
+++ b/test/scripts/plugin-prerelease-test-plan.test.ts
@@ -160,7 +160,8 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => {
    expect(readFileSync("scripts/e2e/lib/clawhub-fixture-server.cjs", "utf8")).toContain(
      "X-ClawHub-Artifact-Sha256",
    );
-    expect(script).toContain("docker_e2e_docker_cmd stats --no-stream");
+    expect(script).toContain("docker_e2e_sample_stats_until_exit");
+    expect(script).toContain("scripts/e2e/lib/docker-stats/assert-resource-ceiling.mjs");
    expect(sweepScript).toContain("scan_logs_for_unexpected_errors");
  });

@@ -182,7 +183,8 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => {
      weight: 3,
    });
    expect(script).toContain("OPENCLAW_ENTRY=/app/openclaw.mjs");
-    expect(script).toContain("docker_e2e_docker_cmd stats --no-stream");
+    expect(script).toContain("docker_e2e_sample_stats_until_exit");
+    expect(script).toContain("scripts/e2e/lib/docker-stats/assert-resource-ceiling.mjs");
    expect(script).toContain("node scripts/e2e/kitchen-sink-rpc-walk.mjs");
    expect(script).not.toContain("--import tsx");
    expect(walkScript).toContain("commands.list");