From 75e0053cf969e4cf4e66b893edce73086b542ccd Mon Sep 17 00:00:00 2001 From: yaoyi1222 Date: Sun, 31 May 2026 21:35:58 +0800 Subject: [PATCH] fix(auto-reply): warn on substantive private message-tool finals Warn operators when message_tool_only produces unusually substantive private final text without a delivered source reply. Keeps short/NO_REPLY silence quiet, avoids logging response bodies, and distinguishes unrelated side effects from source-reply delivery. --- docs/tools/slash-commands.md | 5 + .../message-tool-stranded-final-reply.md | 92 ++++++++++++ .../agent-runner.misc.runreplyagent.test.ts | 135 ++++++++++++++++++ src/auto-reply/reply/agent-runner.ts | 56 +++++++- .../reply/private-message-tool-final.test.ts | 74 ++++++++++ .../reply/private-message-tool-final.ts | 67 +++++++++ .../plugin-prerelease-test-plan.test.ts | 6 +- 7 files changed, 427 insertions(+), 8 deletions(-) create mode 100644 qa/scenarios/channels/message-tool-stranded-final-reply.md create mode 100644 src/auto-reply/reply/private-message-tool-final.test.ts create mode 100644 src/auto-reply/reply/private-message-tool-final.ts diff --git a/docs/tools/slash-commands.md b/docs/tools/slash-commands.md index 713f0168dee..9103ae0065b 100644 --- a/docs/tools/slash-commands.md +++ b/docs/tools/slash-commands.md @@ -212,11 +212,13 @@ plugins. - `/fast on|off` persists a session override; use the Sessions UI `inherit` option to clear it. - `/fast` is provider-specific: OpenAI/Codex map it to `service_tier=priority`; direct Anthropic requests map it to `service_tier=auto` or `standard_only`. - `/reasoning`, `/verbose`, and `/trace` are risky in group settings — they may reveal internal reasoning or plugin diagnostics. Keep them off in group chats. + - `/model` persists the new model immediately to the session. - If the agent is idle, the next run uses it right away. - If a run is active, the switch is marked pending and applied at the next clean retry point. + @@ -468,6 +470,7 @@ See [BTW side questions](/tools/btw) for the full behavior. - **Native Slack commands:** `agent::slack:slash:` (prefix configurable via `channels.slack.slashCommand.sessionPrefix`) - **Native Telegram commands:** `telegram:slash:` (targets the chat session via `CommandTargetSessionKey`) - **`/stop`** targets the active chat session to abort the current run. + `channels.slack.slashCommand` supports a single `/openclaw`-style command. @@ -479,11 +482,13 @@ See [BTW side questions](/tools/btw) for the full behavior. - Command-only messages from allowlisted senders are handled immediately (bypass queue + model). - Inline shortcuts (`/help`, `/commands`, `/status`, `/whoami`) also work embedded in normal messages and are stripped before the model sees the remaining text. - Unauthorized command-only messages are silently ignored; inline `/...` tokens are treated as plain text. + - Commands accept an optional `:` between the command and args (`/think: high`, `/send: on`). - `/new ` accepts a model alias, `provider/model`, or a provider name (fuzzy match); if no match, the text is treated as the message body. - `/allowlist add|remove` requires `commands.config: true` and honors channel `configWrites`. + diff --git a/qa/scenarios/channels/message-tool-stranded-final-reply.md b/qa/scenarios/channels/message-tool-stranded-final-reply.md new file mode 100644 index 00000000000..c9c5496f431 --- /dev/null +++ b/qa/scenarios/channels/message-tool-stranded-final-reply.md @@ -0,0 +1,92 @@ +# Message-tool-only private final reply warning + +```yaml qa-scenario +id: message-tool-stranded-final-reply +title: Message-tool-only private final reply warning +surface: channel +coverage: + primary: + - channels.direct-visible-replies + secondary: + - channels.qa-channel + - tools.message +objective: Reproduce #85714 — under messages.visibleReplies=message_tool a long private final reply that never calls the message tool is kept private (no outbound), and the gateway emits the private-final WARN. +gatewayConfigPatch: + messages: + visibleReplies: message_tool +successCriteria: + - The mock provider returns a long normal final answer and does not plan the message tool. + - Under message_tool_only delivery the reply is kept private, so the direct conversation receives no outbound message. + - The gateway logs the private-final WARN from source-reply/private-final. +docsRefs: + - docs/channels/qa-channel.md +codeRefs: + - src/auto-reply/reply/agent-runner.ts + - src/auto-reply/reply/private-message-tool-final.ts + - src/auto-reply/reply/dispatch-from-config.ts +execution: + kind: flow + summary: Send a direct message_tool_only turn whose model reply omits the message tool, and verify a substantive private final warns without outbound delivery. + config: + conversationId: qa-stranded-dm + promptSnippet: qa private final reply warning check + prompt: "qa private final reply warning check. Reply to me directly in two complete sentences with `QA-STRANDED-85714` in the first sentence and a short explanation in the second sentence. Do NOT call any tool. Do NOT use the message tool." + expectedMarker: QA-STRANDED-85714 + privateFinalLogNeedle: "source-reply/private-final" +``` + +```yaml qa-flow +steps: + - name: warns for substantive private final text when the model omits the message tool + actions: + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - call: waitForQaChannelReady + args: + - ref: env + - 60000 + - call: reset + - set: logCursor + value: + expr: markGatewayLogCursor() + - set: requestCountBefore + value: + expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).length : 0" + - call: state.addInboundMessage + args: + - conversation: + id: + expr: config.conversationId + kind: direct + senderId: alice + senderName: Alice + text: + expr: config.prompt + - call: waitForNoOutbound + args: + - ref: state + - expr: liveTurnTimeoutMs(env, 30000) + - set: scenarioRequests + value: + expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).slice(requestCountBefore).filter((request) => String(request.allInputText ?? '').includes(config.promptSnippet)) : []" + - assert: + expr: "!env.mock || scenarioRequests.length > 0" + message: expected mock request evidence that the turn actually ran + - assert: + expr: "!env.mock || scenarioRequests.every((request) => request.plannedToolName !== 'message')" + message: + expr: "`model should not have planned the message tool, saw ${JSON.stringify(scenarioRequests.map((request) => request.plannedToolName ?? null))}`" + - set: privateFinalLog + value: + expr: "String(readGatewayLogs() ?? '').slice(logCursor)" + - set: privateFinalLine + value: + expr: "(privateFinalLog.split('\\n').find((line) => line.includes(config.privateFinalLogNeedle)) ?? '').trim()" + - assert: + expr: "privateFinalLog.includes(config.privateFinalLogNeedle)" + message: + expr: "`expected the gateway to log ${config.privateFinalLogNeedle} after a substantive private message_tool_only reply, but it was absent`" + detailsExpr: "`no-outbound private final; WARN logged=${privateFinalLog.includes(config.privateFinalLogNeedle)}; mock requests=${scenarioRequests.length}; gateway log: ${privateFinalLine}`" +``` diff --git a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts index a1196387c0d..b91b1d05259 100644 --- a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts +++ b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts @@ -190,6 +190,14 @@ vi.mock("../../agents/subagent-registry.js", () => ({ markSubagentRunTerminated: () => 0, })); +// #85714: keep the real private-final decision but spy the WARN emitter so we +// can assert it fires only through the substantive text suppression branch. +const warnPrivateFinalSpy = vi.hoisted(() => vi.fn()); +vi.mock("./private-message-tool-final.js", async (importOriginal) => { + const actual = await importOriginal(); + return { ...actual, warnPrivateMessageToolFinal: warnPrivateFinalSpy }; +}); + import { runReplyAgent } from "./agent-runner.js"; type RunWithModelFallbackParams = { @@ -244,6 +252,7 @@ beforeEach(() => { embeddedRunTesting.resetActiveEmbeddedRuns(); replyRunRegistryTesting.resetReplyRunRegistry(); runEmbeddedAgentMock.mockClear(); + warnPrivateFinalSpy.mockClear(); runCliAgentMock.mockClear(); runWithModelFallbackMock.mockClear(); runtimeErrorMock.mockClear(); @@ -2984,3 +2993,129 @@ describe("runReplyAgent mid-turn rate-limit fallback", () => { expect(payload?.text).toBeUndefined(); }); }); + +describe("runReplyAgent private message_tool_only final warning (#85714)", () => { + async function runPrivateFinalCase(params: { + messagingToolSentTargets?: unknown[]; + finalAssistantText?: string; + payloadText?: string; + successfulCronAdds?: number; + }) { + const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-stranded-")); + const storePath = path.join(tmp, "sessions.json"); + const sessionKey = "stranded"; + const sessionEntry = { sessionId: "session", updatedAt: Date.now(), totalTokens: 1_000 }; + await fs.writeFile(storePath, JSON.stringify({ [sessionKey]: sessionEntry }, null, 2), "utf-8"); + + const finalAssistantText = + params.finalAssistantText ?? + "Here is the answer the user asked for. It includes enough detail to read like a user-facing response rather than a short private note. This should have been sent with the message tool if the channel expected a visible reply."; + runEmbeddedAgentMock.mockResolvedValue({ + // payloadText can differ from the assistant text to simulate metadata-only + // payloads (verbose notices, usage line) that must NOT trigger the warn — + // detection keys off the assistant final text, not the payload bundle. + payloads: [{ text: params.payloadText ?? finalAssistantText }], + meta: { agentMeta: {}, finalAssistantVisibleText: finalAssistantText }, + ...(params.messagingToolSentTargets + ? { messagingToolSentTargets: params.messagingToolSentTargets } + : {}), + ...(params.successfulCronAdds === undefined + ? {} + : { successfulCronAdds: params.successfulCronAdds }), + }); + + const sessionCtx = { + Provider: "whatsapp", + OriginatingTo: "+15550001111", + AccountId: "primary", + MessageSid: "msg", + ChatType: "direct", + } as unknown as TemplateContext; + const followupRun = { + prompt: "hello", + summaryLine: "hello", + enqueuedAt: Date.now(), + run: { + agentId: "main", + agentDir: "/tmp/agent", + sessionId: "session", + sessionKey, + messageProvider: "whatsapp", + sessionFile: "/tmp/session.jsonl", + workspaceDir: tmp, + // Direct chat + visibleReplies=message_tool resolves to message_tool_only, + // so the final text is kept private (no automatic delivery). + config: { messages: { visibleReplies: "message_tool" } }, + skillsSnapshot: {}, + provider: "anthropic", + model: "claude", + thinkLevel: "low", + reasoningLevel: "on", + verboseLevel: "off", + elevatedLevel: "off", + bashElevated: { enabled: false, allowed: false, defaultLevel: "off" }, + timeoutMs: 1_000, + blockReplyBreak: "message_end", + }, + } as unknown as FollowupRun; + + await runReplyAgent({ + commandBody: "hello", + followupRun, + queueKey: sessionKey, + resolvedQueue: { mode: "interrupt" } as unknown as QueueSettings, + shouldSteer: false, + shouldFollowup: false, + isActive: false, + isStreaming: false, + typing: createMockTypingController(), + sessionCtx, + sessionEntry, + sessionStore: { [sessionKey]: sessionEntry }, + sessionKey, + storePath, + defaultModel: "anthropic/claude-opus-4-6", + agentCfgContextTokens: 200_000, + resolvedVerboseLevel: "off", + isNewSession: false, + blockStreamingEnabled: false, + resolvedBlockStreamingBreak: "message_end", + shouldInjectGroupIntro: false, + typingMode: "instant", + }); + } + + it("warns when a substantive private final reply never used the message tool", async () => { + await runPrivateFinalCase({}); + expect(warnPrivateFinalSpy).toHaveBeenCalledTimes(1); + expect(warnPrivateFinalSpy.mock.calls[0]?.[0]).toMatchObject({ sessionKey: "stranded" }); + }); + + it("does not warn for a short private final reply", async () => { + await runPrivateFinalCase({ finalAssistantText: "Nothing to send here." }); + expect(warnPrivateFinalSpy).not.toHaveBeenCalled(); + }); + + it("does not warn when the message tool delivered this turn", async () => { + await runPrivateFinalCase({ + messagingToolSentTargets: [{ tool: "message", provider: "whatsapp", to: "+15550001111" }], + }); + expect(warnPrivateFinalSpy).not.toHaveBeenCalled(); + }); + + it("still warns when only an unrelated cron side effect succeeded", async () => { + await runPrivateFinalCase({ successfulCronAdds: 1 }); + expect(warnPrivateFinalSpy).toHaveBeenCalledTimes(1); + }); + + it("does not warn on an intentional NO_REPLY turn even when metadata payloads remain", async () => { + // Assistant went silent (NO_REPLY), but a verbose/usage metadata payload + // survives in finalPayloads. The warn must key off the assistant text, not + // the payload bundle, so no private-final warning should fire. + await runPrivateFinalCase({ + finalAssistantText: "no_reply", + payloadText: "Auto-compaction complete (count 1).", + }); + expect(warnPrivateFinalSpy).not.toHaveBeenCalled(); + }); +}); diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 34956f2827b..11a2f181a77 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -94,6 +94,10 @@ import { resolveOriginMessageProvider, resolveOriginMessageTo } from "./origin-r import { sanitizePendingFinalDeliveryText } from "./pending-final-delivery.js"; import { drainPendingToolTasks } from "./pending-tool-task-drain.js"; import { readPostCompactionContext } from "./post-compaction-context.js"; +import { + shouldWarnAboutPrivateMessageToolFinal, + warnPrivateMessageToolFinal, +} from "./private-message-tool-final.js"; import { resolveActiveRunQueueAction } from "./queue-policy.js"; import { enqueueFollowupRun, @@ -230,15 +234,27 @@ function hasSuccessfulSideEffectDelivery(params: { messagingToolSentTargets?: unknown[]; successfulCronAdds?: number; didSendDeterministicApprovalPrompt?: boolean; +}): boolean { + return ( + hasSuccessfulSourceReplyDelivery(params) || + (params.successfulCronAdds ?? 0) > 0 || + params.didSendDeterministicApprovalPrompt === true + ); +} + +function hasSuccessfulSourceReplyDelivery(params: { + blockReplyPipeline: { didStream: () => boolean; isAborted: () => boolean } | null; + directlySentBlockKeys?: Set; + messagingToolSentTexts?: string[]; + messagingToolSentMediaUrls?: string[]; + messagingToolSentTargets?: unknown[]; }): boolean { return ( (params.blockReplyPipeline?.didStream() && !params.blockReplyPipeline.isAborted()) || (params.directlySentBlockKeys?.size ?? 0) > 0 || hasNonEmptyStringArray(params.messagingToolSentTexts) || hasNonEmptyStringArray(params.messagingToolSentMediaUrls) || - hasCommittedMessagingTargetDeliveryEvidence(params.messagingToolSentTargets) || - (params.successfulCronAdds ?? 0) > 0 || - params.didSendDeterministicApprovalPrompt === true + hasCommittedMessagingTargetDeliveryEvidence(params.messagingToolSentTargets) ); } @@ -1795,6 +1811,13 @@ export async function runReplyAgent(params: { successfulCronAdds: runResult.successfulCronAdds, didSendDeterministicApprovalPrompt: runResult.didSendDeterministicApprovalPrompt, }); + const successfulSourceReplyDelivery = hasSuccessfulSourceReplyDelivery({ + blockReplyPipeline, + directlySentBlockKeys, + messagingToolSentTexts: runResult.messagingToolSentTexts, + messagingToolSentMediaUrls: runResult.messagingToolSentMediaUrls, + messagingToolSentTargets: runResult.messagingToolSentTargets, + }); const returnSilentFallbackFailureIfNeeded = async (): Promise => { const silentFallbackFailurePayload = buildSilentFallbackFailurePayload({ fallbackTransition, @@ -2276,9 +2299,30 @@ export async function runReplyAgent(params: { runtimePolicySessionKey, opts, }); - const pendingText = sourceReplyPolicy.suppressDelivery - ? "" - : buildPendingFinalDeliveryText(finalPayloads); + const finalDeliveryText = buildPendingFinalDeliveryText(finalPayloads); + // #85714: warn only for unusually substantive private final text. In + // message_tool_only, no tool call can be intentional silence, and + // finalDeliveryText also includes verbose/status/usage metadata. + const assistantFinalText = rawAssistantText ?? ""; + if ( + shouldWarnAboutPrivateMessageToolFinal({ + sourceReplyDeliveryMode: sourceReplyPolicy.sourceReplyDeliveryMode, + sendPolicyDenied: sourceReplyPolicy.sendPolicyDenied, + successfulSourceReplyDelivery, + finalText: assistantFinalText, + }) + ) { + warnPrivateMessageToolFinal({ + sessionKey, + channel: + sessionCtx.OriginatingChannel ?? + sessionCtx.Surface ?? + sessionCtx.Provider ?? + activeSessionEntry?.channel, + finalTextLength: assistantFinalText.trim().length, + }); + } + const pendingText = sourceReplyPolicy.suppressDelivery ? "" : finalDeliveryText; const agentId = followupRun.run.agentId; const heartbeatAgentCfg = agentId ? resolveAgentConfig(cfg, agentId)?.heartbeat : undefined; const heartbeatAckMaxChars = Math.max( diff --git a/src/auto-reply/reply/private-message-tool-final.test.ts b/src/auto-reply/reply/private-message-tool-final.test.ts new file mode 100644 index 00000000000..ded842878a7 --- /dev/null +++ b/src/auto-reply/reply/private-message-tool-final.test.ts @@ -0,0 +1,74 @@ +import { describe, expect, it } from "vitest"; +import { shouldWarnAboutPrivateMessageToolFinal } from "./private-message-tool-final.js"; + +const base = { + sourceReplyDeliveryMode: "message_tool_only" as const, + sendPolicyDenied: false, + successfulSourceReplyDelivery: false, + finalText: + "Here is the answer the user asked for. It includes enough detail to look like a visible response rather than an internal no-op note.", +}; + +describe("shouldWarnAboutPrivateMessageToolFinal", () => { + it("flags a multi-sentence private final that was never delivered via the message tool (#85714)", () => { + expect(shouldWarnAboutPrivateMessageToolFinal(base)).toBe(true); + }); + + it("flags a long private final even without multiple sentence terminators", () => { + expect( + shouldWarnAboutPrivateMessageToolFinal({ + ...base, + finalText: "x".repeat(280), + }), + ).toBe(true); + }); + + it("does not flag automatic delivery mode (final text is delivered normally)", () => { + expect( + shouldWarnAboutPrivateMessageToolFinal({ ...base, sourceReplyDeliveryMode: "automatic" }), + ).toBe(false); + expect( + shouldWarnAboutPrivateMessageToolFinal({ ...base, sourceReplyDeliveryMode: undefined }), + ).toBe(false); + }); + + it("does not flag when the message tool already delivered this turn", () => { + expect( + shouldWarnAboutPrivateMessageToolFinal({ ...base, successfulSourceReplyDelivery: true }), + ).toBe(false); + }); + + it("does not flag silent sentinel variants (intentional silence)", () => { + expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: "NO_REPLY" })).toBe(false); + expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: " no_reply " })).toBe( + false, + ); + expect( + shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: "NO_REPLY\n\nNO_REPLY" }), + ).toBe(false); + }); + + it("does not flag a short private final", () => { + expect( + shouldWarnAboutPrivateMessageToolFinal({ + ...base, + finalText: "Nothing to add here.", + }), + ).toBe(false); + expect( + shouldWarnAboutPrivateMessageToolFinal({ + ...base, + finalText: "I do not need to send anything. Nothing else to add.", + }), + ).toBe(false); + }); + + it("does not flag empty or whitespace-only final text", () => { + expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: "" })).toBe(false); + expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: " \n " })).toBe(false); + }); + + it("does not flag when delivery was intentionally denied by send policy", () => { + expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, sendPolicyDenied: true })).toBe(false); + }); +}); diff --git a/src/auto-reply/reply/private-message-tool-final.ts b/src/auto-reply/reply/private-message-tool-final.ts new file mode 100644 index 00000000000..5541ab5d73a --- /dev/null +++ b/src/auto-reply/reply/private-message-tool-final.ts @@ -0,0 +1,67 @@ +import { createSubsystemLogger } from "../../logging/subsystem.js"; +import type { SourceReplyDeliveryMode } from "../get-reply-options.types.js"; +import { isSilentReplyText } from "../tokens.js"; + +const privateFinalReplyLogger = createSubsystemLogger("source-reply/private-final"); + +const LONG_PRIVATE_FINAL_MIN_CHARS = 280; +const MULTI_SENTENCE_PRIVATE_FINAL_MIN_CHARS = 120; +const MULTI_SENTENCE_TERMINATOR_MIN_COUNT = 2; +const SENTENCE_TERMINATOR_REGEX = /[.!?]+(?:\s|$)/g; + +/** + * `message_tool_only` allows the model to stay silent by simply not calling the + * message tool, so short private final text is not evidence of message loss. + * Warn only for unusually substantive private finals, which usually means the + * model wrote a user-facing answer but missed the configured delivery tool. + */ +export function shouldWarnAboutPrivateMessageToolFinal(params: { + sourceReplyDeliveryMode: SourceReplyDeliveryMode | undefined; + sendPolicyDenied: boolean; + successfulSourceReplyDelivery: boolean; + finalText: string; +}): boolean { + if (params.sourceReplyDeliveryMode !== "message_tool_only") { + return false; + } + // A send-policy denial is an intentional block, and a successful source-reply + // delivery means the contract was honored. Other side effects do not count. + if (params.sendPolicyDenied || params.successfulSourceReplyDelivery) { + return false; + } + const trimmed = params.finalText.trim(); + if (!trimmed || isSilentReplyText(trimmed)) { + return false; + } + if (trimmed.length >= LONG_PRIVATE_FINAL_MIN_CHARS) { + return true; + } + const sentenceTerminatorCount = countSentenceLikeTerminators(trimmed); + return ( + trimmed.length >= MULTI_SENTENCE_PRIVATE_FINAL_MIN_CHARS && + sentenceTerminatorCount >= MULTI_SENTENCE_TERMINATOR_MIN_COUNT + ); +} + +/** + * Emit metadata-only operator signal. The body is intentionally omitted: + * `message_tool_only` keeps normal final text private by design. + */ +export function warnPrivateMessageToolFinal(params: { + sessionKey: string | undefined; + channel: string | undefined; + finalTextLength: number; +}): void { + privateFinalReplyLogger.warn( + "agent produced a long private final reply without calling the configured delivery tool (message_tool_only); response kept private and not delivered to the source channel", + { + sessionKey: params.sessionKey, + channel: params.channel, + chars: params.finalTextLength, + }, + ); +} + +function countSentenceLikeTerminators(text: string): number { + return Array.from(text.matchAll(SENTENCE_TERMINATOR_REGEX)).length; +} diff --git a/test/scripts/plugin-prerelease-test-plan.test.ts b/test/scripts/plugin-prerelease-test-plan.test.ts index 91575cce0d0..f36cc9d3264 100644 --- a/test/scripts/plugin-prerelease-test-plan.test.ts +++ b/test/scripts/plugin-prerelease-test-plan.test.ts @@ -160,7 +160,8 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => { expect(readFileSync("scripts/e2e/lib/clawhub-fixture-server.cjs", "utf8")).toContain( "X-ClawHub-Artifact-Sha256", ); - expect(script).toContain("docker_e2e_docker_cmd stats --no-stream"); + expect(script).toContain("docker_e2e_sample_stats_until_exit"); + expect(script).toContain("scripts/e2e/lib/docker-stats/assert-resource-ceiling.mjs"); expect(sweepScript).toContain("scan_logs_for_unexpected_errors"); }); @@ -182,7 +183,8 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => { weight: 3, }); expect(script).toContain("OPENCLAW_ENTRY=/app/openclaw.mjs"); - expect(script).toContain("docker_e2e_docker_cmd stats --no-stream"); + expect(script).toContain("docker_e2e_sample_stats_until_exit"); + expect(script).toContain("scripts/e2e/lib/docker-stats/assert-resource-ceiling.mjs"); expect(script).toContain("node scripts/e2e/kitchen-sink-rpc-walk.mjs"); expect(script).not.toContain("--import tsx"); expect(walkScript).toContain("commands.list");