mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-03 04:36:21 +00:00
fix(auto-reply): warn on substantive private message-tool finals
Warn operators when message_tool_only produces unusually substantive private final text without a delivered source reply. Keeps short/NO_REPLY silence quiet, avoids logging response bodies, and distinguishes unrelated side effects from source-reply delivery.
This commit is contained in:
@@ -212,11 +212,13 @@ plugins.
|
||||
- `/fast on|off` persists a session override; use the Sessions UI `inherit` option to clear it.
|
||||
- `/fast` is provider-specific: OpenAI/Codex map it to `service_tier=priority`; direct Anthropic requests map it to `service_tier=auto` or `standard_only`.
|
||||
- `/reasoning`, `/verbose`, and `/trace` are risky in group settings — they may reveal internal reasoning or plugin diagnostics. Keep them off in group chats.
|
||||
|
||||
</Accordion>
|
||||
<Accordion title="Model switching details">
|
||||
- `/model` persists the new model immediately to the session.
|
||||
- If the agent is idle, the next run uses it right away.
|
||||
- If a run is active, the switch is marked pending and applied at the next clean retry point.
|
||||
|
||||
</Accordion>
|
||||
</AccordionGroup>
|
||||
|
||||
@@ -468,6 +470,7 @@ See [BTW side questions](/tools/btw) for the full behavior.
|
||||
- **Native Slack commands:** `agent:<agentId>:slack:slash:<userId>` (prefix configurable via `channels.slack.slashCommand.sessionPrefix`)
|
||||
- **Native Telegram commands:** `telegram:slash:<userId>` (targets the chat session via `CommandTargetSessionKey`)
|
||||
- **`/stop`** targets the active chat session to abort the current run.
|
||||
|
||||
</Accordion>
|
||||
<Accordion title="Slack specifics">
|
||||
`channels.slack.slashCommand` supports a single `/openclaw`-style command.
|
||||
@@ -479,11 +482,13 @@ See [BTW side questions](/tools/btw) for the full behavior.
|
||||
- Command-only messages from allowlisted senders are handled immediately (bypass queue + model).
|
||||
- Inline shortcuts (`/help`, `/commands`, `/status`, `/whoami`) also work embedded in normal messages and are stripped before the model sees the remaining text.
|
||||
- Unauthorized command-only messages are silently ignored; inline `/...` tokens are treated as plain text.
|
||||
|
||||
</Accordion>
|
||||
<Accordion title="Argument notes">
|
||||
- Commands accept an optional `:` between the command and args (`/think: high`, `/send: on`).
|
||||
- `/new <model>` accepts a model alias, `provider/model`, or a provider name (fuzzy match); if no match, the text is treated as the message body.
|
||||
- `/allowlist add|remove` requires `commands.config: true` and honors channel `configWrites`.
|
||||
|
||||
</Accordion>
|
||||
</AccordionGroup>
|
||||
|
||||
|
||||
92
qa/scenarios/channels/message-tool-stranded-final-reply.md
Normal file
92
qa/scenarios/channels/message-tool-stranded-final-reply.md
Normal file
@@ -0,0 +1,92 @@
|
||||
# Message-tool-only private final reply warning
|
||||
|
||||
```yaml qa-scenario
|
||||
id: message-tool-stranded-final-reply
|
||||
title: Message-tool-only private final reply warning
|
||||
surface: channel
|
||||
coverage:
|
||||
primary:
|
||||
- channels.direct-visible-replies
|
||||
secondary:
|
||||
- channels.qa-channel
|
||||
- tools.message
|
||||
objective: Reproduce #85714 — under messages.visibleReplies=message_tool a long private final reply that never calls the message tool is kept private (no outbound), and the gateway emits the private-final WARN.
|
||||
gatewayConfigPatch:
|
||||
messages:
|
||||
visibleReplies: message_tool
|
||||
successCriteria:
|
||||
- The mock provider returns a long normal final answer and does not plan the message tool.
|
||||
- Under message_tool_only delivery the reply is kept private, so the direct conversation receives no outbound message.
|
||||
- The gateway logs the private-final WARN from source-reply/private-final.
|
||||
docsRefs:
|
||||
- docs/channels/qa-channel.md
|
||||
codeRefs:
|
||||
- src/auto-reply/reply/agent-runner.ts
|
||||
- src/auto-reply/reply/private-message-tool-final.ts
|
||||
- src/auto-reply/reply/dispatch-from-config.ts
|
||||
execution:
|
||||
kind: flow
|
||||
summary: Send a direct message_tool_only turn whose model reply omits the message tool, and verify a substantive private final warns without outbound delivery.
|
||||
config:
|
||||
conversationId: qa-stranded-dm
|
||||
promptSnippet: qa private final reply warning check
|
||||
prompt: "qa private final reply warning check. Reply to me directly in two complete sentences with `QA-STRANDED-85714` in the first sentence and a short explanation in the second sentence. Do NOT call any tool. Do NOT use the message tool."
|
||||
expectedMarker: QA-STRANDED-85714
|
||||
privateFinalLogNeedle: "source-reply/private-final"
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
steps:
|
||||
- name: warns for substantive private final text when the model omits the message tool
|
||||
actions:
|
||||
- call: waitForGatewayHealthy
|
||||
args:
|
||||
- ref: env
|
||||
- 60000
|
||||
- call: waitForQaChannelReady
|
||||
args:
|
||||
- ref: env
|
||||
- 60000
|
||||
- call: reset
|
||||
- set: logCursor
|
||||
value:
|
||||
expr: markGatewayLogCursor()
|
||||
- set: requestCountBefore
|
||||
value:
|
||||
expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).length : 0"
|
||||
- call: state.addInboundMessage
|
||||
args:
|
||||
- conversation:
|
||||
id:
|
||||
expr: config.conversationId
|
||||
kind: direct
|
||||
senderId: alice
|
||||
senderName: Alice
|
||||
text:
|
||||
expr: config.prompt
|
||||
- call: waitForNoOutbound
|
||||
args:
|
||||
- ref: state
|
||||
- expr: liveTurnTimeoutMs(env, 30000)
|
||||
- set: scenarioRequests
|
||||
value:
|
||||
expr: "env.mock ? (await fetchJson(`${env.mock.baseUrl}/debug/requests`)).slice(requestCountBefore).filter((request) => String(request.allInputText ?? '').includes(config.promptSnippet)) : []"
|
||||
- assert:
|
||||
expr: "!env.mock || scenarioRequests.length > 0"
|
||||
message: expected mock request evidence that the turn actually ran
|
||||
- assert:
|
||||
expr: "!env.mock || scenarioRequests.every((request) => request.plannedToolName !== 'message')"
|
||||
message:
|
||||
expr: "`model should not have planned the message tool, saw ${JSON.stringify(scenarioRequests.map((request) => request.plannedToolName ?? null))}`"
|
||||
- set: privateFinalLog
|
||||
value:
|
||||
expr: "String(readGatewayLogs() ?? '').slice(logCursor)"
|
||||
- set: privateFinalLine
|
||||
value:
|
||||
expr: "(privateFinalLog.split('\\n').find((line) => line.includes(config.privateFinalLogNeedle)) ?? '').trim()"
|
||||
- assert:
|
||||
expr: "privateFinalLog.includes(config.privateFinalLogNeedle)"
|
||||
message:
|
||||
expr: "`expected the gateway to log ${config.privateFinalLogNeedle} after a substantive private message_tool_only reply, but it was absent`"
|
||||
detailsExpr: "`no-outbound private final; WARN logged=${privateFinalLog.includes(config.privateFinalLogNeedle)}; mock requests=${scenarioRequests.length}; gateway log: ${privateFinalLine}`"
|
||||
```
|
||||
@@ -190,6 +190,14 @@ vi.mock("../../agents/subagent-registry.js", () => ({
|
||||
markSubagentRunTerminated: () => 0,
|
||||
}));
|
||||
|
||||
// #85714: keep the real private-final decision but spy the WARN emitter so we
|
||||
// can assert it fires only through the substantive text suppression branch.
|
||||
const warnPrivateFinalSpy = vi.hoisted(() => vi.fn());
|
||||
vi.mock("./private-message-tool-final.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("./private-message-tool-final.js")>();
|
||||
return { ...actual, warnPrivateMessageToolFinal: warnPrivateFinalSpy };
|
||||
});
|
||||
|
||||
import { runReplyAgent } from "./agent-runner.js";
|
||||
|
||||
type RunWithModelFallbackParams = {
|
||||
@@ -244,6 +252,7 @@ beforeEach(() => {
|
||||
embeddedRunTesting.resetActiveEmbeddedRuns();
|
||||
replyRunRegistryTesting.resetReplyRunRegistry();
|
||||
runEmbeddedAgentMock.mockClear();
|
||||
warnPrivateFinalSpy.mockClear();
|
||||
runCliAgentMock.mockClear();
|
||||
runWithModelFallbackMock.mockClear();
|
||||
runtimeErrorMock.mockClear();
|
||||
@@ -2984,3 +2993,129 @@ describe("runReplyAgent mid-turn rate-limit fallback", () => {
|
||||
expect(payload?.text).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("runReplyAgent private message_tool_only final warning (#85714)", () => {
|
||||
async function runPrivateFinalCase(params: {
|
||||
messagingToolSentTargets?: unknown[];
|
||||
finalAssistantText?: string;
|
||||
payloadText?: string;
|
||||
successfulCronAdds?: number;
|
||||
}) {
|
||||
const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-stranded-"));
|
||||
const storePath = path.join(tmp, "sessions.json");
|
||||
const sessionKey = "stranded";
|
||||
const sessionEntry = { sessionId: "session", updatedAt: Date.now(), totalTokens: 1_000 };
|
||||
await fs.writeFile(storePath, JSON.stringify({ [sessionKey]: sessionEntry }, null, 2), "utf-8");
|
||||
|
||||
const finalAssistantText =
|
||||
params.finalAssistantText ??
|
||||
"Here is the answer the user asked for. It includes enough detail to read like a user-facing response rather than a short private note. This should have been sent with the message tool if the channel expected a visible reply.";
|
||||
runEmbeddedAgentMock.mockResolvedValue({
|
||||
// payloadText can differ from the assistant text to simulate metadata-only
|
||||
// payloads (verbose notices, usage line) that must NOT trigger the warn —
|
||||
// detection keys off the assistant final text, not the payload bundle.
|
||||
payloads: [{ text: params.payloadText ?? finalAssistantText }],
|
||||
meta: { agentMeta: {}, finalAssistantVisibleText: finalAssistantText },
|
||||
...(params.messagingToolSentTargets
|
||||
? { messagingToolSentTargets: params.messagingToolSentTargets }
|
||||
: {}),
|
||||
...(params.successfulCronAdds === undefined
|
||||
? {}
|
||||
: { successfulCronAdds: params.successfulCronAdds }),
|
||||
});
|
||||
|
||||
const sessionCtx = {
|
||||
Provider: "whatsapp",
|
||||
OriginatingTo: "+15550001111",
|
||||
AccountId: "primary",
|
||||
MessageSid: "msg",
|
||||
ChatType: "direct",
|
||||
} as unknown as TemplateContext;
|
||||
const followupRun = {
|
||||
prompt: "hello",
|
||||
summaryLine: "hello",
|
||||
enqueuedAt: Date.now(),
|
||||
run: {
|
||||
agentId: "main",
|
||||
agentDir: "/tmp/agent",
|
||||
sessionId: "session",
|
||||
sessionKey,
|
||||
messageProvider: "whatsapp",
|
||||
sessionFile: "/tmp/session.jsonl",
|
||||
workspaceDir: tmp,
|
||||
// Direct chat + visibleReplies=message_tool resolves to message_tool_only,
|
||||
// so the final text is kept private (no automatic delivery).
|
||||
config: { messages: { visibleReplies: "message_tool" } },
|
||||
skillsSnapshot: {},
|
||||
provider: "anthropic",
|
||||
model: "claude",
|
||||
thinkLevel: "low",
|
||||
reasoningLevel: "on",
|
||||
verboseLevel: "off",
|
||||
elevatedLevel: "off",
|
||||
bashElevated: { enabled: false, allowed: false, defaultLevel: "off" },
|
||||
timeoutMs: 1_000,
|
||||
blockReplyBreak: "message_end",
|
||||
},
|
||||
} as unknown as FollowupRun;
|
||||
|
||||
await runReplyAgent({
|
||||
commandBody: "hello",
|
||||
followupRun,
|
||||
queueKey: sessionKey,
|
||||
resolvedQueue: { mode: "interrupt" } as unknown as QueueSettings,
|
||||
shouldSteer: false,
|
||||
shouldFollowup: false,
|
||||
isActive: false,
|
||||
isStreaming: false,
|
||||
typing: createMockTypingController(),
|
||||
sessionCtx,
|
||||
sessionEntry,
|
||||
sessionStore: { [sessionKey]: sessionEntry },
|
||||
sessionKey,
|
||||
storePath,
|
||||
defaultModel: "anthropic/claude-opus-4-6",
|
||||
agentCfgContextTokens: 200_000,
|
||||
resolvedVerboseLevel: "off",
|
||||
isNewSession: false,
|
||||
blockStreamingEnabled: false,
|
||||
resolvedBlockStreamingBreak: "message_end",
|
||||
shouldInjectGroupIntro: false,
|
||||
typingMode: "instant",
|
||||
});
|
||||
}
|
||||
|
||||
it("warns when a substantive private final reply never used the message tool", async () => {
|
||||
await runPrivateFinalCase({});
|
||||
expect(warnPrivateFinalSpy).toHaveBeenCalledTimes(1);
|
||||
expect(warnPrivateFinalSpy.mock.calls[0]?.[0]).toMatchObject({ sessionKey: "stranded" });
|
||||
});
|
||||
|
||||
it("does not warn for a short private final reply", async () => {
|
||||
await runPrivateFinalCase({ finalAssistantText: "Nothing to send here." });
|
||||
expect(warnPrivateFinalSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("does not warn when the message tool delivered this turn", async () => {
|
||||
await runPrivateFinalCase({
|
||||
messagingToolSentTargets: [{ tool: "message", provider: "whatsapp", to: "+15550001111" }],
|
||||
});
|
||||
expect(warnPrivateFinalSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("still warns when only an unrelated cron side effect succeeded", async () => {
|
||||
await runPrivateFinalCase({ successfulCronAdds: 1 });
|
||||
expect(warnPrivateFinalSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("does not warn on an intentional NO_REPLY turn even when metadata payloads remain", async () => {
|
||||
// Assistant went silent (NO_REPLY), but a verbose/usage metadata payload
|
||||
// survives in finalPayloads. The warn must key off the assistant text, not
|
||||
// the payload bundle, so no private-final warning should fire.
|
||||
await runPrivateFinalCase({
|
||||
finalAssistantText: "no_reply",
|
||||
payloadText: "Auto-compaction complete (count 1).",
|
||||
});
|
||||
expect(warnPrivateFinalSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -94,6 +94,10 @@ import { resolveOriginMessageProvider, resolveOriginMessageTo } from "./origin-r
|
||||
import { sanitizePendingFinalDeliveryText } from "./pending-final-delivery.js";
|
||||
import { drainPendingToolTasks } from "./pending-tool-task-drain.js";
|
||||
import { readPostCompactionContext } from "./post-compaction-context.js";
|
||||
import {
|
||||
shouldWarnAboutPrivateMessageToolFinal,
|
||||
warnPrivateMessageToolFinal,
|
||||
} from "./private-message-tool-final.js";
|
||||
import { resolveActiveRunQueueAction } from "./queue-policy.js";
|
||||
import {
|
||||
enqueueFollowupRun,
|
||||
@@ -230,15 +234,27 @@ function hasSuccessfulSideEffectDelivery(params: {
|
||||
messagingToolSentTargets?: unknown[];
|
||||
successfulCronAdds?: number;
|
||||
didSendDeterministicApprovalPrompt?: boolean;
|
||||
}): boolean {
|
||||
return (
|
||||
hasSuccessfulSourceReplyDelivery(params) ||
|
||||
(params.successfulCronAdds ?? 0) > 0 ||
|
||||
params.didSendDeterministicApprovalPrompt === true
|
||||
);
|
||||
}
|
||||
|
||||
function hasSuccessfulSourceReplyDelivery(params: {
|
||||
blockReplyPipeline: { didStream: () => boolean; isAborted: () => boolean } | null;
|
||||
directlySentBlockKeys?: Set<string>;
|
||||
messagingToolSentTexts?: string[];
|
||||
messagingToolSentMediaUrls?: string[];
|
||||
messagingToolSentTargets?: unknown[];
|
||||
}): boolean {
|
||||
return (
|
||||
(params.blockReplyPipeline?.didStream() && !params.blockReplyPipeline.isAborted()) ||
|
||||
(params.directlySentBlockKeys?.size ?? 0) > 0 ||
|
||||
hasNonEmptyStringArray(params.messagingToolSentTexts) ||
|
||||
hasNonEmptyStringArray(params.messagingToolSentMediaUrls) ||
|
||||
hasCommittedMessagingTargetDeliveryEvidence(params.messagingToolSentTargets) ||
|
||||
(params.successfulCronAdds ?? 0) > 0 ||
|
||||
params.didSendDeterministicApprovalPrompt === true
|
||||
hasCommittedMessagingTargetDeliveryEvidence(params.messagingToolSentTargets)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1795,6 +1811,13 @@ export async function runReplyAgent(params: {
|
||||
successfulCronAdds: runResult.successfulCronAdds,
|
||||
didSendDeterministicApprovalPrompt: runResult.didSendDeterministicApprovalPrompt,
|
||||
});
|
||||
const successfulSourceReplyDelivery = hasSuccessfulSourceReplyDelivery({
|
||||
blockReplyPipeline,
|
||||
directlySentBlockKeys,
|
||||
messagingToolSentTexts: runResult.messagingToolSentTexts,
|
||||
messagingToolSentMediaUrls: runResult.messagingToolSentMediaUrls,
|
||||
messagingToolSentTargets: runResult.messagingToolSentTargets,
|
||||
});
|
||||
const returnSilentFallbackFailureIfNeeded = async (): Promise<ReplyPayload | undefined> => {
|
||||
const silentFallbackFailurePayload = buildSilentFallbackFailurePayload({
|
||||
fallbackTransition,
|
||||
@@ -2276,9 +2299,30 @@ export async function runReplyAgent(params: {
|
||||
runtimePolicySessionKey,
|
||||
opts,
|
||||
});
|
||||
const pendingText = sourceReplyPolicy.suppressDelivery
|
||||
? ""
|
||||
: buildPendingFinalDeliveryText(finalPayloads);
|
||||
const finalDeliveryText = buildPendingFinalDeliveryText(finalPayloads);
|
||||
// #85714: warn only for unusually substantive private final text. In
|
||||
// message_tool_only, no tool call can be intentional silence, and
|
||||
// finalDeliveryText also includes verbose/status/usage metadata.
|
||||
const assistantFinalText = rawAssistantText ?? "";
|
||||
if (
|
||||
shouldWarnAboutPrivateMessageToolFinal({
|
||||
sourceReplyDeliveryMode: sourceReplyPolicy.sourceReplyDeliveryMode,
|
||||
sendPolicyDenied: sourceReplyPolicy.sendPolicyDenied,
|
||||
successfulSourceReplyDelivery,
|
||||
finalText: assistantFinalText,
|
||||
})
|
||||
) {
|
||||
warnPrivateMessageToolFinal({
|
||||
sessionKey,
|
||||
channel:
|
||||
sessionCtx.OriginatingChannel ??
|
||||
sessionCtx.Surface ??
|
||||
sessionCtx.Provider ??
|
||||
activeSessionEntry?.channel,
|
||||
finalTextLength: assistantFinalText.trim().length,
|
||||
});
|
||||
}
|
||||
const pendingText = sourceReplyPolicy.suppressDelivery ? "" : finalDeliveryText;
|
||||
const agentId = followupRun.run.agentId;
|
||||
const heartbeatAgentCfg = agentId ? resolveAgentConfig(cfg, agentId)?.heartbeat : undefined;
|
||||
const heartbeatAckMaxChars = Math.max(
|
||||
|
||||
74
src/auto-reply/reply/private-message-tool-final.test.ts
Normal file
74
src/auto-reply/reply/private-message-tool-final.test.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { shouldWarnAboutPrivateMessageToolFinal } from "./private-message-tool-final.js";
|
||||
|
||||
const base = {
|
||||
sourceReplyDeliveryMode: "message_tool_only" as const,
|
||||
sendPolicyDenied: false,
|
||||
successfulSourceReplyDelivery: false,
|
||||
finalText:
|
||||
"Here is the answer the user asked for. It includes enough detail to look like a visible response rather than an internal no-op note.",
|
||||
};
|
||||
|
||||
describe("shouldWarnAboutPrivateMessageToolFinal", () => {
|
||||
it("flags a multi-sentence private final that was never delivered via the message tool (#85714)", () => {
|
||||
expect(shouldWarnAboutPrivateMessageToolFinal(base)).toBe(true);
|
||||
});
|
||||
|
||||
it("flags a long private final even without multiple sentence terminators", () => {
|
||||
expect(
|
||||
shouldWarnAboutPrivateMessageToolFinal({
|
||||
...base,
|
||||
finalText: "x".repeat(280),
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("does not flag automatic delivery mode (final text is delivered normally)", () => {
|
||||
expect(
|
||||
shouldWarnAboutPrivateMessageToolFinal({ ...base, sourceReplyDeliveryMode: "automatic" }),
|
||||
).toBe(false);
|
||||
expect(
|
||||
shouldWarnAboutPrivateMessageToolFinal({ ...base, sourceReplyDeliveryMode: undefined }),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("does not flag when the message tool already delivered this turn", () => {
|
||||
expect(
|
||||
shouldWarnAboutPrivateMessageToolFinal({ ...base, successfulSourceReplyDelivery: true }),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("does not flag silent sentinel variants (intentional silence)", () => {
|
||||
expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: "NO_REPLY" })).toBe(false);
|
||||
expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: " no_reply " })).toBe(
|
||||
false,
|
||||
);
|
||||
expect(
|
||||
shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: "NO_REPLY\n\nNO_REPLY" }),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("does not flag a short private final", () => {
|
||||
expect(
|
||||
shouldWarnAboutPrivateMessageToolFinal({
|
||||
...base,
|
||||
finalText: "Nothing to add here.",
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
shouldWarnAboutPrivateMessageToolFinal({
|
||||
...base,
|
||||
finalText: "I do not need to send anything. Nothing else to add.",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("does not flag empty or whitespace-only final text", () => {
|
||||
expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: "" })).toBe(false);
|
||||
expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, finalText: " \n " })).toBe(false);
|
||||
});
|
||||
|
||||
it("does not flag when delivery was intentionally denied by send policy", () => {
|
||||
expect(shouldWarnAboutPrivateMessageToolFinal({ ...base, sendPolicyDenied: true })).toBe(false);
|
||||
});
|
||||
});
|
||||
67
src/auto-reply/reply/private-message-tool-final.ts
Normal file
67
src/auto-reply/reply/private-message-tool-final.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
import { createSubsystemLogger } from "../../logging/subsystem.js";
|
||||
import type { SourceReplyDeliveryMode } from "../get-reply-options.types.js";
|
||||
import { isSilentReplyText } from "../tokens.js";
|
||||
|
||||
const privateFinalReplyLogger = createSubsystemLogger("source-reply/private-final");
|
||||
|
||||
const LONG_PRIVATE_FINAL_MIN_CHARS = 280;
|
||||
const MULTI_SENTENCE_PRIVATE_FINAL_MIN_CHARS = 120;
|
||||
const MULTI_SENTENCE_TERMINATOR_MIN_COUNT = 2;
|
||||
const SENTENCE_TERMINATOR_REGEX = /[.!?]+(?:\s|$)/g;
|
||||
|
||||
/**
|
||||
* `message_tool_only` allows the model to stay silent by simply not calling the
|
||||
* message tool, so short private final text is not evidence of message loss.
|
||||
* Warn only for unusually substantive private finals, which usually means the
|
||||
* model wrote a user-facing answer but missed the configured delivery tool.
|
||||
*/
|
||||
export function shouldWarnAboutPrivateMessageToolFinal(params: {
|
||||
sourceReplyDeliveryMode: SourceReplyDeliveryMode | undefined;
|
||||
sendPolicyDenied: boolean;
|
||||
successfulSourceReplyDelivery: boolean;
|
||||
finalText: string;
|
||||
}): boolean {
|
||||
if (params.sourceReplyDeliveryMode !== "message_tool_only") {
|
||||
return false;
|
||||
}
|
||||
// A send-policy denial is an intentional block, and a successful source-reply
|
||||
// delivery means the contract was honored. Other side effects do not count.
|
||||
if (params.sendPolicyDenied || params.successfulSourceReplyDelivery) {
|
||||
return false;
|
||||
}
|
||||
const trimmed = params.finalText.trim();
|
||||
if (!trimmed || isSilentReplyText(trimmed)) {
|
||||
return false;
|
||||
}
|
||||
if (trimmed.length >= LONG_PRIVATE_FINAL_MIN_CHARS) {
|
||||
return true;
|
||||
}
|
||||
const sentenceTerminatorCount = countSentenceLikeTerminators(trimmed);
|
||||
return (
|
||||
trimmed.length >= MULTI_SENTENCE_PRIVATE_FINAL_MIN_CHARS &&
|
||||
sentenceTerminatorCount >= MULTI_SENTENCE_TERMINATOR_MIN_COUNT
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit metadata-only operator signal. The body is intentionally omitted:
|
||||
* `message_tool_only` keeps normal final text private by design.
|
||||
*/
|
||||
export function warnPrivateMessageToolFinal(params: {
|
||||
sessionKey: string | undefined;
|
||||
channel: string | undefined;
|
||||
finalTextLength: number;
|
||||
}): void {
|
||||
privateFinalReplyLogger.warn(
|
||||
"agent produced a long private final reply without calling the configured delivery tool (message_tool_only); response kept private and not delivered to the source channel",
|
||||
{
|
||||
sessionKey: params.sessionKey,
|
||||
channel: params.channel,
|
||||
chars: params.finalTextLength,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function countSentenceLikeTerminators(text: string): number {
|
||||
return Array.from(text.matchAll(SENTENCE_TERMINATOR_REGEX)).length;
|
||||
}
|
||||
@@ -160,7 +160,8 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => {
|
||||
expect(readFileSync("scripts/e2e/lib/clawhub-fixture-server.cjs", "utf8")).toContain(
|
||||
"X-ClawHub-Artifact-Sha256",
|
||||
);
|
||||
expect(script).toContain("docker_e2e_docker_cmd stats --no-stream");
|
||||
expect(script).toContain("docker_e2e_sample_stats_until_exit");
|
||||
expect(script).toContain("scripts/e2e/lib/docker-stats/assert-resource-ceiling.mjs");
|
||||
expect(sweepScript).toContain("scan_logs_for_unexpected_errors");
|
||||
});
|
||||
|
||||
@@ -182,7 +183,8 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => {
|
||||
weight: 3,
|
||||
});
|
||||
expect(script).toContain("OPENCLAW_ENTRY=/app/openclaw.mjs");
|
||||
expect(script).toContain("docker_e2e_docker_cmd stats --no-stream");
|
||||
expect(script).toContain("docker_e2e_sample_stats_until_exit");
|
||||
expect(script).toContain("scripts/e2e/lib/docker-stats/assert-resource-ceiling.mjs");
|
||||
expect(script).toContain("node scripts/e2e/kitchen-sink-rpc-walk.mjs");
|
||||
expect(script).not.toContain("--import tsx");
|
||||
expect(walkScript).toContain("commands.list");
|
||||
|
||||
Reference in New Issue
Block a user