From dcff28d28524eda07f3e10cab3e34ce9fecccea2 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sun, 26 Apr 2026 23:29:19 -0700 Subject: [PATCH] fix(telegram): hide acknowledged failed-tool warnings from chat (#72410) * fix(telegram): hide acknowledged failed-tool warnings from chat * fix(clownfish): address review for ghcrawl-207034-agentic-merge (1) * fix(clownfish): address review for ghcrawl-207034-agentic-merge (1) --- CHANGELOG.md | 1 + .../telegram/src/bot-message-dispatch.test.ts | 27 +++++++++ .../run/payloads.errors.test.ts | 59 ++++++++++++++++++- src/agents/pi-embedded-runner/run/payloads.ts | 56 +++++++++++++++++- 4 files changed, 140 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8816741333..51a17bc3d5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ Docs: https://docs.openclaw.ai - Codex harness: normalize cached input tokens before session/context accounting so prompt cache reads are not double-counted in `/status`, `session_status`, or persisted `sessionEntry.totalTokens`. Fixes #69298. Thanks @richardmqq. - Hooks/session-memory: use the host local timezone for memory filenames, fallback timestamp slugs, and markdown headers instead of UTC dates. Fixes #46703. (#46721) Thanks @Astro-Han. - Feishu: extract quoted/replied interactive-card text across schema 1.0, schema 2.0, i18n, template-variable, and post-format fallback shapes without carrying broad generated/config churn from related parser experiments. (#38776, #60383, #42218, #45936) Thanks @lishuaigit, @lskun, @just2gooo, and @Br1an67. +- Telegram/agents: hide raw failed write/edit warning messages in Telegram when the assistant already explicitly acknowledges the failed action, while keeping warnings when the reply claims success or omits the failure; #39406 remains the broader configurable delivery-policy follow-up. Fixes #51065; covers #39631. Thanks @Bartok9 and @Bortlesboat. - Exec approvals: accept a symlinked `OPENCLAW_HOME` as the trusted approvals root while still rejecting symlinked `.openclaw` path components below it. (#64663) Thanks @FunJim. - Logging: add top-level `hostname`, flattened `message`, and available `agent_id`, `session_id`, and `channel` fields to file-log JSONL records for multi-agent filtering without removing existing structured log arguments. Fixes #51075. Thanks @stevengonsalvez. - ACP: route server logs to stderr before Gateway config/bootstrap work so ACP stdout remains JSON-RPC only for IDE integrations. Fixes #49060. Thanks @Hollychou924. diff --git a/extensions/telegram/src/bot-message-dispatch.test.ts b/extensions/telegram/src/bot-message-dispatch.test.ts index d30e37dc66e..e0098bb55ca 100644 --- a/extensions/telegram/src/bot-message-dispatch.test.ts +++ b/extensions/telegram/src/bot-message-dispatch.test.ts @@ -2887,6 +2887,33 @@ describe("dispatchTelegramMessage draft streaming", () => { ); }); + it("finalizes explicit failed-action replies without a standalone warning delivery", async () => { + const draftStream = createDraftStream(999); + createTelegramDraftStream.mockReturnValue(draftStream); + dispatchReplyWithBufferedBlockDispatcher.mockImplementation( + async ({ dispatcherOptions, replyOptions }) => { + await replyOptions?.onPartialReply?.({ text: "Let me update that file." }); + await dispatcherOptions.deliver( + { text: "I couldn't update the file, so no changes were applied." }, + { kind: "final" }, + ); + return { queuedFinal: true }; + }, + ); + deliverReplies.mockResolvedValue({ delivered: true }); + + await dispatchWithContext({ context: createContext(), streamMode: "block" }); + + expect(editMessageTelegram).toHaveBeenCalledWith( + 123, + 999, + "I couldn't update the file, so no changes were applied.", + expect.any(Object), + ); + expect(deliverReplies).not.toHaveBeenCalled(); + expect(draftStream.clear).not.toHaveBeenCalled(); + }); + it("clears preview for error-only finals", async () => { const draftStream = createDraftStream(999); createTelegramDraftStream.mockReturnValue(draftStream); diff --git a/src/agents/pi-embedded-runner/run/payloads.errors.test.ts b/src/agents/pi-embedded-runner/run/payloads.errors.test.ts index 525d6f94433..b3238b11b07 100644 --- a/src/agents/pi-embedded-runner/run/payloads.errors.test.ts +++ b/src/agents/pi-embedded-runner/run/payloads.errors.test.ts @@ -328,7 +328,7 @@ describe("buildEmbeddedRunPayloads", () => { expectSingleToolErrorPayload(payloads, { title, absentDetail }); }); - it("shows mutating tool errors even when assistant output exists", () => { + it("shows mutating tool errors when assistant output claims success", () => { const payloads = buildPayloads({ assistantTexts: ["Done."], lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage, @@ -342,6 +342,63 @@ describe("buildEmbeddedRunPayloads", () => { expect(payloads[1]?.text).not.toContain("missing"); }); + it("shows mutating tool errors when assistant output does not acknowledge the failure", () => { + const payloads = buildPayloads({ + assistantTexts: ["No issues found. The update is complete."], + lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage, + lastToolError: { toolName: "edit", error: "file missing" }, + }); + + expect(payloads).toHaveLength(2); + expect(payloads[0]?.text).toBe("No issues found. The update is complete."); + expect(payloads[1]?.isError).toBe(true); + expect(payloads[1]?.text).toContain("Edit"); + expect(payloads[1]?.text).not.toContain("missing"); + }); + + it("shows mutating tool errors when assistant says it did not find issues in the file", () => { + const text = "I did not find any issues in the file. The update is complete."; + const payloads = buildPayloads({ + assistantTexts: [text], + lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage, + lastToolError: { toolName: "edit", error: "file missing" }, + }); + + expect(payloads).toHaveLength(2); + expect(payloads[0]?.text).toBe(text); + expect(payloads[1]?.isError).toBe(true); + expect(payloads[1]?.text).toContain("Edit"); + expect(payloads[1]?.text).not.toContain("missing"); + }); + + it.each([ + "I did not need to update the file; it is already correct.", + "I did not have to edit the file because it was already correct.", + ])("shows mutating tool errors when assistant output uses no-op phrasing: %s", (text) => { + const payloads = buildPayloads({ + assistantTexts: [text], + lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage, + lastToolError: { toolName: "edit", error: "file missing" }, + }); + + expect(payloads).toHaveLength(2); + expect(payloads[0]?.text).toBe(text); + expect(payloads[1]?.isError).toBe(true); + expect(payloads[1]?.text).toContain("Edit"); + expect(payloads[1]?.text).not.toContain("missing"); + }); + + it("suppresses mutating tool errors when assistant output explicitly acknowledges the failed action", () => { + const text = "I couldn't update the file, so no changes were applied."; + const payloads = buildPayloads({ + assistantTexts: [text], + lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage, + lastToolError: { toolName: "edit", error: "file missing" }, + }); + + expectSinglePayloadSummary(payloads, { text }); + }); + it("does not treat session_status read failures as mutating when explicitly flagged", () => { const payloads = buildPayloads({ assistantTexts: ["Status loaded."], diff --git a/src/agents/pi-embedded-runner/run/payloads.ts b/src/agents/pi-embedded-runner/run/payloads.ts index e1746e3066b..886930a3fda 100644 --- a/src/agents/pi-embedded-runner/run/payloads.ts +++ b/src/agents/pi-embedded-runner/run/payloads.ts @@ -44,11 +44,54 @@ const RECOVERABLE_TOOL_ERROR_KEYWORDS = [ "requires", ] as const; +const MUTATING_FAILURE_ACTION_PATTERN = + "(?:write|edit|update|save|create|delete|remove|modify|change|apply|patch|move|rename|send|reply|message|tool|action|operation)"; + +const MUTATING_FAILURE_INABILITY_PATTERN = new RegExp( + `\\b(?:couldn't|could not|can't|cannot|unable to|am unable to|wasn't able to|was not able to|were unable to)\\b.{0,100}\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b`, + "u", +); +const MUTATING_FAILURE_ACTION_THEN_FAILURE_PATTERN = new RegExp( + `\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b.{0,100}\\b(?:failed|failure|errored)\\b`, + "u", +); +const MUTATING_FAILURE_FAILURE_THEN_ACTION_PATTERN = new RegExp( + `\\b(?:failed|failure)\\b.{0,100}\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b`, + "u", +); +const MUTATING_FAILURE_ERROR_WHILE_ACTION_PATTERN = new RegExp( + `\\b(?:hit|encountered|ran into)\\b.{0,60}\\berror\\b.{0,100}\\b(?:while|trying to|when)\\b.{0,100}\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b`, + "u", +); +const DID_NOT_FAIL_PATTERN = /\b(?:did not|didn't)\s+fail\b/u; +const NEGATED_FAILURE_PATTERN = /\b(?:no|not|without)\s+(?:failures?|errors?)\b/u; + function isRecoverableToolError(error: string | undefined): boolean { const errorLower = normalizeOptionalLowercaseString(error) ?? ""; return RECOVERABLE_TOOL_ERROR_KEYWORDS.some((keyword) => errorLower.includes(keyword)); } +function hasExplicitMutatingToolFailureAcknowledgement(text: string): boolean { + const normalizedText = normalizeTextForComparison(text); + if (!normalizedText) { + return false; + } + if (DID_NOT_FAIL_PATTERN.test(normalizedText)) { + return false; + } + if (MUTATING_FAILURE_INABILITY_PATTERN.test(normalizedText)) { + return true; + } + if (NEGATED_FAILURE_PATTERN.test(normalizedText)) { + return false; + } + return ( + MUTATING_FAILURE_ACTION_THEN_FAILURE_PATTERN.test(normalizedText) || + MUTATING_FAILURE_FAILURE_THEN_ACTION_PATTERN.test(normalizedText) || + MUTATING_FAILURE_ERROR_WHILE_ACTION_PATTERN.test(normalizedText) + ); +} + function isVerboseToolDetailEnabled(level?: VerboseLevel): boolean { return level === "on" || level === "full"; } @@ -84,6 +127,7 @@ function shouldIncludeToolErrorDetails(params: { function resolveToolErrorWarningPolicy(params: { lastToolError: ToolErrorSummary; hasUserFacingReply: boolean; + hasUserFacingFailureAcknowledgement: boolean; suppressToolErrors: boolean; suppressToolErrorWarnings?: boolean; isCronTrigger?: boolean; @@ -107,7 +151,10 @@ function resolveToolErrorWarningPolicy(params: { const isMutatingToolError = params.lastToolError.mutatingAction ?? isLikelyMutatingToolName(params.lastToolError.toolName); if (isMutatingToolError) { - return { showWarning: true, includeDetails }; + return { + showWarning: !params.hasUserFacingFailureAcknowledgement, + includeDetails, + }; } if (params.suppressToolErrors) { return { showWarning: false, includeDetails }; @@ -316,6 +363,7 @@ export function buildEmbeddedRunPayloads(params: { ).filter((text) => !shouldSuppressRawErrorText(text)); let hasUserFacingAssistantReply = false; + let hasUserFacingFailureAcknowledgement = false; for (const text of answerTexts) { const { text: cleanedText, @@ -337,12 +385,16 @@ export function buildEmbeddedRunPayloads(params: { replyToCurrent, }); hasUserFacingAssistantReply = true; + if (cleanedText && hasExplicitMutatingToolFailureAcknowledgement(cleanedText)) { + hasUserFacingFailureAcknowledgement = true; + } } if (params.lastToolError) { const warningPolicy = resolveToolErrorWarningPolicy({ lastToolError: params.lastToolError, hasUserFacingReply: hasUserFacingAssistantReply, + hasUserFacingFailureAcknowledgement, suppressToolErrors: Boolean(params.config?.messages?.suppressToolErrors), suppressToolErrorWarnings: params.suppressToolErrorWarnings, isCronTrigger: params.isCronTrigger, @@ -350,7 +402,7 @@ export function buildEmbeddedRunPayloads(params: { verboseLevel: params.verboseLevel, }); - // Always surface mutating tool failures so we do not silently confirm actions that did not happen. + // Surface mutating failures unless the assistant explicitly acknowledged the failed action. // Otherwise, keep the previous behavior and only surface non-recoverable failures when no reply exists. if (warningPolicy.showWarning) { const toolSummary = formatToolAggregate(