diff --git a/CHANGELOG.md b/CHANGELOG.md index e284071985d..4c69b7e6303 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Docs: https://docs.openclaw.ai - Discord: preserve disabled presentation buttons when adapting and rendering Discord message controls. (#84188) Thanks @100menotu001. - Twitch: add a test-only client-manager registry reset helper so non-isolated Twitch tests can clear cached managers between cases. Fixes #83887. (#84244) Thanks @hclsys. - Cron: use structured embedded-run denial metadata for isolated scheduled tasks so blocked exec requests fail the job without treating ordinary assistant prose as a denial. (#84067) Thanks @abnershang. +- Cron: keep recovered tool warnings diagnostic for successful scheduled runs so final cron output is delivered instead of being replaced by a post-processing warning. (#84045) Thanks @abnershang. - Plugins/perf: thread explicit plugin discovery results through `loadBundledCapabilityRuntimeRegistry`, `resolveBundledPluginSources`, and `listChannelCatalogEntries` so callers that already hold a discovery result skip redundant filesystem walks. Thanks @SebTardif. - harden update restart script creation [AI]. (#84088) Thanks @pgondhi987. - Docker: keep the bundled Codex plugin in official release image keep lists so the default OpenAI agent harness remains available after Docker pruning. Fixes #83613. (#83626) Thanks @YuanHanzhong. diff --git a/src/agents/pi-embedded-runner/run/payloads.errors.test.ts b/src/agents/pi-embedded-runner/run/payloads.errors.test.ts index 2368683ff93..123001deed9 100644 --- a/src/agents/pi-embedded-runner/run/payloads.errors.test.ts +++ b/src/agents/pi-embedded-runner/run/payloads.errors.test.ts @@ -1,5 +1,6 @@ import type { AssistantMessage } from "@earendil-works/pi-ai"; import { describe, expect, it } from "vitest"; +import { getReplyPayloadMetadata } from "../../../auto-reply/reply-payload.js"; import { formatBillingErrorMessage } from "../../pi-embedded-helpers.js"; import { makeAssistantMessageFixture } from "../../test-helpers/assistant-message-fixtures.js"; import { @@ -457,6 +458,9 @@ describe("buildEmbeddedRunPayloads", () => { expect(payloads[1]?.isError).toBe(true); expect(payloads[1]?.text).toContain("Write"); expect(payloads[1]?.text).not.toContain("missing"); + expect(getReplyPayloadMetadata(payloads[1] as object)?.nonTerminalToolErrorWarning).toBe( + undefined, + ); }); it("shows exec tool errors when assistant output claims success", () => { @@ -474,6 +478,9 @@ describe("buildEmbeddedRunPayloads", () => { expect(payloads[1]?.isError).toBe(true); expect(payloads[1]?.text).toContain("Exec"); expect(payloads[1]?.text).not.toContain("python: command not found"); + expect(getReplyPayloadMetadata(payloads[1] as object)?.nonTerminalToolErrorWarning).toBe( + undefined, + ); }); it("shows mutating tool errors when assistant output does not acknowledge the failure", () => { diff --git a/src/agents/pi-embedded-runner/run/payloads.test.ts b/src/agents/pi-embedded-runner/run/payloads.test.ts index 2b2bd708f63..2ed3c3b0d81 100644 --- a/src/agents/pi-embedded-runner/run/payloads.test.ts +++ b/src/agents/pi-embedded-runner/run/payloads.test.ts @@ -351,6 +351,28 @@ describe("buildEmbeddedRunPayloads tool-error warnings", () => { }); }); + it("marks middleware tool-error warnings after assistant output as non-terminal", () => { + const payloads = buildPayloads({ + assistantTexts: ["Queued 3 topics."], + lastToolError: { + toolName: "exec", + error: "Tool output unavailable due to post-processing error", + middlewareError: true, + }, + verboseLevel: "off", + }); + + expect(payloads).toHaveLength(2); + expect(payloads[0]?.text).toBe("Queued 3 topics."); + expect(payloads[1]).toMatchObject({ + isError: true, + }); + expect(payloads[1]?.text).toContain("Exec failed"); + expect(getReplyPayloadMetadata(payloads[1] as object)).toMatchObject({ + nonTerminalToolErrorWarning: true, + }); + }); + it("surfaces concise bash tool errors when verbose mode is off", () => { const payloads = buildPayloads({ lastToolError: { toolName: "bash", error: "command failed" }, diff --git a/src/agents/pi-embedded-runner/run/payloads.ts b/src/agents/pi-embedded-runner/run/payloads.ts index 8a5e2826bcc..f6dee1ad01f 100644 --- a/src/agents/pi-embedded-runner/run/payloads.ts +++ b/src/agents/pi-embedded-runner/run/payloads.ts @@ -136,6 +136,10 @@ function shouldIncludeToolErrorDetails(params: { ); } +function shouldMarkNonTerminalToolErrorWarning(lastToolError: ToolErrorSummary): boolean { + return lastToolError.middlewareError === true; +} + function resolveToolErrorWarningPolicy(params: { lastToolError: ToolErrorSummary; hasUserFacingReply: boolean; @@ -221,6 +225,7 @@ export function buildEmbeddedRunPayloads(params: { presentation?: ReplyPayload["presentation"]; interactive?: ReplyPayload["interactive"]; channelData?: Record; + nonTerminalToolErrorWarning?: boolean; sourceReplyMirror?: { idempotencyKey?: string; }; @@ -509,6 +514,9 @@ export function buildEmbeddedRunPayloads(params: { replyItems.push({ text: warningText, isError: true, + nonTerminalToolErrorWarning: + hasUserFacingAssistantReply && + shouldMarkNonTerminalToolErrorWarning(params.lastToolError), }); } } @@ -530,6 +538,11 @@ export function buildEmbeddedRunPayloads(params: { if (item.isError !== undefined) { payload.isError = item.isError; } + if (item.nonTerminalToolErrorWarning) { + setReplyPayloadMetadata(payload, { + nonTerminalToolErrorWarning: true, + }); + } if (item.replyToId) { payload.replyToId = item.replyToId; } diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts index ce7698c5938..9c9370b2178 100644 --- a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts +++ b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts @@ -281,6 +281,47 @@ describe("handleToolExecutionEnd cron.add commitment tracking", () => { }); describe("handleToolExecutionEnd mutating failure recovery", () => { + it("marks middleware failures on the last tool error", async () => { + const { ctx } = createTestContext(); + + await handleToolExecutionStart( + ctx as never, + { + type: "tool_execution_start", + toolName: "exec", + toolCallId: "tool-exec-middleware-error", + args: { cmd: "echo ok" }, + } as never, + ); + + await handleToolExecutionEnd( + ctx as never, + { + type: "tool_execution_end", + toolName: "exec", + toolCallId: "tool-exec-middleware-error", + isError: false, + result: { + content: [ + { + type: "text", + text: "Tool output unavailable due to post-processing error.", + }, + ], + details: { + status: "error", + middlewareError: true, + }, + }, + } as never, + ); + + expect(ctx.state.lastToolError).toMatchObject({ + toolName: "exec", + middlewareError: true, + }); + }); + it("clears edit failure when the retry succeeds through common file path aliases", async () => { const { ctx } = createTestContext(); diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.ts b/src/agents/pi-embedded-subscribe.handlers.tools.ts index eb99c079dd9..787ed877aa9 100644 --- a/src/agents/pi-embedded-subscribe.handlers.tools.ts +++ b/src/agents/pi-embedded-subscribe.handlers.tools.ts @@ -68,6 +68,19 @@ const beforeToolCallModuleLoader = createLazyImportLoader( const LIVE_EXEC_OUTPUT_MAX_CHARS = 8000; const LIVE_EXEC_UPDATE_MIN_INTERVAL_MS = 250; +function isMiddlewareToolResultError(result: unknown): boolean { + if (!result || typeof result !== "object") { + return false; + } + const details = (result as { details?: unknown }).details; + return Boolean( + details && + typeof details === "object" && + !Array.isArray(details) && + (details as { middlewareError?: unknown }).middlewareError === true, + ); +} + function loadExecApprovalReply(): Promise { return execApprovalReplyModuleLoader.load(); } @@ -942,6 +955,7 @@ export async function handleToolExecutionEnd( ...(errorCode ? { errorCode } : {}), error: errorMessage, timedOut: isToolResultTimedOut(sanitizedResult) || undefined, + middlewareError: isMiddlewareToolResultError(sanitizedResult) || undefined, mutatingAction: callSummary?.mutatingAction, actionFingerprint: callSummary?.actionFingerprint, fileTarget: callSummary?.fileTarget, diff --git a/src/agents/tool-error-summary.ts b/src/agents/tool-error-summary.ts index 56e08e4fda0..1acfbc3936c 100644 --- a/src/agents/tool-error-summary.ts +++ b/src/agents/tool-error-summary.ts @@ -7,6 +7,7 @@ export type ToolErrorSummary = { errorCode?: string; error?: string; timedOut?: boolean; + middlewareError?: boolean; mutatingAction?: boolean; actionFingerprint?: string; fileTarget?: FileTarget; diff --git a/src/auto-reply/reply-payload.ts b/src/auto-reply/reply-payload.ts index fe2b2125aae..24017cd06db 100644 --- a/src/auto-reply/reply-payload.ts +++ b/src/auto-reply/reply-payload.ts @@ -163,6 +163,8 @@ export type ReplyPayloadMetadata = { idempotencyKey?: string; }; beforeAgentRunBlocked?: boolean; + /** Warning synthesized from an observed tool error after the run produced assistant output. */ + nonTerminalToolErrorWarning?: boolean; }; const replyPayloadMetadata = new WeakMap(); diff --git a/src/cron/isolated-agent.helpers.test.ts b/src/cron/isolated-agent.helpers.test.ts index 0de4ad78509..a23af58b53b 100644 --- a/src/cron/isolated-agent.helpers.test.ts +++ b/src/cron/isolated-agent.helpers.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it } from "vitest"; +import { setReplyPayloadMetadata } from "../auto-reply/reply-payload.js"; import { resolveCronPayloadOutcome } from "./isolated-agent/helpers.js"; describe("resolveCronPayloadOutcome", () => { @@ -39,6 +40,51 @@ describe("resolveCronPayloadOutcome", () => { expect(result.summary).toBe("Write completed successfully."); }); + it("keeps non-terminal tool warnings diagnostic when final assistant output succeeded", () => { + const toolWarning = setReplyPayloadMetadata( + { + text: "⚠️ Exec failed", + isError: true, + }, + { nonTerminalToolErrorWarning: true }, + ); + + const result = resolveCronPayloadOutcome({ + payloads: [{ text: "Queued 3 topics." }, toolWarning], + finalAssistantVisibleText: "Queued 3 topics.", + preferFinalAssistantVisibleText: true, + }); + + expect(result.hasFatalErrorPayload).toBe(false); + expect(result.embeddedRunError).toBeUndefined(); + expect(result.summary).toBe("Queued 3 topics."); + expect(result.outputText).toBe("Queued 3 topics."); + expect(result.deliveryPayloads).toEqual([{ text: "Queued 3 topics." }]); + }); + + it("keeps marked middleware warnings diagnostic after structured cron output", () => { + const mediaPayload = { mediaUrl: "file:///tmp/cron-report.png" }; + const toolWarning = setReplyPayloadMetadata( + { + text: "⚠️ Exec failed", + isError: true, + }, + { nonTerminalToolErrorWarning: true }, + ); + + const result = resolveCronPayloadOutcome({ + payloads: [mediaPayload, toolWarning], + }); + + expect(result.hasFatalErrorPayload).toBe(false); + expect(result.embeddedRunError).toBeUndefined(); + expect(result.summary).toBeUndefined(); + expect(result.outputText).toBeUndefined(); + expect(result.synthesizedText).toBeUndefined(); + expect(result.deliveryPayloads).toEqual([mediaPayload]); + expect(result.deliveryPayloadHasStructuredContent).toBe(true); + }); + it("treats trailing message delivery warnings as non-fatal when final assistant text exists", () => { const result = resolveCronPayloadOutcome({ payloads: [{ text: "Draft output" }, { text: "⚠️ ✉️ Message failed", isError: true }], diff --git a/src/cron/isolated-agent/helpers.ts b/src/cron/isolated-agent/helpers.ts index 498f7853f3c..1b9ca745365 100644 --- a/src/cron/isolated-agent/helpers.ts +++ b/src/cron/isolated-agent/helpers.ts @@ -1,5 +1,6 @@ import { hasOutboundReplyContent } from "openclaw/plugin-sdk/reply-payload"; import { DEFAULT_HEARTBEAT_ACK_MAX_CHARS } from "../../auto-reply/heartbeat.js"; +import { getReplyPayloadMetadata } from "../../auto-reply/reply-payload.js"; import type { ReplyPayload } from "../../auto-reply/reply-payload.js"; import { normalizeOptionalString } from "../../shared/string-coerce.js"; import { truncateUtf16Safe } from "../../utils.js"; @@ -97,6 +98,9 @@ export function pickSummaryFromPayloads( } } for (let i = payloads.length - 1; i >= 0; i--) { + if (isNonTerminalToolErrorWarning(payloads[i])) { + continue; + } const summary = pickSummaryFromOutput(payloads[i]?.text); if (summary) { return summary; @@ -118,6 +122,9 @@ export function pickLastNonEmptyTextFromPayloads( } } for (let i = payloads.length - 1; i >= 0; i--) { + if (isNonTerminalToolErrorWarning(payloads[i])) { + continue; + } const clean = (payloads[i]?.text ?? "").trim(); if (clean) { return clean; @@ -195,6 +202,17 @@ function isCronMessagePresentationWarning(text: string | undefined): boolean { ); } +function isNonTerminalToolErrorWarning(payload: object | undefined): boolean { + return Boolean(payload && getReplyPayloadMetadata(payload)?.nonTerminalToolErrorWarning); +} + +function isSuccessfulCronPayload(payload: DeliveryPayload | undefined): boolean { + return ( + payload?.isError !== true && + (isDeliverablePayload(payload) || payloadHasStructuredDeliveryContent(payload)) + ); +} + export function resolveCronPayloadOutcome(params: { payloads: DeliveryPayload[]; runLevelError?: unknown; @@ -202,7 +220,8 @@ export function resolveCronPayloadOutcome(params: { finalAssistantVisibleText?: string | undefined; preferFinalAssistantVisibleText?: boolean; }): CronPayloadOutcome { - const firstText = params.payloads[0]?.text ?? ""; + const firstText = + params.payloads.find((payload) => !isNonTerminalToolErrorWarning(payload))?.text ?? ""; const fallbackSummary = pickSummaryFromPayloads(params.payloads) ?? pickSummaryFromOutput(firstText); const fallbackOutputText = pickLastNonEmptyTextFromPayloads(params.payloads); @@ -223,15 +242,22 @@ export function resolveCronPayloadOutcome(params: { const hasSuccessfulPayloadAfterLastError = !params.runLevelError && lastErrorPayloadIndex >= 0 && - params.payloads - .slice(lastErrorPayloadIndex + 1) - .some((payload) => payload?.isError !== true && Boolean(payload?.text?.trim())); + params.payloads.slice(lastErrorPayloadIndex + 1).some(isSuccessfulCronPayload); const hasSuccessfulPayloadBeforeLastError = !params.runLevelError && lastErrorPayloadIndex > 0 && - params.payloads - .slice(0, lastErrorPayloadIndex) - .some((payload) => payload?.isError !== true && Boolean(payload?.text?.trim())); + params.payloads.slice(0, lastErrorPayloadIndex).some(isSuccessfulCronPayload); + const lastErrorPayload = + lastErrorPayloadIndex >= 0 ? params.payloads[lastErrorPayloadIndex] : undefined; + const hasRecoveringTerminalOutput = + normalizedFinalAssistantVisibleText !== undefined || + hasSuccessfulPayloadAfterLastError || + hasSuccessfulPayloadBeforeLastError; + const hasNonTerminalToolErrorWarning = + !params.runLevelError && + params.failureSignal?.fatalForCron !== true && + hasRecoveringTerminalOutput && + isNonTerminalToolErrorWarning(lastErrorPayload); const hasPendingPresentationWarning = !params.runLevelError && params.failureSignal?.fatalForCron !== true && @@ -239,7 +265,10 @@ export function resolveCronPayloadOutcome(params: { isCronMessagePresentationWarning(lastErrorPayloadText) && (normalizedFinalAssistantVisibleText !== undefined || hasSuccessfulPayloadBeforeLastError); const hasFatalStructuredErrorPayload = - hasErrorPayload && !hasSuccessfulPayloadAfterLastError && !hasPendingPresentationWarning; + hasErrorPayload && + !hasSuccessfulPayloadAfterLastError && + !hasPendingPresentationWarning && + !hasNonTerminalToolErrorWarning; const hasStructuredDeliveryPayloads = selectedDeliveryPayloads.some((payload) => payloadHasStructuredDeliveryContent(payload), ); diff --git a/src/cron/run-diagnostics.test.ts b/src/cron/run-diagnostics.test.ts index 86d6a89ca45..a6b9ad7a4cc 100644 --- a/src/cron/run-diagnostics.test.ts +++ b/src/cron/run-diagnostics.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it } from "vitest"; +import { setReplyPayloadMetadata } from "../auto-reply/reply-payload.js"; import { createCronRunDiagnosticsFromAgentResult, createCronRunDiagnosticsFromError, @@ -149,6 +150,35 @@ describe("cron run diagnostics", () => { ).toBeUndefined(); }); + it("keeps non-terminal tool warnings as warning diagnostics for successful runs", () => { + const toolWarning = setReplyPayloadMetadata( + { + toolName: "exec", + text: "⚠️ Exec failed", + isError: true, + }, + { nonTerminalToolErrorWarning: true }, + ); + + const diagnostics = createCronRunDiagnosticsFromAgentResult( + { + payloads: [{ text: "Queued 3 topics." }, toolWarning], + }, + { finalStatus: "ok", nowMs: () => 700 }, + ); + + expect(diagnostics?.entries).toEqual([ + { + ts: 700, + source: "tool", + severity: "warn", + message: "⚠️ Exec failed", + toolName: "exec", + }, + ]); + expect(diagnostics?.summary).toBe("⚠️ Exec failed"); + }); + it("captures silent failed exec details with a fallback message", () => { const diagnostics = createCronRunDiagnosticsFromAgentResult( { diff --git a/src/cron/run-diagnostics.ts b/src/cron/run-diagnostics.ts index 6c4eca75c57..180d9b25304 100644 --- a/src/cron/run-diagnostics.ts +++ b/src/cron/run-diagnostics.ts @@ -1,3 +1,4 @@ +import { getReplyPayloadMetadata } from "../auto-reply/reply-payload.js"; import { redactSensitiveText } from "../logging/redact.js"; import { normalizeOptionalString } from "../shared/string-coerce.js"; import type { @@ -272,10 +273,13 @@ export function createCronRunDiagnosticsFromToolPayload( }); const isError = payload.isError === true; const text = typeof payload.text === "string" ? payload.text : undefined; + const isNonTerminalToolWarning = + opts?.finalStatus === "ok" && + getReplyPayloadMetadata(payload)?.nonTerminalToolErrorWarning === true; const textDiagnostics = isError && text ? createCronRunDiagnosticsFromError("tool", text, { - severity: "error", + severity: isNonTerminalToolWarning ? "warn" : "error", nowMs: opts?.nowMs, toolName, })