From b0c70786fd6377e65b19e922f48628c4f4ea3aeb Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 04:33:36 +0100 Subject: [PATCH] fix(cron): preserve structured denial failures --- CHANGELOG.md | 2 +- docs/automation/cron-jobs.md | 2 +- docs/cli/cron.md | 9 +- .../pi-embedded-runner/failure-signal.test.ts | 95 +++++++++++++++++++ .../pi-embedded-runner/failure-signal.ts | 46 +++++++++ src/agents/pi-embedded-runner/run.ts | 10 ++ src/agents/pi-embedded-runner/types.ts | 10 ++ .../pi-embedded-subscribe.tools.test.ts | 21 ++++ src/agents/pi-embedded-subscribe.tools.ts | 73 ++++++++++---- src/cron/isolated-agent.helpers.test.ts | 45 +++++++++ src/cron/isolated-agent/helpers.ts | 71 +++++++++++--- src/cron/isolated-agent/run-executor.ts | 3 + .../isolated-agent/run.interim-retry.test.ts | 66 +++++++++++++ src/cron/isolated-agent/run.test-harness.ts | 37 ++++++-- src/cron/isolated-agent/run.ts | 4 +- 15 files changed, 450 insertions(+), 44 deletions(-) create mode 100644 src/agents/pi-embedded-runner/failure-signal.test.ts create mode 100644 src/agents/pi-embedded-runner/failure-signal.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 82b86c400cc..b7b7ea42f37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ Docs: https://docs.openclaw.ai ### Fixes -- Cron: classify isolated runs as errors when final output narrates known execution-denial markers such as `SYSTEM_RUN_DENIED`, `INVALID_REQUEST`, or approval-binding refusal phrases, so blocked commands no longer appear green in cron history. Fixes #67172; carries forward #67186. Thanks @oc-gh-dr, @hclsys, and @1yihui. +- Cron: classify isolated runs as errors from structured embedded-run execution-denial metadata, with final-output marker fallback for `SYSTEM_RUN_DENIED`, `INVALID_REQUEST`, and approval-binding refusals, so blocked commands no longer appear green in cron history. Fixes #67172; carries forward #67186. Thanks @oc-gh-dr, @hclsys, and @1yihui. - Gateway/install: add a validated `--wrapper`/`OPENCLAW_WRAPPER` service install path that persists executable LaunchAgent/systemd wrappers across forced reinstalls, updates, and doctor repairs instead of falling back to raw node/bun `ProgramArguments`. Fixes #69400. (#72445) Thanks @willtmc. - macOS Gateway: write launchd services with a state-dir `WorkingDirectory`, use a durable state-dir temp path instead of freezing macOS session `TMPDIR`, create that temp directory before bootstrap, and label abort-shaped launchd exits as `SIGABRT/abort` in status output. Fixes #53679 and #70223; refs #71848. Thanks @dlturock, @stammi922, and @palladius. - Exec approvals: accept runtime-owned `source: "allow-always"` and `commandText` allowlist metadata in gateway and node approval-set payloads so Control UI round-trips no longer fail with `unexpected property 'source'`. Fixes #60000; carries forward #60064. Thanks @sd1471123, @sharkqwy, and @luoyanglang. diff --git a/docs/automation/cron-jobs.md b/docs/automation/cron-jobs.md index fcc57d630ad..19c6d2cf6a5 100644 --- a/docs/automation/cron-jobs.md +++ b/docs/automation/cron-jobs.md @@ -47,7 +47,7 @@ Cron is the Gateway's built-in scheduler. It persists jobs, wakes the agent at t - One-shot jobs (`--at`) auto-delete after success by default. - Isolated cron runs best-effort close tracked browser tabs/processes for their `cron:` session when the run completes, so detached browser automation does not leave orphaned processes behind. - Isolated cron runs also guard against stale acknowledgement replies. If the first result is just an interim status update (`on it`, `pulling everything together`, and similar hints) and no descendant subagent run is still responsible for the final answer, OpenClaw re-prompts once for the actual result before delivery. -- Isolated cron runs classify known execution-denial markers in the final summary/output as failures, including host markers such as `SYSTEM_RUN_DENIED` and `INVALID_REQUEST`, so a blocked command is not reported as a green run. +- Isolated cron runs prefer structured execution-denial metadata from the embedded run, then fall back to known final summary/output markers such as `SYSTEM_RUN_DENIED` and `INVALID_REQUEST`, so a blocked command is not reported as a green run. diff --git a/docs/cli/cron.md b/docs/cli/cron.md index 1398d95cb37..a0a11173073 100644 --- a/docs/cli/cron.md +++ b/docs/cli/cron.md @@ -57,10 +57,11 @@ Note: if an isolated cron run returns only the silent token (`NO_REPLY` / `no_reply`), cron suppresses direct outbound delivery and the fallback queued summary path as well, so nothing is posted back to chat. -Note: isolated cron runs treat known denial markers in final output, such as -`SYSTEM_RUN_DENIED`, `INVALID_REQUEST`, and approval-binding refusal phrases, as -errors. `cron list` and run history then surface the matched token in the error -reason instead of reporting a blocked command as `ok`. +Note: isolated cron runs prefer structured execution-denial metadata from the +embedded run, then fall back to known denial markers in final output, such as +`SYSTEM_RUN_DENIED`, `INVALID_REQUEST`, and approval-binding refusal phrases. +`cron list` and run history surface the denial reason instead of reporting a +blocked command as `ok`. Note: `cron add|edit --model ...` uses that selected allowed model for the job. If the model is not allowed, cron warns and falls back to the job's agent/default diff --git a/src/agents/pi-embedded-runner/failure-signal.test.ts b/src/agents/pi-embedded-runner/failure-signal.test.ts new file mode 100644 index 00000000000..25fb5c0ad50 --- /dev/null +++ b/src/agents/pi-embedded-runner/failure-signal.test.ts @@ -0,0 +1,95 @@ +import { describe, expect, it } from "vitest"; +import { resolveEmbeddedRunFailureSignal } from "./failure-signal.js"; + +describe("resolveEmbeddedRunFailureSignal", () => { + it("classifies cron exec denials from tool error metadata", () => { + expect( + resolveEmbeddedRunFailureSignal({ + trigger: "cron", + lastToolError: { + toolName: "exec", + error: "SYSTEM_RUN_DENIED: approval required", + }, + }), + ).toEqual({ + kind: "execution_denied", + source: "tool", + toolName: "exec", + code: "SYSTEM_RUN_DENIED", + message: "SYSTEM_RUN_DENIED: approval required", + fatalForCron: true, + }); + }); + + it("classifies invalid request denials from tool error metadata", () => { + expect( + resolveEmbeddedRunFailureSignal({ + trigger: "cron", + lastToolError: { + toolName: "bash", + error: "INVALID_REQUEST: approval denied", + }, + })?.code, + ).toBe("INVALID_REQUEST"); + }); + + it("does not mark non-cron runs", () => { + expect( + resolveEmbeddedRunFailureSignal({ + trigger: "user", + lastToolError: { + toolName: "exec", + error: "SYSTEM_RUN_DENIED: approval required", + }, + }), + ).toBeUndefined(); + }); + + it("does not mark ordinary tool failures as cron-denial failures", () => { + expect( + resolveEmbeddedRunFailureSignal({ + trigger: "cron", + lastToolError: { + toolName: "exec", + error: "/bin/bash: line 1: python: command not found", + }, + }), + ).toBeUndefined(); + }); + + it("does not mark non-exec validation errors as execution denials", () => { + expect( + resolveEmbeddedRunFailureSignal({ + trigger: "cron", + lastToolError: { + toolName: "browser", + error: "INVALID_REQUEST: url required", + }, + }), + ).toBeUndefined(); + }); + + it("does not mark non-exec tool output that merely mentions host denial tokens", () => { + expect( + resolveEmbeddedRunFailureSignal({ + trigger: "cron", + lastToolError: { + toolName: "web_fetch", + error: "The fetched page says SYSTEM_RUN_DENIED in its troubleshooting section.", + }, + }), + ).toBeUndefined(); + }); + + it("infers approval-binding denials even when the host code is omitted", () => { + expect( + resolveEmbeddedRunFailureSignal({ + trigger: "cron", + lastToolError: { + toolName: "exec", + error: "Approval cannot safely bind this interpreter/runtime command", + }, + })?.code, + ).toBe("SYSTEM_RUN_DENIED"); + }); +}); diff --git a/src/agents/pi-embedded-runner/failure-signal.ts b/src/agents/pi-embedded-runner/failure-signal.ts new file mode 100644 index 00000000000..30bfe3e0aa8 --- /dev/null +++ b/src/agents/pi-embedded-runner/failure-signal.ts @@ -0,0 +1,46 @@ +import { normalizeOptionalString } from "../../shared/string-coerce.js"; +import { isExecLikeToolName, type ToolErrorSummary } from "../tool-error-summary.js"; +import type { EmbeddedRunFailureSignal } from "./types.js"; + +const FAILURE_SIGNAL_CODES = ["SYSTEM_RUN_DENIED", "INVALID_REQUEST"] as const; + +function resolveFailureSignalCode(message: string): EmbeddedRunFailureSignal["code"] | undefined { + for (const code of FAILURE_SIGNAL_CODES) { + if (message.includes(code)) { + return code; + } + } + if (message.toLowerCase().includes("approval cannot safely bind")) { + return "SYSTEM_RUN_DENIED"; + } + return undefined; +} + +export function resolveEmbeddedRunFailureSignal(params: { + trigger?: string | undefined; + lastToolError?: ToolErrorSummary | undefined; +}): EmbeddedRunFailureSignal | undefined { + if (params.trigger !== "cron") { + return undefined; + } + const lastToolError = params.lastToolError; + if (!lastToolError || !isExecLikeToolName(lastToolError.toolName)) { + return undefined; + } + const message = normalizeOptionalString(lastToolError.error); + if (!message) { + return undefined; + } + const code = resolveFailureSignalCode(message); + if (!code) { + return undefined; + } + return { + kind: "execution_denied", + source: "tool", + ...(lastToolError.toolName ? { toolName: lastToolError.toolName } : {}), + code, + message, + fatalForCron: true, + }; +} diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 4aede7500ee..3adbcc24131 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -84,6 +84,7 @@ import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js import { runPostCompactionSideEffects } from "./compaction-hooks.js"; import { buildEmbeddedCompactionRuntimeContext } from "./compaction-runtime-context.js"; import { runContextEngineMaintenance } from "./context-engine-maintenance.js"; +import { resolveEmbeddedRunFailureSignal } from "./failure-signal.js"; import { resolveGlobalLane, resolveSessionLane } from "./lanes.js"; import { log } from "./logger.js"; import { resolveModelAsync } from "./model.js"; @@ -1853,6 +1854,10 @@ export async function runEmbeddedPiAgent( toolMetas: attempt.toolMetas, hadFailure: Boolean(attempt.lastToolError), }); + const failureSignal = resolveEmbeddedRunFailureSignal({ + trigger: params.trigger, + lastToolError: attempt.lastToolError, + }); // Timeout aborts can leave the run without any assistant payloads. // Emit an explicit timeout error instead of silently completing, so @@ -1893,6 +1898,7 @@ export async function runEmbeddedPiAgent( replayInvalid, livenessState, toolSummary: attemptToolSummary, + ...(failureSignal ? { failureSignal } : {}), agentHarnessResultClassification: attempt.agentHarnessResultClassification, }, didSendViaMessagingTool: attempt.didSendViaMessagingTool, @@ -2070,6 +2076,7 @@ export async function runEmbeddedPiAgent( replayInvalid, livenessState, toolSummary: attemptToolSummary, + ...(failureSignal ? { failureSignal } : {}), agentHarnessResultClassification: attempt.agentHarnessResultClassification, }, didSendViaMessagingTool: attempt.didSendViaMessagingTool, @@ -2119,6 +2126,7 @@ export async function runEmbeddedPiAgent( replayInvalid, livenessState, toolSummary: attemptToolSummary, + ...(failureSignal ? { failureSignal } : {}), agentHarnessResultClassification: attempt.agentHarnessResultClassification, }, didSendViaMessagingTool: attempt.didSendViaMessagingTool, @@ -2227,6 +2235,7 @@ export async function runEmbeddedPiAgent( replayInvalid, livenessState, toolSummary: attemptToolSummary, + ...(failureSignal ? { failureSignal } : {}), agentHarnessResultClassification: attempt.agentHarnessResultClassification, }, didSendViaMessagingTool: attempt.didSendViaMessagingTool, @@ -2334,6 +2343,7 @@ export async function runEmbeddedPiAgent( ...(params.blockReplyBreak ? { blockStreaming: params.blockReplyBreak } : {}), }, toolSummary: attemptToolSummary, + ...(failureSignal ? { failureSignal } : {}), completion: { ...(stopReason ? { stopReason } : {}), ...(stopReason ? { finishReason: stopReason } : {}), diff --git a/src/agents/pi-embedded-runner/types.ts b/src/agents/pi-embedded-runner/types.ts index b9dd3e73ab3..fbc25212951 100644 --- a/src/agents/pi-embedded-runner/types.ts +++ b/src/agents/pi-embedded-runner/types.ts @@ -103,6 +103,15 @@ export type ContextManagementTrace = { export type EmbeddedRunLivenessState = "working" | "paused" | "blocked" | "abandoned"; +export type EmbeddedRunFailureSignal = { + kind: "execution_denied"; + source: "tool"; + toolName?: string; + code: "SYSTEM_RUN_DENIED" | "INVALID_REQUEST"; + message: string; + fatalForCron: true; +}; + export type EmbeddedPiRunMeta = { durationMs: number; agentMeta?: EmbeddedPiAgentMeta; @@ -124,6 +133,7 @@ export type EmbeddedPiRunMeta = { | "retry_limit"; message: string; }; + failureSignal?: EmbeddedRunFailureSignal; /** Stop reason for the agent run (e.g., "completed", "tool_calls"). */ stopReason?: string; /** Pending tool calls when stopReason is "tool_calls". */ diff --git a/src/agents/pi-embedded-subscribe.tools.test.ts b/src/agents/pi-embedded-subscribe.tools.test.ts index 2ab5a09738a..6407e23360a 100644 --- a/src/agents/pi-embedded-subscribe.tools.test.ts +++ b/src/agents/pi-embedded-subscribe.tools.test.ts @@ -12,4 +12,25 @@ describe("extractToolErrorMessage", () => { expect(extractToolErrorMessage({ details: { status: "failed" } })).toBe("failed"); expect(extractToolErrorMessage({ details: { status: "timeout" } })).toBe("timeout"); }); + + it("prefers node-host aggregated denial text over generic failed status", () => { + expect( + extractToolErrorMessage({ + content: [{ type: "text", text: "SYSTEM_RUN_DENIED: approval required" }], + details: { + status: "failed", + aggregated: "SYSTEM_RUN_DENIED: approval required", + }, + }), + ).toBe("SYSTEM_RUN_DENIED: approval required"); + }); + + it("uses result text before generic failed status when details omit aggregated output", () => { + expect( + extractToolErrorMessage({ + content: [{ type: "text", text: "SYSTEM_RUN_DENIED: approval required" }], + details: { status: "failed" }, + }), + ).toBe("SYSTEM_RUN_DENIED: approval required"); + }); }); diff --git a/src/agents/pi-embedded-subscribe.tools.ts b/src/agents/pi-embedded-subscribe.tools.ts index e796b24321a..ebe91ecf0e9 100644 --- a/src/agents/pi-embedded-subscribe.tools.ts +++ b/src/agents/pi-embedded-subscribe.tools.ts @@ -75,10 +75,7 @@ function extractErrorField(value: unknown): string | undefined { return undefined; } const record = value as Record; - const direct = - readErrorCandidate(record.error) ?? - readErrorCandidate(record.message) ?? - readErrorCandidate(record.reason); + const direct = extractDirectErrorField(record); if (direct) { return direct; } @@ -89,6 +86,34 @@ function extractErrorField(value: unknown): string | undefined { return normalizeToolErrorText(status); } +function extractDirectErrorField(value: unknown): string | undefined { + if (!value || typeof value !== "object") { + return undefined; + } + const record = value as Record; + return ( + readErrorCandidate(record.error) ?? + readErrorCandidate(record.message) ?? + readErrorCandidate(record.reason) + ); +} + +function extractAggregatedErrorField(value: unknown): string | undefined { + if (!value || typeof value !== "object") { + return undefined; + } + const record = value as Record; + return readErrorCandidate(record.aggregated); +} + +function isHostDenialToolText(text: string): boolean { + const normalized = text.trim(); + if (normalized.includes("SYSTEM_RUN_DENIED") || normalized.includes("INVALID_REQUEST")) { + return true; + } + return normalized.toLowerCase().includes("approval cannot safely bind"); +} + export function sanitizeToolResult(result: unknown): unknown { if (!result || typeof result !== "object") { return result; @@ -388,28 +413,42 @@ export function extractToolErrorMessage(result: unknown): string | undefined { return undefined; } const record = result as Record; - const fromDetails = extractErrorField(record.details); + const fromDetails = extractDirectErrorField(record.details); if (fromDetails) { return fromDetails; } - const fromRoot = extractErrorField(record); + const fromDetailsAggregated = extractAggregatedErrorField(record.details); + if (fromDetailsAggregated) { + return fromDetailsAggregated; + } + const fromRoot = extractDirectErrorField(record); if (fromRoot) { return fromRoot; } const text = extractToolResultText(result); - if (!text) { - return undefined; - } - try { - const parsed = JSON.parse(text) as unknown; - const fromJson = extractErrorField(parsed); - if (fromJson) { - return fromJson; + if (text) { + try { + const parsed = JSON.parse(text) as unknown; + const fromJson = extractErrorField(parsed); + if (fromJson) { + return fromJson; + } + } catch { + // Fall through to status/text fallback. + } + if (isHostDenialToolText(text)) { + return normalizeToolErrorText(text); } - } catch { - // Fall through to first-line text fallback. } - return normalizeToolErrorText(text); + const fromDetailsStatus = extractErrorField(record.details); + if (fromDetailsStatus) { + return fromDetailsStatus; + } + const fromRootStatus = extractErrorField(record); + if (fromRootStatus) { + return fromRootStatus; + } + return text ? normalizeToolErrorText(text) : undefined; } function resolveMessageToolTarget(args: Record): string | undefined { diff --git a/src/cron/isolated-agent.helpers.test.ts b/src/cron/isolated-agent.helpers.test.ts index 4f8e1fe217c..876ebe59d0f 100644 --- a/src/cron/isolated-agent.helpers.test.ts +++ b/src/cron/isolated-agent.helpers.test.ts @@ -190,6 +190,51 @@ describe("resolveCronPayloadOutcome", () => { ); }); + it("prefers typed failure signals over denial-token fallback", () => { + const result = resolveCronPayloadOutcome({ + payloads: [{ text: "On it, retrying now." }], + failureSignal: { + kind: "execution_denied", + source: "tool", + toolName: "exec", + code: "SYSTEM_RUN_DENIED", + message: "SYSTEM_RUN_DENIED: approval required", + fatalForCron: true, + }, + }); + + expect(result.hasFatalErrorPayload).toBe(true); + expect(result.embeddedRunError).toBe( + "cron classifier: execution_denied failure from exec (SYSTEM_RUN_DENIED): SYSTEM_RUN_DENIED: approval required", + ); + expect(result.summary).toBe("SYSTEM_RUN_DENIED: approval required"); + expect(result.outputText).toBe("SYSTEM_RUN_DENIED: approval required"); + expect(result.synthesizedText).toBe("SYSTEM_RUN_DENIED: approval required"); + expect(result.deliveryPayload).toEqual({ + text: "SYSTEM_RUN_DENIED: approval required", + isError: true, + }); + expect(result.deliveryPayloads).toEqual([ + { text: "SYSTEM_RUN_DENIED: approval required", isError: true }, + ]); + expect(result.deliveryPayloadHasStructuredContent).toBe(false); + }); + + it("ignores non-fatal failure signal metadata", () => { + const result = resolveCronPayloadOutcome({ + payloads: [{ text: "ordinary success" }], + failureSignal: { + kind: "execution_denied", + source: "tool", + message: "SYSTEM_RUN_DENIED: approval required", + fatalForCron: false, + }, + }); + + expect(result.hasFatalErrorPayload).toBe(false); + expect(result.embeddedRunError).toBeUndefined(); + }); + it("keeps structured error payload reasons ahead of denial-token reasons", () => { const result = resolveCronPayloadOutcome({ payloads: [ diff --git a/src/cron/isolated-agent/helpers.ts b/src/cron/isolated-agent/helpers.ts index 08d8a8b30b5..3901f53a00c 100644 --- a/src/cron/isolated-agent/helpers.ts +++ b/src/cron/isolated-agent/helpers.ts @@ -26,6 +26,20 @@ type CronDenialSignal = { field: string; }; +type CronFailureSignal = { + kind?: string; + source?: string; + toolName?: string; + code?: string; + message?: string; + fatalForCron?: boolean; +}; + +type NormalizedCronFailureSignal = CronFailureSignal & { + message: string; + fatalForCron: true; +}; + const CRON_DENIAL_EXACT_TOKENS = ["SYSTEM_RUN_DENIED", "INVALID_REQUEST"] as const; const CRON_DENIAL_CASE_INSENSITIVE_TOKENS = [ "approval cannot safely bind", @@ -75,6 +89,25 @@ function formatCronDenialSignal(signal: CronDenialSignal): string { return `cron classifier: denial token "${signal.token}" detected in ${signal.field}`; } +function normalizeCronFailureSignal( + signal: CronFailureSignal | undefined, +): NormalizedCronFailureSignal | undefined { + const message = normalizeOptionalString(signal?.message); + if (signal?.fatalForCron !== true || !message) { + return undefined; + } + return { ...signal, message, fatalForCron: true }; +} + +function formatCronFailureSignal(signal: NormalizedCronFailureSignal): string { + const kind = normalizeOptionalString(signal.kind) ?? "run"; + const code = normalizeOptionalString(signal.code); + const source = normalizeOptionalString(signal.toolName) ?? normalizeOptionalString(signal.source); + return `cron classifier: ${kind} failure${source ? ` from ${source}` : ""}${ + code ? ` (${code})` : "" + }: ${signal.message}`; +} + export function pickSummaryFromOutput(text: string | undefined) { const clean = (text ?? "").trim(); if (!clean) { @@ -191,7 +224,8 @@ export function resolveHeartbeatAckMaxChars(agentCfg?: { heartbeat?: { ackMaxCha export function resolveCronPayloadOutcome(params: { payloads: DeliveryPayload[]; runLevelError?: unknown; - finalAssistantVisibleText?: string; + failureSignal?: CronFailureSignal | undefined; + finalAssistantVisibleText?: string | undefined; preferFinalAssistantVisibleText?: boolean; }): CronPayloadOutcome { const firstText = params.payloads[0]?.text ?? ""; @@ -254,19 +288,34 @@ export function resolveCronPayloadOutcome(params: { text: payload?.text, })), ]); - const hasFatalErrorPayload = hasFatalStructuredErrorPayload || denialSignal !== undefined; + const failureSignal = normalizeCronFailureSignal(params.failureSignal); + const hasFatalErrorPayload = + hasFatalStructuredErrorPayload || failureSignal !== undefined || denialSignal !== undefined; + const shouldUseFailureSignalPayload = + failureSignal !== undefined && !hasFatalStructuredErrorPayload; + const failureSignalDeliveryPayload = shouldUseFailureSignalPayload + ? ({ text: failureSignal.message, isError: true } satisfies DeliveryPayload) + : undefined; return { - summary, - outputText, - synthesizedText, - deliveryPayload, - deliveryPayloads: resolvedDeliveryPayloads, - deliveryPayloadHasStructuredContent, + summary: shouldUseFailureSignalPayload + ? (pickSummaryFromOutput(failureSignal.message) ?? summary) + : summary, + outputText: shouldUseFailureSignalPayload ? failureSignal.message : outputText, + synthesizedText: shouldUseFailureSignalPayload ? failureSignal.message : synthesizedText, + deliveryPayload: failureSignalDeliveryPayload ?? deliveryPayload, + deliveryPayloads: failureSignalDeliveryPayload + ? [failureSignalDeliveryPayload] + : resolvedDeliveryPayloads, + deliveryPayloadHasStructuredContent: failureSignalDeliveryPayload + ? false + : deliveryPayloadHasStructuredContent, hasFatalErrorPayload, embeddedRunError: hasFatalStructuredErrorPayload ? (lastErrorPayloadText ?? "cron isolated run returned an error payload") - : denialSignal - ? formatCronDenialSignal(denialSignal) - : undefined, + : failureSignal + ? formatCronFailureSignal(failureSignal) + : denialSignal + ? formatCronDenialSignal(denialSignal) + : undefined, }; } diff --git a/src/cron/isolated-agent/run-executor.ts b/src/cron/isolated-agent/run-executor.ts index 18e48796eb4..bbcc1d5ba79 100644 --- a/src/cron/isolated-agent/run-executor.ts +++ b/src/cron/isolated-agent/run-executor.ts @@ -359,10 +359,12 @@ export async function executeCronRun(params: { const interimPayloads = runResult.payloads ?? []; const { deliveryPayloadHasStructuredContent: interimPayloadHasStructuredContent, + hasFatalErrorPayload: interimHasFatalErrorPayload, outputText: interimOutputText, } = resolveCronPayloadOutcome({ payloads: interimPayloads, runLevelError: runResult.meta?.error, + failureSignal: runResult.meta?.failureSignal, finalAssistantVisibleText: runResult.meta?.finalAssistantVisibleText, preferFinalAssistantVisibleText: ( await resolveCronChannelOutputPolicy(params.resolvedDelivery.channel) @@ -371,6 +373,7 @@ export async function executeCronRun(params: { const interimText = interimOutputText?.trim() ?? ""; const shouldRetryInterimAck = !runResult.meta?.error && + !interimHasFatalErrorPayload && !runResult.didSendViaMessagingTool && !interimPayloadHasStructuredContent && !interimPayloads.some((payload) => payload?.isError === true) && diff --git a/src/cron/isolated-agent/run.interim-retry.test.ts b/src/cron/isolated-agent/run.interim-retry.test.ts index 2c799869a44..b4c75fd3c0d 100644 --- a/src/cron/isolated-agent/run.interim-retry.test.ts +++ b/src/cron/isolated-agent/run.interim-retry.test.ts @@ -5,10 +5,13 @@ import { } from "./run.suite-helpers.js"; import { countActiveDescendantRunsMock, + dispatchCronDeliveryMock, + isHeartbeatOnlyResponseMock, listDescendantRunsForRequesterMock, loadRunCronIsolatedAgentTurn, mockRunCronFallbackPassthrough, pickLastNonEmptyTextFromPayloadsMock, + resolveCronDeliveryPlanMock, runEmbeddedPiAgentMock, runWithModelFallbackMock, } from "./run.test-harness.js"; @@ -74,6 +77,69 @@ describe("runCronIsolatedAgentTurn — interim ack retry", () => { await runTurnAndExpectOk(1, 1); }); + it("does not retry over a fatal structured failure signal", async () => { + usePayloadTextExtraction(); + runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "On it, retrying now." }], + meta: { + agentMeta: { usage: { input: 10, output: 20 } }, + failureSignal: { + kind: "execution_denied", + source: "tool", + toolName: "exec", + code: "SYSTEM_RUN_DENIED", + message: "SYSTEM_RUN_DENIED: approval required", + fatalForCron: true, + }, + }, + }); + + mockRunCronFallbackPassthrough(); + const result = await runCronIsolatedAgentTurn(makeIsolatedAgentTurnParams()); + + expect(result.status).toBe("error"); + expect(result.error).toBe("SYSTEM_RUN_DENIED: approval required"); + expect(runWithModelFallbackMock).toHaveBeenCalledTimes(1); + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(1); + }); + + it("delivers synthesized fatal failure signals even when the original payloads are empty", async () => { + usePayloadTextExtraction(); + resolveCronDeliveryPlanMock.mockReturnValue({ + requested: true, + mode: "announce", + channel: "messagechat", + to: "123", + }); + isHeartbeatOnlyResponseMock.mockReturnValue(true); + runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [], + meta: { + agentMeta: { usage: { input: 10, output: 20 } }, + failureSignal: { + kind: "execution_denied", + source: "tool", + toolName: "exec", + code: "SYSTEM_RUN_DENIED", + message: "SYSTEM_RUN_DENIED: approval required", + fatalForCron: true, + }, + }, + }); + + mockRunCronFallbackPassthrough(); + const result = await runCronIsolatedAgentTurn(makeIsolatedAgentTurnParams()); + + expect(result.status).toBe("error"); + expect(result.error).toBe("SYSTEM_RUN_DENIED: approval required"); + expect(dispatchCronDeliveryMock).toHaveBeenCalledWith( + expect.objectContaining({ + skipHeartbeatDelivery: false, + deliveryPayloads: [{ text: "SYSTEM_RUN_DENIED: approval required", isError: true }], + }), + ); + }); + it("does not retry when descendants were spawned in this run even if they already settled", async () => { usePayloadTextExtraction(); runEmbeddedPiAgentMock.mockResolvedValueOnce({ diff --git a/src/cron/isolated-agent/run.test-harness.ts b/src/cron/isolated-agent/run.test-harness.ts index 44cc62b5ece..747727c3ecf 100644 --- a/src/cron/isolated-agent/run.test-harness.ts +++ b/src/cron/isolated-agent/run.test-harness.ts @@ -362,21 +362,40 @@ function resetRunOutcomeMocks(): void { pickLastNonEmptyTextFromPayloadsMock.mockReturnValue("test output"); resolveCronPayloadOutcomeMock.mockReset(); resolveCronPayloadOutcomeMock.mockImplementation( - ({ payloads }: { payloads: Array<{ isError?: boolean }> }) => { - const outputText = pickLastNonEmptyTextFromPayloadsMock(payloads); + ({ + payloads, + failureSignal, + }: { + payloads: Array<{ isError?: boolean }>; + failureSignal?: { fatalForCron?: boolean; message?: string }; + }) => { + const failureMessage = + failureSignal?.fatalForCron === true + ? (failureSignal.message ?? "cron isolated run returned a fatal failure signal") + : undefined; + const outputText = failureMessage ?? pickLastNonEmptyTextFromPayloadsMock(payloads); const synthesizedText = outputText?.trim() || "summary"; - const hasFatalErrorPayload = payloads.some((payload) => payload?.isError === true); + const hasFatalErrorPayload = + payloads.some((payload) => payload?.isError === true) || failureMessage !== undefined; + const deliveryPayload = failureMessage ? { text: failureMessage, isError: true } : undefined; return { - summary: "summary", + summary: failureMessage ?? "summary", outputText, synthesizedText, - deliveryPayload: undefined, - deliveryPayloads: synthesizedText ? [{ text: synthesizedText }] : [], + deliveryPayload, + deliveryPayloads: deliveryPayload + ? [deliveryPayload] + : synthesizedText + ? [{ text: synthesizedText }] + : [], deliveryPayloadHasStructuredContent: false, hasFatalErrorPayload, - embeddedRunError: hasFatalErrorPayload - ? "cron isolated run returned an error payload" - : undefined, + embeddedRunError: + failureMessage !== undefined + ? failureMessage + : hasFatalErrorPayload + ? "cron isolated run returned an error payload" + : undefined, }; }, ); diff --git a/src/cron/isolated-agent/run.ts b/src/cron/isolated-agent/run.ts index 577d0544493..b1a3bdd80d9 100644 --- a/src/cron/isolated-agent/run.ts +++ b/src/cron/isolated-agent/run.ts @@ -839,6 +839,7 @@ async function finalizeCronRun(params: { } = resolveCronPayloadOutcome({ payloads, runLevelError: finalRunResult.meta?.error, + failureSignal: finalRunResult.meta?.failureSignal, finalAssistantVisibleText: finalRunResult.meta?.finalAssistantVisibleText, preferFinalAssistantVisibleText: ( await resolveCronChannelOutputPolicy(prepared.resolvedDelivery.channel) @@ -864,7 +865,8 @@ async function finalizeCronRun(params: { const skipHeartbeatDelivery = prepared.deliveryRequested && - isHeartbeatOnlyResponse(payloads, resolveHeartbeatAckMaxChars(prepared.agentCfg)); + !hasFatalErrorPayload && + isHeartbeatOnlyResponse(deliveryPayloads, resolveHeartbeatAckMaxChars(prepared.agentCfg)); const { dispatchCronDelivery, matchesMessagingToolDeliveryTarget,