diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d4a2b28a43..84821e93f4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Telegram: clean up tool-only draft previews after assistant message boundaries so transient `Surfacing...` tool-status bubbles do not linger when no matching final preview arrives. Thanks @BunsDev. +- Cron: surface failed isolated-run diagnostics in `cron show`, status, and run history when requested tools are unavailable, so blocked cron runs report the actual tool-policy failure instead of a misleading green result. Fixes #75763. Thanks @RyanSandoval. - TUI/escape abort: track the in-flight runId after `chat.send` resolves so pressing Esc during the gap before the first gateway event aborts the run instead of repeatedly printing `no active run`. Fixes #1296. Thanks @Lukavyi and @romneyda. - TUI/render: stop the long-token sanitizer from injecting literal spaces inside inline code spans, fenced code blocks, table borders, and bare hyphenated/dotted identifiers, so copied package names, entity IDs, and shell line-continuations stay byte-for-byte intact while narrow-terminal protection still chunks unidentifiable long prose tokens. Fixes #48432, #39505. Thanks @DocOellerson, @xeusoc, @CCcassiusdjs, @akramcodez, @brokemac79, @romneyda. - Gateway/status: label Linux managed gateway services as `systemd user`, making status output explicit about the user-service scope instead of implying a system-level unit. Thanks @vincentkoc. diff --git a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift index c8ccdddbd38..af9e7887816 100644 --- a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift +++ b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift @@ -4323,6 +4323,7 @@ public struct CronRunLogEntry: Codable, Sendable { public let status: AnyCodable? public let error: String? public let summary: String? + public let diagnostics: [String: AnyCodable]? public let delivered: Bool? public let deliverystatus: AnyCodable? public let deliveryerror: String? @@ -4344,6 +4345,7 @@ public struct CronRunLogEntry: Codable, Sendable { status: AnyCodable?, error: String?, summary: String?, + diagnostics: [String: AnyCodable]?, delivered: Bool?, deliverystatus: AnyCodable?, deliveryerror: String?, @@ -4364,6 +4366,7 @@ public struct CronRunLogEntry: Codable, Sendable { self.status = status self.error = error self.summary = summary + self.diagnostics = diagnostics self.delivered = delivered self.deliverystatus = deliverystatus self.deliveryerror = deliveryerror @@ -4386,6 +4389,7 @@ public struct CronRunLogEntry: Codable, Sendable { case status case error case summary + case diagnostics case delivered case deliverystatus = "deliveryStatus" case deliveryerror = "deliveryError" diff --git a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift index c8ccdddbd38..af9e7887816 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift @@ -4323,6 +4323,7 @@ public struct CronRunLogEntry: Codable, Sendable { public let status: AnyCodable? public let error: String? public let summary: String? + public let diagnostics: [String: AnyCodable]? public let delivered: Bool? public let deliverystatus: AnyCodable? public let deliveryerror: String? @@ -4344,6 +4345,7 @@ public struct CronRunLogEntry: Codable, Sendable { status: AnyCodable?, error: String?, summary: String?, + diagnostics: [String: AnyCodable]?, delivered: Bool?, deliverystatus: AnyCodable?, deliveryerror: String?, @@ -4364,6 +4366,7 @@ public struct CronRunLogEntry: Codable, Sendable { self.status = status self.error = error self.summary = summary + self.diagnostics = diagnostics self.delivered = delivered self.deliverystatus = deliverystatus self.deliveryerror = deliveryerror @@ -4386,6 +4389,7 @@ public struct CronRunLogEntry: Codable, Sendable { case status case error case summary + case diagnostics case delivered case deliverystatus = "deliveryStatus" case deliveryerror = "deliveryError" diff --git a/src/cli/cron-cli.test.ts b/src/cli/cron-cli.test.ts index 24c729de26b..f50fb26fe7b 100644 --- a/src/cli/cron-cli.test.ts +++ b/src/cli/cron-cli.test.ts @@ -497,8 +497,10 @@ describe("cron cli", () => { nextOffset: 200, }; } + const targetJob = createCronJob("target-job", "Target Job"); + targetJob.state.lastDiagnosticSummary = "exec stderr tail"; return { - jobs: [createCronJob("target-job", "Target Job")], + jobs: [targetJob], hasMore: false, nextOffset: null, deliveryPreviews: { @@ -527,6 +529,7 @@ describe("cron cli", () => { expect(defaultRuntime.log).toHaveBeenCalledWith( "delivery: announce -> telegram:-100 (resolved from last, main session)", ); + expect(defaultRuntime.log).toHaveBeenCalledWith("diagnostic: exec stderr tail"); }); it("sends agent id on cron add", async () => { diff --git a/src/cli/cron-cli/shared.ts b/src/cli/cron-cli/shared.ts index f1a05ae84b0..acd65b6ef14 100644 --- a/src/cli/cron-cli/shared.ts +++ b/src/cli/cron-cli/shared.ts @@ -380,4 +380,5 @@ export function printCronShow( runtime.log(`next: ${formatRelative(job.state.nextRunAtMs, Date.now())}`); runtime.log(`last: ${formatRelative(job.state.lastRunAtMs, Date.now())}`); runtime.log(`status: ${formatStatus(job)}`); + runtime.log(`diagnostic: ${job.state.lastDiagnosticSummary ?? "-"}`); } diff --git a/src/cron/isolated-agent/run.ts b/src/cron/isolated-agent/run.ts index c85cbb32a5f..ac87bcd888c 100644 --- a/src/cron/isolated-agent/run.ts +++ b/src/cron/isolated-agent/run.ts @@ -10,6 +10,11 @@ import { stringifyRouteThreadId } from "../../plugin-sdk/channel-route.js"; import { createLazyImportLoader } from "../../shared/lazy-promise.js"; import { normalizeOptionalString } from "../../shared/string-coerce.js"; import { resolveCronDeliveryPlan, type CronDeliveryPlan } from "../delivery-plan.js"; +import { + createCronRunDiagnosticsFromAgentResult, + createCronRunDiagnosticsFromError, + mergeCronRunDiagnostics, +} from "../run-diagnostics.js"; import type { CronAgentExecutionStarted, CronDeliveryTrace, @@ -566,7 +571,14 @@ async function prepareCronRunContext(params: { if (!resolvedModelSelection.ok) { return { ok: false, - result: withRunSession({ status: "error", error: resolvedModelSelection.error }), + result: withRunSession({ + status: "error", + error: resolvedModelSelection.error, + diagnostics: createCronRunDiagnosticsFromError( + "cron-preflight", + resolvedModelSelection.error, + ), + }), }; } let provider = resolvedModelSelection.provider; @@ -586,6 +598,9 @@ async function prepareCronRunContext(params: { result: withRunSession({ status: "skipped", error: preflight.reason, + diagnostics: createCronRunDiagnosticsFromError("model-preflight", preflight.reason, { + severity: "warn", + }), provider, model, }), @@ -862,7 +877,15 @@ async function finalizeCronRun(params: { await prepared.persistSessionEntry(); if (params.isAborted()) { - return prepared.withRunSession({ status: "error", error: params.abortReason(), ...telemetry }); + return prepared.withRunSession({ + status: "error", + error: params.abortReason(), + diagnostics: mergeCronRunDiagnostics( + createCronRunDiagnosticsFromAgentResult(finalRunResult, { finalStatus: "error" }), + createCronRunDiagnosticsFromError("cron-setup", params.abortReason()), + ), + ...telemetry, + }); } let { summary, @@ -882,6 +905,9 @@ async function finalizeCronRun(params: { await resolveCronChannelOutputPolicy(prepared.resolvedDelivery.channel) ).preferFinalAssistantVisibleText, }); + const agentDiagnostics = createCronRunDiagnosticsFromAgentResult(finalRunResult, { + finalStatus: hasFatalErrorPayload ? "error" : "ok", + }); const resolveRunOutcome = (result?: { delivered?: boolean; deliveryAttempted?: boolean; @@ -897,6 +923,15 @@ async function finalizeCronRun(params: { delivered: result?.delivered, deliveryAttempted: result?.deliveryAttempted, delivery: result?.delivery, + diagnostics: hasFatalErrorPayload + ? mergeCronRunDiagnostics( + agentDiagnostics, + createCronRunDiagnosticsFromError( + "agent-run", + embeddedRunError ?? "cron isolated run returned an error payload", + ), + ) + : agentDiagnostics, ...telemetry, }); const failPendingPresentationWarningUnlessDelivered = (delivered?: boolean) => { @@ -975,6 +1010,13 @@ async function finalizeCronRun(params: { deliveryAttempted: deliveryResult.result.deliveryAttempted ?? deliveryResult.deliveryAttempted, delivery: deliveryTrace, + diagnostics: mergeCronRunDiagnostics( + agentDiagnostics, + deliveryResult.result.diagnostics, + deliveryResult.result.status === "error" && deliveryResult.result.error + ? createCronRunDiagnosticsFromError("delivery", deliveryResult.result.error) + : undefined, + ), }; failPendingPresentationWarningUnlessDelivered( resultWithDeliveryMeta.delivered ?? deliveryResult.delivered, @@ -1066,7 +1108,11 @@ export async function runCronIsolatedAgentTurn(params: { suppressExecNotifyOnExit: prepared.context.suppressExecNotifyOnExit, }); if (isAborted()) { - return prepared.context.withRunSession({ status: "error", error: abortReason() }); + return prepared.context.withRunSession({ + status: "error", + error: abortReason(), + diagnostics: createCronRunDiagnosticsFromError("cron-setup", abortReason()), + }); } return await finalizeCronRun({ prepared: prepared.context, @@ -1075,6 +1121,10 @@ export async function runCronIsolatedAgentTurn(params: { isAborted, }); } catch (err) { - return prepared.context.withRunSession({ status: "error", error: String(err) }); + return prepared.context.withRunSession({ + status: "error", + error: String(err), + diagnostics: createCronRunDiagnosticsFromError("agent-run", err), + }); } } diff --git a/src/cron/run-diagnostics.test.ts b/src/cron/run-diagnostics.test.ts new file mode 100644 index 00000000000..5e3546a380d --- /dev/null +++ b/src/cron/run-diagnostics.test.ts @@ -0,0 +1,171 @@ +import { describe, expect, it } from "vitest"; +import { + createCronRunDiagnosticsFromAgentResult, + createCronRunDiagnosticsFromError, + mergeCronRunDiagnostics, + normalizeCronRunDiagnostics, + summarizeCronRunDiagnostics, +} from "./run-diagnostics.js"; + +describe("cron run diagnostics", () => { + it("normalizes and bounds diagnostic entries", () => { + const diagnostics = normalizeCronRunDiagnostics({ + summary: "x".repeat(2_100), + entries: Array.from({ length: 12 }, (_, i) => ({ + ts: i, + source: "exec", + severity: "error", + message: i === 11 ? `secret sk-1234567890abcdef ${"a".repeat(1_100)}` : `entry ${i}`, + })), + }); + + expect(diagnostics?.entries).toHaveLength(10); + expect(diagnostics?.entries[0]?.message).toBe("entry 2"); + expect(diagnostics?.entries.at(-1)?.message).toMatch(/…$/); + expect(diagnostics?.entries.at(-1)?.message).not.toContain("sk-1234567890abcdef"); + expect(diagnostics?.entries.at(-1)?.truncated).toBe(true); + expect(diagnostics?.summary).toHaveLength(2_000); + }); + + it("preserves later terminal diagnostics when capping entries", () => { + const diagnostics = normalizeCronRunDiagnostics({ + entries: [ + ...Array.from({ length: 10 }, (_, i) => ({ + ts: i, + source: "tool", + severity: "warn", + message: `tool warning ${i}`, + })), + { + ts: 11, + source: "delivery", + severity: "error", + message: "delivery failed", + }, + ], + }); + + expect(diagnostics?.entries).toHaveLength(10); + expect(diagnostics?.entries.map((entry) => entry.message)).not.toContain("tool warning 0"); + expect(diagnostics?.entries.at(-1)).toMatchObject({ + source: "delivery", + severity: "error", + message: "delivery failed", + }); + }); + + it("returns undefined for empty diagnostics", () => { + expect(normalizeCronRunDiagnostics({ entries: [] })).toBeUndefined(); + expect(normalizeCronRunDiagnostics({ entries: [{ source: "exec" }] })).toBeUndefined(); + expect(summarizeCronRunDiagnostics(undefined)).toBeUndefined(); + }); + + it("creates diagnostics from errors and prefers the latest error summary", () => { + const first = createCronRunDiagnosticsFromError("cron-preflight", "first failure", { + nowMs: () => 100, + }); + const second = createCronRunDiagnosticsFromError("delivery", new Error("delivery failed"), { + nowMs: () => 200, + }); + + const merged = mergeCronRunDiagnostics(first, second); + expect(merged?.summary).toBe("delivery failed"); + expect(merged?.entries.map((entry) => entry.message)).toEqual([ + "first failure", + "delivery failed", + ]); + expect(summarizeCronRunDiagnostics(merged)).toBe("delivery failed"); + }); + + it("keeps a later delivery error summary ahead of an earlier warning", () => { + const warning = normalizeCronRunDiagnostics({ + summary: "agent warning", + entries: [{ ts: 100, source: "agent-run", severity: "warn", message: "agent warning" }], + }); + const deliveryError = createCronRunDiagnosticsFromError("delivery", "delivery failed", { + nowMs: () => 200, + }); + + expect(mergeCronRunDiagnostics(warning, deliveryError)?.summary).toBe("delivery failed"); + }); + + it("extracts fatal agent result payloads and meta errors", () => { + const diagnostics = createCronRunDiagnosticsFromAgentResult( + { + payloads: [ + { text: "normal" }, + { text: "tool stderr", isError: true, toolName: "shell" }, + { + toolName: "exec", + details: { + status: "completed", + exitCode: 2, + aggregated: "stdout\nstderr failure", + }, + }, + ], + meta: { + error: { kind: "retry_limit", message: "retry limit exceeded" }, + failureSignal: { message: "SYSTEM_RUN_DENIED" }, + }, + }, + { nowMs: () => 123 }, + ); + + expect(diagnostics?.entries.map((entry) => entry.message)).toEqual([ + "tool stderr", + "stdout\nstderr failure", + "retry limit exceeded", + "SYSTEM_RUN_DENIED", + ]); + expect(diagnostics?.entries[1]).toMatchObject({ + source: "exec", + toolName: "exec", + exitCode: 2, + }); + }); + + it("does not capture harmless successful exec output", () => { + const result = { + payloads: [ + { + toolName: "exec", + details: { + status: "completed", + exitCode: 0, + aggregated: "progress written to stderr", + }, + }, + ], + }; + + expect(createCronRunDiagnosticsFromAgentResult(result)).toBeUndefined(); + expect( + createCronRunDiagnosticsFromAgentResult(result, { finalStatus: "error" }), + ).toBeUndefined(); + }); + + it("captures silent failed exec details with a fallback message", () => { + const diagnostics = createCronRunDiagnosticsFromAgentResult({ + payloads: [ + { + toolName: "exec", + details: { + status: "completed", + exitCode: 2, + }, + }, + ], + }); + + expect(diagnostics?.entries).toEqual([ + expect.objectContaining({ + source: "exec", + severity: "warn", + message: "exec failed with exit code 2", + toolName: "exec", + exitCode: 2, + }), + ]); + }); +}); diff --git a/src/cron/run-diagnostics.ts b/src/cron/run-diagnostics.ts new file mode 100644 index 00000000000..6c4eca75c57 --- /dev/null +++ b/src/cron/run-diagnostics.ts @@ -0,0 +1,313 @@ +import { redactSensitiveText } from "../logging/redact.js"; +import { normalizeOptionalString } from "../shared/string-coerce.js"; +import type { + CronRunDiagnostic, + CronRunDiagnostics, + CronRunDiagnosticSeverity, + CronRunDiagnosticSource, +} from "./types.js"; + +const MAX_ENTRIES = 10; +const MAX_ENTRY_CHARS = 1_000; +const MAX_SUMMARY_CHARS = 2_000; +const EXEC_DIAGNOSTIC_TAIL_CHARS = 2_000; + +function normalizeSeverity(value: unknown): CronRunDiagnosticSeverity { + return value === "info" || value === "warn" || value === "error" ? value : "error"; +} + +function normalizeSource(value: unknown): CronRunDiagnosticSource { + switch (value) { + case "cron-preflight": + case "cron-setup": + case "model-preflight": + case "agent-run": + case "tool": + case "exec": + case "delivery": + return value; + default: + return "agent-run"; + } +} + +function normalizeTimestamp(value: unknown, nowMs: () => number): number { + return typeof value === "number" && Number.isFinite(value) && value >= 0 + ? Math.floor(value) + : nowMs(); +} + +function formatUnknownError(error: unknown): string { + if (error instanceof Error) { + return error.message || error.name; + } + return String(error); +} + +function isRecord(value: unknown): value is Record { + return value !== null && typeof value === "object"; +} + +function normalizeToolName(value: unknown): string | undefined { + if (typeof value !== "string") { + return undefined; + } + return normalizeOptionalString(value); +} + +function normalizeExitCode(value: unknown): number | null | undefined { + if (typeof value === "number" && Number.isFinite(value)) { + return value; + } + return value === null ? null : undefined; +} + +function tailText(value: string, maxChars: number): string { + if (value.length <= maxChars) { + return value; + } + return value.slice(value.length - maxChars); +} + +function normalizeDiagnosticMessage(value: unknown): { message?: string; truncated?: boolean } { + if (typeof value !== "string") { + return {}; + } + const normalized = normalizeOptionalString(value); + if (!normalized) { + return {}; + } + const redacted = redactSensitiveText(normalized, { mode: "tools" }); + if (redacted.length <= MAX_ENTRY_CHARS) { + return { message: redacted }; + } + return { message: `${redacted.slice(0, MAX_ENTRY_CHARS - 1)}…`, truncated: true }; +} + +function trimSummary(value: string | undefined): string | undefined { + const normalized = normalizeOptionalString(value); + if (!normalized) { + return undefined; + } + if (normalized.length <= MAX_SUMMARY_CHARS) { + return normalized; + } + return `${normalized.slice(0, MAX_SUMMARY_CHARS - 1)}…`; +} + +export function summarizeCronRunDiagnostics( + diagnostics: CronRunDiagnostics | undefined, +): string | undefined { + if (!diagnostics) { + return undefined; + } + return trimSummary(diagnostics.summary ?? diagnostics.entries[0]?.message); +} + +export function normalizeCronRunDiagnostics( + value: unknown, + opts?: { nowMs?: () => number }, +): CronRunDiagnostics | undefined { + if (!value || typeof value !== "object") { + return undefined; + } + const record = value as { summary?: unknown; entries?: unknown }; + const nowMs = opts?.nowMs ?? Date.now; + const entriesRaw = Array.isArray(record.entries) ? record.entries : []; + const entries: CronRunDiagnostic[] = []; + for (const item of entriesRaw) { + if (!item || typeof item !== "object") { + continue; + } + const entry = item as Partial; + const normalized = normalizeDiagnosticMessage(entry.message); + if (!normalized.message) { + continue; + } + entries.push({ + ts: normalizeTimestamp(entry.ts, nowMs), + source: normalizeSource(entry.source), + severity: normalizeSeverity(entry.severity), + message: normalized.message, + ...(typeof entry.toolName === "string" && entry.toolName.trim() + ? { toolName: entry.toolName.trim() } + : {}), + ...(typeof entry.exitCode === "number" && Number.isFinite(entry.exitCode) + ? { exitCode: entry.exitCode } + : entry.exitCode === null + ? { exitCode: null } + : {}), + ...(entry.truncated === true || normalized.truncated ? { truncated: true } : {}), + }); + if (entries.length > MAX_ENTRIES) { + entries.shift(); + } + } + const summary = trimSummary( + typeof record.summary === "string" + ? redactSensitiveText(record.summary, { mode: "tools" }) + : undefined, + ); + if (entries.length === 0 && !summary) { + return undefined; + } + return { ...(summary ? { summary } : {}), entries }; +} + +export function mergeCronRunDiagnostics( + ...values: Array +): CronRunDiagnostics | undefined { + const entries: CronRunDiagnostic[] = []; + let summaryCandidate: { summary: string; severity: number; order: number } | undefined; + for (const value of values) { + const normalized = normalizeCronRunDiagnostics(value); + if (!normalized) { + continue; + } + const entryCandidate = + normalized.entries.findLast((entry) => entry.severity === "error") ?? + normalized.entries.findLast((entry) => entry.severity === "warn") ?? + normalized.entries.findLast((entry) => entry.severity === "info"); + const summary = trimSummary(normalized.summary ?? entryCandidate?.message); + if (summary) { + const severity = + entryCandidate?.severity === "error" ? 2 : entryCandidate?.severity === "warn" ? 1 : 0; + const order = entries.length + normalized.entries.length; + if ( + !summaryCandidate || + severity > summaryCandidate.severity || + (severity === summaryCandidate.severity && order >= summaryCandidate.order) + ) { + summaryCandidate = { summary, severity, order }; + } + } + entries.push(...normalized.entries); + } + return normalizeCronRunDiagnostics({ + summary: summaryCandidate?.summary, + entries, + }); +} + +export function createCronRunDiagnosticsFromError( + source: CronRunDiagnosticSource, + error: unknown, + opts?: { + severity?: CronRunDiagnosticSeverity; + nowMs?: () => number; + toolName?: string; + exitCode?: number | null; + }, +): CronRunDiagnostics | undefined { + const message = formatUnknownError(error); + return normalizeCronRunDiagnostics( + { + summary: message, + entries: [ + { + ts: opts?.nowMs?.() ?? Date.now(), + source, + severity: opts?.severity ?? "error", + message, + toolName: opts?.toolName, + exitCode: opts?.exitCode, + }, + ], + }, + opts, + ); +} + +export function createCronRunDiagnosticsFromExecDetails( + details: unknown, + opts?: { + nowMs?: () => number; + toolName?: string; + }, +): CronRunDiagnostics | undefined { + if (!isRecord(details)) { + return undefined; + } + const status = typeof details.status === "string" ? details.status : undefined; + const exitCode = normalizeExitCode(details.exitCode); + const relevant = status === "failed" || (typeof exitCode === "number" && exitCode !== 0); + if (!relevant) { + return undefined; + } + const aggregated = normalizeOptionalString(details.aggregated); + const message = aggregated + ? tailText(aggregated, EXEC_DIAGNOSTIC_TAIL_CHARS) + : typeof exitCode === "number" + ? `exec failed with exit code ${exitCode}` + : "exec failed"; + return normalizeCronRunDiagnostics( + { + summary: message, + entries: [ + { + ts: opts?.nowMs?.() ?? Date.now(), + source: "exec", + severity: status === "failed" ? "error" : "warn", + message, + toolName: opts?.toolName, + exitCode, + }, + ], + }, + opts, + ); +} + +export function createCronRunDiagnosticsFromToolPayload( + payload: unknown, + opts?: { nowMs?: () => number; finalStatus?: "ok" | "error" | "skipped" }, +): CronRunDiagnostics | undefined { + if (!isRecord(payload)) { + return undefined; + } + const toolName = normalizeToolName(payload.toolName) ?? normalizeToolName(payload.name); + const detailsDiagnostics = createCronRunDiagnosticsFromExecDetails(payload.details, { + nowMs: opts?.nowMs, + toolName, + }); + const isError = payload.isError === true; + const text = typeof payload.text === "string" ? payload.text : undefined; + const textDiagnostics = + isError && text + ? createCronRunDiagnosticsFromError("tool", text, { + severity: "error", + nowMs: opts?.nowMs, + toolName, + }) + : undefined; + return mergeCronRunDiagnostics(detailsDiagnostics, textDiagnostics); +} + +export function createCronRunDiagnosticsFromAgentResult( + result: unknown, + opts?: { nowMs?: () => number; finalStatus?: "ok" | "error" | "skipped" }, +): CronRunDiagnostics | undefined { + const record = isRecord(result) ? result : {}; + const meta = + record.meta && typeof record.meta === "object" ? (record.meta as Record) : {}; + const diagnostics: Array = []; + const payloads = Array.isArray(record.payloads) ? record.payloads : []; + for (const payload of payloads) { + diagnostics.push(createCronRunDiagnosticsFromToolPayload(payload, opts)); + } + const metaError = + meta.error && typeof meta.error === "object" + ? (meta.error as { message?: unknown }) + : undefined; + if (typeof metaError?.message === "string") { + diagnostics.push(createCronRunDiagnosticsFromError("agent-run", metaError.message, opts)); + } + const failureSignal = + meta.failureSignal && typeof meta.failureSignal === "object" + ? (meta.failureSignal as { message?: unknown }) + : undefined; + if (typeof failureSignal?.message === "string") { + diagnostics.push(createCronRunDiagnosticsFromError("tool", failureSignal.message, opts)); + } + return mergeCronRunDiagnostics(...diagnostics); +} diff --git a/src/cron/run-log.test.ts b/src/cron/run-log.test.ts index cc0758f0242..59716f38fc9 100644 --- a/src/cron/run-log.test.ts +++ b/src/cron/run-log.test.ts @@ -311,6 +311,46 @@ describe("cron run log", () => { }); }); + it("reads and searches run diagnostics", async () => { + await withRunLogDir("openclaw-cron-log-diagnostics-", async (dir) => { + const logPath = path.join(dir, "runs", "job-1.jsonl"); + + await appendCronRunLog(logPath, { + ts: 1, + jobId: "job-1", + action: "finished", + status: "error", + diagnostics: { + summary: "exec stderr tail", + entries: [ + { + ts: 1, + source: "exec", + severity: "error", + message: "exec stderr tail", + exitCode: 2, + }, + ], + }, + }); + + const entries = await readCronRunLogEntries(logPath, { limit: 10, jobId: "job-1" }); + expect(entries[0]?.diagnostics).toMatchObject({ + summary: "exec stderr tail", + entries: [{ source: "exec", severity: "error", message: "exec stderr tail", exitCode: 2 }], + }); + expect( + ( + await readCronRunLogEntriesPage(logPath, { + limit: 10, + jobId: "job-1", + query: "stderr tail", + }) + ).entries, + ).toHaveLength(1); + }); + }); + it("reads telemetry fields", async () => { await withRunLogDir("openclaw-cron-log-telemetry-", async (dir) => { const logPath = path.join(dir, "runs", "job-1.jsonl"); diff --git a/src/cron/run-log.ts b/src/cron/run-log.ts index d98248f81b9..d9b70f55345 100644 --- a/src/cron/run-log.ts +++ b/src/cron/run-log.ts @@ -9,9 +9,11 @@ import { normalizeOptionalString, normalizeStringifiedOptionalString, } from "../shared/string-coerce.js"; +import { normalizeCronRunDiagnostics } from "./run-diagnostics.js"; import type { CronDeliveryStatus, CronDeliveryTrace, + CronRunDiagnostics, CronRunStatus, CronRunTelemetry, } from "./types.js"; @@ -23,6 +25,7 @@ export type CronRunLogEntry = { status?: CronRunStatus; error?: string; summary?: string; + diagnostics?: CronRunDiagnostics; delivered?: boolean; deliveryStatus?: CronDeliveryStatus; deliveryError?: string; @@ -312,6 +315,7 @@ function parseAllRunLogEntries(raw: string, opts?: { jobId?: string }): CronRunL error: obj.error, summary: obj.summary, runId: typeof obj.runId === "string" && obj.runId.trim() ? obj.runId : undefined, + diagnostics: normalizeCronRunDiagnostics(obj.diagnostics), runAtMs: obj.runAtMs, durationMs: obj.durationMs, nextRunAtMs: obj.nextRunAtMs, @@ -408,6 +412,8 @@ export async function readCronRunLogEntriesPage( [ entry.summary ?? "", entry.error ?? "", + entry.diagnostics?.summary ?? "", + ...(entry.diagnostics?.entries ?? []).map((diagnostic) => diagnostic.message), entry.jobId, entry.delivery?.intended?.channel ?? "", entry.delivery?.resolved?.channel ?? "", @@ -472,6 +478,8 @@ export async function readCronRunLogEntriesPageAll( return [ entry.summary ?? "", entry.error ?? "", + entry.diagnostics?.summary ?? "", + ...(entry.diagnostics?.entries ?? []).map((diagnostic) => diagnostic.message), entry.jobId, jobName, entry.delivery?.intended?.channel ?? "", diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index 1c8f9e7f606..0fdd3f23151 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -6,6 +6,7 @@ import { createRunningTaskRun, failTaskRunByRunId, } from "../../tasks/detached-task-runtime.js"; +import { createCronRunDiagnosticsFromError } from "../run-diagnostics.js"; import { createCronExecutionId } from "../run-id.js"; import type { CronJob, CronJobCreate, CronJobPatch } from "../types.js"; import { @@ -470,12 +471,17 @@ async function skipInvalidPersistedManualRun(params: { }) { const endedAt = params.state.deps.nowMs(); const errorText = normalizeCronRunErrorText(params.error); + const diagnostics = createCronRunDiagnosticsFromError("cron-preflight", errorText, { + severity: "warn", + nowMs: params.state.deps.nowMs, + }); const shouldDelete = applyJobResult( params.state, params.job, { status: "skipped", error: errorText, + diagnostics, startedAt: endedAt, endedAt, }, @@ -487,6 +493,7 @@ async function skipInvalidPersistedManualRun(params: { action: "finished", status: "skipped", error: errorText, + diagnostics, runAtMs: endedAt, durationMs: params.job.state.lastDurationMs, nextRunAtMs: params.job.state.nextRunAtMs, @@ -712,6 +719,7 @@ async function finishPreparedManualRun( { status: coreResult.status, error: coreResult.error, + diagnostics: coreResult.diagnostics, delivered: coreResult.delivered, startedAt, endedAt, @@ -726,6 +734,7 @@ async function finishPreparedManualRun( status: coreResult.status, error: coreResult.error, summary: coreResult.summary, + diagnostics: coreResult.diagnostics, delivered: coreResult.delivered, deliveryStatus: job.state.lastDeliveryStatus, deliveryError: job.state.lastDeliveryError, diff --git a/src/cron/service/state.ts b/src/cron/service/state.ts index de2f0ec1581..6232e2499da 100644 --- a/src/cron/service/state.ts +++ b/src/cron/service/state.ts @@ -6,6 +6,7 @@ import type { CronJob, CronJobCreate, CronJobPatch, + CronRunDiagnostics, CronMessageChannel, CronAgentExecutionStarted, CronRunOutcome, @@ -24,6 +25,7 @@ export type CronEvent = { status?: CronRunStatus; error?: string; summary?: string; + diagnostics?: CronRunDiagnostics; delivered?: boolean; deliveryStatus?: CronDeliveryStatus; deliveryError?: string; diff --git a/src/cron/service/timer.regression.test.ts b/src/cron/service/timer.regression.test.ts index 35197a0f83a..6ad0757dd10 100644 --- a/src/cron/service/timer.regression.test.ts +++ b/src/cron/service/timer.regression.test.ts @@ -1486,4 +1486,51 @@ describe("cron service timer regressions", () => { expect(job.state.lastRunAtMs).toBe(startedAt); expect(job.state.nextRunAtMs).toBe(expectedNextMs); }); + + it("persists last cron run diagnostics on job state", () => { + const startedAt = Date.parse("2026-04-14T12:00:00.000Z"); + const endedAt = startedAt + 500; + const job = createIsolatedRegressionJob({ + id: "diagnostics-job", + name: "diagnostics-job", + scheduledAt: startedAt, + schedule: { kind: "every", everyMs: 60_000, anchorMs: startedAt }, + payload: { kind: "agentTurn", message: "diagnose" }, + state: { runningAtMs: startedAt }, + }); + const state = createCronServiceState({ + cronEnabled: true, + storePath: "/tmp/cron-diagnostics-job.json", + log: noopLogger, + nowMs: () => endedAt, + enqueueSystemEvent: vi.fn(), + requestHeartbeat: vi.fn(), + runIsolatedAgentJob: createDefaultIsolatedRunner(), + }); + + applyJobResult(state, job, { + status: "error", + error: "failed", + diagnostics: { + summary: "exec stderr tail", + entries: [ + { + ts: startedAt, + source: "exec", + severity: "error", + message: "exec stderr tail", + exitCode: 1, + }, + ], + }, + startedAt, + endedAt, + }); + + expect(job.state.lastDiagnostics).toMatchObject({ + summary: "exec stderr tail", + entries: [{ source: "exec", severity: "error", message: "exec stderr tail", exitCode: 1 }], + }); + expect(job.state.lastDiagnosticSummary).toBe("exec stderr tail"); + }); }); diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index 7eba1c74963..f6cbe239048 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -14,6 +14,11 @@ import { } from "../../tasks/detached-task-runtime.js"; import { clearCronJobActive, markCronJobActive } from "../active-jobs.js"; import { resolveCronDeliveryPlan } from "../delivery-plan.js"; +import { + createCronRunDiagnosticsFromError, + normalizeCronRunDiagnostics, + summarizeCronRunDiagnostics, +} from "../run-diagnostics.js"; import { createCronExecutionId } from "../run-id.js"; import { sweepCronRunSessions } from "../session-reaper.js"; import type { @@ -151,7 +156,13 @@ export async function executeJobCoreWithTimeout( return first; } await cleanupTimedOutCronAgentRun(state, job, jobTimeoutMs, activeExecution); - return { status: "error", error: timeoutErrorMessage() }; + return { + status: "error", + error: timeoutErrorMessage(), + diagnostics: createCronRunDiagnosticsFromError("cron-setup", timeoutErrorMessage(), { + nowMs: state.deps.nowMs, + }), + }; } finally { if (timeoutId) { clearTimeout(timeoutId); @@ -512,6 +523,7 @@ export function applyJobResult( result: { status: CronRunStatus; error?: string; + diagnostics?: CronRunOutcome["diagnostics"]; delivered?: boolean; startedAt: number; endedAt: number; @@ -537,6 +549,8 @@ export function applyJobResult( job.state.lastStatus = result.status; job.state.lastDurationMs = Math.max(0, result.endedAt - result.startedAt); job.state.lastError = result.error; + job.state.lastDiagnostics = normalizeCronRunDiagnostics(result.diagnostics); + job.state.lastDiagnosticSummary = summarizeCronRunDiagnostics(job.state.lastDiagnostics); job.state.lastErrorReason = result.status === "error" && typeof result.error === "string" ? (resolveFailoverReasonFromError(result.error) ?? undefined) @@ -717,6 +731,7 @@ function applyOutcomeToStoredJob(state: CronServiceState, result: TimedCronRunOu applyJobResult(state, result.job, { status: result.status, error: result.error, + diagnostics: result.diagnostics, delivered: result.delivered, startedAt: result.startedAt, endedAt: result.endedAt, @@ -738,6 +753,7 @@ function applyOutcomeToStoredJob(state: CronServiceState, result: TimedCronRunOu const shouldDelete = applyJobResult(state, job, { status: result.status, error: result.error, + diagnostics: result.diagnostics, delivered: result.delivered, startedAt: result.startedAt, endedAt: result.endedAt, @@ -906,6 +922,9 @@ export async function onTimer(state: CronServiceState) { taskRunId, status: "error", error: errorText, + diagnostics: createCronRunDiagnosticsFromError("cron-setup", errorText, { + nowMs: state.deps.nowMs, + }), startedAt, endedAt: state.deps.nowMs(), }; @@ -1228,6 +1247,7 @@ async function runStartupCatchupCandidate( status: result.status, error: result.error, summary: result.summary, + diagnostics: result.diagnostics, delivered: result.delivered, sessionId: result.sessionId, sessionKey: result.sessionKey, @@ -1244,6 +1264,9 @@ async function runStartupCatchupCandidate( taskRunId, status: "error", error: normalizeCronRunErrorText(err), + diagnostics: createCronRunDiagnosticsFromError("cron-setup", normalizeCronRunErrorText(err), { + nowMs: state.deps.nowMs, + }), startedAt, endedAt: state.deps.nowMs(), }; @@ -1473,10 +1496,24 @@ async function executeDetachedCronJob( } > { if (job.payload.kind !== "agentTurn") { - return { status: "skipped", error: "isolated job requires payload.kind=agentTurn" }; + const error = "isolated job requires payload.kind=agentTurn"; + return { + status: "skipped", + error, + diagnostics: createCronRunDiagnosticsFromError("cron-preflight", error, { + severity: "warn", + nowMs: state.deps.nowMs, + }), + }; } if (abortSignal?.aborted) { - return resolveAbortError(); + const aborted = resolveAbortError(); + return { + ...aborted, + diagnostics: createCronRunDiagnosticsFromError("cron-setup", aborted.error, { + nowMs: state.deps.nowMs, + }), + }; } const res = await state.deps.runIsolatedAgentJob({ @@ -1487,7 +1524,13 @@ async function executeDetachedCronJob( }); if (abortSignal?.aborted) { - return { status: "error", error: timeoutErrorMessage() }; + return { + status: "error", + error: timeoutErrorMessage(), + diagnostics: createCronRunDiagnosticsFromError("cron-setup", timeoutErrorMessage(), { + nowMs: state.deps.nowMs, + }), + }; } return { @@ -1499,6 +1542,7 @@ async function executeDetachedCronJob( delivery: res.delivery, sessionId: res.sessionId, sessionKey: res.sessionKey, + diagnostics: res.diagnostics, model: res.model, provider: res.provider, usage: res.usage, @@ -1540,6 +1584,7 @@ export async function executeJob( const shouldDelete = applyJobResult(state, job, { status: coreResult.status, error: coreResult.error, + diagnostics: coreResult.diagnostics, delivered: coreResult.delivered, startedAt, endedAt, @@ -1572,6 +1617,7 @@ function emitJobFinished( status: result.status, error: result.error, summary: result.summary, + diagnostics: result.diagnostics, delivered: result.delivered, deliveryStatus: job.state.lastDeliveryStatus, deliveryError: job.state.lastDeliveryError, diff --git a/src/cron/types.ts b/src/cron/types.ts index 44e40c66e3d..e9e829a8610 100644 --- a/src/cron/types.ts +++ b/src/cron/types.ts @@ -88,6 +88,32 @@ export type CronRunTelemetry = { usage?: CronUsageSummary; }; +export type CronRunDiagnosticSeverity = "info" | "warn" | "error"; + +export type CronRunDiagnosticSource = + | "cron-preflight" + | "cron-setup" + | "model-preflight" + | "agent-run" + | "tool" + | "exec" + | "delivery"; + +export type CronRunDiagnostic = { + ts: number; + source: CronRunDiagnosticSource; + severity: CronRunDiagnosticSeverity; + message: string; + toolName?: string; + exitCode?: number | null; + truncated?: boolean; +}; + +export type CronRunDiagnostics = { + summary?: string; + entries: CronRunDiagnostic[]; +}; + export type CronRunOutcome = { status: CronRunStatus; error?: string; @@ -96,6 +122,7 @@ export type CronRunOutcome = { summary?: string; sessionId?: string; sessionKey?: string; + diagnostics?: CronRunDiagnostics; }; export type CronAgentExecutionStarted = { @@ -157,6 +184,8 @@ export type CronJobState = { /** @deprecated Use lastRunStatus. */ lastStatus?: "ok" | "error" | "skipped"; lastError?: string; + lastDiagnostics?: CronRunDiagnostics; + lastDiagnosticSummary?: string; /** Classified reason for the last error (when available). */ lastErrorReason?: FailoverReason; lastDurationMs?: number; diff --git a/src/gateway/protocol/schema/cron.ts b/src/gateway/protocol/schema/cron.ts index f1b268763d2..c985e9fcbac 100644 --- a/src/gateway/protocol/schema/cron.ts +++ b/src/gateway/protocol/schema/cron.ts @@ -74,6 +74,39 @@ const CronFailoverReasonSchema = Type.Union([ Type.Literal("unclassified"), Type.Literal("unknown"), ]); +const CronRunDiagnosticSeveritySchema = Type.Union([ + Type.Literal("info"), + Type.Literal("warn"), + Type.Literal("error"), +]); +const CronRunDiagnosticSourceSchema = Type.Union([ + Type.Literal("cron-preflight"), + Type.Literal("cron-setup"), + Type.Literal("model-preflight"), + Type.Literal("agent-run"), + Type.Literal("tool"), + Type.Literal("exec"), + Type.Literal("delivery"), +]); +const CronRunDiagnosticSchema = Type.Object( + { + ts: Type.Integer({ minimum: 0 }), + source: CronRunDiagnosticSourceSchema, + severity: CronRunDiagnosticSeveritySchema, + message: Type.String(), + toolName: Type.Optional(Type.String()), + exitCode: Type.Optional(Type.Union([Type.Number(), Type.Null()])), + truncated: Type.Optional(Type.Boolean()), + }, + { additionalProperties: false }, +); +const CronRunDiagnosticsSchema = Type.Object( + { + summary: Type.Optional(Type.String()), + entries: Type.Array(CronRunDiagnosticSchema), + }, + { additionalProperties: false }, +); const CronCommonOptionalFields = { agentId: Type.Optional(Type.Union([NonEmptyString, Type.Null()])), sessionKey: Type.Optional(Type.Union([NonEmptyString, Type.Null()])), @@ -238,6 +271,28 @@ export const CronDeliveryPatchSchema = Type.Object( ); export const CronJobStateSchema = Type.Object( + { + nextRunAtMs: Type.Optional(Type.Integer({ minimum: 0 })), + runningAtMs: Type.Optional(Type.Integer({ minimum: 0 })), + lastRunAtMs: Type.Optional(Type.Integer({ minimum: 0 })), + lastRunStatus: Type.Optional(CronRunStatusSchema), + lastStatus: Type.Optional(DeprecatedCronRunStatusSchema), + lastError: Type.Optional(Type.String()), + lastDiagnostics: Type.Optional(CronRunDiagnosticsSchema), + lastDiagnosticSummary: Type.Optional(Type.String()), + lastErrorReason: Type.Optional(CronFailoverReasonSchema), + lastDurationMs: Type.Optional(Type.Integer({ minimum: 0 })), + consecutiveErrors: Type.Optional(Type.Integer({ minimum: 0 })), + consecutiveSkipped: Type.Optional(Type.Integer({ minimum: 0 })), + lastDelivered: Type.Optional(Type.Boolean()), + lastDeliveryStatus: Type.Optional(CronDeliveryStatusSchema), + lastDeliveryError: Type.Optional(Type.String()), + lastFailureAlertAtMs: Type.Optional(Type.Integer({ minimum: 0 })), + }, + { additionalProperties: false }, +); + +const CronJobStatePatchSchema = Type.Object( { nextRunAtMs: Type.Optional(Type.Integer({ minimum: 0 })), runningAtMs: Type.Optional(Type.Integer({ minimum: 0 })), @@ -318,7 +373,7 @@ export const CronJobPatchSchema = Type.Object( payload: Type.Optional(CronPayloadPatchSchema), delivery: Type.Optional(CronDeliveryPatchSchema), failureAlert: Type.Optional(Type.Union([Type.Literal(false), CronFailureAlertSchema])), - state: Type.Optional(Type.Partial(CronJobStateSchema)), + state: Type.Optional(CronJobStatePatchSchema), }, { additionalProperties: false }, ); @@ -360,6 +415,7 @@ export const CronRunLogEntrySchema = Type.Object( status: Type.Optional(CronRunStatusSchema), error: Type.Optional(Type.String()), summary: Type.Optional(Type.String()), + diagnostics: Type.Optional(CronRunDiagnosticsSchema), delivered: Type.Optional(Type.Boolean()), deliveryStatus: Type.Optional(CronDeliveryStatusSchema), deliveryError: Type.Optional(Type.String()), diff --git a/src/gateway/server-cron.ts b/src/gateway/server-cron.ts index b06ce9ec0da..cfdfc9a5774 100644 --- a/src/gateway/server-cron.ts +++ b/src/gateway/server-cron.ts @@ -406,6 +406,7 @@ export function buildGatewayCronService(params: { status: evt.status, error: evt.error, summary: evt.summary, + diagnostics: evt.diagnostics, delivered: evt.delivered, deliveryStatus: evt.deliveryStatus, deliveryError: evt.deliveryError, diff --git a/src/gateway/server-methods/cron.validation.test.ts b/src/gateway/server-methods/cron.validation.test.ts index 3a1602c995d..169a18e3318 100644 --- a/src/gateway/server-methods/cron.validation.test.ts +++ b/src/gateway/server-methods/cron.validation.test.ts @@ -227,6 +227,39 @@ describe("cron method validation", () => { expect(respond).toHaveBeenCalledWith(true, { id: "cron-1" }, undefined); }); + it("rejects execution-derived diagnostics in cron.update state patches", async () => { + const { context, respond } = await invokeCronUpdate( + { + id: "cron-1", + patch: { + state: { + lastDiagnostics: { + summary: "forged", + entries: [ + { + ts: 1, + source: "agent-run", + severity: "error", + message: "forged", + }, + ], + }, + }, + }, + }, + createCronJob(), + ); + + expect(context.cron.update).not.toHaveBeenCalled(); + expect(respond).toHaveBeenCalledWith( + false, + undefined, + expect.objectContaining({ + code: "INVALID_REQUEST", + }), + ); + }); + it("rejects ambiguous announce delivery on add when multiple channels are configured", async () => { getRuntimeConfig.mockReturnValue({ session: {