feat(cron): add failure destination support to failed cron jobs (#31059)

* feat(cron): add failure destination support with webhook mode and bestEffort handling

Extends PR #24789 failure alerts with features from PR #29145:
- Add webhook delivery mode for failure alerts (mode: 'webhook')
- Add accountId support for multi-account channel configurations
- Add bestEffort handling to skip alerts when job has bestEffort=true
- Add separate failureDestination config (global + per-job in delivery)
- Add duplicate prevention (prevents sending to same as primary delivery)
- Add CLI flags: --failure-alert-mode, --failure-alert-account-id
- Add UI fields for new options in web cron editor

* fix(cron): merge failureAlert mode/accountId and preserve failureDestination on updates

- Fix mergeCronFailureAlert to merge mode and accountId fields
- Fix mergeCronDelivery to preserve failureDestination on updates
- Fix isSameDeliveryTarget to use 'announce' as default instead of 'none'
  to properly detect duplicates when delivery.mode is undefined

* fix(cron): validate webhook mode requires URL in resolveFailureDestination

When mode is 'webhook' but no 'to' URL is provided, return null
instead of creating an invalid plan that silently fails later.

* fix(cron): fail closed on webhook mode without URL and make failureDestination fields clearable

- sendCronFailureAlert: fail closed when mode is webhook but URL is missing
- mergeCronDelivery: use per-key presence checks so callers can clear
  nested failureDestination fields via cron.update

Note: protocol:check shows missing internalEvents in Swift models - this is
a pre-existing issue unrelated to these changes (upstream sync needed).

* fix(cron): use separate schema for failureDestination and fix type cast

- Create CronFailureDestinationSchema excluding after/cooldownMs fields
- Fix type cast in sendFailureNotificationAnnounce to use CronMessageChannel

* fix(cron): merge global failureDestination with partial job overrides

When job has partial failureDestination config, fall back to global
config for unset fields instead of treating it as a full override.

* fix(cron): avoid forcing announce mode and clear inherited to on mode change

- UI: only include mode in patch if explicitly set to non-default
- delivery.ts: clear inherited 'to' when job overrides mode, since URL
  semantics differ between announce and webhook modes

* fix(cron): preserve explicit to on mode override and always include mode in UI patches

- delivery.ts: preserve job-level explicit 'to' when overriding mode
- UI: always include mode in failureAlert patch so users can switch between announce/webhook

* fix(cron): allow clearing accountId and treat undefined global mode as announce

- UI: always include accountId in patch so users can clear it
- delivery.ts: treat undefined global mode as announce when comparing for clearing inherited 'to'

* Cron: harden failure destination routing and add regression coverage

* Cron: resolve failure destination review feedback

* Cron: drop unrelated timeout assertions from conflict resolution

* Cron: format cron CLI regression test

* Cron: align gateway cron test mock types

---------

Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
Evgeny Zislis
2026-03-02 17:27:41 +02:00
committed by GitHub
parent a905b6dabc
commit 4b4ea5df8b
22 changed files with 993 additions and 89 deletions

View File

@@ -44,5 +44,7 @@ export const DEFAULT_CRON_FORM: CronFormState = {
failureAlertCooldownSeconds: "3600",
failureAlertChannel: "last",
failureAlertTo: "",
failureAlertDeliveryMode: "announce",
failureAlertAccountId: "",
timeoutSeconds: "",
};

View File

@@ -630,6 +630,52 @@ describe("cron controller", () => {
cooldownMs: 120_000,
channel: "telegram",
to: "123456",
mode: "announce",
accountId: undefined,
},
},
});
});
it("includes failure alert mode/accountId in cron.update patch", async () => {
const request = vi.fn(async (method: string, _payload?: unknown) => {
if (method === "cron.update") {
return { id: "job-alert-mode" };
}
if (method === "cron.list") {
return { jobs: [{ id: "job-alert-mode" }] };
}
if (method === "cron.status") {
return { enabled: true, jobs: 1, nextWakeAtMs: null };
}
return {};
});
const state = createState({
client: { request } as unknown as CronState["client"],
cronEditingJobId: "job-alert-mode",
cronForm: {
...DEFAULT_CRON_FORM,
name: "alert mode job",
payloadKind: "agentTurn",
payloadText: "run it",
failureAlertMode: "custom",
failureAlertAfter: "1",
failureAlertDeliveryMode: "webhook",
failureAlertAccountId: "bot-a",
},
});
await addCronJob(state);
const updateCall = request.mock.calls.find(([method]) => method === "cron.update");
expect(updateCall).toBeDefined();
expect(updateCall?.[1]).toMatchObject({
id: "job-alert-mode",
patch: {
failureAlert: {
after: 1,
mode: "webhook",
accountId: "bot-a",
},
},
});
@@ -780,6 +826,8 @@ describe("cron controller", () => {
expect(state.cronForm.failureAlertCooldownSeconds).toBe("30");
expect(state.cronForm.failureAlertChannel).toBe("telegram");
expect(state.cronForm.failureAlertTo).toBe("999");
expect(state.cronForm.failureAlertDeliveryMode).toBe("announce");
expect(state.cronForm.failureAlertAccountId).toBe("");
});
it("validates key cron form errors", () => {

View File

@@ -481,6 +481,12 @@ function jobToForm(job: CronJob, prev: CronFormState): CronFormState {
? (failureAlert.channel ?? CRON_CHANNEL_LAST)
: CRON_CHANNEL_LAST,
failureAlertTo: failureAlert && typeof failureAlert === "object" ? (failureAlert.to ?? "") : "",
failureAlertDeliveryMode:
failureAlert && typeof failureAlert === "object"
? (failureAlert.mode ?? "announce")
: "announce",
failureAlertAccountId:
failureAlert && typeof failureAlert === "object" ? (failureAlert.accountId ?? "") : "",
timeoutSeconds:
job.payload.kind === "agentTurn" && typeof job.payload.timeoutSeconds === "number"
? String(job.payload.timeoutSeconds)
@@ -593,12 +599,21 @@ function buildFailureAlert(form: CronFormState) {
cooldownSeconds !== undefined && Number.isFinite(cooldownSeconds) && cooldownSeconds >= 0
? Math.floor(cooldownSeconds * 1000)
: undefined;
return {
const deliveryMode = form.failureAlertDeliveryMode;
const accountId = form.failureAlertAccountId.trim();
const patch: Record<string, unknown> = {
after: after > 0 ? Math.floor(after) : undefined,
channel: form.failureAlertChannel.trim() || CRON_CHANNEL_LAST,
to: form.failureAlertTo.trim() || undefined,
...(cooldownMs !== undefined ? { cooldownMs } : {}),
};
// Always include mode and accountId so users can switch/clear them
if (deliveryMode) {
patch.mode = deliveryMode;
}
// Include accountId if explicitly set, or send undefined to allow clearing
patch.accountId = accountId || undefined;
return patch;
}
export async function addCronJob(state: CronState) {

View File

@@ -479,6 +479,14 @@ export type CronDelivery = {
to?: string;
accountId?: string;
bestEffort?: boolean;
failureDestination?: CronFailureDestination;
};
export type CronFailureDestination = {
channel?: string;
to?: string;
mode?: "announce" | "webhook";
accountId?: string;
};
export type CronFailureAlert = {
@@ -486,6 +494,8 @@ export type CronFailureAlert = {
channel?: string;
to?: string;
cooldownMs?: number;
mode?: "announce" | "webhook";
accountId?: string;
};
export type CronJobState = {

View File

@@ -48,5 +48,7 @@ export type CronFormState = {
failureAlertCooldownSeconds: string;
failureAlertChannel: string;
failureAlertTo: string;
failureAlertDeliveryMode: "announce" | "webhook";
failureAlertAccountId: string;
timeoutSeconds: string;
};

View File

@@ -1279,6 +1279,31 @@ export function renderCron(props: CronProps) {
Optional recipient override for failure alerts.
</div>
</label>
<label class="field">
${renderFieldLabel("Alert mode")}
<select
.value=${props.form.failureAlertDeliveryMode || "announce"}
@change=${(e: Event) =>
props.onFormChange({
failureAlertDeliveryMode: (e.target as HTMLSelectElement)
.value as CronFormState["failureAlertDeliveryMode"],
})}
>
<option value="announce">Announce (via channel)</option>
<option value="webhook">Webhook (HTTP POST)</option>
</select>
</label>
<label class="field">
${renderFieldLabel("Alert account ID")}
<input
.value=${props.form.failureAlertAccountId}
@input=${(e: Event) =>
props.onFormChange({
failureAlertAccountId: (e.target as HTMLInputElement).value,
})}
placeholder="Account ID for multi-account setups"
/>
</label>
`
: nothing
}