fix: prevent stale subagent failure announces

2026-05-06 05:50:43 +00:00 · 2026-04-21 19:58:12 +01:00
parent dcf131e54c
commit 11efbf5a2e
9 changed files with 110 additions and 13 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Agents/subagents: stop terminal failed subagent runs from freezing or announcing captured reply text, so failover-exhausted runs report a clean failure instead of replaying stale assistant/tool output.
 - Auth/commands: require owner identity (an owner-candidate match or internal `operator.admin`) for owner-enforced commands instead of treating wildcard channel `allowFrom` or empty owner-candidate lists as sufficient, so non-owner senders can no longer reach owner-only commands through a permissive fallback when `enforceOwnerForCommands=true` and `commands.ownerAllowFrom` is unset. (#69774) Thanks @drobison00.
 - Control UI/CSP: tighten `img-src` to `'self' data:` only, and make Control UI avatar helpers drop remote `http(s)` and protocol-relative URLs so the UI falls back to the built-in logo/badge instead of issuing arbitrary remote image fetches. Same-origin avatar routes (relative paths) and `data:image/...` avatars still render. (#69773)
 - CLI/channels: keep `status`, `health`, `channels list`, and `channels status` on read-only channel metadata when Telegram, Slack, Discord, or third-party channel plugins are configured, avoiding full bundled plugin runtime imports on those cold paths. Fixes #69042. (#69479) Thanks @gumadeiras.
--- a/docs/automation/tasks.md
+++ b/docs/automation/tasks.md
@@ -227,7 +227,7 @@ Completion cleanup is also runtime-aware:
 - Isolated cron completion best-effort closes tracked browser tabs/processes for the cron session before the run fully tears down.
 - Isolated cron delivery waits out descendant subagent follow-up when needed and
  suppresses stale parent acknowledgement text instead of announcing it.
- Subagent completion delivery prefers the latest visible assistant text; if that is empty it falls back to sanitized latest tool/toolResult text, and timeout-only tool-call runs can collapse to a short partial-progress summary.
+- Subagent completion delivery prefers the latest visible assistant text; if that is empty it falls back to sanitized latest tool/toolResult text, and timeout-only tool-call runs can collapse to a short partial-progress summary. Terminal failed runs announce failure status without replaying captured reply text.
 - Cleanup failures do not mask the real task outcome.

 ### `tasks flow list|show|cancel`
--- a/docs/tools/subagents.md
+++ b/docs/tools/subagents.md
@@ -55,7 +55,7 @@ transcript path on disk when you need the raw full transcript.
  - thread-bound or conversation-bound completion routes win when available
  - if the completion origin only provides a channel, OpenClaw fills the missing target/account from the requester session's resolved route (`lastChannel` / `lastTo` / `lastAccountId`) so direct delivery still works
 - The completion handoff to the requester session is runtime-generated internal context (not user-authored text) and includes:
-  - `Result` (latest visible `assistant` reply text, otherwise sanitized latest tool/toolResult text)
+  - `Result` (latest visible `assistant` reply text, otherwise sanitized latest tool/toolResult text; terminal failed runs do not reuse captured reply text)
  - `Status` (`completed successfully` / `failed` / `timed out` / `unknown`)
  - compact runtime/token stats
  - a delivery instruction telling the requester agent to rewrite in normal assistant voice (not forward raw internal metadata)
@@ -249,7 +249,7 @@ Sub-agents report back via an announce step:
  - child session key/id
  - announce type + task label
  - status line derived from runtime outcome (`success`, `error`, `timeout`, or `unknown`)
-  - result content selected from the latest visible assistant text, otherwise sanitized latest tool/toolResult text
+  - result content selected from the latest visible assistant text, otherwise sanitized latest tool/toolResult text; terminal failed runs report failure status without replaying captured reply text
  - a follow-up instruction describing when to reply vs. stay silent
 - `Status` is not inferred from model output; it comes from runtime outcome signals.
 - On timeout, if the child only got through tool calls, announce can collapse that history into a short partial-progress summary instead of replaying raw tool output.
--- a/src/agents/subagent-announce-output.ts
+++ b/src/agents/subagent-announce-output.ts
@@ -63,6 +63,10 @@ export type SubagentRunOutcome = {
  elapsedMs?: number;
 };

+function isFailedOutcome(outcome?: SubagentRunOutcome): boolean {
+  return outcome?.status === "error";
+}
+
 function readFiniteNumber(value: number | undefined): number | undefined {
  return typeof value === "number" && Number.isFinite(value) ? value : undefined;
 }
@@ -253,6 +257,9 @@ function selectSubagentOutputText(
  snapshot: SubagentOutputSnapshot,
  outcome?: SubagentRunOutcome,
 ): string | undefined {
+  if (isFailedOutcome(outcome)) {
+    return undefined;
+  }
  if (snapshot.latestSilentText) {
    return snapshot.latestSilentText;
  }
@@ -270,6 +277,9 @@ export async function readSubagentOutput(
  sessionKey: string,
  outcome?: SubagentRunOutcome,
 ): Promise<string | undefined> {
+  if (isFailedOutcome(outcome)) {
+    return undefined;
+  }
  const history = await subagentAnnounceOutputDeps.callGateway({
    method: "chat.history",
    params: { sessionKey, limit: 100 },
@@ -347,14 +357,18 @@ export function applySubagentWaitOutcome(params: {

 export async function captureSubagentCompletionReply(
  sessionKey: string,
-  options?: { waitForReply?: boolean },
+  options?: { waitForReply?: boolean; outcome?: SubagentRunOutcome },
 ): Promise<string | undefined> {
+  if (isFailedOutcome(options?.outcome)) {
+    return undefined;
+  }
  return await captureSubagentCompletionReplyUsing({
    sessionKey,
    waitForReply: options?.waitForReply,
    maxWaitMs: isFastTestMode() ? 50 : 1_500,
    retryIntervalMs: isFastTestMode() ? FAST_TEST_RETRY_INTERVAL_MS : 100,
-    readSubagentOutput: async (nextSessionKey) => await readSubagentOutput(nextSessionKey),
+    readSubagentOutput: async (nextSessionKey) =>
+      await readSubagentOutput(nextSessionKey, options?.outcome),
  });
 }

--- a/src/agents/subagent-announce.timeout.test.ts
+++ b/src/agents/subagent-announce.timeout.test.ts
@@ -453,6 +453,32 @@ describe("subagent announce timeout config", () => {
    expect(internalEvents[0]?.result).not.toContain("data");
  });

+  it("does not announce cached reply text when the child run terminally failed", async () => {
+    chatHistoryMessages = [
+      { role: "assistant", content: [{ type: "text", text: "stale history output" }] },
+      { role: "toolResult", content: [{ type: "text", text: "stale tool output" }] },
+    ];
+
+    await runAnnounceFlowForTest("run-terminal-error-no-stale-output", {
+      outcome: { status: "error", error: "All models failed (2): timeout" },
+      roundOneReply: "stale frozen output",
+      fallbackReply: "older fallback output",
+    });
+
+    const directAgentCall = findFinalDirectAgentCall();
+    const internalEvents =
+      (directAgentCall?.params?.internalEvents as Array<{
+        result?: string;
+        status?: string;
+        statusLabel?: string;
+      }>) ?? [];
+    expect(internalEvents[0]?.status).toBe("error");
+    expect(internalEvents[0]?.statusLabel).toContain("All models failed");
+    expect(internalEvents[0]?.result).toBe("(no output)");
+    expect(directAgentCall?.params?.message).not.toContain("stale");
+    expect(directAgentCall?.params?.message).not.toContain("older fallback");
+  });
+
  it("preserves NO_REPLY when timeout history ends with silence after earlier progress", async () => {
    chatHistoryMessages = [
      {
--- a/src/agents/subagent-announce.ts
+++ b/src/agents/subagent-announce.ts
@@ -285,6 +285,10 @@ export async function runSubagentAnnounceFlow(params: {
    if (!outcome) {
      outcome = { status: "unknown" };
    }
+    const failedTerminalOutcome = outcome.status === "error";
+    if (failedTerminalOutcome) {
+      reply = undefined;
+    }

    let requesterDepth = getSubagentDepthFromSessionStore(targetRequesterSessionKey);
    const requesterIsInternalSession = () =>
@@ -366,7 +370,7 @@ export async function runSubagentAnnounceFlow(params: {
      }
    }

-    if (!childCompletionFindings) {
+    if (!childCompletionFindings && !failedTerminalOutcome) {
      const fallbackReply = normalizeOptionalString(params.fallbackReply);
      const fallbackIsSilent =
        Boolean(fallbackReply) &&
--- a/src/agents/subagent-registry-lifecycle.test.ts
+++ b/src/agents/subagent-registry-lifecycle.test.ts
@@ -328,9 +328,50 @@ describe("subagent registry lifecycle hardening", () => {

    expect(captureSubagentCompletionReply).toHaveBeenCalledWith(entry.childSessionKey, {
      waitForReply: false,
+      outcome: {
+        status: "ok",
+        startedAt: 2_000,
+        endedAt: 4_000,
+        elapsedMs: 2_000,
+      },
    });
  });

+  it("does not freeze stale reply text for terminal error outcomes", async () => {
+    const persist = vi.fn();
+    const captureSubagentCompletionReply = vi.fn(async () => "stale assistant text");
+    const entry = createRunEntry({
+      expectsCompletionMessage: true,
+    });
+
+    const controller = createLifecycleController({
+      entry,
+      persist,
+      captureSubagentCompletionReply,
+    });
+
+    await expect(
+      controller.completeSubagentRun({
+        runId: entry.runId,
+        endedAt: 4_000,
+        outcome: { status: "error", error: "All models failed (2): timeout" },
+        reason: SUBAGENT_ENDED_REASON_COMPLETE,
+        triggerCleanup: false,
+      }),
+    ).resolves.toBeUndefined();
+
+    expect(captureSubagentCompletionReply).not.toHaveBeenCalled();
+    expect(entry.frozenResultText).toBeNull();
+    expect(taskExecutorMocks.failTaskRunByRunId).toHaveBeenCalledWith(
+      expect.objectContaining({
+        status: "failed",
+        error: "All models failed (2): timeout",
+        progressSummary: undefined,
+      }),
+    );
+    expect(persist).toHaveBeenCalled();
+  });
+
  it("does not re-run announce flow after completion was already delivered", async () => {
    const entry = createRunEntry({
      completionAnnouncedAt: 3_500,
--- a/src/agents/subagent-registry-lifecycle.ts
+++ b/src/agents/subagent-registry-lifecycle.ts
@@ -187,13 +187,22 @@ export function createSubagentRegistryLifecycleController(params: {
    }
  };

-  const freezeRunResultAtCompletion = async (entry: SubagentRunRecord): Promise<boolean> => {
+  const freezeRunResultAtCompletion = async (
+    entry: SubagentRunRecord,
+    outcome: SubagentRunOutcome,
+  ): Promise<boolean> => {
    if (entry.frozenResultText !== undefined) {
      return false;
    }
+    if (outcome.status === "error") {
+      entry.frozenResultText = null;
+      entry.frozenResultCapturedAt = Date.now();
+      return true;
+    }
    try {
      const captured = await params.captureSubagentCompletionReply(entry.childSessionKey, {
        waitForReply: entry.expectsCompletionMessage === true,
+        outcome,
      });
      entry.frozenResultText = captured?.trim() ? capFrozenResultText(captured) : null;
    } catch {
@@ -228,7 +237,9 @@ export function createSubagentRegistryLifecycleController(params: {
  };

  const refreshFrozenResultFromSession = async (sessionKey: string): Promise<boolean> => {
-    const candidates = listPendingCompletionRunsForSession(sessionKey);
+    const candidates = listPendingCompletionRunsForSession(sessionKey).filter(
+      (entry) => entry.outcome?.status !== "error",
+    );
    if (candidates.length === 0) {
      return false;
    }
@@ -615,7 +626,7 @@ export function createSubagentRegistryLifecycleController(params: {
      mutated = true;
    }

-    if (await freezeRunResultAtCompletion(entry)) {
+    if (await freezeRunResultAtCompletion(entry, outcome)) {
      mutated = true;
    }

--- a/src/agents/subagent-registry.ts
+++ b/src/agents/subagent-registry.ts
@@ -109,8 +109,8 @@ async function loadCleanupBrowserSessionsForLifecycleEnd(): Promise<

 const defaultSubagentRegistryDeps: SubagentRegistryDeps = {
  callGateway,
-  captureSubagentCompletionReply: async (sessionKey) =>
-    (await loadSubagentAnnounceModule()).captureSubagentCompletionReply(sessionKey),
+  captureSubagentCompletionReply: async (sessionKey, options) =>
+    (await loadSubagentAnnounceModule()).captureSubagentCompletionReply(sessionKey, options),
  cleanupBrowserSessionsForLifecycleEnd: async (params) =>
    (await loadCleanupBrowserSessionsForLifecycleEnd())(params),
  getSubagentRunsSnapshotForRead,
@@ -391,8 +391,8 @@ const subagentLifecycleController = createSubagentRegistryLifecycleController({
  emitSubagentEndedHookForRun,
  notifyContextEngineSubagentEnded,
  resumeSubagentRun,
-  captureSubagentCompletionReply: (sessionKey) =>
-    subagentRegistryDeps.captureSubagentCompletionReply(sessionKey),
+  captureSubagentCompletionReply: (sessionKey, options) =>
+    subagentRegistryDeps.captureSubagentCompletionReply(sessionKey, options),
  cleanupBrowserSessionsForLifecycleEnd: (args) =>
    subagentRegistryDeps.cleanupBrowserSessionsForLifecycleEnd(args),
  runSubagentAnnounceFlow: (params) => subagentRegistryDeps.runSubagentAnnounceFlow(params),