From 36e76ef4240967307dd8dc06540f2e26fa7a8bff Mon Sep 17 00:00:00 2001 From: WhatsSkiLL Date: Fri, 22 May 2026 05:43:28 +0200 Subject: [PATCH] fix(codex): block progress-only completions [AI-assisted] (#85110) Summary: - The PR adds shared required-completion classification for ACP/subagent finalization, marks missing, progress-only, and delivery-exhausted completions as blocked, and adds regression tests plus a changelog entry. - Reproducibility: yes. source-reproducible. Current main finalizes the implicated ACP and subagent success pa ... he linked issue supplies production-shaped evidence; this read-only pass did not run a live provider repro. Automerge notes: - PR branch already contained follow-up commit before automerge: fix(codex): preserve final completions after progress - PR branch already contained follow-up commit before automerge: fix(codex): accept progress-prefixed final completions - PR branch already contained follow-up commit before automerge: fix(codex): accept separator-delimited completions - PR branch already contained follow-up commit before automerge: fix(codex): keep follow-up planning blocked - PR branch already contained follow-up commit before automerge: fix(codex): block progress-only completions [AI-assisted] Validation: - ClawSweeper review passed for head 21a11591653802e9ef2770e5c6744a130c1b2975. - Required merge gates passed before the squash merge. Prepared head SHA: 21a11591653802e9ef2770e5c6744a130c1b2975 Review: https://github.com/openclaw/openclaw/pull/85110#issuecomment-4513104331 Co-authored-by: IWhatsskill <284122573+IWhatsskill@users.noreply.github.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: takhoffman Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com> --- CHANGELOG.md | 1 + src/acp/control-plane/manager.core.ts | 5 + src/acp/control-plane/manager.test.ts | 379 ++++++++++++++++++ .../subagent-registry-lifecycle.test.ts | 192 +++++++++ src/agents/subagent-registry-lifecycle.ts | 52 ++- src/tasks/task-completion-contract.ts | 92 +++++ 6 files changed, 720 insertions(+), 1 deletion(-) create mode 100644 src/tasks/task-completion-contract.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 8105d58f9a3..6b6266c5655 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai - CLI/agents: allow `openclaw agent --session-key` to target explicit session keys, including agent-scoped legacy keys. (#85121) Thanks @Kaspre. - Auto-reply/ACP: wait for same-channel block reply delivery before starting tool work, while still honoring ACP dispatch aborts so stopped turns do not wait on slow channel sends. (#83722) Thanks @IWhatsskill. +- Codex/ACP: mark required child-run completions that only report progress, omit a final deliverable, or fail requester delivery as blocked while preserving real final reports. (#85110) Thanks @IWhatsskill. - Agents/subagents: surface blocked child-run completions as errors instead of successful subagent finishes. (#80886) Thanks @TurboTheTurtle. - Agents/Pi: treat accepted embedded `sessions_spawn` child-session handoffs as terminal progress so parent turns no longer report false non-deliverable failures. (#85054) Thanks @samzong. - WhatsApp: update Baileys to `7.0.0-rc13` and drop the obsolete logger type patch. diff --git a/src/acp/control-plane/manager.core.ts b/src/acp/control-plane/manager.core.ts index b3c835e1807..7a9d452f396 100644 --- a/src/acp/control-plane/manager.core.ts +++ b/src/acp/control-plane/manager.core.ts @@ -12,6 +12,7 @@ import { failTaskRunByRunId, startTaskRunByRunId, } from "../../tasks/detached-task-runtime.js"; +import { resolveRequiredCompletionTerminalResult } from "../../tasks/task-completion-contract.js"; import type { DeliveryContext } from "../../utils/delivery-context.js"; import { AcpRuntimeError, @@ -125,6 +126,10 @@ function resolveBackgroundTaskTerminalResult(progressSummary: string): { terminalOutcome?: "blocked"; terminalSummary?: string; } { + const requiredCompletionResult = resolveRequiredCompletionTerminalResult(progressSummary); + if (requiredCompletionResult.terminalOutcome) { + return requiredCompletionResult; + } const normalized = normalizeText(progressSummary)?.replace(/\s+/g, " ").trim(); if (!normalized) { return {}; diff --git a/src/acp/control-plane/manager.test.ts b/src/acp/control-plane/manager.test.ts index ba56d19565a..07fff94fd4d 100644 --- a/src/acp/control-plane/manager.test.ts +++ b/src/acp/control-plane/manager.test.ts @@ -2731,6 +2731,385 @@ describe("AcpSessionManager", () => { }); }); + it("keeps parented ACP turns successful when final output follows progress text", async () => { + await withAcpManagerTaskStateDir(async () => { + const runtimeState = createRuntime(); + runtimeState.runtime.startTurn = vi.fn((input) => ({ + requestId: input.requestId, + events: (async function* () { + yield { + type: "text_delta" as const, + stream: "output" as const, + text: "I'll inspect the repo now. ", + }; + yield { + type: "text_delta" as const, + stream: "output" as const, + text: "The crash is a missing null check in src/foo.ts.", + }; + })(), + result: Promise.resolve({ + status: "completed" as const, + stopReason: "end_turn", + }), + cancel: vi.fn(async () => {}), + closeStream: vi.fn(async () => {}), + })); + hoisted.requireAcpRuntimeBackendMock.mockReturnValue({ + id: "acpx", + runtime: runtimeState.runtime, + }); + hoisted.readAcpSessionEntryMock.mockImplementation((paramsUnknown: unknown) => { + const sessionKey = (paramsUnknown as { sessionKey?: string }).sessionKey; + if (sessionKey === "agent:codex:acp:child-1") { + return { + sessionKey, + storeSessionKey: sessionKey, + entry: { + sessionId: "child-1", + updatedAt: Date.now(), + spawnedBy: "agent:quant:telegram:quant:direct:822430204", + label: "Progress then final", + }, + acp: readySessionMeta(), + }; + } + if (sessionKey === "agent:quant:telegram:quant:direct:822430204") { + return { + sessionKey, + storeSessionKey: sessionKey, + entry: { + sessionId: "parent-1", + updatedAt: Date.now(), + }, + }; + } + return null; + }); + + const manager = new AcpSessionManager(); + await manager.runTurn({ + cfg: baseCfg, + sessionKey: "agent:codex:acp:child-1", + text: "Inspect and report back", + mode: "prompt", + requestId: "direct-parented-progress-then-final-run", + }); + + const record = requireTaskByRunId("direct-parented-progress-then-final-run"); + expectRecordFields(record, { + runtime: "acp", + ownerKey: "agent:quant:telegram:quant:direct:822430204", + scopeKind: "session", + childSessionKey: "agent:codex:acp:child-1", + status: "succeeded", + progressSummary: + "I'll inspect the repo now. The crash is a missing null check in src/foo.ts.", + }); + expect(record.terminalOutcome).toBeUndefined(); + expect(record.terminalSummary).toBeUndefined(); + }); + }); + + it("keeps parented ACP turns successful when final output follows a separator", async () => { + await withAcpManagerTaskStateDir(async () => { + const runtimeState = createRuntime(); + runtimeState.runtime.startTurn = vi.fn((input) => ({ + requestId: input.requestId, + events: (async function* () { + yield { + type: "text_delta" as const, + stream: "output" as const, + text: "I'll inspect the repo now: the crash is a missing null check in src/foo.ts.", + }; + })(), + result: Promise.resolve({ + status: "completed" as const, + stopReason: "end_turn", + }), + cancel: vi.fn(async () => {}), + closeStream: vi.fn(async () => {}), + })); + hoisted.requireAcpRuntimeBackendMock.mockReturnValue({ + id: "acpx", + runtime: runtimeState.runtime, + }); + hoisted.readAcpSessionEntryMock.mockImplementation((paramsUnknown: unknown) => { + const sessionKey = (paramsUnknown as { sessionKey?: string }).sessionKey; + if (sessionKey === "agent:codex:acp:child-1") { + return { + sessionKey, + storeSessionKey: sessionKey, + entry: { + sessionId: "child-1", + updatedAt: Date.now(), + spawnedBy: "agent:quant:telegram:quant:direct:822430204", + label: "Separator final", + }, + acp: readySessionMeta(), + }; + } + if (sessionKey === "agent:quant:telegram:quant:direct:822430204") { + return { + sessionKey, + storeSessionKey: sessionKey, + entry: { + sessionId: "parent-1", + updatedAt: Date.now(), + }, + }; + } + return null; + }); + + const manager = new AcpSessionManager(); + await manager.runTurn({ + cfg: baseCfg, + sessionKey: "agent:codex:acp:child-1", + text: "Inspect and report back", + mode: "prompt", + requestId: "direct-parented-separator-final-run", + }); + + const record = requireTaskByRunId("direct-parented-separator-final-run"); + expectRecordFields(record, { + runtime: "acp", + ownerKey: "agent:quant:telegram:quant:direct:822430204", + scopeKind: "session", + childSessionKey: "agent:codex:acp:child-1", + status: "succeeded", + progressSummary: + "I'll inspect the repo now: the crash is a missing null check in src/foo.ts.", + }); + expect(record.terminalOutcome).toBeUndefined(); + expect(record.terminalSummary).toBeUndefined(); + }); + }); + + it("keeps parented ACP turns blocked when progress text only adds follow-up planning", async () => { + await withAcpManagerTaskStateDir(async () => { + const runtimeState = createRuntime(); + runtimeState.runtime.startTurn = vi.fn((input) => ({ + requestId: input.requestId, + events: (async function* () { + yield { + type: "text_delta" as const, + stream: "output" as const, + text: "I'll inspect the repo now. Then I'll run tests and report back.", + }; + })(), + result: Promise.resolve({ + status: "completed" as const, + stopReason: "end_turn", + }), + cancel: vi.fn(async () => {}), + closeStream: vi.fn(async () => {}), + })); + hoisted.requireAcpRuntimeBackendMock.mockReturnValue({ + id: "acpx", + runtime: runtimeState.runtime, + }); + hoisted.readAcpSessionEntryMock.mockImplementation((paramsUnknown: unknown) => { + const sessionKey = (paramsUnknown as { sessionKey?: string }).sessionKey; + if (sessionKey === "agent:codex:acp:child-1") { + return { + sessionKey, + storeSessionKey: sessionKey, + entry: { + sessionId: "child-1", + updatedAt: Date.now(), + spawnedBy: "agent:quant:telegram:quant:direct:822430204", + label: "Follow-up planning", + }, + acp: readySessionMeta(), + }; + } + if (sessionKey === "agent:quant:telegram:quant:direct:822430204") { + return { + sessionKey, + storeSessionKey: sessionKey, + entry: { + sessionId: "parent-1", + updatedAt: Date.now(), + }, + }; + } + return null; + }); + + const manager = new AcpSessionManager(); + await manager.runTurn({ + cfg: baseCfg, + sessionKey: "agent:codex:acp:child-1", + text: "Inspect and report back", + mode: "prompt", + requestId: "direct-parented-followup-planning-run", + }); + + expectRecordFields(requireTaskByRunId("direct-parented-followup-planning-run"), { + runtime: "acp", + ownerKey: "agent:quant:telegram:quant:direct:822430204", + scopeKind: "session", + childSessionKey: "agent:codex:acp:child-1", + status: "succeeded", + progressSummary: "I'll inspect the repo now. Then I'll run tests and report back.", + terminalOutcome: "blocked", + terminalSummary: + "Required completion ended with progress-only text, not a final deliverable.", + }); + }); + }); + + it("marks completed parented ACP turns blocked when they only contain progress text", async () => { + await withAcpManagerTaskStateDir(async () => { + const runtimeState = createRuntime(); + runtimeState.runtime.startTurn = vi.fn((input) => ({ + requestId: input.requestId, + events: (async function* () { + yield { + type: "text_delta" as const, + stream: "output" as const, + text: "I'll inspect the repo now.", + }; + })(), + result: Promise.resolve({ + status: "completed" as const, + stopReason: "end_turn", + }), + cancel: vi.fn(async () => {}), + closeStream: vi.fn(async () => {}), + })); + hoisted.requireAcpRuntimeBackendMock.mockReturnValue({ + id: "acpx", + runtime: runtimeState.runtime, + }); + hoisted.readAcpSessionEntryMock.mockImplementation((paramsUnknown: unknown) => { + const sessionKey = (paramsUnknown as { sessionKey?: string }).sessionKey; + if (sessionKey === "agent:codex:acp:child-1") { + return { + sessionKey, + storeSessionKey: sessionKey, + entry: { + sessionId: "child-1", + updatedAt: Date.now(), + spawnedBy: "agent:quant:telegram:quant:direct:822430204", + label: "Progress only", + }, + acp: readySessionMeta(), + }; + } + if (sessionKey === "agent:quant:telegram:quant:direct:822430204") { + return { + sessionKey, + storeSessionKey: sessionKey, + entry: { + sessionId: "parent-1", + updatedAt: Date.now(), + }, + }; + } + return null; + }); + + const events: string[] = []; + const manager = new AcpSessionManager(); + await manager.runTurn({ + cfg: baseCfg, + sessionKey: "agent:codex:acp:child-1", + text: "Inspect and report back", + mode: "prompt", + requestId: "direct-parented-progress-completed-run", + onEvent: (event) => { + events.push(event.type); + }, + }); + + expect(events).toEqual(["text_delta", "done"]); + expectRecordFields(requireTaskByRunId("direct-parented-progress-completed-run"), { + runtime: "acp", + ownerKey: "agent:quant:telegram:quant:direct:822430204", + scopeKind: "session", + childSessionKey: "agent:codex:acp:child-1", + status: "succeeded", + progressSummary: "I'll inspect the repo now.", + terminalOutcome: "blocked", + terminalSummary: + "Required completion ended with progress-only text, not a final deliverable.", + }); + }); + }); + + it("marks completed parented ACP turns blocked when final output is missing", async () => { + await withAcpManagerTaskStateDir(async () => { + const runtimeState = createRuntime(); + runtimeState.runtime.startTurn = vi.fn((input) => ({ + requestId: input.requestId, + events: (async function* () {})(), + result: Promise.resolve({ + status: "completed" as const, + stopReason: "end_turn", + }), + cancel: vi.fn(async () => {}), + closeStream: vi.fn(async () => {}), + })); + hoisted.requireAcpRuntimeBackendMock.mockReturnValue({ + id: "acpx", + runtime: runtimeState.runtime, + }); + hoisted.readAcpSessionEntryMock.mockImplementation((paramsUnknown: unknown) => { + const sessionKey = (paramsUnknown as { sessionKey?: string }).sessionKey; + if (sessionKey === "agent:codex:acp:child-1") { + return { + sessionKey, + storeSessionKey: sessionKey, + entry: { + sessionId: "child-1", + updatedAt: Date.now(), + spawnedBy: "agent:quant:telegram:quant:direct:822430204", + label: "Missing final", + }, + acp: readySessionMeta(), + }; + } + if (sessionKey === "agent:quant:telegram:quant:direct:822430204") { + return { + sessionKey, + storeSessionKey: sessionKey, + entry: { + sessionId: "parent-1", + updatedAt: Date.now(), + }, + }; + } + return null; + }); + + const events: string[] = []; + const manager = new AcpSessionManager(); + await manager.runTurn({ + cfg: baseCfg, + sessionKey: "agent:codex:acp:child-1", + text: "Produce a final result", + mode: "prompt", + requestId: "direct-parented-empty-completed-run", + onEvent: (event) => { + events.push(event.type); + }, + }); + + expect(events).toEqual(["done"]); + expectRecordFields(requireTaskByRunId("direct-parented-empty-completed-run"), { + runtime: "acp", + ownerKey: "agent:quant:telegram:quant:direct:822430204", + scopeKind: "session", + childSessionKey: "agent:codex:acp:child-1", + status: "succeeded", + terminalOutcome: "blocked", + terminalSummary: "Required completion did not produce a final deliverable.", + }); + }); + }); + it("closes completed startTurn streams after draining queued output", async () => { await withAcpManagerTaskStateDir(async () => { const runtimeState = createRuntime(); diff --git a/src/agents/subagent-registry-lifecycle.test.ts b/src/agents/subagent-registry-lifecycle.test.ts index da1c43b1f45..82c9e49e9b2 100644 --- a/src/agents/subagent-registry-lifecycle.test.ts +++ b/src/agents/subagent-registry-lifecycle.test.ts @@ -305,6 +305,175 @@ describe("subagent registry lifecycle hardening", () => { }); }); + it("marks required progress-only completions blocked without failing the task", async () => { + const entry = createRunEntry({ + expectsCompletionMessage: true, + }); + + const controller = createLifecycleController({ + entry, + captureSubagentCompletionReply: vi.fn(async () => "I'll inspect the repo now."), + }); + + await controller.completeSubagentRun({ + runId: entry.runId, + endedAt: 4_000, + outcome: { status: "ok" }, + reason: SUBAGENT_ENDED_REASON_COMPLETE, + triggerCleanup: false, + }); + + expectFields(firstCallArg(taskExecutorMocks.completeTaskRunByRunId), { + runId: entry.runId, + runtime: "subagent", + sessionKey: entry.childSessionKey, + progressSummary: "I'll inspect the repo now.", + terminalOutcome: "blocked", + terminalSummary: + "Required completion ended with progress-only text, not a final deliverable.", + }); + expect(taskExecutorMocks.failTaskRunByRunId).not.toHaveBeenCalled(); + }); + + it("marks missing required completions blocked while preserving real final reports", async () => { + const missingEntry = createRunEntry({ + expectsCompletionMessage: true, + }); + await createLifecycleController({ + entry: missingEntry, + captureSubagentCompletionReply: vi.fn(async () => undefined), + }).completeSubagentRun({ + runId: missingEntry.runId, + endedAt: 4_000, + outcome: { status: "ok" }, + reason: SUBAGENT_ENDED_REASON_COMPLETE, + triggerCleanup: false, + }); + + expectFields(firstCallArg(taskExecutorMocks.completeTaskRunByRunId), { + runId: missingEntry.runId, + terminalOutcome: "blocked", + terminalSummary: "Required completion did not produce a final deliverable.", + }); + + taskExecutorMocks.completeTaskRunByRunId.mockClear(); + const finalEntry = createRunEntry({ + runId: "run-final", + expectsCompletionMessage: true, + }); + await createLifecycleController({ + entry: finalEntry, + captureSubagentCompletionReply: vi.fn( + async () => "Fixed the crash and verified the regression tests pass.", + ), + }).completeSubagentRun({ + runId: finalEntry.runId, + endedAt: 5_000, + outcome: { status: "ok" }, + reason: SUBAGENT_ENDED_REASON_COMPLETE, + triggerCleanup: false, + }); + + const finalArg = firstCallArg(taskExecutorMocks.completeTaskRunByRunId); + expectFields(finalArg, { + runId: finalEntry.runId, + runtime: "subagent", + sessionKey: finalEntry.childSessionKey, + progressSummary: "Fixed the crash and verified the regression tests pass.", + terminalSummary: null, + }); + expect(finalArg.terminalOutcome).toBeUndefined(); + }); + + it("keeps required completions successful when final output follows progress text", async () => { + const entry = createRunEntry({ + expectsCompletionMessage: true, + }); + + await createLifecycleController({ + entry, + captureSubagentCompletionReply: vi.fn( + async () => "I'll inspect the repo now. The crash is a missing null check in src/foo.ts.", + ), + }).completeSubagentRun({ + runId: entry.runId, + endedAt: 4_000, + outcome: { status: "ok" }, + reason: SUBAGENT_ENDED_REASON_COMPLETE, + triggerCleanup: false, + }); + + const finalArg = firstCallArg(taskExecutorMocks.completeTaskRunByRunId); + expectFields(finalArg, { + runId: entry.runId, + runtime: "subagent", + sessionKey: entry.childSessionKey, + progressSummary: + "I'll inspect the repo now. The crash is a missing null check in src/foo.ts.", + terminalSummary: null, + }); + expect(finalArg.terminalOutcome).toBeUndefined(); + }); + + it("keeps required completions successful when final output follows a separator", async () => { + const entry = createRunEntry({ + expectsCompletionMessage: true, + }); + + await createLifecycleController({ + entry, + captureSubagentCompletionReply: vi.fn( + async () => "I'll inspect the repo now - the crash is a missing null check in src/foo.ts.", + ), + }).completeSubagentRun({ + runId: entry.runId, + endedAt: 4_000, + outcome: { status: "ok" }, + reason: SUBAGENT_ENDED_REASON_COMPLETE, + triggerCleanup: false, + }); + + const finalArg = firstCallArg(taskExecutorMocks.completeTaskRunByRunId); + expectFields(finalArg, { + runId: entry.runId, + runtime: "subagent", + sessionKey: entry.childSessionKey, + progressSummary: + "I'll inspect the repo now - the crash is a missing null check in src/foo.ts.", + terminalSummary: null, + }); + expect(finalArg.terminalOutcome).toBeUndefined(); + }); + + it("keeps required completions blocked when progress text only adds follow-up planning", async () => { + const entry = createRunEntry({ + expectsCompletionMessage: true, + }); + + await createLifecycleController({ + entry, + captureSubagentCompletionReply: vi.fn( + async () => "I'll inspect the repo now. Then I'll run tests and report back.", + ), + }).completeSubagentRun({ + runId: entry.runId, + endedAt: 4_000, + outcome: { status: "ok" }, + reason: SUBAGENT_ENDED_REASON_COMPLETE, + triggerCleanup: false, + }); + + expectFields(firstCallArg(taskExecutorMocks.completeTaskRunByRunId), { + runId: entry.runId, + runtime: "subagent", + sessionKey: entry.childSessionKey, + progressSummary: "I'll inspect the repo now. Then I'll run tests and report back.", + terminalOutcome: "blocked", + terminalSummary: + "Required completion ended with progress-only text, not a final deliverable.", + }); + }); + it("does not reject cleanup give-up when task delivery status update throws", async () => { const persist = vi.fn(); const warn = vi.fn(); @@ -810,6 +979,15 @@ describe("subagent registry lifecycle hardening", () => { deliveryStatus: "failed", error: "gateway request timeout for agent", }); + expectFields(firstCallArg(taskExecutorMocks.completeTaskRunByRunId), { + runId: entry.runId, + runtime: "subagent", + sessionKey: entry.childSessionKey, + progressSummary: "final answer", + terminalOutcome: "blocked", + terminalSummary: + "Required completion delivery failed before reaching the requester: gateway request timeout for agent.", + }); expect(persist).toHaveBeenCalled(); }); @@ -986,6 +1164,20 @@ describe("subagent registry lifecycle hardening", () => { expect(entry.deliverySuspendedAt).toBeTypeOf("number"); expect(entry.deliverySuspendedReason).toBe("retry-limit"); expect(entry.cleanupCompletedAt).toBeUndefined(); + expectFields( + findCallArg( + taskExecutorMocks.completeTaskRunByRunId, + (arg) => arg.terminalOutcome === "blocked", + ), + { + runId: entry.runId, + runtime: "subagent", + sessionKey: entry.childSessionKey, + terminalOutcome: "blocked", + terminalSummary: + "Required completion delivery failed before reaching the requester: UNAVAILABLE: requester wake failed; direct-primary: UNAVAILABLE: requester wake failed.", + }, + ); expect(persist).toHaveBeenCalled(); }); diff --git a/src/agents/subagent-registry-lifecycle.ts b/src/agents/subagent-registry-lifecycle.ts index c102a95701a..a95f0574434 100644 --- a/src/agents/subagent-registry-lifecycle.ts +++ b/src/agents/subagent-registry-lifecycle.ts @@ -11,6 +11,10 @@ import { failTaskRunByRunId, setDetachedTaskDeliveryStatusByRunId, } from "../tasks/detached-task-runtime.js"; +import { + resolveRequiredCompletionDeliveryFailureTerminalResult, + resolveRequiredCompletionTerminalResult, +} from "../tasks/task-completion-contract.js"; import { normalizeDeliveryContext } from "../utils/delivery-context.shared.js"; import { buildAnnounceIdFromChildRun, @@ -265,6 +269,10 @@ export function createSubagentRegistryLifecycleController(params: { const lastEventAt = endedAt; try { if (args.outcome.status === "ok") { + const terminalResult = + args.entry.expectsCompletionMessage === true + ? resolveRequiredCompletionTerminalResult(args.entry.frozenResultText) + : {}; completeTaskRunByRunId({ runId: args.entry.runId, runtime: "subagent", @@ -272,7 +280,8 @@ export function createSubagentRegistryLifecycleController(params: { endedAt, lastEventAt, progressSummary: args.entry.frozenResultText ?? undefined, - terminalSummary: null, + terminalSummary: terminalResult.terminalSummary ?? null, + terminalOutcome: terminalResult.terminalOutcome, }); return; } @@ -297,6 +306,35 @@ export function createSubagentRegistryLifecycleController(params: { } }; + const safeMarkRequiredCompletionDeliveryBlocked = (args: { + entry: SubagentRunRecord; + reason?: string; + }) => { + if (args.entry.expectsCompletionMessage !== true || args.entry.outcome?.status !== "ok") { + return; + } + const endedAt = args.entry.endedAt ?? Date.now(); + const terminalResult = resolveRequiredCompletionDeliveryFailureTerminalResult(args.reason); + try { + completeTaskRunByRunId({ + runId: args.entry.runId, + runtime: "subagent", + sessionKey: args.entry.childSessionKey, + endedAt, + lastEventAt: Date.now(), + progressSummary: args.entry.frozenResultText ?? undefined, + terminalSummary: terminalResult.terminalSummary, + terminalOutcome: terminalResult.terminalOutcome, + }); + } catch (err) { + params.warn("failed to mark subagent completion delivery blocked", { + error: buildSafeLifecycleErrorMeta(err), + runId: maskRunId(args.entry.runId), + childSessionKey: maskSessionKey(args.entry.childSessionKey), + }); + } + }; + const freezeRunResultAtCompletion = async ( entry: SubagentRunRecord, outcome: SubagentRunOutcome, @@ -483,6 +521,10 @@ export function createSubagentRegistryLifecycleController(params: { deliveryStatus: "failed", deliveryError: args.entry.lastAnnounceDeliveryError ?? args.reason, }); + safeMarkRequiredCompletionDeliveryBlocked({ + entry: args.entry, + reason: args.entry.lastAnnounceDeliveryError ?? args.reason, + }); logAnnounceGiveUp(args.entry, args.reason); params.persist(); }; @@ -514,6 +556,10 @@ export function createSubagentRegistryLifecycleController(params: { deliveryStatus: "failed", deliveryError: giveUpParams.entry.lastAnnounceDeliveryError, }); + safeMarkRequiredCompletionDeliveryBlocked({ + entry: giveUpParams.entry, + reason: giveUpParams.entry.lastAnnounceDeliveryError ?? giveUpParams.reason, + }); giveUpParams.entry.wakeOnDescendantSettle = undefined; giveUpParams.entry.fallbackFrozenResultText = undefined; giveUpParams.entry.fallbackFrozenResultCapturedAt = undefined; @@ -757,6 +803,10 @@ export function createSubagentRegistryLifecycleController(params: { deliveryStatus: "failed", deliveryError: entry.lastAnnounceDeliveryError, }); + safeMarkRequiredCompletionDeliveryBlocked({ + entry, + reason: entry.lastAnnounceDeliveryError ?? deferredDecision.reason, + }); entry.wakeOnDescendantSettle = undefined; entry.fallbackFrozenResultText = undefined; entry.fallbackFrozenResultCapturedAt = undefined; diff --git a/src/tasks/task-completion-contract.ts b/src/tasks/task-completion-contract.ts new file mode 100644 index 00000000000..2a46595e57f --- /dev/null +++ b/src/tasks/task-completion-contract.ts @@ -0,0 +1,92 @@ +import type { TaskTerminalOutcome } from "./task-registry.types.js"; + +export type RequiredCompletionTerminalResult = { + terminalOutcome?: Extract; + terminalSummary?: string; +}; + +const PROGRESS_ONLY_PATTERN = + /^(?:i(?:'|\u2019)ll|i will|i(?:'|\u2019)m|i am|i(?:'|\u2019)m going to|i am going to|let me|i need to)\s+(?:now\s+)?(?:analyz(?:e|ing)|apply|check(?:ing)?|continue|debug(?:ging)?|follow(?:ing)?\s+up|inspect(?:ing)?|investigat(?:e|ing)|look(?:ing)?(?:\s+into)?|map(?:ping)?|open(?:ing)?|read(?:ing)?|report(?:ing)?(?:\s+back)?|review(?:ing)?|run(?:ning)?|start(?:ing)?|test(?:ing)?|trace|trac(?:e|ing)|try(?:ing)?|update|verify(?:ing)?|work(?:ing)?)/i; + +const BARE_PROGRESS_ONLY_PATTERN = + /^(?:analyz(?:e|ing)|check(?:ing)?|debug(?:ging)?|inspect(?:ing)?|investigat(?:e|ing)|look(?:ing)?\s+into|map(?:ping)?|read(?:ing)?|report(?:ing)?\s+back|review(?:ing)?|run(?:ning)?|test(?:ing)?|trac(?:e|ing)|verify(?:ing)?|work(?:ing)?\s+on)\b/i; + +const FOLLOW_UP_PLANNING_PREFIX_PATTERN = + /^(?:after(?:wards|\s+that)?|from\s+there|next|once\s+(?:done|that(?:'|\u2019)?s\s+done|that\s+is\s+done)|then)[,.\s]+/i; + +function normalizeCompletionText(value: string | null | undefined): string { + return value?.replace(/\s+/g, " ").trim() ?? ""; +} + +function normalizeCompletionFailureReason(value: string | null | undefined): string { + const normalized = normalizeCompletionText(value); + if (!normalized) { + return ""; + } + return normalized.length <= 160 ? normalized : `${normalized.slice(0, 159)}...`; +} + +function matchesProgressOnlyPrefix(value: string): boolean { + if (PROGRESS_ONLY_PATTERN.test(value) || BARE_PROGRESS_ONLY_PATTERN.test(value)) { + return true; + } + const followup = value.replace(FOLLOW_UP_PLANNING_PREFIX_PATTERN, "").trim(); + return ( + followup !== value && + (PROGRESS_ONLY_PATTERN.test(followup) || BARE_PROGRESS_ONLY_PATTERN.test(followup)) + ); +} + +function hasNonProgressFollowupSentence(value: string): boolean { + const boundary = /(?:[.!?:]|\s[-\u2013\u2014])\s+\S/.exec(value); + if (!boundary) { + return false; + } + const separatorEnd = boundary.index + boundary[0].length - 1; + const firstSentence = value.slice(0, separatorEnd).trim(); + const rest = value.slice(separatorEnd).trim(); + return matchesProgressOnlyPrefix(firstSentence) && !isProgressOnlyCompletionText(rest); +} + +export function isProgressOnlyCompletionText(value: string | null | undefined): boolean { + const normalized = normalizeCompletionText(value); + if (!normalized) { + return false; + } + if (hasNonProgressFollowupSentence(normalized)) { + return false; + } + return matchesProgressOnlyPrefix(normalized); +} + +export function resolveRequiredCompletionTerminalResult( + resultText: string | null | undefined, +): RequiredCompletionTerminalResult { + const normalized = normalizeCompletionText(resultText); + if (!normalized) { + return { + terminalOutcome: "blocked", + terminalSummary: "Required completion did not produce a final deliverable.", + }; + } + if (isProgressOnlyCompletionText(normalized)) { + return { + terminalOutcome: "blocked", + terminalSummary: + "Required completion ended with progress-only text, not a final deliverable.", + }; + } + return {}; +} + +export function resolveRequiredCompletionDeliveryFailureTerminalResult( + reason: string | null | undefined, +): RequiredCompletionTerminalResult { + const normalizedReason = normalizeCompletionFailureReason(reason); + return { + terminalOutcome: "blocked", + terminalSummary: normalizedReason + ? `Required completion delivery failed before reaching the requester: ${normalizedReason}.` + : "Required completion delivery failed before reaching the requester.", + }; +}