mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-28 03:33:52 +00:00
fix(codex): block progress-only completions [AI-assisted] (#85110)
Summary: - The PR adds shared required-completion classification for ACP/subagent finalization, marks missing, progress-only, and delivery-exhausted completions as blocked, and adds regression tests plus a changelog entry. - Reproducibility: yes. source-reproducible. Current main finalizes the implicated ACP and subagent success pa ... he linked issue supplies production-shaped evidence; this read-only pass did not run a live provider repro. Automerge notes: - PR branch already contained follow-up commit before automerge: fix(codex): preserve final completions after progress - PR branch already contained follow-up commit before automerge: fix(codex): accept progress-prefixed final completions - PR branch already contained follow-up commit before automerge: fix(codex): accept separator-delimited completions - PR branch already contained follow-up commit before automerge: fix(codex): keep follow-up planning blocked - PR branch already contained follow-up commit before automerge: fix(codex): block progress-only completions [AI-assisted] Validation: - ClawSweeper review passed for head21a1159165. - Required merge gates passed before the squash merge. Prepared head SHA:21a1159165Review: https://github.com/openclaw/openclaw/pull/85110#issuecomment-4513104331 Co-authored-by: IWhatsskill <284122573+IWhatsskill@users.noreply.github.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: takhoffman Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
This commit is contained in:
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- CLI/agents: allow `openclaw agent --session-key` to target explicit session keys, including agent-scoped legacy keys. (#85121) Thanks @Kaspre.
|
||||
- Auto-reply/ACP: wait for same-channel block reply delivery before starting tool work, while still honoring ACP dispatch aborts so stopped turns do not wait on slow channel sends. (#83722) Thanks @IWhatsskill.
|
||||
- Codex/ACP: mark required child-run completions that only report progress, omit a final deliverable, or fail requester delivery as blocked while preserving real final reports. (#85110) Thanks @IWhatsskill.
|
||||
- Agents/subagents: surface blocked child-run completions as errors instead of successful subagent finishes. (#80886) Thanks @TurboTheTurtle.
|
||||
- Agents/Pi: treat accepted embedded `sessions_spawn` child-session handoffs as terminal progress so parent turns no longer report false non-deliverable failures. (#85054) Thanks @samzong.
|
||||
- WhatsApp: update Baileys to `7.0.0-rc13` and drop the obsolete logger type patch.
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
failTaskRunByRunId,
|
||||
startTaskRunByRunId,
|
||||
} from "../../tasks/detached-task-runtime.js";
|
||||
import { resolveRequiredCompletionTerminalResult } from "../../tasks/task-completion-contract.js";
|
||||
import type { DeliveryContext } from "../../utils/delivery-context.js";
|
||||
import {
|
||||
AcpRuntimeError,
|
||||
@@ -125,6 +126,10 @@ function resolveBackgroundTaskTerminalResult(progressSummary: string): {
|
||||
terminalOutcome?: "blocked";
|
||||
terminalSummary?: string;
|
||||
} {
|
||||
const requiredCompletionResult = resolveRequiredCompletionTerminalResult(progressSummary);
|
||||
if (requiredCompletionResult.terminalOutcome) {
|
||||
return requiredCompletionResult;
|
||||
}
|
||||
const normalized = normalizeText(progressSummary)?.replace(/\s+/g, " ").trim();
|
||||
if (!normalized) {
|
||||
return {};
|
||||
|
||||
@@ -2731,6 +2731,385 @@ describe("AcpSessionManager", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps parented ACP turns successful when final output follows progress text", async () => {
|
||||
await withAcpManagerTaskStateDir(async () => {
|
||||
const runtimeState = createRuntime();
|
||||
runtimeState.runtime.startTurn = vi.fn((input) => ({
|
||||
requestId: input.requestId,
|
||||
events: (async function* () {
|
||||
yield {
|
||||
type: "text_delta" as const,
|
||||
stream: "output" as const,
|
||||
text: "I'll inspect the repo now. ",
|
||||
};
|
||||
yield {
|
||||
type: "text_delta" as const,
|
||||
stream: "output" as const,
|
||||
text: "The crash is a missing null check in src/foo.ts.",
|
||||
};
|
||||
})(),
|
||||
result: Promise.resolve({
|
||||
status: "completed" as const,
|
||||
stopReason: "end_turn",
|
||||
}),
|
||||
cancel: vi.fn(async () => {}),
|
||||
closeStream: vi.fn(async () => {}),
|
||||
}));
|
||||
hoisted.requireAcpRuntimeBackendMock.mockReturnValue({
|
||||
id: "acpx",
|
||||
runtime: runtimeState.runtime,
|
||||
});
|
||||
hoisted.readAcpSessionEntryMock.mockImplementation((paramsUnknown: unknown) => {
|
||||
const sessionKey = (paramsUnknown as { sessionKey?: string }).sessionKey;
|
||||
if (sessionKey === "agent:codex:acp:child-1") {
|
||||
return {
|
||||
sessionKey,
|
||||
storeSessionKey: sessionKey,
|
||||
entry: {
|
||||
sessionId: "child-1",
|
||||
updatedAt: Date.now(),
|
||||
spawnedBy: "agent:quant:telegram:quant:direct:822430204",
|
||||
label: "Progress then final",
|
||||
},
|
||||
acp: readySessionMeta(),
|
||||
};
|
||||
}
|
||||
if (sessionKey === "agent:quant:telegram:quant:direct:822430204") {
|
||||
return {
|
||||
sessionKey,
|
||||
storeSessionKey: sessionKey,
|
||||
entry: {
|
||||
sessionId: "parent-1",
|
||||
updatedAt: Date.now(),
|
||||
},
|
||||
};
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
const manager = new AcpSessionManager();
|
||||
await manager.runTurn({
|
||||
cfg: baseCfg,
|
||||
sessionKey: "agent:codex:acp:child-1",
|
||||
text: "Inspect and report back",
|
||||
mode: "prompt",
|
||||
requestId: "direct-parented-progress-then-final-run",
|
||||
});
|
||||
|
||||
const record = requireTaskByRunId("direct-parented-progress-then-final-run");
|
||||
expectRecordFields(record, {
|
||||
runtime: "acp",
|
||||
ownerKey: "agent:quant:telegram:quant:direct:822430204",
|
||||
scopeKind: "session",
|
||||
childSessionKey: "agent:codex:acp:child-1",
|
||||
status: "succeeded",
|
||||
progressSummary:
|
||||
"I'll inspect the repo now. The crash is a missing null check in src/foo.ts.",
|
||||
});
|
||||
expect(record.terminalOutcome).toBeUndefined();
|
||||
expect(record.terminalSummary).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps parented ACP turns successful when final output follows a separator", async () => {
|
||||
await withAcpManagerTaskStateDir(async () => {
|
||||
const runtimeState = createRuntime();
|
||||
runtimeState.runtime.startTurn = vi.fn((input) => ({
|
||||
requestId: input.requestId,
|
||||
events: (async function* () {
|
||||
yield {
|
||||
type: "text_delta" as const,
|
||||
stream: "output" as const,
|
||||
text: "I'll inspect the repo now: the crash is a missing null check in src/foo.ts.",
|
||||
};
|
||||
})(),
|
||||
result: Promise.resolve({
|
||||
status: "completed" as const,
|
||||
stopReason: "end_turn",
|
||||
}),
|
||||
cancel: vi.fn(async () => {}),
|
||||
closeStream: vi.fn(async () => {}),
|
||||
}));
|
||||
hoisted.requireAcpRuntimeBackendMock.mockReturnValue({
|
||||
id: "acpx",
|
||||
runtime: runtimeState.runtime,
|
||||
});
|
||||
hoisted.readAcpSessionEntryMock.mockImplementation((paramsUnknown: unknown) => {
|
||||
const sessionKey = (paramsUnknown as { sessionKey?: string }).sessionKey;
|
||||
if (sessionKey === "agent:codex:acp:child-1") {
|
||||
return {
|
||||
sessionKey,
|
||||
storeSessionKey: sessionKey,
|
||||
entry: {
|
||||
sessionId: "child-1",
|
||||
updatedAt: Date.now(),
|
||||
spawnedBy: "agent:quant:telegram:quant:direct:822430204",
|
||||
label: "Separator final",
|
||||
},
|
||||
acp: readySessionMeta(),
|
||||
};
|
||||
}
|
||||
if (sessionKey === "agent:quant:telegram:quant:direct:822430204") {
|
||||
return {
|
||||
sessionKey,
|
||||
storeSessionKey: sessionKey,
|
||||
entry: {
|
||||
sessionId: "parent-1",
|
||||
updatedAt: Date.now(),
|
||||
},
|
||||
};
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
const manager = new AcpSessionManager();
|
||||
await manager.runTurn({
|
||||
cfg: baseCfg,
|
||||
sessionKey: "agent:codex:acp:child-1",
|
||||
text: "Inspect and report back",
|
||||
mode: "prompt",
|
||||
requestId: "direct-parented-separator-final-run",
|
||||
});
|
||||
|
||||
const record = requireTaskByRunId("direct-parented-separator-final-run");
|
||||
expectRecordFields(record, {
|
||||
runtime: "acp",
|
||||
ownerKey: "agent:quant:telegram:quant:direct:822430204",
|
||||
scopeKind: "session",
|
||||
childSessionKey: "agent:codex:acp:child-1",
|
||||
status: "succeeded",
|
||||
progressSummary:
|
||||
"I'll inspect the repo now: the crash is a missing null check in src/foo.ts.",
|
||||
});
|
||||
expect(record.terminalOutcome).toBeUndefined();
|
||||
expect(record.terminalSummary).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps parented ACP turns blocked when progress text only adds follow-up planning", async () => {
|
||||
await withAcpManagerTaskStateDir(async () => {
|
||||
const runtimeState = createRuntime();
|
||||
runtimeState.runtime.startTurn = vi.fn((input) => ({
|
||||
requestId: input.requestId,
|
||||
events: (async function* () {
|
||||
yield {
|
||||
type: "text_delta" as const,
|
||||
stream: "output" as const,
|
||||
text: "I'll inspect the repo now. Then I'll run tests and report back.",
|
||||
};
|
||||
})(),
|
||||
result: Promise.resolve({
|
||||
status: "completed" as const,
|
||||
stopReason: "end_turn",
|
||||
}),
|
||||
cancel: vi.fn(async () => {}),
|
||||
closeStream: vi.fn(async () => {}),
|
||||
}));
|
||||
hoisted.requireAcpRuntimeBackendMock.mockReturnValue({
|
||||
id: "acpx",
|
||||
runtime: runtimeState.runtime,
|
||||
});
|
||||
hoisted.readAcpSessionEntryMock.mockImplementation((paramsUnknown: unknown) => {
|
||||
const sessionKey = (paramsUnknown as { sessionKey?: string }).sessionKey;
|
||||
if (sessionKey === "agent:codex:acp:child-1") {
|
||||
return {
|
||||
sessionKey,
|
||||
storeSessionKey: sessionKey,
|
||||
entry: {
|
||||
sessionId: "child-1",
|
||||
updatedAt: Date.now(),
|
||||
spawnedBy: "agent:quant:telegram:quant:direct:822430204",
|
||||
label: "Follow-up planning",
|
||||
},
|
||||
acp: readySessionMeta(),
|
||||
};
|
||||
}
|
||||
if (sessionKey === "agent:quant:telegram:quant:direct:822430204") {
|
||||
return {
|
||||
sessionKey,
|
||||
storeSessionKey: sessionKey,
|
||||
entry: {
|
||||
sessionId: "parent-1",
|
||||
updatedAt: Date.now(),
|
||||
},
|
||||
};
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
const manager = new AcpSessionManager();
|
||||
await manager.runTurn({
|
||||
cfg: baseCfg,
|
||||
sessionKey: "agent:codex:acp:child-1",
|
||||
text: "Inspect and report back",
|
||||
mode: "prompt",
|
||||
requestId: "direct-parented-followup-planning-run",
|
||||
});
|
||||
|
||||
expectRecordFields(requireTaskByRunId("direct-parented-followup-planning-run"), {
|
||||
runtime: "acp",
|
||||
ownerKey: "agent:quant:telegram:quant:direct:822430204",
|
||||
scopeKind: "session",
|
||||
childSessionKey: "agent:codex:acp:child-1",
|
||||
status: "succeeded",
|
||||
progressSummary: "I'll inspect the repo now. Then I'll run tests and report back.",
|
||||
terminalOutcome: "blocked",
|
||||
terminalSummary:
|
||||
"Required completion ended with progress-only text, not a final deliverable.",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("marks completed parented ACP turns blocked when they only contain progress text", async () => {
|
||||
await withAcpManagerTaskStateDir(async () => {
|
||||
const runtimeState = createRuntime();
|
||||
runtimeState.runtime.startTurn = vi.fn((input) => ({
|
||||
requestId: input.requestId,
|
||||
events: (async function* () {
|
||||
yield {
|
||||
type: "text_delta" as const,
|
||||
stream: "output" as const,
|
||||
text: "I'll inspect the repo now.",
|
||||
};
|
||||
})(),
|
||||
result: Promise.resolve({
|
||||
status: "completed" as const,
|
||||
stopReason: "end_turn",
|
||||
}),
|
||||
cancel: vi.fn(async () => {}),
|
||||
closeStream: vi.fn(async () => {}),
|
||||
}));
|
||||
hoisted.requireAcpRuntimeBackendMock.mockReturnValue({
|
||||
id: "acpx",
|
||||
runtime: runtimeState.runtime,
|
||||
});
|
||||
hoisted.readAcpSessionEntryMock.mockImplementation((paramsUnknown: unknown) => {
|
||||
const sessionKey = (paramsUnknown as { sessionKey?: string }).sessionKey;
|
||||
if (sessionKey === "agent:codex:acp:child-1") {
|
||||
return {
|
||||
sessionKey,
|
||||
storeSessionKey: sessionKey,
|
||||
entry: {
|
||||
sessionId: "child-1",
|
||||
updatedAt: Date.now(),
|
||||
spawnedBy: "agent:quant:telegram:quant:direct:822430204",
|
||||
label: "Progress only",
|
||||
},
|
||||
acp: readySessionMeta(),
|
||||
};
|
||||
}
|
||||
if (sessionKey === "agent:quant:telegram:quant:direct:822430204") {
|
||||
return {
|
||||
sessionKey,
|
||||
storeSessionKey: sessionKey,
|
||||
entry: {
|
||||
sessionId: "parent-1",
|
||||
updatedAt: Date.now(),
|
||||
},
|
||||
};
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
const events: string[] = [];
|
||||
const manager = new AcpSessionManager();
|
||||
await manager.runTurn({
|
||||
cfg: baseCfg,
|
||||
sessionKey: "agent:codex:acp:child-1",
|
||||
text: "Inspect and report back",
|
||||
mode: "prompt",
|
||||
requestId: "direct-parented-progress-completed-run",
|
||||
onEvent: (event) => {
|
||||
events.push(event.type);
|
||||
},
|
||||
});
|
||||
|
||||
expect(events).toEqual(["text_delta", "done"]);
|
||||
expectRecordFields(requireTaskByRunId("direct-parented-progress-completed-run"), {
|
||||
runtime: "acp",
|
||||
ownerKey: "agent:quant:telegram:quant:direct:822430204",
|
||||
scopeKind: "session",
|
||||
childSessionKey: "agent:codex:acp:child-1",
|
||||
status: "succeeded",
|
||||
progressSummary: "I'll inspect the repo now.",
|
||||
terminalOutcome: "blocked",
|
||||
terminalSummary:
|
||||
"Required completion ended with progress-only text, not a final deliverable.",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("marks completed parented ACP turns blocked when final output is missing", async () => {
|
||||
await withAcpManagerTaskStateDir(async () => {
|
||||
const runtimeState = createRuntime();
|
||||
runtimeState.runtime.startTurn = vi.fn((input) => ({
|
||||
requestId: input.requestId,
|
||||
events: (async function* () {})(),
|
||||
result: Promise.resolve({
|
||||
status: "completed" as const,
|
||||
stopReason: "end_turn",
|
||||
}),
|
||||
cancel: vi.fn(async () => {}),
|
||||
closeStream: vi.fn(async () => {}),
|
||||
}));
|
||||
hoisted.requireAcpRuntimeBackendMock.mockReturnValue({
|
||||
id: "acpx",
|
||||
runtime: runtimeState.runtime,
|
||||
});
|
||||
hoisted.readAcpSessionEntryMock.mockImplementation((paramsUnknown: unknown) => {
|
||||
const sessionKey = (paramsUnknown as { sessionKey?: string }).sessionKey;
|
||||
if (sessionKey === "agent:codex:acp:child-1") {
|
||||
return {
|
||||
sessionKey,
|
||||
storeSessionKey: sessionKey,
|
||||
entry: {
|
||||
sessionId: "child-1",
|
||||
updatedAt: Date.now(),
|
||||
spawnedBy: "agent:quant:telegram:quant:direct:822430204",
|
||||
label: "Missing final",
|
||||
},
|
||||
acp: readySessionMeta(),
|
||||
};
|
||||
}
|
||||
if (sessionKey === "agent:quant:telegram:quant:direct:822430204") {
|
||||
return {
|
||||
sessionKey,
|
||||
storeSessionKey: sessionKey,
|
||||
entry: {
|
||||
sessionId: "parent-1",
|
||||
updatedAt: Date.now(),
|
||||
},
|
||||
};
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
const events: string[] = [];
|
||||
const manager = new AcpSessionManager();
|
||||
await manager.runTurn({
|
||||
cfg: baseCfg,
|
||||
sessionKey: "agent:codex:acp:child-1",
|
||||
text: "Produce a final result",
|
||||
mode: "prompt",
|
||||
requestId: "direct-parented-empty-completed-run",
|
||||
onEvent: (event) => {
|
||||
events.push(event.type);
|
||||
},
|
||||
});
|
||||
|
||||
expect(events).toEqual(["done"]);
|
||||
expectRecordFields(requireTaskByRunId("direct-parented-empty-completed-run"), {
|
||||
runtime: "acp",
|
||||
ownerKey: "agent:quant:telegram:quant:direct:822430204",
|
||||
scopeKind: "session",
|
||||
childSessionKey: "agent:codex:acp:child-1",
|
||||
status: "succeeded",
|
||||
terminalOutcome: "blocked",
|
||||
terminalSummary: "Required completion did not produce a final deliverable.",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("closes completed startTurn streams after draining queued output", async () => {
|
||||
await withAcpManagerTaskStateDir(async () => {
|
||||
const runtimeState = createRuntime();
|
||||
|
||||
@@ -305,6 +305,175 @@ describe("subagent registry lifecycle hardening", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("marks required progress-only completions blocked without failing the task", async () => {
|
||||
const entry = createRunEntry({
|
||||
expectsCompletionMessage: true,
|
||||
});
|
||||
|
||||
const controller = createLifecycleController({
|
||||
entry,
|
||||
captureSubagentCompletionReply: vi.fn(async () => "I'll inspect the repo now."),
|
||||
});
|
||||
|
||||
await controller.completeSubagentRun({
|
||||
runId: entry.runId,
|
||||
endedAt: 4_000,
|
||||
outcome: { status: "ok" },
|
||||
reason: SUBAGENT_ENDED_REASON_COMPLETE,
|
||||
triggerCleanup: false,
|
||||
});
|
||||
|
||||
expectFields(firstCallArg(taskExecutorMocks.completeTaskRunByRunId), {
|
||||
runId: entry.runId,
|
||||
runtime: "subagent",
|
||||
sessionKey: entry.childSessionKey,
|
||||
progressSummary: "I'll inspect the repo now.",
|
||||
terminalOutcome: "blocked",
|
||||
terminalSummary:
|
||||
"Required completion ended with progress-only text, not a final deliverable.",
|
||||
});
|
||||
expect(taskExecutorMocks.failTaskRunByRunId).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("marks missing required completions blocked while preserving real final reports", async () => {
|
||||
const missingEntry = createRunEntry({
|
||||
expectsCompletionMessage: true,
|
||||
});
|
||||
await createLifecycleController({
|
||||
entry: missingEntry,
|
||||
captureSubagentCompletionReply: vi.fn(async () => undefined),
|
||||
}).completeSubagentRun({
|
||||
runId: missingEntry.runId,
|
||||
endedAt: 4_000,
|
||||
outcome: { status: "ok" },
|
||||
reason: SUBAGENT_ENDED_REASON_COMPLETE,
|
||||
triggerCleanup: false,
|
||||
});
|
||||
|
||||
expectFields(firstCallArg(taskExecutorMocks.completeTaskRunByRunId), {
|
||||
runId: missingEntry.runId,
|
||||
terminalOutcome: "blocked",
|
||||
terminalSummary: "Required completion did not produce a final deliverable.",
|
||||
});
|
||||
|
||||
taskExecutorMocks.completeTaskRunByRunId.mockClear();
|
||||
const finalEntry = createRunEntry({
|
||||
runId: "run-final",
|
||||
expectsCompletionMessage: true,
|
||||
});
|
||||
await createLifecycleController({
|
||||
entry: finalEntry,
|
||||
captureSubagentCompletionReply: vi.fn(
|
||||
async () => "Fixed the crash and verified the regression tests pass.",
|
||||
),
|
||||
}).completeSubagentRun({
|
||||
runId: finalEntry.runId,
|
||||
endedAt: 5_000,
|
||||
outcome: { status: "ok" },
|
||||
reason: SUBAGENT_ENDED_REASON_COMPLETE,
|
||||
triggerCleanup: false,
|
||||
});
|
||||
|
||||
const finalArg = firstCallArg(taskExecutorMocks.completeTaskRunByRunId);
|
||||
expectFields(finalArg, {
|
||||
runId: finalEntry.runId,
|
||||
runtime: "subagent",
|
||||
sessionKey: finalEntry.childSessionKey,
|
||||
progressSummary: "Fixed the crash and verified the regression tests pass.",
|
||||
terminalSummary: null,
|
||||
});
|
||||
expect(finalArg.terminalOutcome).toBeUndefined();
|
||||
});
|
||||
|
||||
it("keeps required completions successful when final output follows progress text", async () => {
|
||||
const entry = createRunEntry({
|
||||
expectsCompletionMessage: true,
|
||||
});
|
||||
|
||||
await createLifecycleController({
|
||||
entry,
|
||||
captureSubagentCompletionReply: vi.fn(
|
||||
async () => "I'll inspect the repo now. The crash is a missing null check in src/foo.ts.",
|
||||
),
|
||||
}).completeSubagentRun({
|
||||
runId: entry.runId,
|
||||
endedAt: 4_000,
|
||||
outcome: { status: "ok" },
|
||||
reason: SUBAGENT_ENDED_REASON_COMPLETE,
|
||||
triggerCleanup: false,
|
||||
});
|
||||
|
||||
const finalArg = firstCallArg(taskExecutorMocks.completeTaskRunByRunId);
|
||||
expectFields(finalArg, {
|
||||
runId: entry.runId,
|
||||
runtime: "subagent",
|
||||
sessionKey: entry.childSessionKey,
|
||||
progressSummary:
|
||||
"I'll inspect the repo now. The crash is a missing null check in src/foo.ts.",
|
||||
terminalSummary: null,
|
||||
});
|
||||
expect(finalArg.terminalOutcome).toBeUndefined();
|
||||
});
|
||||
|
||||
it("keeps required completions successful when final output follows a separator", async () => {
|
||||
const entry = createRunEntry({
|
||||
expectsCompletionMessage: true,
|
||||
});
|
||||
|
||||
await createLifecycleController({
|
||||
entry,
|
||||
captureSubagentCompletionReply: vi.fn(
|
||||
async () => "I'll inspect the repo now - the crash is a missing null check in src/foo.ts.",
|
||||
),
|
||||
}).completeSubagentRun({
|
||||
runId: entry.runId,
|
||||
endedAt: 4_000,
|
||||
outcome: { status: "ok" },
|
||||
reason: SUBAGENT_ENDED_REASON_COMPLETE,
|
||||
triggerCleanup: false,
|
||||
});
|
||||
|
||||
const finalArg = firstCallArg(taskExecutorMocks.completeTaskRunByRunId);
|
||||
expectFields(finalArg, {
|
||||
runId: entry.runId,
|
||||
runtime: "subagent",
|
||||
sessionKey: entry.childSessionKey,
|
||||
progressSummary:
|
||||
"I'll inspect the repo now - the crash is a missing null check in src/foo.ts.",
|
||||
terminalSummary: null,
|
||||
});
|
||||
expect(finalArg.terminalOutcome).toBeUndefined();
|
||||
});
|
||||
|
||||
it("keeps required completions blocked when progress text only adds follow-up planning", async () => {
|
||||
const entry = createRunEntry({
|
||||
expectsCompletionMessage: true,
|
||||
});
|
||||
|
||||
await createLifecycleController({
|
||||
entry,
|
||||
captureSubagentCompletionReply: vi.fn(
|
||||
async () => "I'll inspect the repo now. Then I'll run tests and report back.",
|
||||
),
|
||||
}).completeSubagentRun({
|
||||
runId: entry.runId,
|
||||
endedAt: 4_000,
|
||||
outcome: { status: "ok" },
|
||||
reason: SUBAGENT_ENDED_REASON_COMPLETE,
|
||||
triggerCleanup: false,
|
||||
});
|
||||
|
||||
expectFields(firstCallArg(taskExecutorMocks.completeTaskRunByRunId), {
|
||||
runId: entry.runId,
|
||||
runtime: "subagent",
|
||||
sessionKey: entry.childSessionKey,
|
||||
progressSummary: "I'll inspect the repo now. Then I'll run tests and report back.",
|
||||
terminalOutcome: "blocked",
|
||||
terminalSummary:
|
||||
"Required completion ended with progress-only text, not a final deliverable.",
|
||||
});
|
||||
});
|
||||
|
||||
it("does not reject cleanup give-up when task delivery status update throws", async () => {
|
||||
const persist = vi.fn();
|
||||
const warn = vi.fn();
|
||||
@@ -810,6 +979,15 @@ describe("subagent registry lifecycle hardening", () => {
|
||||
deliveryStatus: "failed",
|
||||
error: "gateway request timeout for agent",
|
||||
});
|
||||
expectFields(firstCallArg(taskExecutorMocks.completeTaskRunByRunId), {
|
||||
runId: entry.runId,
|
||||
runtime: "subagent",
|
||||
sessionKey: entry.childSessionKey,
|
||||
progressSummary: "final answer",
|
||||
terminalOutcome: "blocked",
|
||||
terminalSummary:
|
||||
"Required completion delivery failed before reaching the requester: gateway request timeout for agent.",
|
||||
});
|
||||
expect(persist).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
@@ -986,6 +1164,20 @@ describe("subagent registry lifecycle hardening", () => {
|
||||
expect(entry.deliverySuspendedAt).toBeTypeOf("number");
|
||||
expect(entry.deliverySuspendedReason).toBe("retry-limit");
|
||||
expect(entry.cleanupCompletedAt).toBeUndefined();
|
||||
expectFields(
|
||||
findCallArg(
|
||||
taskExecutorMocks.completeTaskRunByRunId,
|
||||
(arg) => arg.terminalOutcome === "blocked",
|
||||
),
|
||||
{
|
||||
runId: entry.runId,
|
||||
runtime: "subagent",
|
||||
sessionKey: entry.childSessionKey,
|
||||
terminalOutcome: "blocked",
|
||||
terminalSummary:
|
||||
"Required completion delivery failed before reaching the requester: UNAVAILABLE: requester wake failed; direct-primary: UNAVAILABLE: requester wake failed.",
|
||||
},
|
||||
);
|
||||
expect(persist).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
|
||||
@@ -11,6 +11,10 @@ import {
|
||||
failTaskRunByRunId,
|
||||
setDetachedTaskDeliveryStatusByRunId,
|
||||
} from "../tasks/detached-task-runtime.js";
|
||||
import {
|
||||
resolveRequiredCompletionDeliveryFailureTerminalResult,
|
||||
resolveRequiredCompletionTerminalResult,
|
||||
} from "../tasks/task-completion-contract.js";
|
||||
import { normalizeDeliveryContext } from "../utils/delivery-context.shared.js";
|
||||
import {
|
||||
buildAnnounceIdFromChildRun,
|
||||
@@ -265,6 +269,10 @@ export function createSubagentRegistryLifecycleController(params: {
|
||||
const lastEventAt = endedAt;
|
||||
try {
|
||||
if (args.outcome.status === "ok") {
|
||||
const terminalResult =
|
||||
args.entry.expectsCompletionMessage === true
|
||||
? resolveRequiredCompletionTerminalResult(args.entry.frozenResultText)
|
||||
: {};
|
||||
completeTaskRunByRunId({
|
||||
runId: args.entry.runId,
|
||||
runtime: "subagent",
|
||||
@@ -272,7 +280,8 @@ export function createSubagentRegistryLifecycleController(params: {
|
||||
endedAt,
|
||||
lastEventAt,
|
||||
progressSummary: args.entry.frozenResultText ?? undefined,
|
||||
terminalSummary: null,
|
||||
terminalSummary: terminalResult.terminalSummary ?? null,
|
||||
terminalOutcome: terminalResult.terminalOutcome,
|
||||
});
|
||||
return;
|
||||
}
|
||||
@@ -297,6 +306,35 @@ export function createSubagentRegistryLifecycleController(params: {
|
||||
}
|
||||
};
|
||||
|
||||
const safeMarkRequiredCompletionDeliveryBlocked = (args: {
|
||||
entry: SubagentRunRecord;
|
||||
reason?: string;
|
||||
}) => {
|
||||
if (args.entry.expectsCompletionMessage !== true || args.entry.outcome?.status !== "ok") {
|
||||
return;
|
||||
}
|
||||
const endedAt = args.entry.endedAt ?? Date.now();
|
||||
const terminalResult = resolveRequiredCompletionDeliveryFailureTerminalResult(args.reason);
|
||||
try {
|
||||
completeTaskRunByRunId({
|
||||
runId: args.entry.runId,
|
||||
runtime: "subagent",
|
||||
sessionKey: args.entry.childSessionKey,
|
||||
endedAt,
|
||||
lastEventAt: Date.now(),
|
||||
progressSummary: args.entry.frozenResultText ?? undefined,
|
||||
terminalSummary: terminalResult.terminalSummary,
|
||||
terminalOutcome: terminalResult.terminalOutcome,
|
||||
});
|
||||
} catch (err) {
|
||||
params.warn("failed to mark subagent completion delivery blocked", {
|
||||
error: buildSafeLifecycleErrorMeta(err),
|
||||
runId: maskRunId(args.entry.runId),
|
||||
childSessionKey: maskSessionKey(args.entry.childSessionKey),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const freezeRunResultAtCompletion = async (
|
||||
entry: SubagentRunRecord,
|
||||
outcome: SubagentRunOutcome,
|
||||
@@ -483,6 +521,10 @@ export function createSubagentRegistryLifecycleController(params: {
|
||||
deliveryStatus: "failed",
|
||||
deliveryError: args.entry.lastAnnounceDeliveryError ?? args.reason,
|
||||
});
|
||||
safeMarkRequiredCompletionDeliveryBlocked({
|
||||
entry: args.entry,
|
||||
reason: args.entry.lastAnnounceDeliveryError ?? args.reason,
|
||||
});
|
||||
logAnnounceGiveUp(args.entry, args.reason);
|
||||
params.persist();
|
||||
};
|
||||
@@ -514,6 +556,10 @@ export function createSubagentRegistryLifecycleController(params: {
|
||||
deliveryStatus: "failed",
|
||||
deliveryError: giveUpParams.entry.lastAnnounceDeliveryError,
|
||||
});
|
||||
safeMarkRequiredCompletionDeliveryBlocked({
|
||||
entry: giveUpParams.entry,
|
||||
reason: giveUpParams.entry.lastAnnounceDeliveryError ?? giveUpParams.reason,
|
||||
});
|
||||
giveUpParams.entry.wakeOnDescendantSettle = undefined;
|
||||
giveUpParams.entry.fallbackFrozenResultText = undefined;
|
||||
giveUpParams.entry.fallbackFrozenResultCapturedAt = undefined;
|
||||
@@ -757,6 +803,10 @@ export function createSubagentRegistryLifecycleController(params: {
|
||||
deliveryStatus: "failed",
|
||||
deliveryError: entry.lastAnnounceDeliveryError,
|
||||
});
|
||||
safeMarkRequiredCompletionDeliveryBlocked({
|
||||
entry,
|
||||
reason: entry.lastAnnounceDeliveryError ?? deferredDecision.reason,
|
||||
});
|
||||
entry.wakeOnDescendantSettle = undefined;
|
||||
entry.fallbackFrozenResultText = undefined;
|
||||
entry.fallbackFrozenResultCapturedAt = undefined;
|
||||
|
||||
92
src/tasks/task-completion-contract.ts
Normal file
92
src/tasks/task-completion-contract.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
import type { TaskTerminalOutcome } from "./task-registry.types.js";
|
||||
|
||||
export type RequiredCompletionTerminalResult = {
|
||||
terminalOutcome?: Extract<TaskTerminalOutcome, "blocked">;
|
||||
terminalSummary?: string;
|
||||
};
|
||||
|
||||
const PROGRESS_ONLY_PATTERN =
|
||||
/^(?:i(?:'|\u2019)ll|i will|i(?:'|\u2019)m|i am|i(?:'|\u2019)m going to|i am going to|let me|i need to)\s+(?:now\s+)?(?:analyz(?:e|ing)|apply|check(?:ing)?|continue|debug(?:ging)?|follow(?:ing)?\s+up|inspect(?:ing)?|investigat(?:e|ing)|look(?:ing)?(?:\s+into)?|map(?:ping)?|open(?:ing)?|read(?:ing)?|report(?:ing)?(?:\s+back)?|review(?:ing)?|run(?:ning)?|start(?:ing)?|test(?:ing)?|trace|trac(?:e|ing)|try(?:ing)?|update|verify(?:ing)?|work(?:ing)?)/i;
|
||||
|
||||
const BARE_PROGRESS_ONLY_PATTERN =
|
||||
/^(?:analyz(?:e|ing)|check(?:ing)?|debug(?:ging)?|inspect(?:ing)?|investigat(?:e|ing)|look(?:ing)?\s+into|map(?:ping)?|read(?:ing)?|report(?:ing)?\s+back|review(?:ing)?|run(?:ning)?|test(?:ing)?|trac(?:e|ing)|verify(?:ing)?|work(?:ing)?\s+on)\b/i;
|
||||
|
||||
const FOLLOW_UP_PLANNING_PREFIX_PATTERN =
|
||||
/^(?:after(?:wards|\s+that)?|from\s+there|next|once\s+(?:done|that(?:'|\u2019)?s\s+done|that\s+is\s+done)|then)[,.\s]+/i;
|
||||
|
||||
function normalizeCompletionText(value: string | null | undefined): string {
|
||||
return value?.replace(/\s+/g, " ").trim() ?? "";
|
||||
}
|
||||
|
||||
function normalizeCompletionFailureReason(value: string | null | undefined): string {
|
||||
const normalized = normalizeCompletionText(value);
|
||||
if (!normalized) {
|
||||
return "";
|
||||
}
|
||||
return normalized.length <= 160 ? normalized : `${normalized.slice(0, 159)}...`;
|
||||
}
|
||||
|
||||
function matchesProgressOnlyPrefix(value: string): boolean {
|
||||
if (PROGRESS_ONLY_PATTERN.test(value) || BARE_PROGRESS_ONLY_PATTERN.test(value)) {
|
||||
return true;
|
||||
}
|
||||
const followup = value.replace(FOLLOW_UP_PLANNING_PREFIX_PATTERN, "").trim();
|
||||
return (
|
||||
followup !== value &&
|
||||
(PROGRESS_ONLY_PATTERN.test(followup) || BARE_PROGRESS_ONLY_PATTERN.test(followup))
|
||||
);
|
||||
}
|
||||
|
||||
function hasNonProgressFollowupSentence(value: string): boolean {
|
||||
const boundary = /(?:[.!?:]|\s[-\u2013\u2014])\s+\S/.exec(value);
|
||||
if (!boundary) {
|
||||
return false;
|
||||
}
|
||||
const separatorEnd = boundary.index + boundary[0].length - 1;
|
||||
const firstSentence = value.slice(0, separatorEnd).trim();
|
||||
const rest = value.slice(separatorEnd).trim();
|
||||
return matchesProgressOnlyPrefix(firstSentence) && !isProgressOnlyCompletionText(rest);
|
||||
}
|
||||
|
||||
export function isProgressOnlyCompletionText(value: string | null | undefined): boolean {
|
||||
const normalized = normalizeCompletionText(value);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
if (hasNonProgressFollowupSentence(normalized)) {
|
||||
return false;
|
||||
}
|
||||
return matchesProgressOnlyPrefix(normalized);
|
||||
}
|
||||
|
||||
export function resolveRequiredCompletionTerminalResult(
|
||||
resultText: string | null | undefined,
|
||||
): RequiredCompletionTerminalResult {
|
||||
const normalized = normalizeCompletionText(resultText);
|
||||
if (!normalized) {
|
||||
return {
|
||||
terminalOutcome: "blocked",
|
||||
terminalSummary: "Required completion did not produce a final deliverable.",
|
||||
};
|
||||
}
|
||||
if (isProgressOnlyCompletionText(normalized)) {
|
||||
return {
|
||||
terminalOutcome: "blocked",
|
||||
terminalSummary:
|
||||
"Required completion ended with progress-only text, not a final deliverable.",
|
||||
};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
export function resolveRequiredCompletionDeliveryFailureTerminalResult(
|
||||
reason: string | null | undefined,
|
||||
): RequiredCompletionTerminalResult {
|
||||
const normalizedReason = normalizeCompletionFailureReason(reason);
|
||||
return {
|
||||
terminalOutcome: "blocked",
|
||||
terminalSummary: normalizedReason
|
||||
? `Required completion delivery failed before reaching the requester: ${normalizedReason}.`
|
||||
: "Required completion delivery failed before reaching the requester.",
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user