fix(codex): surface lastToolError on degraded orphan-tool delivery

Completed turns with deliverable assistant text still synthesize failed
tool.result rows but no longer set promptError. Record lastToolError on
that degraded path and treat whitespace-only assistant items as non-
deliverable so orphan tools still fail closed.

Co-authored-by: Cursor <cursoragent@cursor.com>
(cherry picked from commit cb6fbe36c73982e1043186b983f0c03334989b34)
This commit is contained in:
liuweiqin
2026-06-07 17:05:19 +08:00
committed by Vincent Koc
parent ed3a0241f3
commit a1af47e5da
3 changed files with 99 additions and 1 deletions

View File

@@ -1692,6 +1692,12 @@ describe("CodexAppServerEventProjector", () => {
expect(result.promptError).toBeNull();
expect(result.promptErrorSource).toBeNull();
expect(result.lastToolError).toMatchObject({
toolName: "bash",
error: expect.stringContaining("without a matching tool.result"),
mutatingAction: true,
});
expect(result.lastToolError?.actionFingerprint).toContain("node scripts/report.js --publish");
expect(result.assistantTexts).toEqual([
"The requested publish command was denied before execution.",
]);
@@ -1738,6 +1744,44 @@ describe("CodexAppServerEventProjector", () => {
});
});
it("records promptError when a completed turn has only whitespace assistant text and an orphan tool call", async () => {
const projector = await createProjector(await createParams());
await projector.handleNotification(
forCurrentTurn("item/started", {
item: {
type: "commandExecution",
id: "cmd-whitespace",
command: "pnpm test extensions/codex",
cwd: "/workspace",
processId: null,
source: "agent",
status: "inProgress",
commandActions: [],
aggregatedOutput: null,
exitCode: null,
durationMs: null,
},
}),
);
await projector.handleNotification(
turnCompleted([
{
type: "agentMessage",
id: "msg-whitespace",
text: " \n\t ",
},
]),
);
const result = projector.buildResult(buildEmptyToolTelemetry());
expect(result.promptError).toContain("without a matching tool.result");
expect(result.promptErrorSource).toBe("prompt");
expect(result.lastToolError).toBeUndefined();
expect(result.assistantTexts).toEqual([]);
});
it("uses streamed command output when final command snapshots omit aggregated output", async () => {
const onAgentEvent = vi.fn();
const trajectoryRecorder = {

View File

@@ -179,6 +179,7 @@ export class CodexAppServerEventProjector {
private readonly toolTrajectoryCallIds = new Set<string>();
private readonly toolTrajectoryResultIds = new Set<string>();
private readonly toolTrajectoryNamesById = new Map<string, string>();
private readonly toolTrajectoryItemsById = new Map<string, CodexThreadItem>();
private readonly transcriptToolProgressCallIds = new Set<string>();
private lastNativeToolError: EmbeddedRunAttemptResult["lastToolError"];
private readonly nativeGeneratedMediaUrls = new Set<string>();
@@ -298,8 +299,9 @@ export class CodexAppServerEventProjector {
this.reasoningItemOrder,
).join("\n\n");
const planText = collectTextValues(this.planTextByItem).join("\n\n");
const hasAssistantItemText = this.hasAssistantItemTextForSynthesis();
const legacyFailClosed =
!this.completedTurn || this.completedTurn.status !== "completed" || assistantTexts.length > 0;
!this.completedTurn || this.completedTurn.status !== "completed" || hasAssistantItemText;
const hasDeliverableAssistantOnCompletedTurn =
this.completedTurn?.status === "completed" &&
assistantTexts.some((text) => text.trim().length > 0);
@@ -1150,6 +1152,7 @@ export class CodexAppServerEventProjector {
if (params.phase === "start") {
this.toolTrajectoryCallIds.add(params.item.id);
this.toolTrajectoryNamesById.set(params.item.id, params.name);
this.toolTrajectoryItemsById.set(params.item.id, params.item);
this.options.trajectoryRecorder?.recordEvent("tool.call", {
threadId: this.threadId,
turnId: this.turnId,
@@ -1503,6 +1506,34 @@ export class CodexAppServerEventProjector {
}
if (!params.recordPromptError) {
const firstMissingId =
missingTranscriptIds.find((id) => {
const name = this.toolTranscriptNamesById.get(id) ?? this.toolTrajectoryNamesById.get(id);
return Boolean(name);
}) ??
missingTrajectoryIds.find((id) => {
const name = this.toolTrajectoryNamesById.get(id) ?? this.toolTranscriptNamesById.get(id);
return Boolean(name);
});
if (firstMissingId) {
const name =
this.toolTranscriptNamesById.get(firstMissingId) ??
this.toolTrajectoryNamesById.get(firstMissingId);
if (name) {
const item = this.toolTrajectoryItemsById.get(firstMissingId);
const meta = item
? itemMeta(item, this.toolProgressDetailMode())
: this.toolMetas.get(firstMissingId)?.meta;
const actionFingerprint = item ? nativeToolActionFingerprint(item) : undefined;
this.lastNativeToolError = {
toolName: name,
...(meta ? { meta } : {}),
error: formatMissingToolResultError({ id: firstMissingId, name }),
...(item && isMutatingNativeToolItem(item) ? { mutatingAction: true } : {}),
...(actionFingerprint ? { actionFingerprint } : {}),
};
}
}
return;
}
const missingCount = new Set([...missingTranscriptIds, ...missingTrajectoryIds]).size;
@@ -1614,6 +1645,23 @@ export class CodexAppServerEventProjector {
return finalText ? [finalText] : [];
}
private hasAssistantItemTextForSynthesis(): boolean {
for (let i = this.assistantItemOrder.length - 1; i >= 0; i -= 1) {
const itemId = this.assistantItemOrder[i];
if (!itemId) {
continue;
}
if (this.assistantPhaseByItem.get(itemId) === "commentary") {
continue;
}
const text = this.assistantTextByItem.get(itemId);
if (text && text.length > 0) {
return true;
}
}
return false;
}
private resolveFinalAssistantText(): string | undefined {
return this.resolveFinalAssistantTextItem()?.text;
}

View File

@@ -1358,6 +1358,12 @@ describe("runCodexAppServerAttempt", () => {
const result = await run;
expect(result.promptError).toBeNull();
expect(result.lastToolError).toMatchObject({
toolName: "bash",
error: expect.stringContaining("without a matching tool.result"),
mutatingAction: true,
});
expect(result.lastToolError?.actionFingerprint).toContain("pnpm test extensions/codex");
expect(result.assistantTexts).toEqual(["Recovered with final answer after orphan tool call."]);
expect(result.messagesSnapshot.map((message) => message.role)).toEqual([
"user",