fix: harden codex verbose tool progress (#70966) (thanks @jalehman)

This commit is contained in:
Peter Steinberger
2026-04-24 08:09:52 +01:00
parent f353a61bab
commit 50e36983bb
6 changed files with 229 additions and 15 deletions

View File

@@ -529,10 +529,41 @@ describe("CodexAppServerEventProjector", () => {
expect(onToolResult).toHaveBeenCalledTimes(1);
expect(onToolResult).toHaveBeenCalledWith({
text: "🛠️ Bash: `pnpm test extensions/codex`",
text: "🛠️ Bash: `` run tests (in /workspace), `pnpm test extensions/codex` ``",
});
});
it("redacts secrets in verbose command summaries", async () => {
const onToolResult = vi.fn();
const projector = await createProjector({
...(await createParams()),
verboseLevel: "on",
onToolResult,
});
await projector.handleNotification(
forCurrentTurn("item/started", {
item: {
type: "commandExecution",
id: "cmd-1",
command: "OPENAI_API_KEY=sk-1234567890abcdefZZZZ pnpm test",
cwd: "/workspace",
processId: null,
source: "agent",
status: "inProgress",
commandActions: [],
aggregatedOutput: null,
exitCode: null,
durationMs: null,
},
}),
);
const text = onToolResult.mock.calls[0]?.[0]?.text;
expect(text).toContain("sk-123…ZZZZ");
expect(text).not.toContain("sk-1234567890abcdefZZZZ");
});
it("uses argument details instead of lifecycle status in verbose tool summaries", async () => {
const onToolResult = vi.fn();
const projector = await createProjector({
@@ -596,6 +627,76 @@ describe("CodexAppServerEventProjector", () => {
});
});
it("uses a safe markdown fence for verbose tool output", async () => {
const onToolResult = vi.fn();
const projector = await createProjector({
...(await createParams()),
verboseLevel: "full",
onToolResult,
});
await projector.handleNotification(
turnCompleted([
{
type: "dynamicToolCall",
id: "tool-1",
namespace: null,
tool: "read",
arguments: { path: "README.md" },
status: "completed",
contentItems: [{ type: "inputText", text: "line\n```\nMEDIA:/tmp/secret.png" }],
success: true,
durationMs: 12,
},
]),
);
expect(onToolResult).toHaveBeenNthCalledWith(2, {
text: "📖 Read: `from README.md`\n````txt\nline\n```\nMEDIA:/tmp/secret.png\n````",
});
});
it("bounds streamed verbose tool output", async () => {
const onToolResult = vi.fn();
const projector = await createProjector({
...(await createParams()),
verboseLevel: "full",
onToolResult,
});
for (let i = 0; i < 25; i += 1) {
await projector.handleNotification(
forCurrentTurn("item/commandExecution/outputDelta", {
itemId: "cmd-1",
delta: `line ${i}\n`,
}),
);
}
await projector.handleNotification(
turnCompleted([
{
type: "commandExecution",
id: "cmd-1",
command: "pnpm test",
cwd: "/workspace",
processId: null,
source: "agent",
status: "completed",
commandActions: [],
aggregatedOutput: "final output should not duplicate streamed output",
exitCode: 0,
durationMs: 12,
},
]),
);
expect(onToolResult).toHaveBeenCalledTimes(21);
expect(onToolResult.mock.calls[19]?.[0]?.text).toContain("...(truncated)...");
expect(JSON.stringify(onToolResult.mock.calls)).not.toContain(
"final output should not duplicate",
);
});
it("continues projecting turn completion when an event consumer throws", async () => {
const onAgentEvent = vi.fn(() => {
throw new Error("consumer failed");

View File

@@ -3,12 +3,14 @@ import type { AssistantMessage, Usage } from "@mariozechner/pi-ai";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import {
formatErrorMessage,
formatToolProgressOutput,
inferToolMetaFromArgs,
normalizeUsage,
runAgentHarnessAfterCompactionHook,
runAgentHarnessBeforeCompactionHook,
type EmbeddedRunAttemptParams,
type EmbeddedRunAttemptResult,
TOOL_PROGRESS_OUTPUT_MAX_CHARS,
formatToolAggregate,
type MessagingToolSend,
} from "openclaw/plugin-sdk/agent-harness-runtime";
@@ -56,6 +58,8 @@ const CURRENT_TOKEN_USAGE_KEYS = [
"last_token_usage",
] as const;
const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20;
export class CodexAppServerEventProjector {
private readonly assistantTextByItem = new Map<string, string>();
private readonly assistantItemOrder: string[] = [];
@@ -66,6 +70,11 @@ export class CodexAppServerEventProjector {
private readonly activeCompactionItemIds = new Set<string>();
private readonly toolResultSummaryItemIds = new Set<string>();
private readonly toolResultOutputItemIds = new Set<string>();
private readonly toolResultOutputStreamedItemIds = new Set<string>();
private readonly toolResultOutputDeltaState = new Map<
string,
{ chars: number; messages: number; truncated: boolean }
>();
private readonly toolMetas = new Map<string, { toolName: string; meta?: string }>();
private assistantStarted = false;
private reasoningStarted = false;
@@ -489,10 +498,44 @@ export class CodexAppServerEventProjector {
if (!itemId || !delta || !this.shouldEmitToolOutput()) {
return;
}
const state = this.toolResultOutputDeltaState.get(itemId) ?? {
chars: 0,
messages: 0,
truncated: false,
};
if (state.truncated) {
return;
}
const remainingChars = Math.max(0, TOOL_PROGRESS_OUTPUT_MAX_CHARS - state.chars);
const remainingMessages = Math.max(0, MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM - state.messages);
if (remainingChars === 0 || remainingMessages === 0) {
state.truncated = true;
this.toolResultOutputDeltaState.set(itemId, state);
this.emitToolResultMessage({
itemId,
text: formatToolOutput(toolName, undefined, "(output truncated)"),
});
return;
}
const chunk = delta.length > remainingChars ? delta.slice(0, remainingChars) : delta;
state.chars += chunk.length;
state.messages += 1;
const reachedLimit =
delta.length > remainingChars ||
state.chars >= TOOL_PROGRESS_OUTPUT_MAX_CHARS ||
state.messages >= MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM;
if (reachedLimit) {
state.truncated = true;
}
this.toolResultOutputDeltaState.set(itemId, state);
this.toolResultOutputStreamedItemIds.add(itemId);
this.emitToolResultMessage({
itemId,
text: formatToolOutput(toolName, undefined, delta),
output: true,
text: formatToolOutput(
toolName,
undefined,
reachedLimit ? `${chunk}\n...(truncated)...` : chunk,
),
});
}
@@ -588,6 +631,9 @@ export class CodexAppServerEventProjector {
if (this.toolResultOutputItemIds.has(itemId)) {
return;
}
if (this.toolResultOutputStreamedItemIds.has(itemId)) {
return;
}
const toolName = itemName(item);
const output = itemOutputText(item);
if (!toolName || !output) {
@@ -596,12 +642,16 @@ export class CodexAppServerEventProjector {
this.emitToolResultMessage({
itemId,
text: formatToolOutput(toolName, itemMeta(item), output),
output: true,
finalOutput: true,
});
}
private emitToolResultMessage(params: { itemId: string; text: string; output?: boolean }): void {
if (params.output) {
private emitToolResultMessage(params: {
itemId: string;
text: string;
finalOutput?: boolean;
}): void {
if (params.finalOutput) {
this.toolResultOutputItemIds.add(params.itemId);
}
try {
@@ -934,7 +984,10 @@ function itemName(item: CodexThreadItem): string | undefined {
function itemMeta(item: CodexThreadItem): string | undefined {
if (item.type === "commandExecution" && typeof item.command === "string") {
return item.command;
return inferToolMetaFromArgs("exec", {
command: item.command,
cwd: typeof item.cwd === "string" ? item.cwd : undefined,
});
}
if (item.type === "webSearch" && typeof item.query === "string") {
return item.query;
@@ -995,11 +1048,30 @@ function formatToolSummary(toolName: string, meta?: string): string {
}
function formatToolOutput(toolName: string, meta: string | undefined, output: string): string {
const trimmed = output.trim();
if (!trimmed) {
const formattedOutput = formatToolProgressOutput(output);
if (!formattedOutput) {
return formatToolSummary(toolName, meta);
}
return `${formatToolSummary(toolName, meta)}\n\`\`\`txt\n${trimmed}\n\`\`\``;
const fence = markdownFenceForText(formattedOutput);
return `${formatToolSummary(toolName, meta)}\n${fence}txt\n${formattedOutput}\n${fence}`;
}
function markdownFenceForText(text: string): string {
return "`".repeat(Math.max(3, longestBacktickRun(text) + 1));
}
function longestBacktickRun(value: string): number {
let longest = 0;
let current = 0;
for (const char of value) {
if (char === "`") {
current += 1;
longest = Math.max(longest, current);
continue;
}
current = 0;
}
return longest;
}
function readItemString(item: CodexThreadItem, key: string): string | undefined {