fix: harden codex verbose tool progress (#70966) (thanks @jalehman)

This commit is contained in:
Peter Steinberger
2026-04-24 08:09:52 +01:00
parent f353a61bab
commit 50e36983bb
6 changed files with 229 additions and 15 deletions

View File

@@ -30,7 +30,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Voice-call/Telnyx: preserve inbound/outbound callback metadata and read transcription text from Telnyx's current `transcription_data` payload.
- Codex harness: send verbose tool progress to chat channels for native app-server runs, matching the Pi harness `/verbose on` and `/verbose full` behavior.
- Codex harness: send verbose tool progress to chat channels for native app-server runs, matching the Pi harness `/verbose on` and `/verbose full` behavior. (#70966) Thanks @jalehman.
- Codex harness: route native `request_user_input` prompts back to the originating chat, preserve queued follow-up answers, and honor newer app-server command approval amendment decisions.
- Codex status: report Codex CLI OAuth as `oauth (codex-cli)` for native `codex/*` sessions instead of showing unknown auth. Fixes #70688. Thanks @jb510.
- Codex harness/context-engine: redact context-engine assembly failures before logging, so fallback warnings do not serialize raw error objects. (#70809) Thanks @jalehman.

View File

@@ -529,10 +529,41 @@ describe("CodexAppServerEventProjector", () => {
expect(onToolResult).toHaveBeenCalledTimes(1);
expect(onToolResult).toHaveBeenCalledWith({
text: "🛠️ Bash: `pnpm test extensions/codex`",
text: "🛠️ Bash: `` run tests (in /workspace), `pnpm test extensions/codex` ``",
});
});
it("redacts secrets in verbose command summaries", async () => {
const onToolResult = vi.fn();
const projector = await createProjector({
...(await createParams()),
verboseLevel: "on",
onToolResult,
});
await projector.handleNotification(
forCurrentTurn("item/started", {
item: {
type: "commandExecution",
id: "cmd-1",
command: "OPENAI_API_KEY=sk-1234567890abcdefZZZZ pnpm test",
cwd: "/workspace",
processId: null,
source: "agent",
status: "inProgress",
commandActions: [],
aggregatedOutput: null,
exitCode: null,
durationMs: null,
},
}),
);
const text = onToolResult.mock.calls[0]?.[0]?.text;
expect(text).toContain("sk-123…ZZZZ");
expect(text).not.toContain("sk-1234567890abcdefZZZZ");
});
it("uses argument details instead of lifecycle status in verbose tool summaries", async () => {
const onToolResult = vi.fn();
const projector = await createProjector({
@@ -596,6 +627,76 @@ describe("CodexAppServerEventProjector", () => {
});
});
it("uses a safe markdown fence for verbose tool output", async () => {
const onToolResult = vi.fn();
const projector = await createProjector({
...(await createParams()),
verboseLevel: "full",
onToolResult,
});
await projector.handleNotification(
turnCompleted([
{
type: "dynamicToolCall",
id: "tool-1",
namespace: null,
tool: "read",
arguments: { path: "README.md" },
status: "completed",
contentItems: [{ type: "inputText", text: "line\n```\nMEDIA:/tmp/secret.png" }],
success: true,
durationMs: 12,
},
]),
);
expect(onToolResult).toHaveBeenNthCalledWith(2, {
text: "📖 Read: `from README.md`\n````txt\nline\n```\nMEDIA:/tmp/secret.png\n````",
});
});
it("bounds streamed verbose tool output", async () => {
const onToolResult = vi.fn();
const projector = await createProjector({
...(await createParams()),
verboseLevel: "full",
onToolResult,
});
for (let i = 0; i < 25; i += 1) {
await projector.handleNotification(
forCurrentTurn("item/commandExecution/outputDelta", {
itemId: "cmd-1",
delta: `line ${i}\n`,
}),
);
}
await projector.handleNotification(
turnCompleted([
{
type: "commandExecution",
id: "cmd-1",
command: "pnpm test",
cwd: "/workspace",
processId: null,
source: "agent",
status: "completed",
commandActions: [],
aggregatedOutput: "final output should not duplicate streamed output",
exitCode: 0,
durationMs: 12,
},
]),
);
expect(onToolResult).toHaveBeenCalledTimes(21);
expect(onToolResult.mock.calls[19]?.[0]?.text).toContain("...(truncated)...");
expect(JSON.stringify(onToolResult.mock.calls)).not.toContain(
"final output should not duplicate",
);
});
it("continues projecting turn completion when an event consumer throws", async () => {
const onAgentEvent = vi.fn(() => {
throw new Error("consumer failed");

View File

@@ -3,12 +3,14 @@ import type { AssistantMessage, Usage } from "@mariozechner/pi-ai";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import {
formatErrorMessage,
formatToolProgressOutput,
inferToolMetaFromArgs,
normalizeUsage,
runAgentHarnessAfterCompactionHook,
runAgentHarnessBeforeCompactionHook,
type EmbeddedRunAttemptParams,
type EmbeddedRunAttemptResult,
TOOL_PROGRESS_OUTPUT_MAX_CHARS,
formatToolAggregate,
type MessagingToolSend,
} from "openclaw/plugin-sdk/agent-harness-runtime";
@@ -56,6 +58,8 @@ const CURRENT_TOKEN_USAGE_KEYS = [
"last_token_usage",
] as const;
const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20;
export class CodexAppServerEventProjector {
private readonly assistantTextByItem = new Map<string, string>();
private readonly assistantItemOrder: string[] = [];
@@ -66,6 +70,11 @@ export class CodexAppServerEventProjector {
private readonly activeCompactionItemIds = new Set<string>();
private readonly toolResultSummaryItemIds = new Set<string>();
private readonly toolResultOutputItemIds = new Set<string>();
private readonly toolResultOutputStreamedItemIds = new Set<string>();
private readonly toolResultOutputDeltaState = new Map<
string,
{ chars: number; messages: number; truncated: boolean }
>();
private readonly toolMetas = new Map<string, { toolName: string; meta?: string }>();
private assistantStarted = false;
private reasoningStarted = false;
@@ -489,10 +498,44 @@ export class CodexAppServerEventProjector {
if (!itemId || !delta || !this.shouldEmitToolOutput()) {
return;
}
const state = this.toolResultOutputDeltaState.get(itemId) ?? {
chars: 0,
messages: 0,
truncated: false,
};
if (state.truncated) {
return;
}
const remainingChars = Math.max(0, TOOL_PROGRESS_OUTPUT_MAX_CHARS - state.chars);
const remainingMessages = Math.max(0, MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM - state.messages);
if (remainingChars === 0 || remainingMessages === 0) {
state.truncated = true;
this.toolResultOutputDeltaState.set(itemId, state);
this.emitToolResultMessage({
itemId,
text: formatToolOutput(toolName, undefined, "(output truncated)"),
});
return;
}
const chunk = delta.length > remainingChars ? delta.slice(0, remainingChars) : delta;
state.chars += chunk.length;
state.messages += 1;
const reachedLimit =
delta.length > remainingChars ||
state.chars >= TOOL_PROGRESS_OUTPUT_MAX_CHARS ||
state.messages >= MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM;
if (reachedLimit) {
state.truncated = true;
}
this.toolResultOutputDeltaState.set(itemId, state);
this.toolResultOutputStreamedItemIds.add(itemId);
this.emitToolResultMessage({
itemId,
text: formatToolOutput(toolName, undefined, delta),
output: true,
text: formatToolOutput(
toolName,
undefined,
reachedLimit ? `${chunk}\n...(truncated)...` : chunk,
),
});
}
@@ -588,6 +631,9 @@ export class CodexAppServerEventProjector {
if (this.toolResultOutputItemIds.has(itemId)) {
return;
}
if (this.toolResultOutputStreamedItemIds.has(itemId)) {
return;
}
const toolName = itemName(item);
const output = itemOutputText(item);
if (!toolName || !output) {
@@ -596,12 +642,16 @@ export class CodexAppServerEventProjector {
this.emitToolResultMessage({
itemId,
text: formatToolOutput(toolName, itemMeta(item), output),
output: true,
finalOutput: true,
});
}
private emitToolResultMessage(params: { itemId: string; text: string; output?: boolean }): void {
if (params.output) {
private emitToolResultMessage(params: {
itemId: string;
text: string;
finalOutput?: boolean;
}): void {
if (params.finalOutput) {
this.toolResultOutputItemIds.add(params.itemId);
}
try {
@@ -934,7 +984,10 @@ function itemName(item: CodexThreadItem): string | undefined {
function itemMeta(item: CodexThreadItem): string | undefined {
if (item.type === "commandExecution" && typeof item.command === "string") {
return item.command;
return inferToolMetaFromArgs("exec", {
command: item.command,
cwd: typeof item.cwd === "string" ? item.cwd : undefined,
});
}
if (item.type === "webSearch" && typeof item.query === "string") {
return item.query;
@@ -995,11 +1048,30 @@ function formatToolSummary(toolName: string, meta?: string): string {
}
function formatToolOutput(toolName: string, meta: string | undefined, output: string): string {
const trimmed = output.trim();
if (!trimmed) {
const formattedOutput = formatToolProgressOutput(output);
if (!formattedOutput) {
return formatToolSummary(toolName, meta);
}
return `${formatToolSummary(toolName, meta)}\n\`\`\`txt\n${trimmed}\n\`\`\``;
const fence = markdownFenceForText(formattedOutput);
return `${formatToolSummary(toolName, meta)}\n${fence}txt\n${formattedOutput}\n${fence}`;
}
function markdownFenceForText(text: string): string {
return "`".repeat(Math.max(3, longestBacktickRun(text) + 1));
}
function longestBacktickRun(value: string): number {
let longest = 0;
let current = 0;
for (const char of value) {
if (char === "`") {
current += 1;
longest = Math.max(longest, current);
continue;
}
current = 0;
}
return longest;
}
function readItemString(item: CodexThreadItem, key: string): string | undefined {

View File

@@ -45,6 +45,11 @@ describe("tool meta formatting", () => {
expect(out).toContain("`~/dir/a.txt`");
});
it("uses a longer inline code delimiter when meta contains backticks", () => {
const out = formatToolAggregate("fs", ["name `with` ticks"], { markdown: true });
expect(out).toBe("🧩 Fs: ``name `with` ticks``");
});
it("keeps exec flags outside markdown and moves them to the front", () => {
vi.stubEnv("HOME", home);
const out = formatToolAggregate("exec", [`cd ${home}/dir && gemini 2>&1 · elevated`], {

View File

@@ -137,8 +137,21 @@ function maybeWrapMarkdown(value: string, markdown?: boolean): string {
if (!markdown) {
return value;
}
if (value.includes("`")) {
return value;
}
return `\`${value}\``;
const delimiter = "`".repeat(longestBacktickRun(value) + 1);
const padding = value.startsWith("`") || value.endsWith("`") || value.includes("\n") ? " " : "";
return `${delimiter}${padding}${value}${padding}${delimiter}`;
}
function longestBacktickRun(value: string): number {
let longest = 0;
let current = 0;
for (const char of value) {
if (char === "`") {
current += 1;
longest = Math.max(longest, current);
continue;
}
current = 0;
}
return longest;
}

View File

@@ -3,6 +3,10 @@
// register quickly inside gateway startup and Docker e2e runs.
import { formatToolDetail, resolveToolDisplay } from "../agents/tool-display.js";
import { redactToolDetail } from "../logging/redact.js";
import { truncateUtf16Safe } from "../utils.js";
export const TOOL_PROGRESS_OUTPUT_MAX_CHARS = 8_000;
export type {
AgentHarness,
@@ -96,3 +100,22 @@ export function inferToolMetaFromArgs(toolName: string, args: unknown): string |
const display = resolveToolDisplay({ name: toolName, args });
return formatToolDetail(display);
}
/**
* Prepare verbose tool output for user-facing progress messages.
*/
export function formatToolProgressOutput(
output: string,
options?: { maxChars?: number },
): string | undefined {
const trimmed = output.replace(/\r\n/g, "\n").replace(/\r/g, "\n").trim();
if (!trimmed) {
return undefined;
}
const redacted = redactToolDetail(trimmed);
const maxChars = options?.maxChars ?? TOOL_PROGRESS_OUTPUT_MAX_CHARS;
if (redacted.length <= maxChars) {
return redacted;
}
return `${truncateUtf16Safe(redacted, maxChars)}\n...(truncated)...`;
}