fix: harden codex verbose tool progress (#70966) (thanks @jalehman)

2026-05-06 09:40:43 +00:00 · 2026-04-24 08:09:52 +01:00
parent f353a61bab
commit 50e36983bb
6 changed files with 229 additions and 15 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,7 +30,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes

 - Voice-call/Telnyx: preserve inbound/outbound callback metadata and read transcription text from Telnyx's current `transcription_data` payload.
- Codex harness: send verbose tool progress to chat channels for native app-server runs, matching the Pi harness `/verbose on` and `/verbose full` behavior.
+- Codex harness: send verbose tool progress to chat channels for native app-server runs, matching the Pi harness `/verbose on` and `/verbose full` behavior. (#70966) Thanks @jalehman.
 - Codex harness: route native `request_user_input` prompts back to the originating chat, preserve queued follow-up answers, and honor newer app-server command approval amendment decisions.
 - Codex status: report Codex CLI OAuth as `oauth (codex-cli)` for native `codex/*` sessions instead of showing unknown auth. Fixes #70688. Thanks @jb510.
 - Codex harness/context-engine: redact context-engine assembly failures before logging, so fallback warnings do not serialize raw error objects. (#70809) Thanks @jalehman.
--- a/extensions/codex/src/app-server/event-projector.test.ts
+++ b/extensions/codex/src/app-server/event-projector.test.ts
@@ -529,10 +529,41 @@ describe("CodexAppServerEventProjector", () => {

    expect(onToolResult).toHaveBeenCalledTimes(1);
    expect(onToolResult).toHaveBeenCalledWith({
-      text: "🛠️ Bash: `pnpm test extensions/codex`",
+      text: "🛠️ Bash: `` run tests (in /workspace), `pnpm test extensions/codex` ``",
    });
  });

+  it("redacts secrets in verbose command summaries", async () => {
+    const onToolResult = vi.fn();
+    const projector = await createProjector({
+      ...(await createParams()),
+      verboseLevel: "on",
+      onToolResult,
+    });
+
+    await projector.handleNotification(
+      forCurrentTurn("item/started", {
+        item: {
+          type: "commandExecution",
+          id: "cmd-1",
+          command: "OPENAI_API_KEY=sk-1234567890abcdefZZZZ pnpm test",
+          cwd: "/workspace",
+          processId: null,
+          source: "agent",
+          status: "inProgress",
+          commandActions: [],
+          aggregatedOutput: null,
+          exitCode: null,
+          durationMs: null,
+        },
+      }),
+    );
+
+    const text = onToolResult.mock.calls[0]?.[0]?.text;
+    expect(text).toContain("sk-123…ZZZZ");
+    expect(text).not.toContain("sk-1234567890abcdefZZZZ");
+  });
+
  it("uses argument details instead of lifecycle status in verbose tool summaries", async () => {
    const onToolResult = vi.fn();
    const projector = await createProjector({
@@ -596,6 +627,76 @@ describe("CodexAppServerEventProjector", () => {
    });
  });

+  it("uses a safe markdown fence for verbose tool output", async () => {
+    const onToolResult = vi.fn();
+    const projector = await createProjector({
+      ...(await createParams()),
+      verboseLevel: "full",
+      onToolResult,
+    });
+
+    await projector.handleNotification(
+      turnCompleted([
+        {
+          type: "dynamicToolCall",
+          id: "tool-1",
+          namespace: null,
+          tool: "read",
+          arguments: { path: "README.md" },
+          status: "completed",
+          contentItems: [{ type: "inputText", text: "line\n```\nMEDIA:/tmp/secret.png" }],
+          success: true,
+          durationMs: 12,
+        },
+      ]),
+    );
+
+    expect(onToolResult).toHaveBeenNthCalledWith(2, {
+      text: "📖 Read: `from README.md`\n````txt\nline\n```\nMEDIA:/tmp/secret.png\n````",
+    });
+  });
+
+  it("bounds streamed verbose tool output", async () => {
+    const onToolResult = vi.fn();
+    const projector = await createProjector({
+      ...(await createParams()),
+      verboseLevel: "full",
+      onToolResult,
+    });
+
+    for (let i = 0; i < 25; i += 1) {
+      await projector.handleNotification(
+        forCurrentTurn("item/commandExecution/outputDelta", {
+          itemId: "cmd-1",
+          delta: `line ${i}\n`,
+        }),
+      );
+    }
+    await projector.handleNotification(
+      turnCompleted([
+        {
+          type: "commandExecution",
+          id: "cmd-1",
+          command: "pnpm test",
+          cwd: "/workspace",
+          processId: null,
+          source: "agent",
+          status: "completed",
+          commandActions: [],
+          aggregatedOutput: "final output should not duplicate streamed output",
+          exitCode: 0,
+          durationMs: 12,
+        },
+      ]),
+    );
+
+    expect(onToolResult).toHaveBeenCalledTimes(21);
+    expect(onToolResult.mock.calls[19]?.[0]?.text).toContain("...(truncated)...");
+    expect(JSON.stringify(onToolResult.mock.calls)).not.toContain(
+      "final output should not duplicate",
+    );
+  });
+
  it("continues projecting turn completion when an event consumer throws", async () => {
    const onAgentEvent = vi.fn(() => {
      throw new Error("consumer failed");
--- a/extensions/codex/src/app-server/event-projector.ts
+++ b/extensions/codex/src/app-server/event-projector.ts
@@ -3,12 +3,14 @@ import type { AssistantMessage, Usage } from "@mariozechner/pi-ai";
 import { SessionManager } from "@mariozechner/pi-coding-agent";
 import {
  formatErrorMessage,
+  formatToolProgressOutput,
  inferToolMetaFromArgs,
  normalizeUsage,
  runAgentHarnessAfterCompactionHook,
  runAgentHarnessBeforeCompactionHook,
  type EmbeddedRunAttemptParams,
  type EmbeddedRunAttemptResult,
+  TOOL_PROGRESS_OUTPUT_MAX_CHARS,
  formatToolAggregate,
  type MessagingToolSend,
 } from "openclaw/plugin-sdk/agent-harness-runtime";
@@ -56,6 +58,8 @@ const CURRENT_TOKEN_USAGE_KEYS = [
  "last_token_usage",
 ] as const;

+const MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM = 20;
+
 export class CodexAppServerEventProjector {
  private readonly assistantTextByItem = new Map<string, string>();
  private readonly assistantItemOrder: string[] = [];
@@ -66,6 +70,11 @@ export class CodexAppServerEventProjector {
  private readonly activeCompactionItemIds = new Set<string>();
  private readonly toolResultSummaryItemIds = new Set<string>();
  private readonly toolResultOutputItemIds = new Set<string>();
+  private readonly toolResultOutputStreamedItemIds = new Set<string>();
+  private readonly toolResultOutputDeltaState = new Map<
+    string,
+    { chars: number; messages: number; truncated: boolean }
+  >();
  private readonly toolMetas = new Map<string, { toolName: string; meta?: string }>();
  private assistantStarted = false;
  private reasoningStarted = false;
@@ -489,10 +498,44 @@ export class CodexAppServerEventProjector {
    if (!itemId || !delta || !this.shouldEmitToolOutput()) {
      return;
    }
+    const state = this.toolResultOutputDeltaState.get(itemId) ?? {
+      chars: 0,
+      messages: 0,
+      truncated: false,
+    };
+    if (state.truncated) {
+      return;
+    }
+    const remainingChars = Math.max(0, TOOL_PROGRESS_OUTPUT_MAX_CHARS - state.chars);
+    const remainingMessages = Math.max(0, MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM - state.messages);
+    if (remainingChars === 0 || remainingMessages === 0) {
+      state.truncated = true;
+      this.toolResultOutputDeltaState.set(itemId, state);
+      this.emitToolResultMessage({
+        itemId,
+        text: formatToolOutput(toolName, undefined, "(output truncated)"),
+      });
+      return;
+    }
+    const chunk = delta.length > remainingChars ? delta.slice(0, remainingChars) : delta;
+    state.chars += chunk.length;
+    state.messages += 1;
+    const reachedLimit =
+      delta.length > remainingChars ||
+      state.chars >= TOOL_PROGRESS_OUTPUT_MAX_CHARS ||
+      state.messages >= MAX_TOOL_OUTPUT_DELTA_MESSAGES_PER_ITEM;
+    if (reachedLimit) {
+      state.truncated = true;
+    }
+    this.toolResultOutputDeltaState.set(itemId, state);
+    this.toolResultOutputStreamedItemIds.add(itemId);
    this.emitToolResultMessage({
      itemId,
-      text: formatToolOutput(toolName, undefined, delta),
-      output: true,
+      text: formatToolOutput(
+        toolName,
+        undefined,
+        reachedLimit ? `${chunk}\n...(truncated)...` : chunk,
+      ),
    });
  }

@@ -588,6 +631,9 @@ export class CodexAppServerEventProjector {
    if (this.toolResultOutputItemIds.has(itemId)) {
      return;
    }
+    if (this.toolResultOutputStreamedItemIds.has(itemId)) {
+      return;
+    }
    const toolName = itemName(item);
    const output = itemOutputText(item);
    if (!toolName || !output) {
@@ -596,12 +642,16 @@ export class CodexAppServerEventProjector {
    this.emitToolResultMessage({
      itemId,
      text: formatToolOutput(toolName, itemMeta(item), output),
-      output: true,
+      finalOutput: true,
    });
  }

-  private emitToolResultMessage(params: { itemId: string; text: string; output?: boolean }): void {
-    if (params.output) {
+  private emitToolResultMessage(params: {
+    itemId: string;
+    text: string;
+    finalOutput?: boolean;
+  }): void {
+    if (params.finalOutput) {
      this.toolResultOutputItemIds.add(params.itemId);
    }
    try {
@@ -934,7 +984,10 @@ function itemName(item: CodexThreadItem): string | undefined {

 function itemMeta(item: CodexThreadItem): string | undefined {
  if (item.type === "commandExecution" && typeof item.command === "string") {
-    return item.command;
+    return inferToolMetaFromArgs("exec", {
+      command: item.command,
+      cwd: typeof item.cwd === "string" ? item.cwd : undefined,
+    });
  }
  if (item.type === "webSearch" && typeof item.query === "string") {
    return item.query;
@@ -995,11 +1048,30 @@ function formatToolSummary(toolName: string, meta?: string): string {
 }

 function formatToolOutput(toolName: string, meta: string | undefined, output: string): string {
-  const trimmed = output.trim();
-  if (!trimmed) {
+  const formattedOutput = formatToolProgressOutput(output);
+  if (!formattedOutput) {
    return formatToolSummary(toolName, meta);
  }
-  return `${formatToolSummary(toolName, meta)}\n\`\`\`txt\n${trimmed}\n\`\`\``;
+  const fence = markdownFenceForText(formattedOutput);
+  return `${formatToolSummary(toolName, meta)}\n${fence}txt\n${formattedOutput}\n${fence}`;
+}
+
+function markdownFenceForText(text: string): string {
+  return "`".repeat(Math.max(3, longestBacktickRun(text) + 1));
+}
+
+function longestBacktickRun(value: string): number {
+  let longest = 0;
+  let current = 0;
+  for (const char of value) {
+    if (char === "`") {
+      current += 1;
+      longest = Math.max(longest, current);
+      continue;
+    }
+    current = 0;
+  }
+  return longest;
 }

 function readItemString(item: CodexThreadItem, key: string): string | undefined {
--- a/src/auto-reply/tool-meta.test.ts
+++ b/src/auto-reply/tool-meta.test.ts
@@ -45,6 +45,11 @@ describe("tool meta formatting", () => {
    expect(out).toContain("`~/dir/a.txt`");
  });

+  it("uses a longer inline code delimiter when meta contains backticks", () => {
+    const out = formatToolAggregate("fs", ["name `with` ticks"], { markdown: true });
+    expect(out).toBe("🧩 Fs: ``name `with` ticks``");
+  });
+
  it("keeps exec flags outside markdown and moves them to the front", () => {
    vi.stubEnv("HOME", home);
    const out = formatToolAggregate("exec", [`cd ${home}/dir && gemini 2>&1 · elevated`], {
--- a/src/auto-reply/tool-meta.ts
+++ b/src/auto-reply/tool-meta.ts
@@ -137,8 +137,21 @@ function maybeWrapMarkdown(value: string, markdown?: boolean): string {
  if (!markdown) {
    return value;
  }
-  if (value.includes("`")) {
-    return value;
-  }
-  return `\`${value}\``;
+  const delimiter = "`".repeat(longestBacktickRun(value) + 1);
+  const padding = value.startsWith("`") || value.endsWith("`") || value.includes("\n") ? " " : "";
+  return `${delimiter}${padding}${value}${padding}${delimiter}`;
+}
+
+function longestBacktickRun(value: string): number {
+  let longest = 0;
+  let current = 0;
+  for (const char of value) {
+    if (char === "`") {
+      current += 1;
+      longest = Math.max(longest, current);
+      continue;
+    }
+    current = 0;
+  }
+  return longest;
 }
--- a/src/plugin-sdk/agent-harness-runtime.ts
+++ b/src/plugin-sdk/agent-harness-runtime.ts
@@ -3,6 +3,10 @@
 // register quickly inside gateway startup and Docker e2e runs.

 import { formatToolDetail, resolveToolDisplay } from "../agents/tool-display.js";
+import { redactToolDetail } from "../logging/redact.js";
+import { truncateUtf16Safe } from "../utils.js";
+
+export const TOOL_PROGRESS_OUTPUT_MAX_CHARS = 8_000;

 export type {
  AgentHarness,
@@ -96,3 +100,22 @@ export function inferToolMetaFromArgs(toolName: string, args: unknown): string |
  const display = resolveToolDisplay({ name: toolName, args });
  return formatToolDetail(display);
 }
+
+/**
+ * Prepare verbose tool output for user-facing progress messages.
+ */
+export function formatToolProgressOutput(
+  output: string,
+  options?: { maxChars?: number },
+): string | undefined {
+  const trimmed = output.replace(/\r\n/g, "\n").replace(/\r/g, "\n").trim();
+  if (!trimmed) {
+    return undefined;
+  }
+  const redacted = redactToolDetail(trimmed);
+  const maxChars = options?.maxChars ?? TOOL_PROGRESS_OUTPUT_MAX_CHARS;
+  if (redacted.length <= maxChars) {
+    return redacted;
+  }
+  return `${truncateUtf16Safe(redacted, maxChars)}\n...(truncated)...`;
+}