fix(agents): demote embedded run stage logs to trace

2026-07-13 21:36:04 +00:00 · 2026-04-29 14:51:20 +01:00
parent 6bbacd14a3
commit 66cdbccc8a
5 changed files with 42 additions and 33 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,7 +23,7 @@ Docs: https://docs.openclaw.ai
 - CLI/models: restore provider-filtered `models list --all --provider <id>` rows for providers without manifest/static catalog coverage, including Anthropic and Amazon Bedrock, while keeping the compatibility fallback off expensive availability and resolver paths. Thanks @shakkernerd.
 - CLI/tools: keep the Gateway `tools.*` RPC namespace out of plugin command discovery and managed proxy startup, so stray commands like `openclaw tools effective` fail quickly instead of cold-loading plugin metadata. Refs #73477. Thanks @oromeis.
 - CLI/status: keep default text `openclaw status --usage` on metadata-only channel scans unless `--deep` or `--all` is set, and send stray `openclaw tools --help` through the precomputed root-help fast path so latency-triage commands avoid plugin/runtime cold loads before printing. Refs #73477 and #74220. Thanks @oromeis and @NianJiuZst.
- Agents/diagnostics: log slow embedded-run startup and preparation stage timings before model I/O, so Docker/VPS latency reports can identify whether plugin loading, auth/model resolution, tool inventory, bootstrap, MCP/LSP, resource loading, or stream setup is dominating pre-run latency. Refs #73428. Thanks @Dimaoggg, @quangtran88, and @Heyvhuang.
+- Agents/diagnostics: trace embedded-run startup and preparation stage timings before model I/O, and warn only on severe slow stages, so Docker/VPS latency reports can identify whether plugin loading, auth/model resolution, tool inventory, bootstrap, MCP/LSP, resource loading, or stream setup is dominating pre-run latency without noisy normal logs. Refs #73428. Thanks @Dimaoggg, @quangtran88, and @Heyvhuang.
 - Gateway/clients: wait for the event loop to become responsive before opening Gateway WebSocket RPC/probe/client connections while charging that readiness wait to caller timeouts, so Windows deferred module-evaluation stalls no longer turn healthy loopback gateways into false handshake timeouts across status, TUI, ACP, MCP, node-host, and plugin client paths. Refs #74279 and #48270. Thanks @wongcode and @joost-heijden.
 - Plugins/runtime-deps: memoize packaged bundled runtime dist-mirror preparation after the first successful pass while keeping source-checkout mirrors refreshable, so constrained Docker/VPS installs avoid repeated root scans before chat turns. Refs #73428, #73421, #73532, and #73477. Thanks @Dimaoggg, @oromeis, @oadiazp, @jmfraga, @bstanbury, @antoniusfelix, and @jkobject.
 - Channels/Discord: treat bare numeric outbound targets that match the effective Discord DM allowlist as user DMs while preserving account-specific legacy `dm.allowFrom` precedence over inherited root `allowFrom`. (#74303) Thanks @Squirbie.
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -93,7 +93,7 @@ import { handleAssistantFailover } from "./run/assistant-failover.js";
 import {
  createEmbeddedRunStageTracker,
  formatEmbeddedRunStageSummary,
-  shouldEmitEmbeddedRunStageSummary,
+  shouldWarnEmbeddedRunStageSummary,
 } from "./run/attempt-stage-timing.js";
 import { forgetPromptBuildDrainCacheForRun } from "./run/attempt.prompt-helpers.js";
 import { createEmbeddedRunAuthController } from "./run/auth-controller.js";
@@ -333,14 +333,18 @@ export async function runEmbeddedPiAgent(
      let startupStagesEmitted = false;
      const emitStartupStageSummary = (phase: string) => {
        const summary = startupStages.snapshot();
+        const shouldWarn = shouldWarnEmbeddedRunStageSummary(summary);
+        if (!shouldWarn && !log.isEnabled("trace")) {
+          return;
+        }
        const message = formatEmbeddedRunStageSummary(
-          `embedded run startup stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
+          `[trace:embedded-run] startup stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
          summary,
        );
-        if (shouldEmitEmbeddedRunStageSummary(summary)) {
+        if (shouldWarn) {
          log.warn(message);
-        } else if (log.isEnabled("debug")) {
-          log.debug(message);
+        } else {
+          log.trace(message);
        }
      };
      params.onExecutionStarted?.();
--- a/src/agents/pi-embedded-runner/run/attempt-stage-timing.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt-stage-timing.test.ts
@@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest";
 import {
  createEmbeddedRunStageTracker,
  formatEmbeddedRunStageSummary,
-  shouldEmitEmbeddedRunStageSummary,
+  shouldWarnEmbeddedRunStageSummary,
 } from "./attempt-stage-timing.js";

 describe("embedded run stage timing", () => {
@@ -25,18 +25,25 @@ describe("embedded run stage timing", () => {
    });
  });

-  it("emits only slow stage summaries", () => {
+  it("warns only for very slow stage summaries by default", () => {
    expect(
-      shouldEmitEmbeddedRunStageSummary(
-        {
-          totalMs: 1_999,
-          stages: [{ name: "auth", durationMs: 999, elapsedMs: 999 }],
-        },
-        { totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
-      ),
+      shouldWarnEmbeddedRunStageSummary({
+        totalMs: 9_999,
+        stages: [{ name: "auth", durationMs: 4_999, elapsedMs: 4_999 }],
+      }),
    ).toBe(false);
+    expect(shouldWarnEmbeddedRunStageSummary({ totalMs: 10_000, stages: [] })).toBe(true);
    expect(
-      shouldEmitEmbeddedRunStageSummary(
+      shouldWarnEmbeddedRunStageSummary({
+        totalMs: 10,
+        stages: [{ name: "auth", durationMs: 5_000, elapsedMs: 5_000 }],
+      }),
+    ).toBe(true);
+  });
+
+  it("supports custom warning thresholds", () => {
+    expect(
+      shouldWarnEmbeddedRunStageSummary(
        {
          totalMs: 2_000,
          stages: [{ name: "auth", durationMs: 10, elapsedMs: 10 }],
@@ -44,15 +51,6 @@ describe("embedded run stage timing", () => {
        { totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
      ),
    ).toBe(true);
-    expect(
-      shouldEmitEmbeddedRunStageSummary(
-        {
-          totalMs: 10,
-          stages: [{ name: "auth", durationMs: 1_000, elapsedMs: 1_000 }],
-        },
-        { totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
-      ),
-    ).toBe(true);
  });

  it("formats summaries compactly for logs", () => {
--- a/src/agents/pi-embedded-runner/run/attempt-stage-timing.ts
+++ b/src/agents/pi-embedded-runner/run/attempt-stage-timing.ts
@@ -14,6 +14,9 @@ export type EmbeddedRunStageTracker = {
  snapshot: () => EmbeddedRunStageSummary;
 };

+export const EMBEDDED_RUN_STAGE_WARN_TOTAL_MS = 10_000;
+export const EMBEDDED_RUN_STAGE_WARN_STAGE_MS = 5_000;
+
 export function createEmbeddedRunStageTracker(options?: {
  now?: () => number;
 }): EmbeddedRunStageTracker {
@@ -43,15 +46,15 @@ export function createEmbeddedRunStageTracker(options?: {
  };
 }

-export function shouldEmitEmbeddedRunStageSummary(
+export function shouldWarnEmbeddedRunStageSummary(
  summary: EmbeddedRunStageSummary,
  options?: {
    totalThresholdMs?: number;
    stageThresholdMs?: number;
  },
 ): boolean {
-  const totalThresholdMs = options?.totalThresholdMs ?? 2_000;
-  const stageThresholdMs = options?.stageThresholdMs ?? 1_000;
+  const totalThresholdMs = options?.totalThresholdMs ?? EMBEDDED_RUN_STAGE_WARN_TOTAL_MS;
+  const stageThresholdMs = options?.stageThresholdMs ?? EMBEDDED_RUN_STAGE_WARN_STAGE_MS;
  return (
    summary.totalMs >= totalThresholdMs ||
    summary.stages.some((stage) => stage.durationMs >= stageThresholdMs)
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -245,7 +245,7 @@ import { configureEmbeddedAttemptHttpRuntime } from "./attempt-http-runtime.js";
 import {
  createEmbeddedRunStageTracker,
  formatEmbeddedRunStageSummary,
-  shouldEmitEmbeddedRunStageSummary,
+  shouldWarnEmbeddedRunStageSummary,
 } from "./attempt-stage-timing.js";
 import {
  assembleAttemptContextEngine,
@@ -597,14 +597,18 @@ export async function runEmbeddedAttempt(
  const prepStages = createEmbeddedRunStageTracker();
  const emitPrepStageSummary = (phase: string) => {
    const summary = prepStages.snapshot();
+    const shouldWarn = shouldWarnEmbeddedRunStageSummary(summary);
+    if (!shouldWarn && !log.isEnabled("trace")) {
+      return;
+    }
    const message = formatEmbeddedRunStageSummary(
-      `embedded run prep stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
+      `[trace:embedded-run] prep stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
      summary,
    );
-    if (shouldEmitEmbeddedRunStageSummary(summary)) {
+    if (shouldWarn) {
      log.warn(message);
-    } else if (log.isEnabled("debug")) {
-      log.debug(message);
+    } else {
+      log.trace(message);
    }
  };