fix(agents): demote embedded run stage logs to trace

This commit is contained in:
Peter Steinberger
2026-04-29 14:51:20 +01:00
parent 6bbacd14a3
commit 66cdbccc8a
5 changed files with 42 additions and 33 deletions

View File

@@ -23,7 +23,7 @@ Docs: https://docs.openclaw.ai
- CLI/models: restore provider-filtered `models list --all --provider <id>` rows for providers without manifest/static catalog coverage, including Anthropic and Amazon Bedrock, while keeping the compatibility fallback off expensive availability and resolver paths. Thanks @shakkernerd. - CLI/models: restore provider-filtered `models list --all --provider <id>` rows for providers without manifest/static catalog coverage, including Anthropic and Amazon Bedrock, while keeping the compatibility fallback off expensive availability and resolver paths. Thanks @shakkernerd.
- CLI/tools: keep the Gateway `tools.*` RPC namespace out of plugin command discovery and managed proxy startup, so stray commands like `openclaw tools effective` fail quickly instead of cold-loading plugin metadata. Refs #73477. Thanks @oromeis. - CLI/tools: keep the Gateway `tools.*` RPC namespace out of plugin command discovery and managed proxy startup, so stray commands like `openclaw tools effective` fail quickly instead of cold-loading plugin metadata. Refs #73477. Thanks @oromeis.
- CLI/status: keep default text `openclaw status --usage` on metadata-only channel scans unless `--deep` or `--all` is set, and send stray `openclaw tools --help` through the precomputed root-help fast path so latency-triage commands avoid plugin/runtime cold loads before printing. Refs #73477 and #74220. Thanks @oromeis and @NianJiuZst. - CLI/status: keep default text `openclaw status --usage` on metadata-only channel scans unless `--deep` or `--all` is set, and send stray `openclaw tools --help` through the precomputed root-help fast path so latency-triage commands avoid plugin/runtime cold loads before printing. Refs #73477 and #74220. Thanks @oromeis and @NianJiuZst.
- Agents/diagnostics: log slow embedded-run startup and preparation stage timings before model I/O, so Docker/VPS latency reports can identify whether plugin loading, auth/model resolution, tool inventory, bootstrap, MCP/LSP, resource loading, or stream setup is dominating pre-run latency. Refs #73428. Thanks @Dimaoggg, @quangtran88, and @Heyvhuang. - Agents/diagnostics: trace embedded-run startup and preparation stage timings before model I/O, and warn only on severe slow stages, so Docker/VPS latency reports can identify whether plugin loading, auth/model resolution, tool inventory, bootstrap, MCP/LSP, resource loading, or stream setup is dominating pre-run latency without noisy normal logs. Refs #73428. Thanks @Dimaoggg, @quangtran88, and @Heyvhuang.
- Gateway/clients: wait for the event loop to become responsive before opening Gateway WebSocket RPC/probe/client connections while charging that readiness wait to caller timeouts, so Windows deferred module-evaluation stalls no longer turn healthy loopback gateways into false handshake timeouts across status, TUI, ACP, MCP, node-host, and plugin client paths. Refs #74279 and #48270. Thanks @wongcode and @joost-heijden. - Gateway/clients: wait for the event loop to become responsive before opening Gateway WebSocket RPC/probe/client connections while charging that readiness wait to caller timeouts, so Windows deferred module-evaluation stalls no longer turn healthy loopback gateways into false handshake timeouts across status, TUI, ACP, MCP, node-host, and plugin client paths. Refs #74279 and #48270. Thanks @wongcode and @joost-heijden.
- Plugins/runtime-deps: memoize packaged bundled runtime dist-mirror preparation after the first successful pass while keeping source-checkout mirrors refreshable, so constrained Docker/VPS installs avoid repeated root scans before chat turns. Refs #73428, #73421, #73532, and #73477. Thanks @Dimaoggg, @oromeis, @oadiazp, @jmfraga, @bstanbury, @antoniusfelix, and @jkobject. - Plugins/runtime-deps: memoize packaged bundled runtime dist-mirror preparation after the first successful pass while keeping source-checkout mirrors refreshable, so constrained Docker/VPS installs avoid repeated root scans before chat turns. Refs #73428, #73421, #73532, and #73477. Thanks @Dimaoggg, @oromeis, @oadiazp, @jmfraga, @bstanbury, @antoniusfelix, and @jkobject.
- Channels/Discord: treat bare numeric outbound targets that match the effective Discord DM allowlist as user DMs while preserving account-specific legacy `dm.allowFrom` precedence over inherited root `allowFrom`. (#74303) Thanks @Squirbie. - Channels/Discord: treat bare numeric outbound targets that match the effective Discord DM allowlist as user DMs while preserving account-specific legacy `dm.allowFrom` precedence over inherited root `allowFrom`. (#74303) Thanks @Squirbie.

View File

@@ -93,7 +93,7 @@ import { handleAssistantFailover } from "./run/assistant-failover.js";
import { import {
createEmbeddedRunStageTracker, createEmbeddedRunStageTracker,
formatEmbeddedRunStageSummary, formatEmbeddedRunStageSummary,
shouldEmitEmbeddedRunStageSummary, shouldWarnEmbeddedRunStageSummary,
} from "./run/attempt-stage-timing.js"; } from "./run/attempt-stage-timing.js";
import { forgetPromptBuildDrainCacheForRun } from "./run/attempt.prompt-helpers.js"; import { forgetPromptBuildDrainCacheForRun } from "./run/attempt.prompt-helpers.js";
import { createEmbeddedRunAuthController } from "./run/auth-controller.js"; import { createEmbeddedRunAuthController } from "./run/auth-controller.js";
@@ -333,14 +333,18 @@ export async function runEmbeddedPiAgent(
let startupStagesEmitted = false; let startupStagesEmitted = false;
const emitStartupStageSummary = (phase: string) => { const emitStartupStageSummary = (phase: string) => {
const summary = startupStages.snapshot(); const summary = startupStages.snapshot();
const shouldWarn = shouldWarnEmbeddedRunStageSummary(summary);
if (!shouldWarn && !log.isEnabled("trace")) {
return;
}
const message = formatEmbeddedRunStageSummary( const message = formatEmbeddedRunStageSummary(
`embedded run startup stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`, `[trace:embedded-run] startup stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
summary, summary,
); );
if (shouldEmitEmbeddedRunStageSummary(summary)) { if (shouldWarn) {
log.warn(message); log.warn(message);
} else if (log.isEnabled("debug")) { } else {
log.debug(message); log.trace(message);
} }
}; };
params.onExecutionStarted?.(); params.onExecutionStarted?.();

View File

@@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest";
import { import {
createEmbeddedRunStageTracker, createEmbeddedRunStageTracker,
formatEmbeddedRunStageSummary, formatEmbeddedRunStageSummary,
shouldEmitEmbeddedRunStageSummary, shouldWarnEmbeddedRunStageSummary,
} from "./attempt-stage-timing.js"; } from "./attempt-stage-timing.js";
describe("embedded run stage timing", () => { describe("embedded run stage timing", () => {
@@ -25,18 +25,25 @@ describe("embedded run stage timing", () => {
}); });
}); });
it("emits only slow stage summaries", () => { it("warns only for very slow stage summaries by default", () => {
expect( expect(
shouldEmitEmbeddedRunStageSummary( shouldWarnEmbeddedRunStageSummary({
{ totalMs: 9_999,
totalMs: 1_999, stages: [{ name: "auth", durationMs: 4_999, elapsedMs: 4_999 }],
stages: [{ name: "auth", durationMs: 999, elapsedMs: 999 }], }),
},
{ totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
),
).toBe(false); ).toBe(false);
expect(shouldWarnEmbeddedRunStageSummary({ totalMs: 10_000, stages: [] })).toBe(true);
expect( expect(
shouldEmitEmbeddedRunStageSummary( shouldWarnEmbeddedRunStageSummary({
totalMs: 10,
stages: [{ name: "auth", durationMs: 5_000, elapsedMs: 5_000 }],
}),
).toBe(true);
});
it("supports custom warning thresholds", () => {
expect(
shouldWarnEmbeddedRunStageSummary(
{ {
totalMs: 2_000, totalMs: 2_000,
stages: [{ name: "auth", durationMs: 10, elapsedMs: 10 }], stages: [{ name: "auth", durationMs: 10, elapsedMs: 10 }],
@@ -44,15 +51,6 @@ describe("embedded run stage timing", () => {
{ totalThresholdMs: 2_000, stageThresholdMs: 1_000 }, { totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
), ),
).toBe(true); ).toBe(true);
expect(
shouldEmitEmbeddedRunStageSummary(
{
totalMs: 10,
stages: [{ name: "auth", durationMs: 1_000, elapsedMs: 1_000 }],
},
{ totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
),
).toBe(true);
}); });
it("formats summaries compactly for logs", () => { it("formats summaries compactly for logs", () => {

View File

@@ -14,6 +14,9 @@ export type EmbeddedRunStageTracker = {
snapshot: () => EmbeddedRunStageSummary; snapshot: () => EmbeddedRunStageSummary;
}; };
export const EMBEDDED_RUN_STAGE_WARN_TOTAL_MS = 10_000;
export const EMBEDDED_RUN_STAGE_WARN_STAGE_MS = 5_000;
export function createEmbeddedRunStageTracker(options?: { export function createEmbeddedRunStageTracker(options?: {
now?: () => number; now?: () => number;
}): EmbeddedRunStageTracker { }): EmbeddedRunStageTracker {
@@ -43,15 +46,15 @@ export function createEmbeddedRunStageTracker(options?: {
}; };
} }
export function shouldEmitEmbeddedRunStageSummary( export function shouldWarnEmbeddedRunStageSummary(
summary: EmbeddedRunStageSummary, summary: EmbeddedRunStageSummary,
options?: { options?: {
totalThresholdMs?: number; totalThresholdMs?: number;
stageThresholdMs?: number; stageThresholdMs?: number;
}, },
): boolean { ): boolean {
const totalThresholdMs = options?.totalThresholdMs ?? 2_000; const totalThresholdMs = options?.totalThresholdMs ?? EMBEDDED_RUN_STAGE_WARN_TOTAL_MS;
const stageThresholdMs = options?.stageThresholdMs ?? 1_000; const stageThresholdMs = options?.stageThresholdMs ?? EMBEDDED_RUN_STAGE_WARN_STAGE_MS;
return ( return (
summary.totalMs >= totalThresholdMs || summary.totalMs >= totalThresholdMs ||
summary.stages.some((stage) => stage.durationMs >= stageThresholdMs) summary.stages.some((stage) => stage.durationMs >= stageThresholdMs)

View File

@@ -245,7 +245,7 @@ import { configureEmbeddedAttemptHttpRuntime } from "./attempt-http-runtime.js";
import { import {
createEmbeddedRunStageTracker, createEmbeddedRunStageTracker,
formatEmbeddedRunStageSummary, formatEmbeddedRunStageSummary,
shouldEmitEmbeddedRunStageSummary, shouldWarnEmbeddedRunStageSummary,
} from "./attempt-stage-timing.js"; } from "./attempt-stage-timing.js";
import { import {
assembleAttemptContextEngine, assembleAttemptContextEngine,
@@ -597,14 +597,18 @@ export async function runEmbeddedAttempt(
const prepStages = createEmbeddedRunStageTracker(); const prepStages = createEmbeddedRunStageTracker();
const emitPrepStageSummary = (phase: string) => { const emitPrepStageSummary = (phase: string) => {
const summary = prepStages.snapshot(); const summary = prepStages.snapshot();
const shouldWarn = shouldWarnEmbeddedRunStageSummary(summary);
if (!shouldWarn && !log.isEnabled("trace")) {
return;
}
const message = formatEmbeddedRunStageSummary( const message = formatEmbeddedRunStageSummary(
`embedded run prep stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`, `[trace:embedded-run] prep stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
summary, summary,
); );
if (shouldEmitEmbeddedRunStageSummary(summary)) { if (shouldWarn) {
log.warn(message); log.warn(message);
} else if (log.isEnabled("debug")) { } else {
log.debug(message); log.trace(message);
} }
}; };