fix(agents): demote embedded run stage logs to trace

This commit is contained in:
Peter Steinberger
2026-04-29 14:51:20 +01:00
parent 6bbacd14a3
commit 66cdbccc8a
5 changed files with 42 additions and 33 deletions

View File

@@ -23,7 +23,7 @@ Docs: https://docs.openclaw.ai
- CLI/models: restore provider-filtered `models list --all --provider <id>` rows for providers without manifest/static catalog coverage, including Anthropic and Amazon Bedrock, while keeping the compatibility fallback off expensive availability and resolver paths. Thanks @shakkernerd.
- CLI/tools: keep the Gateway `tools.*` RPC namespace out of plugin command discovery and managed proxy startup, so stray commands like `openclaw tools effective` fail quickly instead of cold-loading plugin metadata. Refs #73477. Thanks @oromeis.
- CLI/status: keep default text `openclaw status --usage` on metadata-only channel scans unless `--deep` or `--all` is set, and send stray `openclaw tools --help` through the precomputed root-help fast path so latency-triage commands avoid plugin/runtime cold loads before printing. Refs #73477 and #74220. Thanks @oromeis and @NianJiuZst.
- Agents/diagnostics: log slow embedded-run startup and preparation stage timings before model I/O, so Docker/VPS latency reports can identify whether plugin loading, auth/model resolution, tool inventory, bootstrap, MCP/LSP, resource loading, or stream setup is dominating pre-run latency. Refs #73428. Thanks @Dimaoggg, @quangtran88, and @Heyvhuang.
- Agents/diagnostics: trace embedded-run startup and preparation stage timings before model I/O, and warn only on severe slow stages, so Docker/VPS latency reports can identify whether plugin loading, auth/model resolution, tool inventory, bootstrap, MCP/LSP, resource loading, or stream setup is dominating pre-run latency without noisy normal logs. Refs #73428. Thanks @Dimaoggg, @quangtran88, and @Heyvhuang.
- Gateway/clients: wait for the event loop to become responsive before opening Gateway WebSocket RPC/probe/client connections while charging that readiness wait to caller timeouts, so Windows deferred module-evaluation stalls no longer turn healthy loopback gateways into false handshake timeouts across status, TUI, ACP, MCP, node-host, and plugin client paths. Refs #74279 and #48270. Thanks @wongcode and @joost-heijden.
- Plugins/runtime-deps: memoize packaged bundled runtime dist-mirror preparation after the first successful pass while keeping source-checkout mirrors refreshable, so constrained Docker/VPS installs avoid repeated root scans before chat turns. Refs #73428, #73421, #73532, and #73477. Thanks @Dimaoggg, @oromeis, @oadiazp, @jmfraga, @bstanbury, @antoniusfelix, and @jkobject.
- Channels/Discord: treat bare numeric outbound targets that match the effective Discord DM allowlist as user DMs while preserving account-specific legacy `dm.allowFrom` precedence over inherited root `allowFrom`. (#74303) Thanks @Squirbie.

View File

@@ -93,7 +93,7 @@ import { handleAssistantFailover } from "./run/assistant-failover.js";
import {
createEmbeddedRunStageTracker,
formatEmbeddedRunStageSummary,
shouldEmitEmbeddedRunStageSummary,
shouldWarnEmbeddedRunStageSummary,
} from "./run/attempt-stage-timing.js";
import { forgetPromptBuildDrainCacheForRun } from "./run/attempt.prompt-helpers.js";
import { createEmbeddedRunAuthController } from "./run/auth-controller.js";
@@ -333,14 +333,18 @@ export async function runEmbeddedPiAgent(
let startupStagesEmitted = false;
const emitStartupStageSummary = (phase: string) => {
const summary = startupStages.snapshot();
const shouldWarn = shouldWarnEmbeddedRunStageSummary(summary);
if (!shouldWarn && !log.isEnabled("trace")) {
return;
}
const message = formatEmbeddedRunStageSummary(
`embedded run startup stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
`[trace:embedded-run] startup stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
summary,
);
if (shouldEmitEmbeddedRunStageSummary(summary)) {
if (shouldWarn) {
log.warn(message);
} else if (log.isEnabled("debug")) {
log.debug(message);
} else {
log.trace(message);
}
};
params.onExecutionStarted?.();

View File

@@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest";
import {
createEmbeddedRunStageTracker,
formatEmbeddedRunStageSummary,
shouldEmitEmbeddedRunStageSummary,
shouldWarnEmbeddedRunStageSummary,
} from "./attempt-stage-timing.js";
describe("embedded run stage timing", () => {
@@ -25,18 +25,25 @@ describe("embedded run stage timing", () => {
});
});
it("emits only slow stage summaries", () => {
it("warns only for very slow stage summaries by default", () => {
expect(
shouldEmitEmbeddedRunStageSummary(
{
totalMs: 1_999,
stages: [{ name: "auth", durationMs: 999, elapsedMs: 999 }],
},
{ totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
),
shouldWarnEmbeddedRunStageSummary({
totalMs: 9_999,
stages: [{ name: "auth", durationMs: 4_999, elapsedMs: 4_999 }],
}),
).toBe(false);
expect(shouldWarnEmbeddedRunStageSummary({ totalMs: 10_000, stages: [] })).toBe(true);
expect(
shouldEmitEmbeddedRunStageSummary(
shouldWarnEmbeddedRunStageSummary({
totalMs: 10,
stages: [{ name: "auth", durationMs: 5_000, elapsedMs: 5_000 }],
}),
).toBe(true);
});
it("supports custom warning thresholds", () => {
expect(
shouldWarnEmbeddedRunStageSummary(
{
totalMs: 2_000,
stages: [{ name: "auth", durationMs: 10, elapsedMs: 10 }],
@@ -44,15 +51,6 @@ describe("embedded run stage timing", () => {
{ totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
),
).toBe(true);
expect(
shouldEmitEmbeddedRunStageSummary(
{
totalMs: 10,
stages: [{ name: "auth", durationMs: 1_000, elapsedMs: 1_000 }],
},
{ totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
),
).toBe(true);
});
it("formats summaries compactly for logs", () => {

View File

@@ -14,6 +14,9 @@ export type EmbeddedRunStageTracker = {
snapshot: () => EmbeddedRunStageSummary;
};
export const EMBEDDED_RUN_STAGE_WARN_TOTAL_MS = 10_000;
export const EMBEDDED_RUN_STAGE_WARN_STAGE_MS = 5_000;
export function createEmbeddedRunStageTracker(options?: {
now?: () => number;
}): EmbeddedRunStageTracker {
@@ -43,15 +46,15 @@ export function createEmbeddedRunStageTracker(options?: {
};
}
export function shouldEmitEmbeddedRunStageSummary(
export function shouldWarnEmbeddedRunStageSummary(
summary: EmbeddedRunStageSummary,
options?: {
totalThresholdMs?: number;
stageThresholdMs?: number;
},
): boolean {
const totalThresholdMs = options?.totalThresholdMs ?? 2_000;
const stageThresholdMs = options?.stageThresholdMs ?? 1_000;
const totalThresholdMs = options?.totalThresholdMs ?? EMBEDDED_RUN_STAGE_WARN_TOTAL_MS;
const stageThresholdMs = options?.stageThresholdMs ?? EMBEDDED_RUN_STAGE_WARN_STAGE_MS;
return (
summary.totalMs >= totalThresholdMs ||
summary.stages.some((stage) => stage.durationMs >= stageThresholdMs)

View File

@@ -245,7 +245,7 @@ import { configureEmbeddedAttemptHttpRuntime } from "./attempt-http-runtime.js";
import {
createEmbeddedRunStageTracker,
formatEmbeddedRunStageSummary,
shouldEmitEmbeddedRunStageSummary,
shouldWarnEmbeddedRunStageSummary,
} from "./attempt-stage-timing.js";
import {
assembleAttemptContextEngine,
@@ -597,14 +597,18 @@ export async function runEmbeddedAttempt(
const prepStages = createEmbeddedRunStageTracker();
const emitPrepStageSummary = (phase: string) => {
const summary = prepStages.snapshot();
const shouldWarn = shouldWarnEmbeddedRunStageSummary(summary);
if (!shouldWarn && !log.isEnabled("trace")) {
return;
}
const message = formatEmbeddedRunStageSummary(
`embedded run prep stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
`[trace:embedded-run] prep stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
summary,
);
if (shouldEmitEmbeddedRunStageSummary(summary)) {
if (shouldWarn) {
log.warn(message);
} else if (log.isEnabled("debug")) {
log.debug(message);
} else {
log.trace(message);
}
};