fix(agents): trace slow embedded run startup stages

This commit is contained in:
Peter Steinberger
2026-04-29 14:08:29 +01:00
parent bbf985d50a
commit 20e2117371
5 changed files with 202 additions and 0 deletions

View File

@@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai
- CLI/models: restore provider-filtered `models list --all --provider <id>` rows for providers without manifest/static catalog coverage, including Anthropic and Amazon Bedrock, while keeping the compatibility fallback off expensive availability and resolver paths. Thanks @shakkernerd.
- CLI/tools: keep the Gateway `tools.*` RPC namespace out of plugin command discovery and managed proxy startup, so stray commands like `openclaw tools effective` fail quickly instead of cold-loading plugin metadata. Refs #73477. Thanks @oromeis.
- CLI/status: keep default text `openclaw status --usage` on metadata-only channel scans unless `--deep` or `--all` is set, and send stray `openclaw tools --help` through the precomputed root-help fast path so latency-triage commands avoid plugin/runtime cold loads before printing. Refs #73477 and #74220. Thanks @oromeis and @NianJiuZst.
- Agents/diagnostics: log slow embedded-run startup and preparation stage timings before model I/O, so Docker/VPS latency reports can identify whether plugin loading, auth/model resolution, tool inventory, bootstrap, MCP/LSP, resource loading, or stream setup is dominating pre-run latency. Refs #73428. Thanks @Dimaoggg, @quangtran88, and @Heyvhuang.
- Plugins/runtime-deps: memoize packaged bundled runtime dist-mirror preparation after the first successful pass while keeping source-checkout mirrors refreshable, so constrained Docker/VPS installs avoid repeated root scans before chat turns. Refs #73428, #73421, #73532, and #73477. Thanks @Dimaoggg, @oromeis, @oadiazp, @jmfraga, @bstanbury, @antoniusfelix, and @jkobject.
- Channels/Discord: treat bare numeric outbound targets that match the effective Discord DM allowlist as user DMs while preserving account-specific legacy `dm.allowFrom` precedence over inherited root `allowFrom`. (#74303) Thanks @Squirbie.
- Control UI: make the chat sidebar split divider focusable, keyboard-resizable, ARIA-described, and pointer-event based so sidebar resizing works without a mouse. Thanks @BunsDev.

View File

@@ -90,6 +90,11 @@ import { log } from "./logger.js";
import { resolveModelAsync } from "./model.js";
import { createEmbeddedRunReplayState, observeReplayMetadata } from "./replay-state.js";
import { handleAssistantFailover } from "./run/assistant-failover.js";
import {
createEmbeddedRunStageTracker,
formatEmbeddedRunStageSummary,
shouldEmitEmbeddedRunStageSummary,
} from "./run/attempt-stage-timing.js";
import { forgetPromptBuildDrainCacheForRun } from "./run/attempt.prompt-helpers.js";
import { createEmbeddedRunAuthController } from "./run/auth-controller.js";
import { resolveAuthProfileFailureReason } from "./run/auth-profile-failure-policy.js";
@@ -324,6 +329,20 @@ export async function runEmbeddedPiAgent(
return enqueueGlobal(async () => {
throwIfAborted();
const started = Date.now();
const startupStages = createEmbeddedRunStageTracker();
let startupStagesEmitted = false;
const emitStartupStageSummary = (phase: string) => {
const summary = startupStages.snapshot();
const message = formatEmbeddedRunStageSummary(
`embedded run startup stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
summary,
);
if (shouldEmitEmbeddedRunStageSummary(summary)) {
log.warn(message);
} else if (log.isEnabled("debug")) {
log.debug(message);
}
};
params.onExecutionStarted?.();
const workspaceResolution = resolveRunWorkspaceDir({
workspaceDir: params.workspaceDir,
@@ -344,11 +363,13 @@ export async function runEmbeddedPiAgent(
`[workspace-fallback] caller=runEmbeddedPiAgent reason=${workspaceResolution.fallbackReason} run=${params.runId} session=${redactedSessionId} sessionKey=${redactedSessionKey} agent=${workspaceResolution.agentId} workspace=${redactedWorkspace}`,
);
}
startupStages.mark("workspace");
ensureRuntimePluginsLoaded({
config: params.config,
workspaceDir: resolvedWorkspace,
allowGatewaySubagentBinding: params.allowGatewaySubagentBinding,
});
startupStages.mark("runtime-plugins");
let provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
let modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
@@ -407,6 +428,7 @@ export async function runEmbeddedPiAgent(
provider = hookSelection.provider;
modelId = hookSelection.modelId;
const legacyBeforeAgentStartResult = hookSelection.legacyBeforeAgentStartResult;
startupStages.mark("hooks");
const agentHarness = selectAgentHarness({
provider,
modelId,
@@ -453,6 +475,7 @@ export async function runEmbeddedPiAgent(
});
const ctxInfo = resolvedRuntimeModel.ctxInfo;
let effectiveModel = resolvedRuntimeModel.effectiveModel;
startupStages.mark("model-resolution");
const authStore = pluginHarnessOwnsTransport
? createEmptyAuthProfileStore()
@@ -611,6 +634,7 @@ export async function runEmbeddedPiAgent(
} else if (lockedProfileId) {
lastProfileId = lockedProfileId;
}
startupStages.mark("auth");
const { sessionAgentId } = resolveSessionAgentIds({
sessionKey: params.sessionKey,
config: params.config,
@@ -745,6 +769,7 @@ export async function runEmbeddedPiAgent(
agentDir,
workspaceDir: resolvedWorkspace,
});
startupStages.mark("context-engine");
try {
let activeSessionId = params.sessionId;
let activeSessionFile = params.sessionFile;
@@ -892,6 +917,11 @@ export async function runEmbeddedPiAgent(
fastMode: params.fastMode,
},
});
if (!startupStagesEmitted) {
startupStages.mark("attempt-dispatch");
emitStartupStageSummary("attempt-dispatch");
startupStagesEmitted = true;
}
const rawAttempt = await runEmbeddedAttemptWithBackend({
sessionId: activeSessionId,

View File

@@ -0,0 +1,71 @@
import { describe, expect, it } from "vitest";
import {
createEmbeddedRunStageTracker,
formatEmbeddedRunStageSummary,
shouldEmitEmbeddedRunStageSummary,
} from "./attempt-stage-timing.js";
describe("embedded run stage timing", () => {
it("captures stage duration and elapsed time", () => {
let clock = 10;
const tracker = createEmbeddedRunStageTracker({ now: () => clock });
clock = 25;
tracker.mark("workspace");
clock = 40;
tracker.mark("tools");
clock = 45;
expect(tracker.snapshot()).toEqual({
totalMs: 35,
stages: [
{ name: "workspace", durationMs: 15, elapsedMs: 15 },
{ name: "tools", durationMs: 15, elapsedMs: 30 },
],
});
});
it("emits only slow stage summaries", () => {
expect(
shouldEmitEmbeddedRunStageSummary(
{
totalMs: 1_999,
stages: [{ name: "auth", durationMs: 999, elapsedMs: 999 }],
},
{ totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
),
).toBe(false);
expect(
shouldEmitEmbeddedRunStageSummary(
{
totalMs: 2_000,
stages: [{ name: "auth", durationMs: 10, elapsedMs: 10 }],
},
{ totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
),
).toBe(true);
expect(
shouldEmitEmbeddedRunStageSummary(
{
totalMs: 10,
stages: [{ name: "auth", durationMs: 1_000, elapsedMs: 1_000 }],
},
{ totalThresholdMs: 2_000, stageThresholdMs: 1_000 },
),
).toBe(true);
});
it("formats summaries compactly for logs", () => {
expect(
formatEmbeddedRunStageSummary("embedded run startup stages: runId=r1", {
totalMs: 80,
stages: [
{ name: "workspace", durationMs: 25, elapsedMs: 25 },
{ name: "tools", durationMs: 55, elapsedMs: 80 },
],
}),
).toBe(
"embedded run startup stages: runId=r1 totalMs=80 stages=workspace:25ms@25ms,tools:55ms@80ms",
);
});
});

View File

@@ -0,0 +1,72 @@
export type EmbeddedRunStageTiming = {
name: string;
durationMs: number;
elapsedMs: number;
};
export type EmbeddedRunStageSummary = {
totalMs: number;
stages: EmbeddedRunStageTiming[];
};
export type EmbeddedRunStageTracker = {
mark: (name: string) => void;
snapshot: () => EmbeddedRunStageSummary;
};
export function createEmbeddedRunStageTracker(options?: {
now?: () => number;
}): EmbeddedRunStageTracker {
const now = options?.now ?? Date.now;
const startedAt = now();
let previousAt = startedAt;
const stages: EmbeddedRunStageTiming[] = [];
const toMs = (value: number) => Math.max(0, Math.round(value));
return {
mark(name) {
const currentAt = now();
stages.push({
name,
durationMs: toMs(currentAt - previousAt),
elapsedMs: toMs(currentAt - startedAt),
});
previousAt = currentAt;
},
snapshot() {
return {
totalMs: toMs(now() - startedAt),
stages: stages.slice(),
};
},
};
}
export function shouldEmitEmbeddedRunStageSummary(
summary: EmbeddedRunStageSummary,
options?: {
totalThresholdMs?: number;
stageThresholdMs?: number;
},
): boolean {
const totalThresholdMs = options?.totalThresholdMs ?? 2_000;
const stageThresholdMs = options?.stageThresholdMs ?? 1_000;
return (
summary.totalMs >= totalThresholdMs ||
summary.stages.some((stage) => stage.durationMs >= stageThresholdMs)
);
}
export function formatEmbeddedRunStageSummary(
prefix: string,
summary: EmbeddedRunStageSummary,
): string {
const stages =
summary.stages.length > 0
? summary.stages
.map((stage) => `${stage.name}:${stage.durationMs}ms@${stage.elapsedMs}ms`)
.join(",")
: "none";
return `${prefix} totalMs=${summary.totalMs} stages=${stages}`;
}

View File

@@ -242,6 +242,11 @@ import {
shouldRotateCompactionTranscript,
} from "../compaction-successor-transcript.js";
import { configureEmbeddedAttemptHttpRuntime } from "./attempt-http-runtime.js";
import {
createEmbeddedRunStageTracker,
formatEmbeddedRunStageSummary,
shouldEmitEmbeddedRunStageSummary,
} from "./attempt-stage-timing.js";
import {
assembleAttemptContextEngine,
buildLoopPromptCacheInfo,
@@ -589,6 +594,19 @@ export async function runEmbeddedAttempt(
log.debug(
`embedded run start: runId=${params.runId} sessionId=${params.sessionId} provider=${params.provider} model=${params.modelId} thinking=${params.thinkLevel} messageChannel=${params.messageChannel ?? params.messageProvider ?? "unknown"}`,
);
const prepStages = createEmbeddedRunStageTracker();
const emitPrepStageSummary = (phase: string) => {
const summary = prepStages.snapshot();
const message = formatEmbeddedRunStageSummary(
`embedded run prep stages: runId=${params.runId} sessionId=${params.sessionId} phase=${phase}`,
summary,
);
if (shouldEmitEmbeddedRunStageSummary(summary)) {
log.warn(message);
} else if (log.isEnabled("debug")) {
log.debug(message);
}
};
await fs.mkdir(resolvedWorkspace, { recursive: true });
@@ -610,6 +628,7 @@ export async function runEmbeddedAttempt(
config: params.config,
agentId: params.agentId,
});
prepStages.mark("workspace-sandbox");
let restoreSkillEnv: (() => void) | undefined;
let aborted = Boolean(params.abortSignal?.aborted);
@@ -645,6 +664,7 @@ export async function runEmbeddedAttempt(
workspaceDir: effectiveWorkspace,
agentId: sessionAgentId,
});
prepStages.mark("skills");
const sessionLabel = params.sessionKey ?? params.sessionId;
const contextInjectionMode = resolveContextInjectionMode(params.config);
@@ -760,6 +780,7 @@ export async function runEmbeddedAttempt(
});
return applyEmbeddedAttemptToolsAllow(allTools, params.toolsAllow);
})();
prepStages.mark("core-plugin-tools");
const toolsEnabled = supportsModelTools(params.model);
const bootstrapHasFileAccess = toolsEnabled && toolsRaw.some((tool) => tool.name === "read");
const bootstrapRouting = await resolveAttemptWorkspaceBootstrapRouting({
@@ -803,6 +824,7 @@ export async function runEmbeddedAttempt(
runKind: params.bootstrapContextRunKind,
}),
});
prepStages.mark("bootstrap-context");
const remappedContextFiles = remapInjectedContextFilesToWorkspace({
files: resolvedContextFiles,
sourceWorkspaceDir: resolvedWorkspace,
@@ -935,6 +957,7 @@ export async function runEmbeddedAttempt(
warn: (message) => log.warn(message),
});
const effectiveTools = [...tools, ...filteredBundledTools];
prepStages.mark("bundle-tools");
const allowedToolNames = collectAllowedToolNames({
tools: effectiveTools,
clientTools,
@@ -1221,6 +1244,7 @@ export async function runEmbeddedAttempt(
bootstrapMode,
contextFiles: remappedContextFiles,
});
prepStages.mark("system-prompt");
// Keep the session lock scoped to transcript/session mutations. Cold plugin
// and tool setup can be slow, and holding the lock there blocks CLI fallback
@@ -1345,6 +1369,7 @@ export async function runEmbeddedAttempt(
cfg: params.config,
contextTokenBudget: params.contextTokenBudget,
});
prepStages.mark("session-resource-loader");
// Get hook runner early so it's available when creating tools
const hookRunner = getGlobalHookRunner();
@@ -1441,6 +1466,7 @@ export async function runEmbeddedAttempt(
}
session.setActiveToolsByName(sessionToolAllowlist);
const activeSession = session;
prepStages.mark("agent-session");
if (isRawModelRun) {
// Raw model probes should measure exactly the requested prompt against
// the selected provider/model. Reset clears restored transcript state
@@ -1692,6 +1718,8 @@ export async function runEmbeddedAttempt(
`(${params.provider}/${params.modelId})`,
);
}
prepStages.mark("stream-setup");
emitPrepStageSummary("stream-ready");
const cacheObservabilityEnabled = Boolean(cacheTrace) || log.isEnabled("debug");
const promptCacheToolNames = collectPromptCacheToolNames(