fix: use prompt snapshots for live context diagnostics

This commit is contained in:
Peter Steinberger
2026-04-25 20:25:36 +01:00
parent b732f21a86
commit 30325f567c
6 changed files with 205 additions and 12 deletions

View File

@@ -64,6 +64,9 @@ Docs: https://docs.openclaw.ai
- CLI/completion: dedupe provider auth flags before registering `openclaw onboard`
options, so completion-cache refresh during update no longer fails when stale
core fallback flags overlap plugin manifest flags. Fixes #71667.
- Diagnostics/trace: report live context usage from the current prompt snapshot
instead of provider turn totals, avoiding false near-full context spikes on
cached or tool-heavy runs.
- Plugins/Bonjour: stop the gateway from crash-looping on `CIAO PROBING CANCELLED` when the mDNS watchdog cancels a stuck probe. Restores the rejection-handler wiring dropped during the bonjour plugin migration and shares unhandled-rejection state across module instances so plugin-staged copies of `openclaw/plugin-sdk/runtime` register into the same handler set the host consults. Especially affects Docker on macOS, where mDNS probing reliably hits the watchdog. Thanks @troyhitch.
- Google Meet: report pinned Chrome nodes as offline or missing capabilities in
setup/join diagnostics, keep inaccessible nodes out of auto-selection, and

View File

@@ -198,6 +198,9 @@ diagnostics + the exporter plugin are enabled.
Model usage:
- `model.usage`: tokens, cost, duration, context, provider/model/channel, session ids.
`usage` is provider/turn accounting for cost and telemetry; `context.used`
is the current prompt/context snapshot and can be lower than provider
`usage.total` when cached input or tool-loop calls are involved.
Message flow:

View File

@@ -101,6 +101,13 @@ Assistant transcript entries persist the same normalized usage shape, including
returns usage metadata. This gives `/usage cost` and transcript-backed session
status a stable source even after the live runtime state is gone.
OpenClaw keeps provider usage accounting separate from the current context
snapshot. Provider `usage.total` can include cached input, output, and multiple
tool-loop model calls, so it is useful for cost and telemetry but can overstate
the live context window. Context displays and diagnostics use the latest prompt
snapshot (`promptTokens`, or the last model call when no prompt snapshot is
available) for `context.used`.
## Cost estimation (when shown)
Costs are estimated from your model pricing config:

View File

@@ -10,6 +10,11 @@ export type EmbeddedPiAgentMeta = {
agentHarnessId?: string;
cliSessionBinding?: CliSessionBinding;
compactionCount?: number;
/**
* Prompt/context snapshot from the latest model request. Prefer this for
* context-window utilization because provider usage totals can include cached
* and completion tokens that are useful for billing but noisy as live context.
*/
promptTokens?: number;
usage?: {
input?: number;

View File

@@ -10,6 +10,11 @@ import {
import * as sessionTypesModule from "../../config/sessions.js";
import type { SessionEntry } from "../../config/sessions.js";
import { loadSessionStore, saveSessionStore } from "../../config/sessions.js";
import {
onInternalDiagnosticEvent,
resetDiagnosticEventsForTest,
type DiagnosticEventPayload,
} from "../../infra/diagnostic-events.js";
import {
clearMemoryPluginState,
registerMemoryFlushPlanResolver,
@@ -138,6 +143,7 @@ type RunWithModelFallbackParams = {
};
beforeEach(() => {
resetDiagnosticEventsForTest();
embeddedRunTesting.resetActiveEmbeddedRuns();
replyRunRegistryTesting.resetReplyRunRegistry();
runEmbeddedPiAgentMock.mockClear();
@@ -169,6 +175,7 @@ beforeEach(() => {
});
afterEach(() => {
resetDiagnosticEventsForTest();
vi.useRealTimers();
clearMemoryPluginState();
replyRunRegistryTesting.resetReplyRunRegistry();
@@ -289,6 +296,167 @@ describe("runReplyAgent auto-compaction token update", () => {
// totalTokens should use lastCallUsage (55k), not accumulated (75k)
expect(stored[sessionKey].totalTokens).toBe(55_000);
});
it("reports live diagnostic context from promptTokens, not provider usage totals", async () => {
const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-usage-diagnostic-"));
const storePath = path.join(tmp, "sessions.json");
const sessionKey = "main";
const sessionEntry = {
sessionId: "session",
updatedAt: Date.now(),
totalTokens: 50_000,
};
await seedSessionStore({ storePath, sessionKey, entry: sessionEntry });
runEmbeddedPiAgentMock.mockResolvedValue({
payloads: [{ text: "ok" }],
meta: {
agentMeta: {
usage: { input: 75_000, output: 5_000, cacheRead: 25_000, total: 105_000 },
lastCallUsage: { input: 55_000, output: 2_000, cacheRead: 25_000, total: 82_000 },
promptTokens: 44_000,
},
},
});
const diagnostics: DiagnosticEventPayload[] = [];
const unsubscribe = onInternalDiagnosticEvent((event) => {
diagnostics.push(event);
});
const { typing, sessionCtx, resolvedQueue, followupRun } = createBaseRun({
storePath,
sessionEntry,
});
try {
await runReplyAgent({
commandBody: "hello",
followupRun,
queueKey: "main",
resolvedQueue,
shouldSteer: false,
shouldFollowup: false,
isActive: false,
isStreaming: false,
typing,
sessionCtx,
sessionEntry,
sessionStore: { [sessionKey]: sessionEntry },
sessionKey,
storePath,
defaultModel: "anthropic/claude-opus-4-6",
agentCfgContextTokens: 200_000,
resolvedVerboseLevel: "off",
isNewSession: false,
blockStreamingEnabled: false,
resolvedBlockStreamingBreak: "message_end",
shouldInjectGroupIntro: false,
typingMode: "instant",
});
} finally {
unsubscribe();
}
const usageEvent = diagnostics.find((event) => event.type === "model.usage");
expect(usageEvent).toMatchObject({
type: "model.usage",
usage: {
input: 75_000,
output: 5_000,
cacheRead: 25_000,
promptTokens: 100_000,
total: 105_000,
},
context: {
limit: 200_000,
used: 44_000,
},
});
});
it("falls back to last-call prompt usage for live diagnostic context", async () => {
const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-usage-diagnostic-last-"));
const storePath = path.join(tmp, "sessions.json");
const sessionKey = "main";
const sessionEntry = {
sessionId: "session",
updatedAt: Date.now(),
totalTokens: 50_000,
};
await seedSessionStore({ storePath, sessionKey, entry: sessionEntry });
runEmbeddedPiAgentMock.mockResolvedValue({
payloads: [{ text: "ok" }],
meta: {
agentMeta: {
usage: { input: 75_000, output: 5_000, cacheRead: 25_000, total: 105_000 },
lastCallUsage: {
input: 55_000,
output: 2_000,
cacheRead: 25_000,
cacheWrite: 1_000,
total: 83_000,
},
},
},
});
const diagnostics: DiagnosticEventPayload[] = [];
const unsubscribe = onInternalDiagnosticEvent((event) => {
diagnostics.push(event);
});
const { typing, sessionCtx, resolvedQueue, followupRun } = createBaseRun({
storePath,
sessionEntry,
});
try {
await runReplyAgent({
commandBody: "hello",
followupRun,
queueKey: "main",
resolvedQueue,
shouldSteer: false,
shouldFollowup: false,
isActive: false,
isStreaming: false,
typing,
sessionCtx,
sessionEntry,
sessionStore: { [sessionKey]: sessionEntry },
sessionKey,
storePath,
defaultModel: "anthropic/claude-opus-4-6",
agentCfgContextTokens: 200_000,
resolvedVerboseLevel: "off",
isNewSession: false,
blockStreamingEnabled: false,
resolvedBlockStreamingBreak: "message_end",
shouldInjectGroupIntro: false,
typingMode: "instant",
});
} finally {
unsubscribe();
}
const usageEvent = diagnostics.find((event) => event.type === "model.usage");
expect(usageEvent).toMatchObject({
type: "model.usage",
usage: {
input: 75_000,
output: 5_000,
cacheRead: 25_000,
promptTokens: 100_000,
total: 105_000,
},
context: {
limit: 200_000,
used: 81_000,
},
});
});
});
describe("runReplyAgent block streaming", () => {
@@ -913,6 +1081,7 @@ describe("runReplyAgent Active Memory inline debug", () => {
model: "claude",
usage: { input: 1200, output: 45, cacheRead: 800, cacheWrite: 200, total: 2245 },
lastCallUsage: { input: 1000, output: 45, cacheRead: 750, cacheWrite: 150, total: 1945 },
promptTokens: 1250,
compactionCount: 1,
},
},
@@ -987,6 +1156,7 @@ describe("runReplyAgent Active Memory inline debug", () => {
expect(traceText).toContain("🔎 Usage (Session Total):");
expect(traceText).toContain("🔎 Usage (Last Turn Total):");
expect(traceText).toContain("🔎 Context Window (Last Model Request):");
expect(traceText).toContain("used=1,250 tok (1.3k)");
expect(traceText).toContain("🔎 Execution Result:");
expect(traceText).toContain("winner=anthropic/claude");
expect(traceText).toContain("fallbackUsed=yes");
@@ -1025,7 +1195,7 @@ describe("runReplyAgent Active Memory inline debug", () => {
expect(traceText).toContain("🔎 Model Input (User Role):");
expect(traceText).toContain("🔎 Model Output (Assistant Role):");
expect(traceText).toContain(
"Summary: winner=claude 🧠 low fallback=yes attempts=2 stop=end_turn prompt=1.9k/200k ⬇️ 1.2k ⬆️ 45 ♻️ 800 🆕 200 🔢 2.2k tools=2 compactions=1",
"Summary: winner=claude 🧠 low fallback=yes attempts=2 stop=end_turn prompt=1.3k/200k ⬇️ 1.2k ⬆️ 45 ♻️ 800 🆕 200 🔢 2.2k tools=2 compactions=1",
);
expect(traceText.indexOf("🔎 Execution Result:")).toBeGreaterThan(
traceText.indexOf("🔎 Context Window (Last Model Request):"),

View File

@@ -585,6 +585,13 @@ function resolveRequestPromptTokens(params: {
total?: number;
};
}): number | undefined {
if (
typeof params.promptTokens === "number" &&
Number.isFinite(params.promptTokens) &&
params.promptTokens > 0
) {
return params.promptTokens;
}
const lastCall = params.lastCallUsage;
if (lastCall) {
const input = lastCall.input ?? 0;
@@ -595,13 +602,6 @@ function resolveRequestPromptTokens(params: {
return sum;
}
}
if (
typeof params.promptTokens === "number" &&
Number.isFinite(params.promptTokens) &&
params.promptTokens > 0
) {
return params.promptTokens;
}
const usage = params.usage;
if (usage) {
const input = usage.input ?? 0;
@@ -1428,8 +1428,13 @@ export async function runReplyAgent(params: {
const output = usage.output ?? 0;
const cacheRead = usage.cacheRead ?? 0;
const cacheWrite = usage.cacheWrite ?? 0;
const promptTokens = input + cacheRead + cacheWrite;
const totalTokens = usage.total ?? promptTokens + output;
const usagePromptTokens = input + cacheRead + cacheWrite;
const totalTokens = usage.total ?? usagePromptTokens + output;
const contextUsedTokens = resolveRequestPromptTokens({
lastCallUsage: runResult.meta?.agentMeta?.lastCallUsage,
promptTokens,
usage,
});
const costConfig = resolveModelCostConfig({
provider: providerUsed,
model: modelUsed,
@@ -1455,13 +1460,13 @@ export async function runReplyAgent(params: {
output,
cacheRead,
cacheWrite,
promptTokens,
promptTokens: usagePromptTokens,
total: totalTokens,
},
lastCallUsage: runResult.meta?.agentMeta?.lastCallUsage,
context: {
limit: contextTokensUsed,
used: totalTokens,
...(contextUsedTokens !== undefined ? { used: contextUsedTokens } : {}),
},
costUsd,
durationMs: Date.now() - runStartedAt,