mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:30:42 +00:00
fix: use prompt snapshots for live context diagnostics
This commit is contained in:
@@ -64,6 +64,9 @@ Docs: https://docs.openclaw.ai
|
||||
- CLI/completion: dedupe provider auth flags before registering `openclaw onboard`
|
||||
options, so completion-cache refresh during update no longer fails when stale
|
||||
core fallback flags overlap plugin manifest flags. Fixes #71667.
|
||||
- Diagnostics/trace: report live context usage from the current prompt snapshot
|
||||
instead of provider turn totals, avoiding false near-full context spikes on
|
||||
cached or tool-heavy runs.
|
||||
- Plugins/Bonjour: stop the gateway from crash-looping on `CIAO PROBING CANCELLED` when the mDNS watchdog cancels a stuck probe. Restores the rejection-handler wiring dropped during the bonjour plugin migration and shares unhandled-rejection state across module instances so plugin-staged copies of `openclaw/plugin-sdk/runtime` register into the same handler set the host consults. Especially affects Docker on macOS, where mDNS probing reliably hits the watchdog. Thanks @troyhitch.
|
||||
- Google Meet: report pinned Chrome nodes as offline or missing capabilities in
|
||||
setup/join diagnostics, keep inaccessible nodes out of auto-selection, and
|
||||
|
||||
@@ -198,6 +198,9 @@ diagnostics + the exporter plugin are enabled.
|
||||
Model usage:
|
||||
|
||||
- `model.usage`: tokens, cost, duration, context, provider/model/channel, session ids.
|
||||
`usage` is provider/turn accounting for cost and telemetry; `context.used`
|
||||
is the current prompt/context snapshot and can be lower than provider
|
||||
`usage.total` when cached input or tool-loop calls are involved.
|
||||
|
||||
Message flow:
|
||||
|
||||
|
||||
@@ -101,6 +101,13 @@ Assistant transcript entries persist the same normalized usage shape, including
|
||||
returns usage metadata. This gives `/usage cost` and transcript-backed session
|
||||
status a stable source even after the live runtime state is gone.
|
||||
|
||||
OpenClaw keeps provider usage accounting separate from the current context
|
||||
snapshot. Provider `usage.total` can include cached input, output, and multiple
|
||||
tool-loop model calls, so it is useful for cost and telemetry but can overstate
|
||||
the live context window. Context displays and diagnostics use the latest prompt
|
||||
snapshot (`promptTokens`, or the last model call when no prompt snapshot is
|
||||
available) for `context.used`.
|
||||
|
||||
## Cost estimation (when shown)
|
||||
|
||||
Costs are estimated from your model pricing config:
|
||||
|
||||
@@ -10,6 +10,11 @@ export type EmbeddedPiAgentMeta = {
|
||||
agentHarnessId?: string;
|
||||
cliSessionBinding?: CliSessionBinding;
|
||||
compactionCount?: number;
|
||||
/**
|
||||
* Prompt/context snapshot from the latest model request. Prefer this for
|
||||
* context-window utilization because provider usage totals can include cached
|
||||
* and completion tokens that are useful for billing but noisy as live context.
|
||||
*/
|
||||
promptTokens?: number;
|
||||
usage?: {
|
||||
input?: number;
|
||||
|
||||
@@ -10,6 +10,11 @@ import {
|
||||
import * as sessionTypesModule from "../../config/sessions.js";
|
||||
import type { SessionEntry } from "../../config/sessions.js";
|
||||
import { loadSessionStore, saveSessionStore } from "../../config/sessions.js";
|
||||
import {
|
||||
onInternalDiagnosticEvent,
|
||||
resetDiagnosticEventsForTest,
|
||||
type DiagnosticEventPayload,
|
||||
} from "../../infra/diagnostic-events.js";
|
||||
import {
|
||||
clearMemoryPluginState,
|
||||
registerMemoryFlushPlanResolver,
|
||||
@@ -138,6 +143,7 @@ type RunWithModelFallbackParams = {
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
resetDiagnosticEventsForTest();
|
||||
embeddedRunTesting.resetActiveEmbeddedRuns();
|
||||
replyRunRegistryTesting.resetReplyRunRegistry();
|
||||
runEmbeddedPiAgentMock.mockClear();
|
||||
@@ -169,6 +175,7 @@ beforeEach(() => {
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
resetDiagnosticEventsForTest();
|
||||
vi.useRealTimers();
|
||||
clearMemoryPluginState();
|
||||
replyRunRegistryTesting.resetReplyRunRegistry();
|
||||
@@ -289,6 +296,167 @@ describe("runReplyAgent auto-compaction token update", () => {
|
||||
// totalTokens should use lastCallUsage (55k), not accumulated (75k)
|
||||
expect(stored[sessionKey].totalTokens).toBe(55_000);
|
||||
});
|
||||
|
||||
it("reports live diagnostic context from promptTokens, not provider usage totals", async () => {
|
||||
const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-usage-diagnostic-"));
|
||||
const storePath = path.join(tmp, "sessions.json");
|
||||
const sessionKey = "main";
|
||||
const sessionEntry = {
|
||||
sessionId: "session",
|
||||
updatedAt: Date.now(),
|
||||
totalTokens: 50_000,
|
||||
};
|
||||
|
||||
await seedSessionStore({ storePath, sessionKey, entry: sessionEntry });
|
||||
|
||||
runEmbeddedPiAgentMock.mockResolvedValue({
|
||||
payloads: [{ text: "ok" }],
|
||||
meta: {
|
||||
agentMeta: {
|
||||
usage: { input: 75_000, output: 5_000, cacheRead: 25_000, total: 105_000 },
|
||||
lastCallUsage: { input: 55_000, output: 2_000, cacheRead: 25_000, total: 82_000 },
|
||||
promptTokens: 44_000,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const diagnostics: DiagnosticEventPayload[] = [];
|
||||
const unsubscribe = onInternalDiagnosticEvent((event) => {
|
||||
diagnostics.push(event);
|
||||
});
|
||||
const { typing, sessionCtx, resolvedQueue, followupRun } = createBaseRun({
|
||||
storePath,
|
||||
sessionEntry,
|
||||
});
|
||||
|
||||
try {
|
||||
await runReplyAgent({
|
||||
commandBody: "hello",
|
||||
followupRun,
|
||||
queueKey: "main",
|
||||
resolvedQueue,
|
||||
shouldSteer: false,
|
||||
shouldFollowup: false,
|
||||
isActive: false,
|
||||
isStreaming: false,
|
||||
typing,
|
||||
sessionCtx,
|
||||
sessionEntry,
|
||||
sessionStore: { [sessionKey]: sessionEntry },
|
||||
sessionKey,
|
||||
storePath,
|
||||
defaultModel: "anthropic/claude-opus-4-6",
|
||||
agentCfgContextTokens: 200_000,
|
||||
resolvedVerboseLevel: "off",
|
||||
isNewSession: false,
|
||||
blockStreamingEnabled: false,
|
||||
resolvedBlockStreamingBreak: "message_end",
|
||||
shouldInjectGroupIntro: false,
|
||||
typingMode: "instant",
|
||||
});
|
||||
} finally {
|
||||
unsubscribe();
|
||||
}
|
||||
|
||||
const usageEvent = diagnostics.find((event) => event.type === "model.usage");
|
||||
expect(usageEvent).toMatchObject({
|
||||
type: "model.usage",
|
||||
usage: {
|
||||
input: 75_000,
|
||||
output: 5_000,
|
||||
cacheRead: 25_000,
|
||||
promptTokens: 100_000,
|
||||
total: 105_000,
|
||||
},
|
||||
context: {
|
||||
limit: 200_000,
|
||||
used: 44_000,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to last-call prompt usage for live diagnostic context", async () => {
|
||||
const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-usage-diagnostic-last-"));
|
||||
const storePath = path.join(tmp, "sessions.json");
|
||||
const sessionKey = "main";
|
||||
const sessionEntry = {
|
||||
sessionId: "session",
|
||||
updatedAt: Date.now(),
|
||||
totalTokens: 50_000,
|
||||
};
|
||||
|
||||
await seedSessionStore({ storePath, sessionKey, entry: sessionEntry });
|
||||
|
||||
runEmbeddedPiAgentMock.mockResolvedValue({
|
||||
payloads: [{ text: "ok" }],
|
||||
meta: {
|
||||
agentMeta: {
|
||||
usage: { input: 75_000, output: 5_000, cacheRead: 25_000, total: 105_000 },
|
||||
lastCallUsage: {
|
||||
input: 55_000,
|
||||
output: 2_000,
|
||||
cacheRead: 25_000,
|
||||
cacheWrite: 1_000,
|
||||
total: 83_000,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const diagnostics: DiagnosticEventPayload[] = [];
|
||||
const unsubscribe = onInternalDiagnosticEvent((event) => {
|
||||
diagnostics.push(event);
|
||||
});
|
||||
const { typing, sessionCtx, resolvedQueue, followupRun } = createBaseRun({
|
||||
storePath,
|
||||
sessionEntry,
|
||||
});
|
||||
|
||||
try {
|
||||
await runReplyAgent({
|
||||
commandBody: "hello",
|
||||
followupRun,
|
||||
queueKey: "main",
|
||||
resolvedQueue,
|
||||
shouldSteer: false,
|
||||
shouldFollowup: false,
|
||||
isActive: false,
|
||||
isStreaming: false,
|
||||
typing,
|
||||
sessionCtx,
|
||||
sessionEntry,
|
||||
sessionStore: { [sessionKey]: sessionEntry },
|
||||
sessionKey,
|
||||
storePath,
|
||||
defaultModel: "anthropic/claude-opus-4-6",
|
||||
agentCfgContextTokens: 200_000,
|
||||
resolvedVerboseLevel: "off",
|
||||
isNewSession: false,
|
||||
blockStreamingEnabled: false,
|
||||
resolvedBlockStreamingBreak: "message_end",
|
||||
shouldInjectGroupIntro: false,
|
||||
typingMode: "instant",
|
||||
});
|
||||
} finally {
|
||||
unsubscribe();
|
||||
}
|
||||
|
||||
const usageEvent = diagnostics.find((event) => event.type === "model.usage");
|
||||
expect(usageEvent).toMatchObject({
|
||||
type: "model.usage",
|
||||
usage: {
|
||||
input: 75_000,
|
||||
output: 5_000,
|
||||
cacheRead: 25_000,
|
||||
promptTokens: 100_000,
|
||||
total: 105_000,
|
||||
},
|
||||
context: {
|
||||
limit: 200_000,
|
||||
used: 81_000,
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("runReplyAgent block streaming", () => {
|
||||
@@ -913,6 +1081,7 @@ describe("runReplyAgent Active Memory inline debug", () => {
|
||||
model: "claude",
|
||||
usage: { input: 1200, output: 45, cacheRead: 800, cacheWrite: 200, total: 2245 },
|
||||
lastCallUsage: { input: 1000, output: 45, cacheRead: 750, cacheWrite: 150, total: 1945 },
|
||||
promptTokens: 1250,
|
||||
compactionCount: 1,
|
||||
},
|
||||
},
|
||||
@@ -987,6 +1156,7 @@ describe("runReplyAgent Active Memory inline debug", () => {
|
||||
expect(traceText).toContain("🔎 Usage (Session Total):");
|
||||
expect(traceText).toContain("🔎 Usage (Last Turn Total):");
|
||||
expect(traceText).toContain("🔎 Context Window (Last Model Request):");
|
||||
expect(traceText).toContain("used=1,250 tok (1.3k)");
|
||||
expect(traceText).toContain("🔎 Execution Result:");
|
||||
expect(traceText).toContain("winner=anthropic/claude");
|
||||
expect(traceText).toContain("fallbackUsed=yes");
|
||||
@@ -1025,7 +1195,7 @@ describe("runReplyAgent Active Memory inline debug", () => {
|
||||
expect(traceText).toContain("🔎 Model Input (User Role):");
|
||||
expect(traceText).toContain("🔎 Model Output (Assistant Role):");
|
||||
expect(traceText).toContain(
|
||||
"Summary: winner=claude 🧠 low fallback=yes attempts=2 stop=end_turn prompt=1.9k/200k ⬇️ 1.2k ⬆️ 45 ♻️ 800 🆕 200 🔢 2.2k tools=2 compactions=1",
|
||||
"Summary: winner=claude 🧠 low fallback=yes attempts=2 stop=end_turn prompt=1.3k/200k ⬇️ 1.2k ⬆️ 45 ♻️ 800 🆕 200 🔢 2.2k tools=2 compactions=1",
|
||||
);
|
||||
expect(traceText.indexOf("🔎 Execution Result:")).toBeGreaterThan(
|
||||
traceText.indexOf("🔎 Context Window (Last Model Request):"),
|
||||
|
||||
@@ -585,6 +585,13 @@ function resolveRequestPromptTokens(params: {
|
||||
total?: number;
|
||||
};
|
||||
}): number | undefined {
|
||||
if (
|
||||
typeof params.promptTokens === "number" &&
|
||||
Number.isFinite(params.promptTokens) &&
|
||||
params.promptTokens > 0
|
||||
) {
|
||||
return params.promptTokens;
|
||||
}
|
||||
const lastCall = params.lastCallUsage;
|
||||
if (lastCall) {
|
||||
const input = lastCall.input ?? 0;
|
||||
@@ -595,13 +602,6 @@ function resolveRequestPromptTokens(params: {
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
if (
|
||||
typeof params.promptTokens === "number" &&
|
||||
Number.isFinite(params.promptTokens) &&
|
||||
params.promptTokens > 0
|
||||
) {
|
||||
return params.promptTokens;
|
||||
}
|
||||
const usage = params.usage;
|
||||
if (usage) {
|
||||
const input = usage.input ?? 0;
|
||||
@@ -1428,8 +1428,13 @@ export async function runReplyAgent(params: {
|
||||
const output = usage.output ?? 0;
|
||||
const cacheRead = usage.cacheRead ?? 0;
|
||||
const cacheWrite = usage.cacheWrite ?? 0;
|
||||
const promptTokens = input + cacheRead + cacheWrite;
|
||||
const totalTokens = usage.total ?? promptTokens + output;
|
||||
const usagePromptTokens = input + cacheRead + cacheWrite;
|
||||
const totalTokens = usage.total ?? usagePromptTokens + output;
|
||||
const contextUsedTokens = resolveRequestPromptTokens({
|
||||
lastCallUsage: runResult.meta?.agentMeta?.lastCallUsage,
|
||||
promptTokens,
|
||||
usage,
|
||||
});
|
||||
const costConfig = resolveModelCostConfig({
|
||||
provider: providerUsed,
|
||||
model: modelUsed,
|
||||
@@ -1455,13 +1460,13 @@ export async function runReplyAgent(params: {
|
||||
output,
|
||||
cacheRead,
|
||||
cacheWrite,
|
||||
promptTokens,
|
||||
promptTokens: usagePromptTokens,
|
||||
total: totalTokens,
|
||||
},
|
||||
lastCallUsage: runResult.meta?.agentMeta?.lastCallUsage,
|
||||
context: {
|
||||
limit: contextTokensUsed,
|
||||
used: totalTokens,
|
||||
...(contextUsedTokens !== undefined ? { used: contextUsedTokens } : {}),
|
||||
},
|
||||
costUsd,
|
||||
durationMs: Date.now() - runStartedAt,
|
||||
|
||||
Reference in New Issue
Block a user