diff --git a/CHANGELOG.md b/CHANGELOG.md index c8bda620ac1..7de75be6b10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ Docs: https://docs.openclaw.ai - Agents/fallback: preserve the original prompt body on model fallback retries with session history so the retrying model keeps the active task instead of only seeing a generic continue message. (#66029) Thanks @WuKongAI-CMU. - Reply/secrets: resolve active reply channel/account SecretRefs before reply-run message-action discovery so channel token SecretRefs (for example Discord) do not degrade into discovery-time unresolved-secret failures. (#66796) Thanks @joshavant. - Agents/Anthropic: ignore non-positive Anthropic Messages token overrides and fail locally when no positive token budget remains, so invalid `max_tokens` values no longer reach the provider API. (#66664) thanks @jalehman +- Agents/context engines: preserve prompt-only token counts, not full request totals, when deferred maintenance reuses after-turn runtime context so background compaction bookkeeping matches the active prompt window. (#66820) thanks @jalehman. ## 2026.4.14 diff --git a/src/agents/pi-embedded-runner/context-engine-maintenance.test.ts b/src/agents/pi-embedded-runner/context-engine-maintenance.test.ts index b396ca8f51d..ec600053d40 100644 --- a/src/agents/pi-embedded-runner/context-engine-maintenance.test.ts +++ b/src/agents/pi-embedded-runner/context-engine-maintenance.test.ts @@ -424,7 +424,11 @@ describe("runContextEngineMaintenance", () => { sessionKey, sessionFile: "/tmp/session.jsonl", reason: "turn", - runtimeContext: { workspaceDir: "/tmp/workspace" }, + runtimeContext: { + workspaceDir: "/tmp/workspace", + tokenBudget: 2048, + currentTokenCount: 1536, + }, }); expect(result).toBeUndefined(); @@ -453,6 +457,8 @@ describe("runContextEngineMaintenance", () => { runtimeContext: expect.objectContaining({ workspaceDir: "/tmp/workspace", allowDeferredCompactionExecution: true, + tokenBudget: 2048, + currentTokenCount: 1536, }), }); diff --git a/src/agents/pi-embedded-runner/run/attempt.prompt-helpers.ts b/src/agents/pi-embedded-runner/run/attempt.prompt-helpers.ts index 767f496ff24..6aaa470ee64 100644 --- a/src/agents/pi-embedded-runner/run/attempt.prompt-helpers.ts +++ b/src/agents/pi-embedded-runner/run/attempt.prompt-helpers.ts @@ -226,6 +226,8 @@ export function buildAfterTurnRuntimeContext(params: { >; workspaceDir: string; agentDir: string; + tokenBudget?: number; + currentTokenCount?: number; promptCache?: ContextEnginePromptCacheInfo; }): ContextEngineRuntimeContext { return { @@ -252,6 +254,16 @@ export function buildAfterTurnRuntimeContext(params: { extraSystemPrompt: params.attempt.extraSystemPrompt, ownerNumbers: params.attempt.ownerNumbers, }), + ...(typeof params.tokenBudget === "number" && + Number.isFinite(params.tokenBudget) && + params.tokenBudget > 0 + ? { tokenBudget: Math.floor(params.tokenBudget) } + : {}), + ...(typeof params.currentTokenCount === "number" && + Number.isFinite(params.currentTokenCount) && + params.currentTokenCount > 0 + ? { currentTokenCount: Math.floor(params.currentTokenCount) } + : {}), ...(params.promptCache ? { promptCache: params.promptCache } : {}), }; } diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts index 2f45a46c1b7..a2817885639 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts @@ -14,6 +14,8 @@ import { runAttemptContextEngineBootstrap, } from "./attempt.context-engine-helpers.js"; import { + cleanupTempPaths, + createContextEngineAttemptRunner, createContextEngineBootstrapAndAssemble, expectCalledWithSessionKey, getHoisted, @@ -109,6 +111,7 @@ async function finalizeTurn( describe("runEmbeddedAttempt context engine sessionKey forwarding", () => { const sessionKey = "agent:main:discord:channel:test-ctx-engine"; + const tempPaths: string[] = []; beforeEach(() => { resetEmbeddedAttemptHarness(); clearMemoryPluginState(); @@ -116,6 +119,7 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => { }); afterEach(async () => { + await cleanupTempPaths(tempPaths); clearMemoryPluginState(); vi.restoreAllMocks(); }); @@ -395,6 +399,59 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => { ); }); + it("derives deferred maintenance currentTokenCount from prompt-only usage", async () => { + const afterTurn = vi.fn( + async (_params: { + runtimeContext?: { + currentTokenCount?: number; + promptCache?: { lastCallUsage?: { total?: number } }; + }; + }) => {}, + ); + + await createContextEngineAttemptRunner({ + sessionKey, + tempPaths, + contextEngine: { + assemble: async ({ messages }) => ({ + messages, + estimatedTokens: 1, + }), + afterTurn, + }, + sessionPrompt: async (session) => { + session.messages = [ + ...session.messages, + { + role: "assistant", + content: "done", + timestamp: 2, + usage: { + input: 10, + output: 5, + cacheRead: 40, + cacheWrite: 2, + total: 57, + }, + } as unknown as AgentMessage, + ]; + }, + }); + + expect(afterTurn).toHaveBeenCalledWith( + expect.objectContaining({ + runtimeContext: expect.objectContaining({ + currentTokenCount: 52, + promptCache: expect.objectContaining({ + lastCallUsage: expect.objectContaining({ + total: 57, + }), + }), + }), + }), + ); + }); + it("skips maintenance when ingestBatch fails", async () => { const { bootstrap, assemble } = createContextEngineBootstrapAndAssemble(); const ingestBatch = vi.fn(async () => { diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index ab47465ea8a..2c93d062216 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -2836,6 +2836,8 @@ describe("buildAfterTurnRuntimeContext", () => { }, workspaceDir: "/tmp/workspace", agentDir: "/tmp/agent", + tokenBudget: 1050000, + currentTokenCount: 232393, }); expect(legacy).toMatchObject({ @@ -2844,6 +2846,8 @@ describe("buildAfterTurnRuntimeContext", () => { model: "gpt-5.4", workspaceDir: "/tmp/workspace", agentDir: "/tmp/agent", + tokenBudget: 1050000, + currentTokenCount: 232393, }); }); diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 3c3e8885499..b1a44fc05ce 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -116,7 +116,7 @@ import { resolveTranscriptPolicy, shouldAllowProviderOwnedThinkingReplay, } from "../../transcript-policy.js"; -import { normalizeUsage, type NormalizedUsage } from "../../usage.js"; +import { derivePromptTokens, normalizeUsage, type NormalizedUsage } from "../../usage.js"; import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js"; import { isRunnerAbortError } from "../abort.js"; import { isCacheTtlEligibleProvider, readLastCacheTtlTimestamp } from "../cache-ttl.js"; @@ -887,6 +887,7 @@ export async function runEmbeddedAttempt( attempt: params, workspaceDir: effectiveWorkspace, agentDir, + tokenBudget: params.contextTokenBudget, }), runMaintenance: async (contextParams) => await runContextEngineMaintenance({ @@ -2201,10 +2202,13 @@ export async function runEmbeddedAttempt( // Let the active context engine run its post-turn lifecycle. if (params.contextEngine) { + const runtimeCurrentTokenCount = derivePromptTokens(lastCallUsage); const afterTurnRuntimeContext = buildAfterTurnRuntimeContext({ attempt: params, workspaceDir: effectiveWorkspace, agentDir, + tokenBudget: params.contextTokenBudget, + currentTokenCount: runtimeCurrentTokenCount, promptCache, }); await finalizeAttemptContextEngineTurn({ diff --git a/src/context-engine/types.ts b/src/context-engine/types.ts index 594703bc864..2eea7ea7ba2 100644 --- a/src/context-engine/types.ts +++ b/src/context-engine/types.ts @@ -140,6 +140,10 @@ export type ContextEngineRuntimeContext = Record & { * consuming deferred compaction debt. */ allowDeferredCompactionExecution?: boolean; + /** Runtime-resolved context window budget for the active model call. */ + tokenBudget?: number; + /** Best-effort current prompt/context token estimate for this turn. */ + currentTokenCount?: number; /** Optional prompt-cache telemetry for cache-aware engines. */ promptCache?: ContextEnginePromptCacheInfo; /**