From 41f9768cd8deec1ae23e5948a6d4462d1312ec5f Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 02:30:17 +0100 Subject: [PATCH] fix: preserve context engine safeguard compaction --- CHANGELOG.md | 1 + docs/concepts/context-engine.md | 5 +- ...mpt.spawn-workspace.context-engine.test.ts | 76 +++++++++++++++++++ .../attempt.spawn-workspace.test-support.ts | 5 +- src/agents/pi-embedded-runner/run/attempt.ts | 38 ++++------ .../run/preemptive-compaction.test.ts | 15 ++++ .../run/preemptive-compaction.ts | 21 ++++- 7 files changed, 134 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d945162e408..f1d4e0f88fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ Docs: https://docs.openclaw.ai - Gateway/sessions: recover main-agent turns interrupted by a gateway restart from stale transcript-lock evidence, avoiding stuck `status: "running"` sessions without broad post-boot transcript scans. Fixes #70555. Thanks @bitloi. - Plugins/Google Meet: include live Chrome-node readiness in `googlemeet setup` and document the Parallels recovery checks, so stale node tokens or disconnected VM browsers are visible before an agent opens a meeting. Thanks @steipete. - Codex approvals: compact home-directory permission paths to `~` without repeating them as a separate high-risk warning, while preserving filesystem root and wildcard host warnings. Thanks @steipete. +- Context engine: keep safeguard compaction checks active after context-engine windowing and for `ownsCompaction` engines, so large transcripts can compact before prompt submission instead of waiting for provider overflow. Fixes #71325. Thanks @steipete. - Plugins/runtime deps: isolate the internal npm cache used for bundled plugin runtime-dependency repair and let package updates refresh/verify already-current installs, so failed update or sudo doctor runs can be repaired by rerunning `openclaw update`. Thanks @steipete. - Agents/delete: keep `--json` output machine-readable and retain workspaces that overlap another agent's workspace instead of moving shared state to Trash. Fixes #70889 and #70890. (#70897) Thanks @kaseonedge. - Plugins/runtime deps: stage bundled plugin runtime dependencies for packaged/global installs in an external runtime root and retain already staged deps across repairs, avoiding package-tree update races and npm pruning after upgrades. Thanks @steipete. diff --git a/docs/concepts/context-engine.md b/docs/concepts/context-engine.md index a419c4ebf4e..b420cc1a4fa 100644 --- a/docs/concepts/context-engine.md +++ b/docs/concepts/context-engine.md @@ -210,7 +210,10 @@ enabled for the run: - `true` — the engine owns compaction behavior. OpenClaw disables Pi's built-in auto-compaction for that run, and the engine's `compact()` implementation is responsible for `/compact`, overflow recovery compaction, and any proactive - compaction it wants to do in `afterTurn()`. + compaction it wants to do in `afterTurn()`. OpenClaw may still run the + pre-prompt overflow safeguard; when it predicts the full transcript will + overflow, the recovery path calls the active engine's `compact()` before + submitting another prompt. - `false` or unset — Pi's built-in auto-compaction may still run during prompt execution, but the active engine's `compact()` method is still called for `/compact` and overflow recovery. diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts index 4fbf273db79..012e23b60ee 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts @@ -18,6 +18,7 @@ import { import { cleanupTempPaths, createContextEngineBootstrapAndAssemble, + createContextEngineAttemptRunner, expectCalledWithSessionKey, getHoisted, resetEmbeddedAttemptHarness, @@ -276,6 +277,81 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => { expect(params.sessionKey).toBe(sessionKey); }); + it("prechecks unwindowed context before submitting a windowed context-engine prompt", async () => { + const sessionPrompt = vi.fn(async () => {}); + const fullHistory = [ + { + role: "assistant", + content: [{ type: "text", text: "large historical context ".repeat(600) }], + timestamp: 1, + }, + ] as AgentMessage[]; + const windowedMessages = [ + { role: "assistant", content: [{ type: "text", text: "small window" }], timestamp: 2 }, + ] as AgentMessage[]; + const assemble = vi.fn(async () => ({ + messages: windowedMessages, + estimatedTokens: 3, + })); + + const result = await createContextEngineAttemptRunner({ + contextEngine: { assemble }, + sessionKey, + tempPaths, + sessionMessages: fullHistory, + sessionPrompt, + attemptOverrides: { + contextTokenBudget: 512, + }, + }); + + expect(assemble).toHaveBeenCalledWith( + expect.objectContaining({ + messages: fullHistory, + }), + ); + expect(sessionPrompt).not.toHaveBeenCalled(); + expect(result.promptErrorSource).toBe("precheck"); + expect(result.preflightRecovery).toEqual({ route: "compact_only" }); + }); + + it("keeps preflight overflow checks active for engines that own compaction", async () => { + const sessionPrompt = vi.fn(async () => {}); + const fullHistory = [ + { + role: "assistant", + content: [{ type: "text", text: "engine-owned large historical context ".repeat(600) }], + timestamp: 1, + }, + ] as AgentMessage[]; + const assemble = vi.fn(async () => ({ + messages: [ + { role: "assistant", content: [{ type: "text", text: "small window" }], timestamp: 2 }, + ] as AgentMessage[], + estimatedTokens: 3, + })); + + const result = await createContextEngineAttemptRunner({ + contextEngine: { + assemble, + info: { + ownsCompaction: true, + }, + }, + sessionKey, + tempPaths, + sessionMessages: fullHistory, + sessionPrompt, + attemptOverrides: { + contextTokenBudget: 512, + }, + }); + + expect(sessionPrompt).not.toHaveBeenCalled(); + expect(result.promptErrorSource).toBe("precheck"); + expect(result.preflightRecovery).toEqual({ route: "compact_only" }); + }); + it("skips maintenance when afterTurn fails", async () => { const { bootstrap, assemble } = createContextEngineBootstrapAndAssemble(); const afterTurn = vi.fn(async () => { diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts index 85586271820..099f684686e 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts @@ -193,13 +193,15 @@ vi.mock("@mariozechner/pi-coding-agent", async () => { async reload() {} } function ModelRegistry() {} + const estimateTokens = (value: unknown) => + Math.max(1, Math.ceil(JSON.stringify(value ?? "").length / 4)); return { ...actual, AuthStorage, createAgentSession: (...args: unknown[]) => hoisted.createAgentSessionMock(...args), DefaultResourceLoader, - estimateTokens: () => 0, + estimateTokens, generateSummary: async () => "", ModelRegistry, SessionManager: { @@ -975,6 +977,7 @@ export async function createContextEngineAttemptRunner(params: { })), ...(maintain ? { maintain } : {}), info: { + ...params.contextEngine.info, id: infoId, name: infoName, version: infoVersion, diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 8a424c0a1e3..60f51f76e19 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -1374,6 +1374,7 @@ export async function runEmbeddedAttempt( await baseConvertToLlm(normalizeAssistantReplayContent(messages)); } let prePromptMessageCount = activeSession.messages.length; + let unwindowedContextEngineMessagesForPrecheck: AgentMessage[] | undefined; abortSessionForYield = () => { yieldAbortSettled = Promise.resolve(activeSession.abort()); }; @@ -1855,6 +1856,7 @@ export async function runEmbeddedAttempt( if (params.contextEngine) { try { + unwindowedContextEngineMessagesForPrecheck = activeSession.messages.slice(); const assembled = await assembleAttemptContextEngine({ contextEngine: params.contextEngine, sessionId: params.sessionId, @@ -2453,29 +2455,19 @@ export async function runEmbeddedAttempt( const reserveTokens = settingsManager.getCompactionReserveTokens(); const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS; - const preemptiveCompaction = - params.contextEngine?.info?.ownsCompaction === true - ? { - route: "fits" as const, - shouldCompact: false, - estimatedPromptTokens: 0, - promptBudgetBeforeReserve: 0, - overflowTokens: 0, - toolResultReducibleChars: 0, - effectiveReserveTokens: reserveTokens, - } - : shouldPreemptivelyCompactBeforePrompt({ - messages: activeSession.messages, - systemPrompt: systemPromptText, - prompt: effectivePrompt, - contextTokenBudget, - reserveTokens, - toolResultMaxChars: resolveLiveToolResultMaxChars({ - contextWindowTokens: contextTokenBudget, - cfg: params.config, - agentId: sessionAgentId, - }), - }); + const preemptiveCompaction = shouldPreemptivelyCompactBeforePrompt({ + messages: activeSession.messages, + unwindowedMessages: unwindowedContextEngineMessagesForPrecheck, + systemPrompt: systemPromptText, + prompt: effectivePrompt, + contextTokenBudget, + reserveTokens, + toolResultMaxChars: resolveLiveToolResultMaxChars({ + contextWindowTokens: contextTokenBudget, + cfg: params.config, + agentId: sessionAgentId, + }), + }); if (preemptiveCompaction.route === "truncate_tool_results_only") { const toolResultMaxChars = resolveLiveToolResultMaxChars({ contextWindowTokens: contextTokenBudget, diff --git a/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts b/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts index ab9a0be6470..2cc4cb218da 100644 --- a/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts @@ -93,6 +93,21 @@ describe("preemptive-compaction", () => { expect(result.estimatedPromptTokens).toBeLessThan(result.promptBudgetBeforeReserve); }); + it("uses the larger unwindowed message estimate when context engine assembly windows history", () => { + const result = shouldPreemptivelyCompactBeforePrompt({ + messages: [makeAssistantHistory("small assembled window")], + unwindowedMessages: [makeAssistantHistory(verboseHistory.repeat(4))], + systemPrompt: "sys", + prompt: "hello", + contextTokenBudget: 500, + reserveTokens: 50, + }); + + expect(result.shouldCompact).toBe(true); + expect(result.route).toBe("compact_only"); + expect(result.estimatedPromptTokens).toBeGreaterThan(result.promptBudgetBeforeReserve); + }); + it("caps reserve tokens so small context models keep usable prompt budget", () => { const result = shouldPreemptivelyCompactBeforePrompt({ messages: [makeAssistantHistory("short history")], diff --git a/src/agents/pi-embedded-runner/run/preemptive-compaction.ts b/src/agents/pi-embedded-runner/run/preemptive-compaction.ts index de12ededa38..9cc1ac334bc 100644 --- a/src/agents/pi-embedded-runner/run/preemptive-compaction.ts +++ b/src/agents/pi-embedded-runner/run/preemptive-compaction.ts @@ -40,6 +40,7 @@ export function estimatePrePromptTokens(params: { export function shouldPreemptivelyCompactBeforePrompt(params: { messages: AgentMessage[]; + unwindowedMessages?: AgentMessage[]; systemPrompt?: string; prompt: string; contextTokenBudget: number; @@ -54,7 +55,23 @@ export function shouldPreemptivelyCompactBeforePrompt(params: { toolResultReducibleChars: number; effectiveReserveTokens: number; } { - const estimatedPromptTokens = estimatePrePromptTokens(params); + let messagesForPressure = params.messages; + let estimatedPromptTokens = estimatePrePromptTokens({ + messages: params.messages, + systemPrompt: params.systemPrompt, + prompt: params.prompt, + }); + if (params.unwindowedMessages && params.unwindowedMessages !== params.messages) { + const unwindowedEstimatedPromptTokens = estimatePrePromptTokens({ + messages: params.unwindowedMessages, + systemPrompt: params.systemPrompt, + prompt: params.prompt, + }); + if (unwindowedEstimatedPromptTokens > estimatedPromptTokens) { + estimatedPromptTokens = unwindowedEstimatedPromptTokens; + messagesForPressure = params.unwindowedMessages; + } + } const contextTokenBudget = Math.max(1, Math.floor(params.contextTokenBudget)); const requestedReserveTokens = Math.max(0, Math.floor(params.reserveTokens)); const minPromptBudget = Math.min( @@ -68,7 +85,7 @@ export function shouldPreemptivelyCompactBeforePrompt(params: { const promptBudgetBeforeReserve = Math.max(1, contextTokenBudget - effectiveReserveTokens); const overflowTokens = Math.max(0, estimatedPromptTokens - promptBudgetBeforeReserve); const toolResultPotential = estimateToolResultReductionPotential({ - messages: params.messages, + messages: messagesForPressure, contextWindowTokens: params.contextTokenBudget, maxCharsOverride: params.toolResultMaxChars, });