fix: preserve context engine safeguard compaction

This commit is contained in:
Peter Steinberger
2026-04-25 02:30:17 +01:00
parent a9a308becd
commit 41f9768cd8
7 changed files with 134 additions and 27 deletions

View File

@@ -66,6 +66,7 @@ Docs: https://docs.openclaw.ai
- Gateway/sessions: recover main-agent turns interrupted by a gateway restart from stale transcript-lock evidence, avoiding stuck `status: "running"` sessions without broad post-boot transcript scans. Fixes #70555. Thanks @bitloi.
- Plugins/Google Meet: include live Chrome-node readiness in `googlemeet setup` and document the Parallels recovery checks, so stale node tokens or disconnected VM browsers are visible before an agent opens a meeting. Thanks @steipete.
- Codex approvals: compact home-directory permission paths to `~` without repeating them as a separate high-risk warning, while preserving filesystem root and wildcard host warnings. Thanks @steipete.
- Context engine: keep safeguard compaction checks active after context-engine windowing and for `ownsCompaction` engines, so large transcripts can compact before prompt submission instead of waiting for provider overflow. Fixes #71325. Thanks @steipete.
- Plugins/runtime deps: isolate the internal npm cache used for bundled plugin runtime-dependency repair and let package updates refresh/verify already-current installs, so failed update or sudo doctor runs can be repaired by rerunning `openclaw update`. Thanks @steipete.
- Agents/delete: keep `--json` output machine-readable and retain workspaces that overlap another agent's workspace instead of moving shared state to Trash. Fixes #70889 and #70890. (#70897) Thanks @kaseonedge.
- Plugins/runtime deps: stage bundled plugin runtime dependencies for packaged/global installs in an external runtime root and retain already staged deps across repairs, avoiding package-tree update races and npm pruning after upgrades. Thanks @steipete.

View File

@@ -210,7 +210,10 @@ enabled for the run:
- `true` — the engine owns compaction behavior. OpenClaw disables Pi's built-in
auto-compaction for that run, and the engine's `compact()` implementation is
responsible for `/compact`, overflow recovery compaction, and any proactive
compaction it wants to do in `afterTurn()`.
compaction it wants to do in `afterTurn()`. OpenClaw may still run the
pre-prompt overflow safeguard; when it predicts the full transcript will
overflow, the recovery path calls the active engine's `compact()` before
submitting another prompt.
- `false` or unset — Pi's built-in auto-compaction may still run during prompt
execution, but the active engine's `compact()` method is still called for
`/compact` and overflow recovery.

View File

@@ -18,6 +18,7 @@ import {
import {
cleanupTempPaths,
createContextEngineBootstrapAndAssemble,
createContextEngineAttemptRunner,
expectCalledWithSessionKey,
getHoisted,
resetEmbeddedAttemptHarness,
@@ -276,6 +277,81 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => {
expect(params.sessionKey).toBe(sessionKey);
});
it("prechecks unwindowed context before submitting a windowed context-engine prompt", async () => {
const sessionPrompt = vi.fn(async () => {});
const fullHistory = [
{
role: "assistant",
content: [{ type: "text", text: "large historical context ".repeat(600) }],
timestamp: 1,
},
] as AgentMessage[];
const windowedMessages = [
{ role: "assistant", content: [{ type: "text", text: "small window" }], timestamp: 2 },
] as AgentMessage[];
const assemble = vi.fn(async () => ({
messages: windowedMessages,
estimatedTokens: 3,
}));
const result = await createContextEngineAttemptRunner({
contextEngine: { assemble },
sessionKey,
tempPaths,
sessionMessages: fullHistory,
sessionPrompt,
attemptOverrides: {
contextTokenBudget: 512,
},
});
expect(assemble).toHaveBeenCalledWith(
expect.objectContaining({
messages: fullHistory,
}),
);
expect(sessionPrompt).not.toHaveBeenCalled();
expect(result.promptErrorSource).toBe("precheck");
expect(result.preflightRecovery).toEqual({ route: "compact_only" });
});
it("keeps preflight overflow checks active for engines that own compaction", async () => {
const sessionPrompt = vi.fn(async () => {});
const fullHistory = [
{
role: "assistant",
content: [{ type: "text", text: "engine-owned large historical context ".repeat(600) }],
timestamp: 1,
},
] as AgentMessage[];
const assemble = vi.fn(async () => ({
messages: [
{ role: "assistant", content: [{ type: "text", text: "small window" }], timestamp: 2 },
] as AgentMessage[],
estimatedTokens: 3,
}));
const result = await createContextEngineAttemptRunner({
contextEngine: {
assemble,
info: {
ownsCompaction: true,
},
},
sessionKey,
tempPaths,
sessionMessages: fullHistory,
sessionPrompt,
attemptOverrides: {
contextTokenBudget: 512,
},
});
expect(sessionPrompt).not.toHaveBeenCalled();
expect(result.promptErrorSource).toBe("precheck");
expect(result.preflightRecovery).toEqual({ route: "compact_only" });
});
it("skips maintenance when afterTurn fails", async () => {
const { bootstrap, assemble } = createContextEngineBootstrapAndAssemble();
const afterTurn = vi.fn(async () => {

View File

@@ -193,13 +193,15 @@ vi.mock("@mariozechner/pi-coding-agent", async () => {
async reload() {}
}
function ModelRegistry() {}
const estimateTokens = (value: unknown) =>
Math.max(1, Math.ceil(JSON.stringify(value ?? "").length / 4));
return {
...actual,
AuthStorage,
createAgentSession: (...args: unknown[]) => hoisted.createAgentSessionMock(...args),
DefaultResourceLoader,
estimateTokens: () => 0,
estimateTokens,
generateSummary: async () => "",
ModelRegistry,
SessionManager: {
@@ -975,6 +977,7 @@ export async function createContextEngineAttemptRunner(params: {
})),
...(maintain ? { maintain } : {}),
info: {
...params.contextEngine.info,
id: infoId,
name: infoName,
version: infoVersion,

View File

@@ -1374,6 +1374,7 @@ export async function runEmbeddedAttempt(
await baseConvertToLlm(normalizeAssistantReplayContent(messages));
}
let prePromptMessageCount = activeSession.messages.length;
let unwindowedContextEngineMessagesForPrecheck: AgentMessage[] | undefined;
abortSessionForYield = () => {
yieldAbortSettled = Promise.resolve(activeSession.abort());
};
@@ -1855,6 +1856,7 @@ export async function runEmbeddedAttempt(
if (params.contextEngine) {
try {
unwindowedContextEngineMessagesForPrecheck = activeSession.messages.slice();
const assembled = await assembleAttemptContextEngine({
contextEngine: params.contextEngine,
sessionId: params.sessionId,
@@ -2453,29 +2455,19 @@ export async function runEmbeddedAttempt(
const reserveTokens = settingsManager.getCompactionReserveTokens();
const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS;
const preemptiveCompaction =
params.contextEngine?.info?.ownsCompaction === true
? {
route: "fits" as const,
shouldCompact: false,
estimatedPromptTokens: 0,
promptBudgetBeforeReserve: 0,
overflowTokens: 0,
toolResultReducibleChars: 0,
effectiveReserveTokens: reserveTokens,
}
: shouldPreemptivelyCompactBeforePrompt({
messages: activeSession.messages,
systemPrompt: systemPromptText,
prompt: effectivePrompt,
contextTokenBudget,
reserveTokens,
toolResultMaxChars: resolveLiveToolResultMaxChars({
contextWindowTokens: contextTokenBudget,
cfg: params.config,
agentId: sessionAgentId,
}),
});
const preemptiveCompaction = shouldPreemptivelyCompactBeforePrompt({
messages: activeSession.messages,
unwindowedMessages: unwindowedContextEngineMessagesForPrecheck,
systemPrompt: systemPromptText,
prompt: effectivePrompt,
contextTokenBudget,
reserveTokens,
toolResultMaxChars: resolveLiveToolResultMaxChars({
contextWindowTokens: contextTokenBudget,
cfg: params.config,
agentId: sessionAgentId,
}),
});
if (preemptiveCompaction.route === "truncate_tool_results_only") {
const toolResultMaxChars = resolveLiveToolResultMaxChars({
contextWindowTokens: contextTokenBudget,

View File

@@ -93,6 +93,21 @@ describe("preemptive-compaction", () => {
expect(result.estimatedPromptTokens).toBeLessThan(result.promptBudgetBeforeReserve);
});
it("uses the larger unwindowed message estimate when context engine assembly windows history", () => {
const result = shouldPreemptivelyCompactBeforePrompt({
messages: [makeAssistantHistory("small assembled window")],
unwindowedMessages: [makeAssistantHistory(verboseHistory.repeat(4))],
systemPrompt: "sys",
prompt: "hello",
contextTokenBudget: 500,
reserveTokens: 50,
});
expect(result.shouldCompact).toBe(true);
expect(result.route).toBe("compact_only");
expect(result.estimatedPromptTokens).toBeGreaterThan(result.promptBudgetBeforeReserve);
});
it("caps reserve tokens so small context models keep usable prompt budget", () => {
const result = shouldPreemptivelyCompactBeforePrompt({
messages: [makeAssistantHistory("short history")],

View File

@@ -40,6 +40,7 @@ export function estimatePrePromptTokens(params: {
export function shouldPreemptivelyCompactBeforePrompt(params: {
messages: AgentMessage[];
unwindowedMessages?: AgentMessage[];
systemPrompt?: string;
prompt: string;
contextTokenBudget: number;
@@ -54,7 +55,23 @@ export function shouldPreemptivelyCompactBeforePrompt(params: {
toolResultReducibleChars: number;
effectiveReserveTokens: number;
} {
const estimatedPromptTokens = estimatePrePromptTokens(params);
let messagesForPressure = params.messages;
let estimatedPromptTokens = estimatePrePromptTokens({
messages: params.messages,
systemPrompt: params.systemPrompt,
prompt: params.prompt,
});
if (params.unwindowedMessages && params.unwindowedMessages !== params.messages) {
const unwindowedEstimatedPromptTokens = estimatePrePromptTokens({
messages: params.unwindowedMessages,
systemPrompt: params.systemPrompt,
prompt: params.prompt,
});
if (unwindowedEstimatedPromptTokens > estimatedPromptTokens) {
estimatedPromptTokens = unwindowedEstimatedPromptTokens;
messagesForPressure = params.unwindowedMessages;
}
}
const contextTokenBudget = Math.max(1, Math.floor(params.contextTokenBudget));
const requestedReserveTokens = Math.max(0, Math.floor(params.reserveTokens));
const minPromptBudget = Math.min(
@@ -68,7 +85,7 @@ export function shouldPreemptivelyCompactBeforePrompt(params: {
const promptBudgetBeforeReserve = Math.max(1, contextTokenBudget - effectiveReserveTokens);
const overflowTokens = Math.max(0, estimatedPromptTokens - promptBudgetBeforeReserve);
const toolResultPotential = estimateToolResultReductionPotential({
messages: params.messages,
messages: messagesForPressure,
contextWindowTokens: params.contextTokenBudget,
maxCharsOverride: params.toolResultMaxChars,
});