diff --git a/CHANGELOG.md b/CHANGELOG.md index c5232105140..4d0a76669c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai - Matrix/security: normalize sandboxed profile avatar params, preserve `mxc://` avatar URLs, and surface gmail watcher stop failures during reload. (#64701) Thanks @slepybear. - Telegram/documents: drop leaked binary caption bytes from inbound Telegram text handling so document uploads like `.mobi` or `.epub` no longer explode prompt token counts. (#66663) Thanks @joelnishanth. - Gateway/auth: resolve the active gateway bearer per-request on the HTTP server and the HTTP upgrade handler via `getResolvedAuth()`, mirroring the WebSocket path, so a secret rotated through `secrets.reload` or config hot-reload stops authenticating on `/v1/*`, `/tools/invoke`, plugin HTTP routes, and the canvas upgrade path immediately instead of remaining valid on HTTP until gateway restart. (#66651) Thanks @mmaps. +- Agents/compaction: cap the compaction reserve-token floor to the model context window so small-context local models (e.g. Ollama with 16K tokens) no longer trigger context-overflow errors or infinite compaction loops on every prompt. (#65671) Thanks @openperf. ## 2026.4.14 diff --git a/src/agents/pi-compaction-constants.ts b/src/agents/pi-compaction-constants.ts new file mode 100644 index 00000000000..59cb9adfd50 --- /dev/null +++ b/src/agents/pi-compaction-constants.ts @@ -0,0 +1,12 @@ +/** + * Absolute minimum prompt budget in tokens. When the context window is + * large enough that `contextTokenBudget * MIN_PROMPT_BUDGET_RATIO` exceeds + * this value, this absolute floor takes precedence. + */ +export const MIN_PROMPT_BUDGET_TOKENS = 8_000; + +/** + * Minimum share of the context window that must remain available for prompt + * content after reserve tokens are subtracted. + */ +export const MIN_PROMPT_BUDGET_RATIO = 0.5; diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 2c4236e4a11..ed8738ec1e2 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -786,6 +786,7 @@ export async function compactEmbeddedPiSessionDirect( cwd: effectiveWorkspace, agentDir, cfg: params.config, + contextTokenBudget: ctxInfo.tokens, }); // Sets compaction/pruning runtime state and returns extension factories // that must be passed to the resource loader for the safeguard to be active. diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 38aca027ecf..3c3e8885499 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -913,6 +913,7 @@ export async function runEmbeddedAttempt( cwd: effectiveWorkspace, agentDir, cfg: params.config, + contextTokenBudget: params.contextTokenBudget, }); applyPiAutoCompactionGuard({ settingsManager, diff --git a/src/agents/pi-embedded-runner/run/preemptive-compaction.ts b/src/agents/pi-embedded-runner/run/preemptive-compaction.ts index 44b885d03fd..8d550855d79 100644 --- a/src/agents/pi-embedded-runner/run/preemptive-compaction.ts +++ b/src/agents/pi-embedded-runner/run/preemptive-compaction.ts @@ -3,14 +3,16 @@ import { estimateTokens } from "@mariozechner/pi-coding-agent"; import { SAFETY_MARGIN, estimateMessagesTokens } from "../../compaction.js"; import { estimateToolResultReductionPotential } from "../tool-result-truncation.js"; import type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js"; +import { + MIN_PROMPT_BUDGET_RATIO, + MIN_PROMPT_BUDGET_TOKENS, +} from "../../pi-compaction-constants.js"; export const PREEMPTIVE_OVERFLOW_ERROR_TEXT = "Context overflow: prompt too large for the model (precheck)."; const ESTIMATED_CHARS_PER_TOKEN = 4; const TRUNCATION_ROUTE_BUFFER_TOKENS = 512; -const MIN_PROMPT_BUDGET_TOKENS = 8_000; -const MIN_PROMPT_BUDGET_RATIO = 0.5; export type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js"; diff --git a/src/agents/pi-project-settings.ts b/src/agents/pi-project-settings.ts index de5c83fdf54..6aaf9886f35 100644 --- a/src/agents/pi-project-settings.ts +++ b/src/agents/pi-project-settings.ts @@ -187,11 +187,14 @@ export function createPreparedEmbeddedPiSettingsManager(params: { cwd: string; agentDir: string; cfg?: OpenClawConfig; + /** Resolved context window budget so reserve-token floor can be capped for small models. */ + contextTokenBudget?: number; }): SettingsManager { const settingsManager = createEmbeddedPiSettingsManager(params); applyPiCompactionSettingsFromConfig({ settingsManager, cfg: params.cfg, + contextTokenBudget: params.contextTokenBudget, }); return settingsManager; } diff --git a/src/agents/pi-settings.test.ts b/src/agents/pi-settings.test.ts index ac6efe82958..ef31bdf0373 100644 --- a/src/agents/pi-settings.test.ts +++ b/src/agents/pi-settings.test.ts @@ -1,4 +1,8 @@ import { describe, expect, it, vi } from "vitest"; +import { + MIN_PROMPT_BUDGET_RATIO, + MIN_PROMPT_BUDGET_TOKENS, +} from "./pi-compaction-constants.js"; import { applyPiCompactionSettingsFromConfig, DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR, @@ -120,6 +124,173 @@ describe("applyPiCompactionSettingsFromConfig", () => { expect(result.compaction.keepRecentTokens).toBe(20_000); expect(settingsManager.applyOverrides).not.toHaveBeenCalled(); }); + + it("caps floor to context window ratio for small-context models", () => { + // Pi SDK default reserveTokens is 16 384. With a 16 384 context window + // the default floor (20 000) exceeds the window. The aligned cap + // computes: minPromptBudget = min(8_000, floor(16_384 * 0.5)) = 8_000, + // maxReserve = 16_384 - 8_000 = 8_384. Since current (16_384) > capped + // floor (8_384), no override is needed. + const settingsManager = { + getCompactionReserveTokens: () => 16_384, + getCompactionKeepRecentTokens: () => 20_000, + applyOverrides: vi.fn(), + }; + + const result = applyPiCompactionSettingsFromConfig({ + settingsManager, + contextTokenBudget: 16_384, + }); + + // Without the cap, reserveTokens would be bumped to 20_000. + // With the cap, it stays at 16_384 (the current value). + expect(result.compaction.reserveTokens).toBe(16_384); + expect(result.compaction.reserveTokens).toBeLessThan( + DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR, + ); + expect(result.didOverride).toBe(false); + expect(settingsManager.applyOverrides).not.toHaveBeenCalled(); + }); + + it("applies capped floor over user-configured reserveTokens when default floor exceeds context window", () => { + const settingsManager = { + getCompactionReserveTokens: () => 16_384, + getCompactionKeepRecentTokens: () => 20_000, + applyOverrides: vi.fn(), + }; + + // User sets reserveTokens=2048 but NOT reserveTokensFloor (default 20_000 applies). + // Pre-fix: target = max(2048, 20_000) = 20_000 → exceeds 16_384 context → infinite loop. + // Post-fix: floor capped to 8_384 → target = max(2048, 8_384) = 8_384 → works. + const result = applyPiCompactionSettingsFromConfig({ + settingsManager, + cfg: { + agents: { + defaults: { + compaction: { reserveTokens: 2_048 }, + }, + }, + }, + contextTokenBudget: 16_384, + }); + + expect(result.didOverride).toBe(true); + expect(result.compaction.reserveTokens).toBe(8_384); // capped floor wins over user's 2_048 + expect(settingsManager.applyOverrides).toHaveBeenCalledWith({ + compaction: { reserveTokens: 8_384 }, + }); + }); + + it("applies capped floor when current reserve is below it on small-context models", () => { + // Simulate a Pi SDK default of 4 096 with a 16 384 context window. + // minPromptBudget = min(8_000, floor(16_384 * 0.5)) = 8_000. + // maxReserve = 16_384 - 8_000 = 8_384. + // Capped floor = min(20_000, 8_384) = 8_384. + // targetReserveTokens = max(4_096, 8_384) = 8_384 → override applied. + const settingsManager = { + getCompactionReserveTokens: () => 4_096, + getCompactionKeepRecentTokens: () => 20_000, + applyOverrides: vi.fn(), + }; + + const result = applyPiCompactionSettingsFromConfig({ + settingsManager, + contextTokenBudget: 16_384, + }); + + const minPromptBudget = Math.min( + MIN_PROMPT_BUDGET_TOKENS, + Math.max(1, Math.floor(16_384 * MIN_PROMPT_BUDGET_RATIO)), + ); + const expectedReserve = Math.max(0, 16_384 - minPromptBudget); + expect(result.didOverride).toBe(true); + expect(result.compaction.reserveTokens).toBe(expectedReserve); + expect(settingsManager.applyOverrides).toHaveBeenCalledWith({ + compaction: { reserveTokens: expectedReserve }, + }); + }); + + it("respects user-configured reserveTokens below capped floor for small models", () => { + const settingsManager = { + getCompactionReserveTokens: () => 16_384, + getCompactionKeepRecentTokens: () => 20_000, + applyOverrides: vi.fn(), + }; + + // User explicitly sets reserveTokens=2048 and reserveTokensFloor=0. + // With contextTokenBudget=16384, the capped floor = min(0, 8192) = 0. + // targetReserveTokens = max(2048, 0) = 2048. + const result = applyPiCompactionSettingsFromConfig({ + settingsManager, + cfg: { + agents: { + defaults: { + compaction: { reserveTokens: 2_048, reserveTokensFloor: 0 }, + }, + }, + }, + contextTokenBudget: 16_384, + }); + + expect(result.compaction.reserveTokens).toBe(2_048); + expect(settingsManager.applyOverrides).toHaveBeenCalledWith({ + compaction: { reserveTokens: 2_048 }, + }); + }); + + it("does not cap floor for mid-size models when maxReserve exceeds default floor", () => { + const settingsManager = { + getCompactionReserveTokens: () => 16_384, + getCompactionKeepRecentTokens: () => 20_000, + applyOverrides: vi.fn(), + }; + + // 32 768 context window → minPromptBudget = min(8_000, floor(32_768 * 0.5)) = 8_000. + // maxReserve = 32_768 - 8_000 = 24_768. + // Since 24_768 > 20_000 (DEFAULT_FLOOR), the floor is NOT capped and stays at 20_000. + const result = applyPiCompactionSettingsFromConfig({ + settingsManager, + contextTokenBudget: 32_768, + }); + + expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR); + expect(settingsManager.applyOverrides).toHaveBeenCalledWith({ + compaction: { reserveTokens: DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR }, + }); + }); + + it("does not cap floor when context window is large enough", () => { + const settingsManager = { + getCompactionReserveTokens: () => 16_384, + getCompactionKeepRecentTokens: () => 20_000, + applyOverrides: vi.fn(), + }; + + // 200 000 context window → maxReserve = 200_000 - 8_000 = 192_000. + // floor (20 000) is well within that cap. + const result = applyPiCompactionSettingsFromConfig({ + settingsManager, + contextTokenBudget: 200_000, + }); + + expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR); + expect(settingsManager.applyOverrides).toHaveBeenCalledWith({ + compaction: { reserveTokens: DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR }, + }); + }); + + it("falls back to uncapped floor when contextTokenBudget is not provided", () => { + const settingsManager = { + getCompactionReserveTokens: () => 16_384, + getCompactionKeepRecentTokens: () => 20_000, + applyOverrides: vi.fn(), + }; + + // No contextTokenBudget → backward-compatible behavior, floor = 20 000. + const result = applyPiCompactionSettingsFromConfig({ settingsManager }); + + expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR); + }); }); describe("resolveCompactionReserveTokensFloor", () => { diff --git a/src/agents/pi-settings.ts b/src/agents/pi-settings.ts index d7820606b30..390ae0f6fd4 100644 --- a/src/agents/pi-settings.ts +++ b/src/agents/pi-settings.ts @@ -1,5 +1,9 @@ import type { OpenClawConfig } from "../config/types.openclaw.js"; import type { ContextEngineInfo } from "../context-engine/types.js"; +import { + MIN_PROMPT_BUDGET_RATIO, + MIN_PROMPT_BUDGET_TOKENS, +} from "./pi-compaction-constants.js"; export const DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR = 20_000; @@ -15,6 +19,12 @@ type PiSettingsManagerLike = { setCompactionEnabled?: (enabled: boolean) => void; }; +/** + * Ensures the compaction reserve tokens are at least the specified minimum. + * Note: This function is not context-aware and uses an uncapped floor. + * If called for small-context models without threading `contextTokenBudget`, + * it may re-introduce context overflow issues. + */ export function ensurePiCompactionReserveTokens(params: { settingsManager: PiSettingsManagerLike; minReserveTokens?: number; @@ -58,6 +68,8 @@ function toPositiveInt(value: unknown): number | undefined { export function applyPiCompactionSettingsFromConfig(params: { settingsManager: PiSettingsManagerLike; cfg?: OpenClawConfig; + /** When known, the resolved context window budget for the current model. */ + contextTokenBudget?: number; }): { didOverride: boolean; compaction: { reserveTokens: number; keepRecentTokens: number }; @@ -68,7 +80,22 @@ export function applyPiCompactionSettingsFromConfig(params: { const configuredReserveTokens = toNonNegativeInt(compactionCfg?.reserveTokens); const configuredKeepRecentTokens = toPositiveInt(compactionCfg?.keepRecentTokens); - const reserveTokensFloor = resolveCompactionReserveTokensFloor(params.cfg); + let reserveTokensFloor = resolveCompactionReserveTokensFloor(params.cfg); + + // Cap the floor to a safe fraction of the context window so that + // small-context models (e.g. Ollama with 16 K tokens) are not starved of + // prompt budget. Without this cap the default floor of 20 000 can exceed + // the entire context window, causing every prompt to be classified as an + // overflow and triggering an infinite compaction loop. + const ctxBudget = params.contextTokenBudget; + if (typeof ctxBudget === "number" && Number.isFinite(ctxBudget) && ctxBudget > 0) { + const minPromptBudget = Math.min( + MIN_PROMPT_BUDGET_TOKENS, + Math.max(1, Math.floor(ctxBudget * MIN_PROMPT_BUDGET_RATIO)), + ); + const maxReserve = Math.max(0, ctxBudget - minPromptBudget); + reserveTokensFloor = Math.min(reserveTokensFloor, maxReserve); + } const targetReserveTokens = Math.max( configuredReserveTokens ?? currentReserveTokens,