mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:10:43 +00:00
fix(gateway): cap compaction reserve floor to context window for small models (#65671)
Fixes #65465. Caps the compaction reserveTokensFloor so that at least min(8 000, 50%) of the context window remains available for prompt content, preventing the default 20 000-token floor from exceeding the entire context window on small-context local models (e.g. Ollama 16K). The cap is only applied when contextTokenBudget is provided, preserving backward compatibility.
This commit is contained in:
@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Matrix/security: normalize sandboxed profile avatar params, preserve `mxc://` avatar URLs, and surface gmail watcher stop failures during reload. (#64701) Thanks @slepybear.
|
||||
- Telegram/documents: drop leaked binary caption bytes from inbound Telegram text handling so document uploads like `.mobi` or `.epub` no longer explode prompt token counts. (#66663) Thanks @joelnishanth.
|
||||
- Gateway/auth: resolve the active gateway bearer per-request on the HTTP server and the HTTP upgrade handler via `getResolvedAuth()`, mirroring the WebSocket path, so a secret rotated through `secrets.reload` or config hot-reload stops authenticating on `/v1/*`, `/tools/invoke`, plugin HTTP routes, and the canvas upgrade path immediately instead of remaining valid on HTTP until gateway restart. (#66651) Thanks @mmaps.
|
||||
- Agents/compaction: cap the compaction reserve-token floor to the model context window so small-context local models (e.g. Ollama with 16K tokens) no longer trigger context-overflow errors or infinite compaction loops on every prompt. (#65671) Thanks @openperf.
|
||||
|
||||
## 2026.4.14
|
||||
|
||||
|
||||
12
src/agents/pi-compaction-constants.ts
Normal file
12
src/agents/pi-compaction-constants.ts
Normal file
@@ -0,0 +1,12 @@
|
||||
/**
|
||||
* Absolute minimum prompt budget in tokens. When the context window is
|
||||
* large enough that `contextTokenBudget * MIN_PROMPT_BUDGET_RATIO` exceeds
|
||||
* this value, this absolute floor takes precedence.
|
||||
*/
|
||||
export const MIN_PROMPT_BUDGET_TOKENS = 8_000;
|
||||
|
||||
/**
|
||||
* Minimum share of the context window that must remain available for prompt
|
||||
* content after reserve tokens are subtracted.
|
||||
*/
|
||||
export const MIN_PROMPT_BUDGET_RATIO = 0.5;
|
||||
@@ -786,6 +786,7 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
cwd: effectiveWorkspace,
|
||||
agentDir,
|
||||
cfg: params.config,
|
||||
contextTokenBudget: ctxInfo.tokens,
|
||||
});
|
||||
// Sets compaction/pruning runtime state and returns extension factories
|
||||
// that must be passed to the resource loader for the safeguard to be active.
|
||||
|
||||
@@ -913,6 +913,7 @@ export async function runEmbeddedAttempt(
|
||||
cwd: effectiveWorkspace,
|
||||
agentDir,
|
||||
cfg: params.config,
|
||||
contextTokenBudget: params.contextTokenBudget,
|
||||
});
|
||||
applyPiAutoCompactionGuard({
|
||||
settingsManager,
|
||||
|
||||
@@ -3,14 +3,16 @@ import { estimateTokens } from "@mariozechner/pi-coding-agent";
|
||||
import { SAFETY_MARGIN, estimateMessagesTokens } from "../../compaction.js";
|
||||
import { estimateToolResultReductionPotential } from "../tool-result-truncation.js";
|
||||
import type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js";
|
||||
import {
|
||||
MIN_PROMPT_BUDGET_RATIO,
|
||||
MIN_PROMPT_BUDGET_TOKENS,
|
||||
} from "../../pi-compaction-constants.js";
|
||||
|
||||
export const PREEMPTIVE_OVERFLOW_ERROR_TEXT =
|
||||
"Context overflow: prompt too large for the model (precheck).";
|
||||
|
||||
const ESTIMATED_CHARS_PER_TOKEN = 4;
|
||||
const TRUNCATION_ROUTE_BUFFER_TOKENS = 512;
|
||||
const MIN_PROMPT_BUDGET_TOKENS = 8_000;
|
||||
const MIN_PROMPT_BUDGET_RATIO = 0.5;
|
||||
|
||||
export type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js";
|
||||
|
||||
|
||||
@@ -187,11 +187,14 @@ export function createPreparedEmbeddedPiSettingsManager(params: {
|
||||
cwd: string;
|
||||
agentDir: string;
|
||||
cfg?: OpenClawConfig;
|
||||
/** Resolved context window budget so reserve-token floor can be capped for small models. */
|
||||
contextTokenBudget?: number;
|
||||
}): SettingsManager {
|
||||
const settingsManager = createEmbeddedPiSettingsManager(params);
|
||||
applyPiCompactionSettingsFromConfig({
|
||||
settingsManager,
|
||||
cfg: params.cfg,
|
||||
contextTokenBudget: params.contextTokenBudget,
|
||||
});
|
||||
return settingsManager;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
MIN_PROMPT_BUDGET_RATIO,
|
||||
MIN_PROMPT_BUDGET_TOKENS,
|
||||
} from "./pi-compaction-constants.js";
|
||||
import {
|
||||
applyPiCompactionSettingsFromConfig,
|
||||
DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR,
|
||||
@@ -120,6 +124,173 @@ describe("applyPiCompactionSettingsFromConfig", () => {
|
||||
expect(result.compaction.keepRecentTokens).toBe(20_000);
|
||||
expect(settingsManager.applyOverrides).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("caps floor to context window ratio for small-context models", () => {
|
||||
// Pi SDK default reserveTokens is 16 384. With a 16 384 context window
|
||||
// the default floor (20 000) exceeds the window. The aligned cap
|
||||
// computes: minPromptBudget = min(8_000, floor(16_384 * 0.5)) = 8_000,
|
||||
// maxReserve = 16_384 - 8_000 = 8_384. Since current (16_384) > capped
|
||||
// floor (8_384), no override is needed.
|
||||
const settingsManager = {
|
||||
getCompactionReserveTokens: () => 16_384,
|
||||
getCompactionKeepRecentTokens: () => 20_000,
|
||||
applyOverrides: vi.fn(),
|
||||
};
|
||||
|
||||
const result = applyPiCompactionSettingsFromConfig({
|
||||
settingsManager,
|
||||
contextTokenBudget: 16_384,
|
||||
});
|
||||
|
||||
// Without the cap, reserveTokens would be bumped to 20_000.
|
||||
// With the cap, it stays at 16_384 (the current value).
|
||||
expect(result.compaction.reserveTokens).toBe(16_384);
|
||||
expect(result.compaction.reserveTokens).toBeLessThan(
|
||||
DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR,
|
||||
);
|
||||
expect(result.didOverride).toBe(false);
|
||||
expect(settingsManager.applyOverrides).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("applies capped floor over user-configured reserveTokens when default floor exceeds context window", () => {
|
||||
const settingsManager = {
|
||||
getCompactionReserveTokens: () => 16_384,
|
||||
getCompactionKeepRecentTokens: () => 20_000,
|
||||
applyOverrides: vi.fn(),
|
||||
};
|
||||
|
||||
// User sets reserveTokens=2048 but NOT reserveTokensFloor (default 20_000 applies).
|
||||
// Pre-fix: target = max(2048, 20_000) = 20_000 → exceeds 16_384 context → infinite loop.
|
||||
// Post-fix: floor capped to 8_384 → target = max(2048, 8_384) = 8_384 → works.
|
||||
const result = applyPiCompactionSettingsFromConfig({
|
||||
settingsManager,
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
compaction: { reserveTokens: 2_048 },
|
||||
},
|
||||
},
|
||||
},
|
||||
contextTokenBudget: 16_384,
|
||||
});
|
||||
|
||||
expect(result.didOverride).toBe(true);
|
||||
expect(result.compaction.reserveTokens).toBe(8_384); // capped floor wins over user's 2_048
|
||||
expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
|
||||
compaction: { reserveTokens: 8_384 },
|
||||
});
|
||||
});
|
||||
|
||||
it("applies capped floor when current reserve is below it on small-context models", () => {
|
||||
// Simulate a Pi SDK default of 4 096 with a 16 384 context window.
|
||||
// minPromptBudget = min(8_000, floor(16_384 * 0.5)) = 8_000.
|
||||
// maxReserve = 16_384 - 8_000 = 8_384.
|
||||
// Capped floor = min(20_000, 8_384) = 8_384.
|
||||
// targetReserveTokens = max(4_096, 8_384) = 8_384 → override applied.
|
||||
const settingsManager = {
|
||||
getCompactionReserveTokens: () => 4_096,
|
||||
getCompactionKeepRecentTokens: () => 20_000,
|
||||
applyOverrides: vi.fn(),
|
||||
};
|
||||
|
||||
const result = applyPiCompactionSettingsFromConfig({
|
||||
settingsManager,
|
||||
contextTokenBudget: 16_384,
|
||||
});
|
||||
|
||||
const minPromptBudget = Math.min(
|
||||
MIN_PROMPT_BUDGET_TOKENS,
|
||||
Math.max(1, Math.floor(16_384 * MIN_PROMPT_BUDGET_RATIO)),
|
||||
);
|
||||
const expectedReserve = Math.max(0, 16_384 - minPromptBudget);
|
||||
expect(result.didOverride).toBe(true);
|
||||
expect(result.compaction.reserveTokens).toBe(expectedReserve);
|
||||
expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
|
||||
compaction: { reserveTokens: expectedReserve },
|
||||
});
|
||||
});
|
||||
|
||||
it("respects user-configured reserveTokens below capped floor for small models", () => {
|
||||
const settingsManager = {
|
||||
getCompactionReserveTokens: () => 16_384,
|
||||
getCompactionKeepRecentTokens: () => 20_000,
|
||||
applyOverrides: vi.fn(),
|
||||
};
|
||||
|
||||
// User explicitly sets reserveTokens=2048 and reserveTokensFloor=0.
|
||||
// With contextTokenBudget=16384, the capped floor = min(0, 8192) = 0.
|
||||
// targetReserveTokens = max(2048, 0) = 2048.
|
||||
const result = applyPiCompactionSettingsFromConfig({
|
||||
settingsManager,
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
compaction: { reserveTokens: 2_048, reserveTokensFloor: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
contextTokenBudget: 16_384,
|
||||
});
|
||||
|
||||
expect(result.compaction.reserveTokens).toBe(2_048);
|
||||
expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
|
||||
compaction: { reserveTokens: 2_048 },
|
||||
});
|
||||
});
|
||||
|
||||
it("does not cap floor for mid-size models when maxReserve exceeds default floor", () => {
|
||||
const settingsManager = {
|
||||
getCompactionReserveTokens: () => 16_384,
|
||||
getCompactionKeepRecentTokens: () => 20_000,
|
||||
applyOverrides: vi.fn(),
|
||||
};
|
||||
|
||||
// 32 768 context window → minPromptBudget = min(8_000, floor(32_768 * 0.5)) = 8_000.
|
||||
// maxReserve = 32_768 - 8_000 = 24_768.
|
||||
// Since 24_768 > 20_000 (DEFAULT_FLOOR), the floor is NOT capped and stays at 20_000.
|
||||
const result = applyPiCompactionSettingsFromConfig({
|
||||
settingsManager,
|
||||
contextTokenBudget: 32_768,
|
||||
});
|
||||
|
||||
expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR);
|
||||
expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
|
||||
compaction: { reserveTokens: DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR },
|
||||
});
|
||||
});
|
||||
|
||||
it("does not cap floor when context window is large enough", () => {
|
||||
const settingsManager = {
|
||||
getCompactionReserveTokens: () => 16_384,
|
||||
getCompactionKeepRecentTokens: () => 20_000,
|
||||
applyOverrides: vi.fn(),
|
||||
};
|
||||
|
||||
// 200 000 context window → maxReserve = 200_000 - 8_000 = 192_000.
|
||||
// floor (20 000) is well within that cap.
|
||||
const result = applyPiCompactionSettingsFromConfig({
|
||||
settingsManager,
|
||||
contextTokenBudget: 200_000,
|
||||
});
|
||||
|
||||
expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR);
|
||||
expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
|
||||
compaction: { reserveTokens: DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR },
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to uncapped floor when contextTokenBudget is not provided", () => {
|
||||
const settingsManager = {
|
||||
getCompactionReserveTokens: () => 16_384,
|
||||
getCompactionKeepRecentTokens: () => 20_000,
|
||||
applyOverrides: vi.fn(),
|
||||
};
|
||||
|
||||
// No contextTokenBudget → backward-compatible behavior, floor = 20 000.
|
||||
const result = applyPiCompactionSettingsFromConfig({ settingsManager });
|
||||
|
||||
expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveCompactionReserveTokensFloor", () => {
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import type { ContextEngineInfo } from "../context-engine/types.js";
|
||||
import {
|
||||
MIN_PROMPT_BUDGET_RATIO,
|
||||
MIN_PROMPT_BUDGET_TOKENS,
|
||||
} from "./pi-compaction-constants.js";
|
||||
|
||||
export const DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR = 20_000;
|
||||
|
||||
@@ -15,6 +19,12 @@ type PiSettingsManagerLike = {
|
||||
setCompactionEnabled?: (enabled: boolean) => void;
|
||||
};
|
||||
|
||||
/**
|
||||
* Ensures the compaction reserve tokens are at least the specified minimum.
|
||||
* Note: This function is not context-aware and uses an uncapped floor.
|
||||
* If called for small-context models without threading `contextTokenBudget`,
|
||||
* it may re-introduce context overflow issues.
|
||||
*/
|
||||
export function ensurePiCompactionReserveTokens(params: {
|
||||
settingsManager: PiSettingsManagerLike;
|
||||
minReserveTokens?: number;
|
||||
@@ -58,6 +68,8 @@ function toPositiveInt(value: unknown): number | undefined {
|
||||
export function applyPiCompactionSettingsFromConfig(params: {
|
||||
settingsManager: PiSettingsManagerLike;
|
||||
cfg?: OpenClawConfig;
|
||||
/** When known, the resolved context window budget for the current model. */
|
||||
contextTokenBudget?: number;
|
||||
}): {
|
||||
didOverride: boolean;
|
||||
compaction: { reserveTokens: number; keepRecentTokens: number };
|
||||
@@ -68,7 +80,22 @@ export function applyPiCompactionSettingsFromConfig(params: {
|
||||
|
||||
const configuredReserveTokens = toNonNegativeInt(compactionCfg?.reserveTokens);
|
||||
const configuredKeepRecentTokens = toPositiveInt(compactionCfg?.keepRecentTokens);
|
||||
const reserveTokensFloor = resolveCompactionReserveTokensFloor(params.cfg);
|
||||
let reserveTokensFloor = resolveCompactionReserveTokensFloor(params.cfg);
|
||||
|
||||
// Cap the floor to a safe fraction of the context window so that
|
||||
// small-context models (e.g. Ollama with 16 K tokens) are not starved of
|
||||
// prompt budget. Without this cap the default floor of 20 000 can exceed
|
||||
// the entire context window, causing every prompt to be classified as an
|
||||
// overflow and triggering an infinite compaction loop.
|
||||
const ctxBudget = params.contextTokenBudget;
|
||||
if (typeof ctxBudget === "number" && Number.isFinite(ctxBudget) && ctxBudget > 0) {
|
||||
const minPromptBudget = Math.min(
|
||||
MIN_PROMPT_BUDGET_TOKENS,
|
||||
Math.max(1, Math.floor(ctxBudget * MIN_PROMPT_BUDGET_RATIO)),
|
||||
);
|
||||
const maxReserve = Math.max(0, ctxBudget - minPromptBudget);
|
||||
reserveTokensFloor = Math.min(reserveTokensFloor, maxReserve);
|
||||
}
|
||||
|
||||
const targetReserveTokens = Math.max(
|
||||
configuredReserveTokens ?? currentReserveTokens,
|
||||
|
||||
Reference in New Issue
Block a user