fix(gateway): cap compaction reserve floor to context window for small models (#65671)

Fixes #65465. Caps the compaction reserveTokensFloor so that at least min(8 000, 50%) of the context window remains available for
  prompt content, preventing the default 20 000-token floor from exceeding the entire context window on small-context local models (e.g. Ollama
  16K). The cap is only applied when contextTokenBudget is provided, preserving backward compatibility.
This commit is contained in:
Chunyue Wang
2026-04-15 01:08:11 +08:00
committed by GitHub
parent 1169dd7039
commit 4bc46ccfed
8 changed files with 221 additions and 3 deletions

View File

@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
- Matrix/security: normalize sandboxed profile avatar params, preserve `mxc://` avatar URLs, and surface gmail watcher stop failures during reload. (#64701) Thanks @slepybear.
- Telegram/documents: drop leaked binary caption bytes from inbound Telegram text handling so document uploads like `.mobi` or `.epub` no longer explode prompt token counts. (#66663) Thanks @joelnishanth.
- Gateway/auth: resolve the active gateway bearer per-request on the HTTP server and the HTTP upgrade handler via `getResolvedAuth()`, mirroring the WebSocket path, so a secret rotated through `secrets.reload` or config hot-reload stops authenticating on `/v1/*`, `/tools/invoke`, plugin HTTP routes, and the canvas upgrade path immediately instead of remaining valid on HTTP until gateway restart. (#66651) Thanks @mmaps.
- Agents/compaction: cap the compaction reserve-token floor to the model context window so small-context local models (e.g. Ollama with 16K tokens) no longer trigger context-overflow errors or infinite compaction loops on every prompt. (#65671) Thanks @openperf.
## 2026.4.14

View File

@@ -0,0 +1,12 @@
/**
* Absolute minimum prompt budget in tokens. When the context window is
* large enough that `contextTokenBudget * MIN_PROMPT_BUDGET_RATIO` exceeds
* this value, this absolute floor takes precedence.
*/
export const MIN_PROMPT_BUDGET_TOKENS = 8_000;
/**
* Minimum share of the context window that must remain available for prompt
* content after reserve tokens are subtracted.
*/
export const MIN_PROMPT_BUDGET_RATIO = 0.5;

View File

@@ -786,6 +786,7 @@ export async function compactEmbeddedPiSessionDirect(
cwd: effectiveWorkspace,
agentDir,
cfg: params.config,
contextTokenBudget: ctxInfo.tokens,
});
// Sets compaction/pruning runtime state and returns extension factories
// that must be passed to the resource loader for the safeguard to be active.

View File

@@ -913,6 +913,7 @@ export async function runEmbeddedAttempt(
cwd: effectiveWorkspace,
agentDir,
cfg: params.config,
contextTokenBudget: params.contextTokenBudget,
});
applyPiAutoCompactionGuard({
settingsManager,

View File

@@ -3,14 +3,16 @@ import { estimateTokens } from "@mariozechner/pi-coding-agent";
import { SAFETY_MARGIN, estimateMessagesTokens } from "../../compaction.js";
import { estimateToolResultReductionPotential } from "../tool-result-truncation.js";
import type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js";
import {
MIN_PROMPT_BUDGET_RATIO,
MIN_PROMPT_BUDGET_TOKENS,
} from "../../pi-compaction-constants.js";
export const PREEMPTIVE_OVERFLOW_ERROR_TEXT =
"Context overflow: prompt too large for the model (precheck).";
const ESTIMATED_CHARS_PER_TOKEN = 4;
const TRUNCATION_ROUTE_BUFFER_TOKENS = 512;
const MIN_PROMPT_BUDGET_TOKENS = 8_000;
const MIN_PROMPT_BUDGET_RATIO = 0.5;
export type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js";

View File

@@ -187,11 +187,14 @@ export function createPreparedEmbeddedPiSettingsManager(params: {
cwd: string;
agentDir: string;
cfg?: OpenClawConfig;
/** Resolved context window budget so reserve-token floor can be capped for small models. */
contextTokenBudget?: number;
}): SettingsManager {
const settingsManager = createEmbeddedPiSettingsManager(params);
applyPiCompactionSettingsFromConfig({
settingsManager,
cfg: params.cfg,
contextTokenBudget: params.contextTokenBudget,
});
return settingsManager;
}

View File

@@ -1,4 +1,8 @@
import { describe, expect, it, vi } from "vitest";
import {
MIN_PROMPT_BUDGET_RATIO,
MIN_PROMPT_BUDGET_TOKENS,
} from "./pi-compaction-constants.js";
import {
applyPiCompactionSettingsFromConfig,
DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR,
@@ -120,6 +124,173 @@ describe("applyPiCompactionSettingsFromConfig", () => {
expect(result.compaction.keepRecentTokens).toBe(20_000);
expect(settingsManager.applyOverrides).not.toHaveBeenCalled();
});
it("caps floor to context window ratio for small-context models", () => {
// Pi SDK default reserveTokens is 16 384. With a 16 384 context window
// the default floor (20 000) exceeds the window. The aligned cap
// computes: minPromptBudget = min(8_000, floor(16_384 * 0.5)) = 8_000,
// maxReserve = 16_384 - 8_000 = 8_384. Since current (16_384) > capped
// floor (8_384), no override is needed.
const settingsManager = {
getCompactionReserveTokens: () => 16_384,
getCompactionKeepRecentTokens: () => 20_000,
applyOverrides: vi.fn(),
};
const result = applyPiCompactionSettingsFromConfig({
settingsManager,
contextTokenBudget: 16_384,
});
// Without the cap, reserveTokens would be bumped to 20_000.
// With the cap, it stays at 16_384 (the current value).
expect(result.compaction.reserveTokens).toBe(16_384);
expect(result.compaction.reserveTokens).toBeLessThan(
DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR,
);
expect(result.didOverride).toBe(false);
expect(settingsManager.applyOverrides).not.toHaveBeenCalled();
});
it("applies capped floor over user-configured reserveTokens when default floor exceeds context window", () => {
const settingsManager = {
getCompactionReserveTokens: () => 16_384,
getCompactionKeepRecentTokens: () => 20_000,
applyOverrides: vi.fn(),
};
// User sets reserveTokens=2048 but NOT reserveTokensFloor (default 20_000 applies).
// Pre-fix: target = max(2048, 20_000) = 20_000 → exceeds 16_384 context → infinite loop.
// Post-fix: floor capped to 8_384 → target = max(2048, 8_384) = 8_384 → works.
const result = applyPiCompactionSettingsFromConfig({
settingsManager,
cfg: {
agents: {
defaults: {
compaction: { reserveTokens: 2_048 },
},
},
},
contextTokenBudget: 16_384,
});
expect(result.didOverride).toBe(true);
expect(result.compaction.reserveTokens).toBe(8_384); // capped floor wins over user's 2_048
expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
compaction: { reserveTokens: 8_384 },
});
});
it("applies capped floor when current reserve is below it on small-context models", () => {
// Simulate a Pi SDK default of 4 096 with a 16 384 context window.
// minPromptBudget = min(8_000, floor(16_384 * 0.5)) = 8_000.
// maxReserve = 16_384 - 8_000 = 8_384.
// Capped floor = min(20_000, 8_384) = 8_384.
// targetReserveTokens = max(4_096, 8_384) = 8_384 → override applied.
const settingsManager = {
getCompactionReserveTokens: () => 4_096,
getCompactionKeepRecentTokens: () => 20_000,
applyOverrides: vi.fn(),
};
const result = applyPiCompactionSettingsFromConfig({
settingsManager,
contextTokenBudget: 16_384,
});
const minPromptBudget = Math.min(
MIN_PROMPT_BUDGET_TOKENS,
Math.max(1, Math.floor(16_384 * MIN_PROMPT_BUDGET_RATIO)),
);
const expectedReserve = Math.max(0, 16_384 - minPromptBudget);
expect(result.didOverride).toBe(true);
expect(result.compaction.reserveTokens).toBe(expectedReserve);
expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
compaction: { reserveTokens: expectedReserve },
});
});
it("respects user-configured reserveTokens below capped floor for small models", () => {
const settingsManager = {
getCompactionReserveTokens: () => 16_384,
getCompactionKeepRecentTokens: () => 20_000,
applyOverrides: vi.fn(),
};
// User explicitly sets reserveTokens=2048 and reserveTokensFloor=0.
// With contextTokenBudget=16384, the capped floor = min(0, 8192) = 0.
// targetReserveTokens = max(2048, 0) = 2048.
const result = applyPiCompactionSettingsFromConfig({
settingsManager,
cfg: {
agents: {
defaults: {
compaction: { reserveTokens: 2_048, reserveTokensFloor: 0 },
},
},
},
contextTokenBudget: 16_384,
});
expect(result.compaction.reserveTokens).toBe(2_048);
expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
compaction: { reserveTokens: 2_048 },
});
});
it("does not cap floor for mid-size models when maxReserve exceeds default floor", () => {
const settingsManager = {
getCompactionReserveTokens: () => 16_384,
getCompactionKeepRecentTokens: () => 20_000,
applyOverrides: vi.fn(),
};
// 32 768 context window → minPromptBudget = min(8_000, floor(32_768 * 0.5)) = 8_000.
// maxReserve = 32_768 - 8_000 = 24_768.
// Since 24_768 > 20_000 (DEFAULT_FLOOR), the floor is NOT capped and stays at 20_000.
const result = applyPiCompactionSettingsFromConfig({
settingsManager,
contextTokenBudget: 32_768,
});
expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR);
expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
compaction: { reserveTokens: DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR },
});
});
it("does not cap floor when context window is large enough", () => {
const settingsManager = {
getCompactionReserveTokens: () => 16_384,
getCompactionKeepRecentTokens: () => 20_000,
applyOverrides: vi.fn(),
};
// 200 000 context window → maxReserve = 200_000 - 8_000 = 192_000.
// floor (20 000) is well within that cap.
const result = applyPiCompactionSettingsFromConfig({
settingsManager,
contextTokenBudget: 200_000,
});
expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR);
expect(settingsManager.applyOverrides).toHaveBeenCalledWith({
compaction: { reserveTokens: DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR },
});
});
it("falls back to uncapped floor when contextTokenBudget is not provided", () => {
const settingsManager = {
getCompactionReserveTokens: () => 16_384,
getCompactionKeepRecentTokens: () => 20_000,
applyOverrides: vi.fn(),
};
// No contextTokenBudget → backward-compatible behavior, floor = 20 000.
const result = applyPiCompactionSettingsFromConfig({ settingsManager });
expect(result.compaction.reserveTokens).toBe(DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR);
});
});
describe("resolveCompactionReserveTokensFloor", () => {

View File

@@ -1,5 +1,9 @@
import type { OpenClawConfig } from "../config/types.openclaw.js";
import type { ContextEngineInfo } from "../context-engine/types.js";
import {
MIN_PROMPT_BUDGET_RATIO,
MIN_PROMPT_BUDGET_TOKENS,
} from "./pi-compaction-constants.js";
export const DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR = 20_000;
@@ -15,6 +19,12 @@ type PiSettingsManagerLike = {
setCompactionEnabled?: (enabled: boolean) => void;
};
/**
* Ensures the compaction reserve tokens are at least the specified minimum.
* Note: This function is not context-aware and uses an uncapped floor.
* If called for small-context models without threading `contextTokenBudget`,
* it may re-introduce context overflow issues.
*/
export function ensurePiCompactionReserveTokens(params: {
settingsManager: PiSettingsManagerLike;
minReserveTokens?: number;
@@ -58,6 +68,8 @@ function toPositiveInt(value: unknown): number | undefined {
export function applyPiCompactionSettingsFromConfig(params: {
settingsManager: PiSettingsManagerLike;
cfg?: OpenClawConfig;
/** When known, the resolved context window budget for the current model. */
contextTokenBudget?: number;
}): {
didOverride: boolean;
compaction: { reserveTokens: number; keepRecentTokens: number };
@@ -68,7 +80,22 @@ export function applyPiCompactionSettingsFromConfig(params: {
const configuredReserveTokens = toNonNegativeInt(compactionCfg?.reserveTokens);
const configuredKeepRecentTokens = toPositiveInt(compactionCfg?.keepRecentTokens);
const reserveTokensFloor = resolveCompactionReserveTokensFloor(params.cfg);
let reserveTokensFloor = resolveCompactionReserveTokensFloor(params.cfg);
// Cap the floor to a safe fraction of the context window so that
// small-context models (e.g. Ollama with 16 K tokens) are not starved of
// prompt budget. Without this cap the default floor of 20 000 can exceed
// the entire context window, causing every prompt to be classified as an
// overflow and triggering an infinite compaction loop.
const ctxBudget = params.contextTokenBudget;
if (typeof ctxBudget === "number" && Number.isFinite(ctxBudget) && ctxBudget > 0) {
const minPromptBudget = Math.min(
MIN_PROMPT_BUDGET_TOKENS,
Math.max(1, Math.floor(ctxBudget * MIN_PROMPT_BUDGET_RATIO)),
);
const maxReserve = Math.max(0, ctxBudget - minPromptBudget);
reserveTokensFloor = Math.min(reserveTokensFloor, maxReserve);
}
const targetReserveTokens = Math.max(
configuredReserveTokens ?? currentReserveTokens,