fix: preserve prompt budget for small context models

This commit is contained in:
Peter Steinberger
2026-04-12 17:14:56 +01:00
parent 0b8f09819f
commit 50fcdb36a8
3 changed files with 48 additions and 4 deletions

View File

@@ -1905,6 +1905,7 @@ export async function runEmbeddedAttempt(
`promptBudgetBeforeReserve=${preemptiveCompaction.promptBudgetBeforeReserve} ` +
`overflowTokens=${preemptiveCompaction.overflowTokens} ` +
`toolResultReducibleChars=${preemptiveCompaction.toolResultReducibleChars} ` +
`effectiveReserveTokens=${preemptiveCompaction.effectiveReserveTokens} ` +
`sessionFile=${params.sessionFile}`,
);
skipPromptSubmission = true;
@@ -1936,7 +1937,9 @@ export async function runEmbeddedAttempt(
`promptBudgetBeforeReserve=${preemptiveCompaction.promptBudgetBeforeReserve} ` +
`overflowTokens=${preemptiveCompaction.overflowTokens} ` +
`toolResultReducibleChars=${preemptiveCompaction.toolResultReducibleChars} ` +
`reserveTokens=${reserveTokens} sessionFile=${params.sessionFile}`,
`reserveTokens=${reserveTokens} ` +
`effectiveReserveTokens=${preemptiveCompaction.effectiveReserveTokens} ` +
`sessionFile=${params.sessionFile}`,
);
skipPromptSubmission = true;
}

View File

@@ -84,6 +84,35 @@ describe("preemptive-compaction", () => {
expect(result.estimatedPromptTokens).toBeLessThan(result.promptBudgetBeforeReserve);
});
it("caps reserve tokens so small context models keep usable prompt budget", () => {
const result = shouldPreemptivelyCompactBeforePrompt({
messages: [makeAssistantHistory("short history")],
systemPrompt: "sys",
prompt: "hello",
contextTokenBudget: 16_000,
reserveTokens: 20_000,
});
expect(result.effectiveReserveTokens).toBe(8_000);
expect(result.promptBudgetBeforeReserve).toBe(8_000);
expect(result.shouldCompact).toBe(false);
expect(result.route).toBe("fits");
});
it("keeps the requested reserve when it leaves enough prompt budget", () => {
const result = shouldPreemptivelyCompactBeforePrompt({
messages: [makeAssistantHistory("short history")],
systemPrompt: "sys",
prompt: "hello",
contextTokenBudget: 32_000,
reserveTokens: 20_000,
});
expect(result.effectiveReserveTokens).toBe(20_000);
expect(result.promptBudgetBeforeReserve).toBe(12_000);
expect(result.shouldCompact).toBe(false);
});
it("routes to direct tool-result truncation when recent tool tails can clearly absorb the overflow", () => {
const medium = "alpha beta gamma delta epsilon ".repeat(2200);
const messages: AgentMessage[] = [

View File

@@ -9,6 +9,9 @@ export const PREEMPTIVE_OVERFLOW_ERROR_TEXT =
const ESTIMATED_CHARS_PER_TOKEN = 4;
const TRUNCATION_ROUTE_BUFFER_TOKENS = 512;
const MIN_PROMPT_BUDGET_TOKENS = 8_000;
const MIN_PROMPT_BUDGET_RATIO = 0.5;
export type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js";
export function estimatePrePromptTokens(params: {
@@ -46,12 +49,20 @@ export function shouldPreemptivelyCompactBeforePrompt(params: {
promptBudgetBeforeReserve: number;
overflowTokens: number;
toolResultReducibleChars: number;
effectiveReserveTokens: number;
} {
const estimatedPromptTokens = estimatePrePromptTokens(params);
const promptBudgetBeforeReserve = Math.max(
1,
Math.floor(params.contextTokenBudget) - Math.max(0, Math.floor(params.reserveTokens)),
const contextTokenBudget = Math.max(1, Math.floor(params.contextTokenBudget));
const requestedReserveTokens = Math.max(0, Math.floor(params.reserveTokens));
const minPromptBudget = Math.min(
MIN_PROMPT_BUDGET_TOKENS,
Math.max(1, Math.floor(contextTokenBudget * MIN_PROMPT_BUDGET_RATIO)),
);
const effectiveReserveTokens = Math.min(
requestedReserveTokens,
Math.max(0, contextTokenBudget - minPromptBudget),
);
const promptBudgetBeforeReserve = Math.max(1, contextTokenBudget - effectiveReserveTokens);
const overflowTokens = Math.max(0, estimatedPromptTokens - promptBudgetBeforeReserve);
const toolResultPotential = estimateToolResultReductionPotential({
messages: params.messages,
@@ -82,5 +93,6 @@ export function shouldPreemptivelyCompactBeforePrompt(params: {
promptBudgetBeforeReserve,
overflowTokens,
toolResultReducibleChars,
effectiveReserveTokens,
};
}