From 50fcdb36a841a7bfea69be06a738f9990aa15bc1 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 12 Apr 2026 17:14:56 +0100 Subject: [PATCH] fix: preserve prompt budget for small context models --- src/agents/pi-embedded-runner/run/attempt.ts | 5 +++- .../run/preemptive-compaction.test.ts | 29 +++++++++++++++++++ .../run/preemptive-compaction.ts | 18 ++++++++++-- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 6cfedbffdd2..8100bd06db1 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -1905,6 +1905,7 @@ export async function runEmbeddedAttempt( `promptBudgetBeforeReserve=${preemptiveCompaction.promptBudgetBeforeReserve} ` + `overflowTokens=${preemptiveCompaction.overflowTokens} ` + `toolResultReducibleChars=${preemptiveCompaction.toolResultReducibleChars} ` + + `effectiveReserveTokens=${preemptiveCompaction.effectiveReserveTokens} ` + `sessionFile=${params.sessionFile}`, ); skipPromptSubmission = true; @@ -1936,7 +1937,9 @@ export async function runEmbeddedAttempt( `promptBudgetBeforeReserve=${preemptiveCompaction.promptBudgetBeforeReserve} ` + `overflowTokens=${preemptiveCompaction.overflowTokens} ` + `toolResultReducibleChars=${preemptiveCompaction.toolResultReducibleChars} ` + - `reserveTokens=${reserveTokens} sessionFile=${params.sessionFile}`, + `reserveTokens=${reserveTokens} ` + + `effectiveReserveTokens=${preemptiveCompaction.effectiveReserveTokens} ` + + `sessionFile=${params.sessionFile}`, ); skipPromptSubmission = true; } diff --git a/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts b/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts index 4922262fa42..f5325fc4331 100644 --- a/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts @@ -84,6 +84,35 @@ describe("preemptive-compaction", () => { expect(result.estimatedPromptTokens).toBeLessThan(result.promptBudgetBeforeReserve); }); + it("caps reserve tokens so small context models keep usable prompt budget", () => { + const result = shouldPreemptivelyCompactBeforePrompt({ + messages: [makeAssistantHistory("short history")], + systemPrompt: "sys", + prompt: "hello", + contextTokenBudget: 16_000, + reserveTokens: 20_000, + }); + + expect(result.effectiveReserveTokens).toBe(8_000); + expect(result.promptBudgetBeforeReserve).toBe(8_000); + expect(result.shouldCompact).toBe(false); + expect(result.route).toBe("fits"); + }); + + it("keeps the requested reserve when it leaves enough prompt budget", () => { + const result = shouldPreemptivelyCompactBeforePrompt({ + messages: [makeAssistantHistory("short history")], + systemPrompt: "sys", + prompt: "hello", + contextTokenBudget: 32_000, + reserveTokens: 20_000, + }); + + expect(result.effectiveReserveTokens).toBe(20_000); + expect(result.promptBudgetBeforeReserve).toBe(12_000); + expect(result.shouldCompact).toBe(false); + }); + it("routes to direct tool-result truncation when recent tool tails can clearly absorb the overflow", () => { const medium = "alpha beta gamma delta epsilon ".repeat(2200); const messages: AgentMessage[] = [ diff --git a/src/agents/pi-embedded-runner/run/preemptive-compaction.ts b/src/agents/pi-embedded-runner/run/preemptive-compaction.ts index a1baa6487c2..44b885d03fd 100644 --- a/src/agents/pi-embedded-runner/run/preemptive-compaction.ts +++ b/src/agents/pi-embedded-runner/run/preemptive-compaction.ts @@ -9,6 +9,9 @@ export const PREEMPTIVE_OVERFLOW_ERROR_TEXT = const ESTIMATED_CHARS_PER_TOKEN = 4; const TRUNCATION_ROUTE_BUFFER_TOKENS = 512; +const MIN_PROMPT_BUDGET_TOKENS = 8_000; +const MIN_PROMPT_BUDGET_RATIO = 0.5; + export type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js"; export function estimatePrePromptTokens(params: { @@ -46,12 +49,20 @@ export function shouldPreemptivelyCompactBeforePrompt(params: { promptBudgetBeforeReserve: number; overflowTokens: number; toolResultReducibleChars: number; + effectiveReserveTokens: number; } { const estimatedPromptTokens = estimatePrePromptTokens(params); - const promptBudgetBeforeReserve = Math.max( - 1, - Math.floor(params.contextTokenBudget) - Math.max(0, Math.floor(params.reserveTokens)), + const contextTokenBudget = Math.max(1, Math.floor(params.contextTokenBudget)); + const requestedReserveTokens = Math.max(0, Math.floor(params.reserveTokens)); + const minPromptBudget = Math.min( + MIN_PROMPT_BUDGET_TOKENS, + Math.max(1, Math.floor(contextTokenBudget * MIN_PROMPT_BUDGET_RATIO)), ); + const effectiveReserveTokens = Math.min( + requestedReserveTokens, + Math.max(0, contextTokenBudget - minPromptBudget), + ); + const promptBudgetBeforeReserve = Math.max(1, contextTokenBudget - effectiveReserveTokens); const overflowTokens = Math.max(0, estimatedPromptTokens - promptBudgetBeforeReserve); const toolResultPotential = estimateToolResultReductionPotential({ messages: params.messages, @@ -82,5 +93,6 @@ export function shouldPreemptivelyCompactBeforePrompt(params: { promptBudgetBeforeReserve, overflowTokens, toolResultReducibleChars, + effectiveReserveTokens, }; }