Restore reserve-based overflow precheck

This commit is contained in:
Tak Hoffman
2026-04-05 22:56:00 -05:00
committed by Peter Steinberger
parent ceb686052b
commit 3e2a05f425
3 changed files with 134 additions and 1 deletions

View File

@@ -120,6 +120,10 @@ import {
} from "../prompt-cache-observability.js";
import { resolveCacheRetention } from "../prompt-cache-retention.js";
import { sanitizeSessionHistory, validateReplayTurns } from "../replay-history.js";
import {
PREEMPTIVE_OVERFLOW_ERROR_TEXT,
shouldPreemptivelyCompactBeforePrompt,
} from "./preemptive-compaction.js";
import {
clearActiveEmbeddedRun,
type EmbeddedPiQueueHandle,
@@ -1521,7 +1525,7 @@ export async function runEmbeddedAttempt(
const hookAgentId = sessionAgentId;
let promptError: unknown = null;
let promptErrorSource: "prompt" | "compaction" | null = null;
let promptErrorSource: "prompt" | "compaction" | "precheck" | null = null;
let prePromptMessageCount = activeSession.messages.length;
try {
const promptStartedAt = Date.now();
@@ -1761,6 +1765,27 @@ export async function runEmbeddedAttempt(
});
}
const reserveTokens = settingsManager.getCompactionReserveTokens();
const preemptiveCompaction = shouldPreemptivelyCompactBeforePrompt({
messages: activeSession.messages,
systemPrompt: systemPromptText,
prompt: effectivePrompt,
contextTokenBudget: params.contextTokenBudget,
reserveTokens,
});
if (preemptiveCompaction.shouldCompact) {
promptError = new Error(PREEMPTIVE_OVERFLOW_ERROR_TEXT);
promptErrorSource = "precheck";
log.warn(
`[context-overflow-precheck] sessionKey=${params.sessionKey ?? params.sessionId} ` +
`provider=${params.provider}/${params.modelId} ` +
`estimatedPromptTokens=${preemptiveCompaction.estimatedPromptTokens} ` +
`promptBudgetBeforeReserve=${preemptiveCompaction.promptBudgetBeforeReserve} ` +
`reserveTokens=${reserveTokens} sessionFile=${params.sessionFile}`,
);
return;
}
const btwSnapshotMessages = activeSession.messages.slice(-MAX_BTW_SNAPSHOT_MESSAGES);
updateActiveEmbeddedRunSnapshot(params.sessionId, {
transcriptLeafId,

View File

@@ -0,0 +1,61 @@
import { describe, expect, it } from "vitest";
import {
PREEMPTIVE_OVERFLOW_ERROR_TEXT,
estimatePrePromptTokens,
shouldPreemptivelyCompactBeforePrompt,
} from "./preemptive-compaction.js";
describe("preemptive-compaction", () => {
const verboseHistory =
"alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu ".repeat(40);
const verboseSystem =
"system guidance with multiple distinct words to avoid tokenizer overcompression ".repeat(25);
const verbosePrompt =
"user request with distinct content asking for a detailed answer and more context ".repeat(25);
it("exports a context-overflow-compatible precheck error text", () => {
expect(PREEMPTIVE_OVERFLOW_ERROR_TEXT).toContain("Context overflow:");
expect(PREEMPTIVE_OVERFLOW_ERROR_TEXT).toContain("(precheck)");
});
it("raises the estimate as prompt-side content grows", () => {
const smaller = estimatePrePromptTokens({
messages: [{ role: "assistant", content: verboseHistory }],
systemPrompt: "sys",
prompt: "hello",
});
const larger = estimatePrePromptTokens({
messages: [{ role: "assistant", content: verboseHistory }],
systemPrompt: verboseSystem,
prompt: verbosePrompt,
});
expect(larger).toBeGreaterThan(smaller);
});
it("requests preemptive compaction when the reserve-based prompt budget would be exceeded", () => {
const result = shouldPreemptivelyCompactBeforePrompt({
messages: [{ role: "assistant", content: verboseHistory }],
systemPrompt: verboseSystem,
prompt: verbosePrompt,
contextTokenBudget: 500,
reserveTokens: 50,
});
expect(result.shouldCompact).toBe(true);
expect(result.estimatedPromptTokens).toBeGreaterThan(result.promptBudgetBeforeReserve);
});
it("does not request preemptive compaction when the reserve-based prompt budget still fits", () => {
const result = shouldPreemptivelyCompactBeforePrompt({
messages: [{ role: "assistant", content: "short history" }],
systemPrompt: "sys",
prompt: "hello",
contextTokenBudget: 10_000,
reserveTokens: 1_000,
});
expect(result.shouldCompact).toBe(false);
expect(result.estimatedPromptTokens).toBeLessThan(result.promptBudgetBeforeReserve);
});
});

View File

@@ -0,0 +1,47 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { estimateTokens } from "@mariozechner/pi-coding-agent";
import { SAFETY_MARGIN, estimateMessagesTokens } from "../../compaction.js";
export const PREEMPTIVE_OVERFLOW_ERROR_TEXT =
"Context overflow: prompt too large for the model (precheck).";
export function estimatePrePromptTokens(params: {
messages: AgentMessage[];
systemPrompt?: string;
prompt: string;
}): number {
const { messages, systemPrompt, prompt } = params;
const syntheticMessages: AgentMessage[] = [];
if (typeof systemPrompt === "string" && systemPrompt.trim().length > 0) {
syntheticMessages.push({ role: "system", content: systemPrompt } as AgentMessage);
}
syntheticMessages.push({ role: "user", content: prompt } as AgentMessage);
const estimated =
estimateMessagesTokens(messages) +
syntheticMessages.reduce((sum, message) => sum + estimateTokens(message), 0);
return Math.max(0, Math.ceil(estimated * SAFETY_MARGIN));
}
export function shouldPreemptivelyCompactBeforePrompt(params: {
messages: AgentMessage[];
systemPrompt?: string;
prompt: string;
contextTokenBudget: number;
reserveTokens: number;
}): {
shouldCompact: boolean;
estimatedPromptTokens: number;
promptBudgetBeforeReserve: number;
} {
const estimatedPromptTokens = estimatePrePromptTokens(params);
const promptBudgetBeforeReserve = Math.max(
1,
Math.floor(params.contextTokenBudget) - Math.max(0, Math.floor(params.reserveTokens)),
);
return {
shouldCompact: estimatedPromptTokens > promptBudgetBeforeReserve,
estimatedPromptTokens,
promptBudgetBeforeReserve,
};
}