diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 878a2cd917e..297ca90c170 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -120,6 +120,10 @@ import { } from "../prompt-cache-observability.js"; import { resolveCacheRetention } from "../prompt-cache-retention.js"; import { sanitizeSessionHistory, validateReplayTurns } from "../replay-history.js"; +import { + PREEMPTIVE_OVERFLOW_ERROR_TEXT, + shouldPreemptivelyCompactBeforePrompt, +} from "./preemptive-compaction.js"; import { clearActiveEmbeddedRun, type EmbeddedPiQueueHandle, @@ -1521,7 +1525,7 @@ export async function runEmbeddedAttempt( const hookAgentId = sessionAgentId; let promptError: unknown = null; - let promptErrorSource: "prompt" | "compaction" | null = null; + let promptErrorSource: "prompt" | "compaction" | "precheck" | null = null; let prePromptMessageCount = activeSession.messages.length; try { const promptStartedAt = Date.now(); @@ -1761,6 +1765,27 @@ export async function runEmbeddedAttempt( }); } + const reserveTokens = settingsManager.getCompactionReserveTokens(); + const preemptiveCompaction = shouldPreemptivelyCompactBeforePrompt({ + messages: activeSession.messages, + systemPrompt: systemPromptText, + prompt: effectivePrompt, + contextTokenBudget: params.contextTokenBudget, + reserveTokens, + }); + if (preemptiveCompaction.shouldCompact) { + promptError = new Error(PREEMPTIVE_OVERFLOW_ERROR_TEXT); + promptErrorSource = "precheck"; + log.warn( + `[context-overflow-precheck] sessionKey=${params.sessionKey ?? params.sessionId} ` + + `provider=${params.provider}/${params.modelId} ` + + `estimatedPromptTokens=${preemptiveCompaction.estimatedPromptTokens} ` + + `promptBudgetBeforeReserve=${preemptiveCompaction.promptBudgetBeforeReserve} ` + + `reserveTokens=${reserveTokens} sessionFile=${params.sessionFile}`, + ); + return; + } + const btwSnapshotMessages = activeSession.messages.slice(-MAX_BTW_SNAPSHOT_MESSAGES); updateActiveEmbeddedRunSnapshot(params.sessionId, { transcriptLeafId, diff --git a/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts b/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts new file mode 100644 index 00000000000..81919a477f9 --- /dev/null +++ b/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it } from "vitest"; +import { + PREEMPTIVE_OVERFLOW_ERROR_TEXT, + estimatePrePromptTokens, + shouldPreemptivelyCompactBeforePrompt, +} from "./preemptive-compaction.js"; + +describe("preemptive-compaction", () => { + const verboseHistory = + "alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu ".repeat(40); + const verboseSystem = + "system guidance with multiple distinct words to avoid tokenizer overcompression ".repeat(25); + const verbosePrompt = + "user request with distinct content asking for a detailed answer and more context ".repeat(25); + + it("exports a context-overflow-compatible precheck error text", () => { + expect(PREEMPTIVE_OVERFLOW_ERROR_TEXT).toContain("Context overflow:"); + expect(PREEMPTIVE_OVERFLOW_ERROR_TEXT).toContain("(precheck)"); + }); + + it("raises the estimate as prompt-side content grows", () => { + const smaller = estimatePrePromptTokens({ + messages: [{ role: "assistant", content: verboseHistory }], + systemPrompt: "sys", + prompt: "hello", + }); + const larger = estimatePrePromptTokens({ + messages: [{ role: "assistant", content: verboseHistory }], + systemPrompt: verboseSystem, + prompt: verbosePrompt, + }); + + expect(larger).toBeGreaterThan(smaller); + }); + + it("requests preemptive compaction when the reserve-based prompt budget would be exceeded", () => { + const result = shouldPreemptivelyCompactBeforePrompt({ + messages: [{ role: "assistant", content: verboseHistory }], + systemPrompt: verboseSystem, + prompt: verbosePrompt, + contextTokenBudget: 500, + reserveTokens: 50, + }); + + expect(result.shouldCompact).toBe(true); + expect(result.estimatedPromptTokens).toBeGreaterThan(result.promptBudgetBeforeReserve); + }); + + it("does not request preemptive compaction when the reserve-based prompt budget still fits", () => { + const result = shouldPreemptivelyCompactBeforePrompt({ + messages: [{ role: "assistant", content: "short history" }], + systemPrompt: "sys", + prompt: "hello", + contextTokenBudget: 10_000, + reserveTokens: 1_000, + }); + + expect(result.shouldCompact).toBe(false); + expect(result.estimatedPromptTokens).toBeLessThan(result.promptBudgetBeforeReserve); + }); +}); diff --git a/src/agents/pi-embedded-runner/run/preemptive-compaction.ts b/src/agents/pi-embedded-runner/run/preemptive-compaction.ts new file mode 100644 index 00000000000..baf153c0256 --- /dev/null +++ b/src/agents/pi-embedded-runner/run/preemptive-compaction.ts @@ -0,0 +1,47 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { estimateTokens } from "@mariozechner/pi-coding-agent"; +import { SAFETY_MARGIN, estimateMessagesTokens } from "../../compaction.js"; + +export const PREEMPTIVE_OVERFLOW_ERROR_TEXT = + "Context overflow: prompt too large for the model (precheck)."; + +export function estimatePrePromptTokens(params: { + messages: AgentMessage[]; + systemPrompt?: string; + prompt: string; +}): number { + const { messages, systemPrompt, prompt } = params; + const syntheticMessages: AgentMessage[] = []; + if (typeof systemPrompt === "string" && systemPrompt.trim().length > 0) { + syntheticMessages.push({ role: "system", content: systemPrompt } as AgentMessage); + } + syntheticMessages.push({ role: "user", content: prompt } as AgentMessage); + + const estimated = + estimateMessagesTokens(messages) + + syntheticMessages.reduce((sum, message) => sum + estimateTokens(message), 0); + return Math.max(0, Math.ceil(estimated * SAFETY_MARGIN)); +} + +export function shouldPreemptivelyCompactBeforePrompt(params: { + messages: AgentMessage[]; + systemPrompt?: string; + prompt: string; + contextTokenBudget: number; + reserveTokens: number; +}): { + shouldCompact: boolean; + estimatedPromptTokens: number; + promptBudgetBeforeReserve: number; +} { + const estimatedPromptTokens = estimatePrePromptTokens(params); + const promptBudgetBeforeReserve = Math.max( + 1, + Math.floor(params.contextTokenBudget) - Math.max(0, Math.floor(params.reserveTokens)), + ); + return { + shouldCompact: estimatedPromptTokens > promptBudgetBeforeReserve, + estimatedPromptTokens, + promptBudgetBeforeReserve, + }; +}