mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-16 11:41:08 +00:00
Restore reserve-based overflow precheck
This commit is contained in:
committed by
Peter Steinberger
parent
ceb686052b
commit
3e2a05f425
@@ -120,6 +120,10 @@ import {
|
||||
} from "../prompt-cache-observability.js";
|
||||
import { resolveCacheRetention } from "../prompt-cache-retention.js";
|
||||
import { sanitizeSessionHistory, validateReplayTurns } from "../replay-history.js";
|
||||
import {
|
||||
PREEMPTIVE_OVERFLOW_ERROR_TEXT,
|
||||
shouldPreemptivelyCompactBeforePrompt,
|
||||
} from "./preemptive-compaction.js";
|
||||
import {
|
||||
clearActiveEmbeddedRun,
|
||||
type EmbeddedPiQueueHandle,
|
||||
@@ -1521,7 +1525,7 @@ export async function runEmbeddedAttempt(
|
||||
const hookAgentId = sessionAgentId;
|
||||
|
||||
let promptError: unknown = null;
|
||||
let promptErrorSource: "prompt" | "compaction" | null = null;
|
||||
let promptErrorSource: "prompt" | "compaction" | "precheck" | null = null;
|
||||
let prePromptMessageCount = activeSession.messages.length;
|
||||
try {
|
||||
const promptStartedAt = Date.now();
|
||||
@@ -1761,6 +1765,27 @@ export async function runEmbeddedAttempt(
|
||||
});
|
||||
}
|
||||
|
||||
const reserveTokens = settingsManager.getCompactionReserveTokens();
|
||||
const preemptiveCompaction = shouldPreemptivelyCompactBeforePrompt({
|
||||
messages: activeSession.messages,
|
||||
systemPrompt: systemPromptText,
|
||||
prompt: effectivePrompt,
|
||||
contextTokenBudget: params.contextTokenBudget,
|
||||
reserveTokens,
|
||||
});
|
||||
if (preemptiveCompaction.shouldCompact) {
|
||||
promptError = new Error(PREEMPTIVE_OVERFLOW_ERROR_TEXT);
|
||||
promptErrorSource = "precheck";
|
||||
log.warn(
|
||||
`[context-overflow-precheck] sessionKey=${params.sessionKey ?? params.sessionId} ` +
|
||||
`provider=${params.provider}/${params.modelId} ` +
|
||||
`estimatedPromptTokens=${preemptiveCompaction.estimatedPromptTokens} ` +
|
||||
`promptBudgetBeforeReserve=${preemptiveCompaction.promptBudgetBeforeReserve} ` +
|
||||
`reserveTokens=${reserveTokens} sessionFile=${params.sessionFile}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const btwSnapshotMessages = activeSession.messages.slice(-MAX_BTW_SNAPSHOT_MESSAGES);
|
||||
updateActiveEmbeddedRunSnapshot(params.sessionId, {
|
||||
transcriptLeafId,
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
PREEMPTIVE_OVERFLOW_ERROR_TEXT,
|
||||
estimatePrePromptTokens,
|
||||
shouldPreemptivelyCompactBeforePrompt,
|
||||
} from "./preemptive-compaction.js";
|
||||
|
||||
describe("preemptive-compaction", () => {
|
||||
const verboseHistory =
|
||||
"alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu ".repeat(40);
|
||||
const verboseSystem =
|
||||
"system guidance with multiple distinct words to avoid tokenizer overcompression ".repeat(25);
|
||||
const verbosePrompt =
|
||||
"user request with distinct content asking for a detailed answer and more context ".repeat(25);
|
||||
|
||||
it("exports a context-overflow-compatible precheck error text", () => {
|
||||
expect(PREEMPTIVE_OVERFLOW_ERROR_TEXT).toContain("Context overflow:");
|
||||
expect(PREEMPTIVE_OVERFLOW_ERROR_TEXT).toContain("(precheck)");
|
||||
});
|
||||
|
||||
it("raises the estimate as prompt-side content grows", () => {
|
||||
const smaller = estimatePrePromptTokens({
|
||||
messages: [{ role: "assistant", content: verboseHistory }],
|
||||
systemPrompt: "sys",
|
||||
prompt: "hello",
|
||||
});
|
||||
const larger = estimatePrePromptTokens({
|
||||
messages: [{ role: "assistant", content: verboseHistory }],
|
||||
systemPrompt: verboseSystem,
|
||||
prompt: verbosePrompt,
|
||||
});
|
||||
|
||||
expect(larger).toBeGreaterThan(smaller);
|
||||
});
|
||||
|
||||
it("requests preemptive compaction when the reserve-based prompt budget would be exceeded", () => {
|
||||
const result = shouldPreemptivelyCompactBeforePrompt({
|
||||
messages: [{ role: "assistant", content: verboseHistory }],
|
||||
systemPrompt: verboseSystem,
|
||||
prompt: verbosePrompt,
|
||||
contextTokenBudget: 500,
|
||||
reserveTokens: 50,
|
||||
});
|
||||
|
||||
expect(result.shouldCompact).toBe(true);
|
||||
expect(result.estimatedPromptTokens).toBeGreaterThan(result.promptBudgetBeforeReserve);
|
||||
});
|
||||
|
||||
it("does not request preemptive compaction when the reserve-based prompt budget still fits", () => {
|
||||
const result = shouldPreemptivelyCompactBeforePrompt({
|
||||
messages: [{ role: "assistant", content: "short history" }],
|
||||
systemPrompt: "sys",
|
||||
prompt: "hello",
|
||||
contextTokenBudget: 10_000,
|
||||
reserveTokens: 1_000,
|
||||
});
|
||||
|
||||
expect(result.shouldCompact).toBe(false);
|
||||
expect(result.estimatedPromptTokens).toBeLessThan(result.promptBudgetBeforeReserve);
|
||||
});
|
||||
});
|
||||
47
src/agents/pi-embedded-runner/run/preemptive-compaction.ts
Normal file
47
src/agents/pi-embedded-runner/run/preemptive-compaction.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { estimateTokens } from "@mariozechner/pi-coding-agent";
|
||||
import { SAFETY_MARGIN, estimateMessagesTokens } from "../../compaction.js";
|
||||
|
||||
export const PREEMPTIVE_OVERFLOW_ERROR_TEXT =
|
||||
"Context overflow: prompt too large for the model (precheck).";
|
||||
|
||||
export function estimatePrePromptTokens(params: {
|
||||
messages: AgentMessage[];
|
||||
systemPrompt?: string;
|
||||
prompt: string;
|
||||
}): number {
|
||||
const { messages, systemPrompt, prompt } = params;
|
||||
const syntheticMessages: AgentMessage[] = [];
|
||||
if (typeof systemPrompt === "string" && systemPrompt.trim().length > 0) {
|
||||
syntheticMessages.push({ role: "system", content: systemPrompt } as AgentMessage);
|
||||
}
|
||||
syntheticMessages.push({ role: "user", content: prompt } as AgentMessage);
|
||||
|
||||
const estimated =
|
||||
estimateMessagesTokens(messages) +
|
||||
syntheticMessages.reduce((sum, message) => sum + estimateTokens(message), 0);
|
||||
return Math.max(0, Math.ceil(estimated * SAFETY_MARGIN));
|
||||
}
|
||||
|
||||
export function shouldPreemptivelyCompactBeforePrompt(params: {
|
||||
messages: AgentMessage[];
|
||||
systemPrompt?: string;
|
||||
prompt: string;
|
||||
contextTokenBudget: number;
|
||||
reserveTokens: number;
|
||||
}): {
|
||||
shouldCompact: boolean;
|
||||
estimatedPromptTokens: number;
|
||||
promptBudgetBeforeReserve: number;
|
||||
} {
|
||||
const estimatedPromptTokens = estimatePrePromptTokens(params);
|
||||
const promptBudgetBeforeReserve = Math.max(
|
||||
1,
|
||||
Math.floor(params.contextTokenBudget) - Math.max(0, Math.floor(params.reserveTokens)),
|
||||
);
|
||||
return {
|
||||
shouldCompact: estimatedPromptTokens > promptBudgetBeforeReserve,
|
||||
estimatedPromptTokens,
|
||||
promptBudgetBeforeReserve,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user