mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:50:43 +00:00
fix: preserve prompt budget for small context models
This commit is contained in:
@@ -1905,6 +1905,7 @@ export async function runEmbeddedAttempt(
|
||||
`promptBudgetBeforeReserve=${preemptiveCompaction.promptBudgetBeforeReserve} ` +
|
||||
`overflowTokens=${preemptiveCompaction.overflowTokens} ` +
|
||||
`toolResultReducibleChars=${preemptiveCompaction.toolResultReducibleChars} ` +
|
||||
`effectiveReserveTokens=${preemptiveCompaction.effectiveReserveTokens} ` +
|
||||
`sessionFile=${params.sessionFile}`,
|
||||
);
|
||||
skipPromptSubmission = true;
|
||||
@@ -1936,7 +1937,9 @@ export async function runEmbeddedAttempt(
|
||||
`promptBudgetBeforeReserve=${preemptiveCompaction.promptBudgetBeforeReserve} ` +
|
||||
`overflowTokens=${preemptiveCompaction.overflowTokens} ` +
|
||||
`toolResultReducibleChars=${preemptiveCompaction.toolResultReducibleChars} ` +
|
||||
`reserveTokens=${reserveTokens} sessionFile=${params.sessionFile}`,
|
||||
`reserveTokens=${reserveTokens} ` +
|
||||
`effectiveReserveTokens=${preemptiveCompaction.effectiveReserveTokens} ` +
|
||||
`sessionFile=${params.sessionFile}`,
|
||||
);
|
||||
skipPromptSubmission = true;
|
||||
}
|
||||
|
||||
@@ -84,6 +84,35 @@ describe("preemptive-compaction", () => {
|
||||
expect(result.estimatedPromptTokens).toBeLessThan(result.promptBudgetBeforeReserve);
|
||||
});
|
||||
|
||||
it("caps reserve tokens so small context models keep usable prompt budget", () => {
|
||||
const result = shouldPreemptivelyCompactBeforePrompt({
|
||||
messages: [makeAssistantHistory("short history")],
|
||||
systemPrompt: "sys",
|
||||
prompt: "hello",
|
||||
contextTokenBudget: 16_000,
|
||||
reserveTokens: 20_000,
|
||||
});
|
||||
|
||||
expect(result.effectiveReserveTokens).toBe(8_000);
|
||||
expect(result.promptBudgetBeforeReserve).toBe(8_000);
|
||||
expect(result.shouldCompact).toBe(false);
|
||||
expect(result.route).toBe("fits");
|
||||
});
|
||||
|
||||
it("keeps the requested reserve when it leaves enough prompt budget", () => {
|
||||
const result = shouldPreemptivelyCompactBeforePrompt({
|
||||
messages: [makeAssistantHistory("short history")],
|
||||
systemPrompt: "sys",
|
||||
prompt: "hello",
|
||||
contextTokenBudget: 32_000,
|
||||
reserveTokens: 20_000,
|
||||
});
|
||||
|
||||
expect(result.effectiveReserveTokens).toBe(20_000);
|
||||
expect(result.promptBudgetBeforeReserve).toBe(12_000);
|
||||
expect(result.shouldCompact).toBe(false);
|
||||
});
|
||||
|
||||
it("routes to direct tool-result truncation when recent tool tails can clearly absorb the overflow", () => {
|
||||
const medium = "alpha beta gamma delta epsilon ".repeat(2200);
|
||||
const messages: AgentMessage[] = [
|
||||
|
||||
@@ -9,6 +9,9 @@ export const PREEMPTIVE_OVERFLOW_ERROR_TEXT =
|
||||
|
||||
const ESTIMATED_CHARS_PER_TOKEN = 4;
|
||||
const TRUNCATION_ROUTE_BUFFER_TOKENS = 512;
|
||||
const MIN_PROMPT_BUDGET_TOKENS = 8_000;
|
||||
const MIN_PROMPT_BUDGET_RATIO = 0.5;
|
||||
|
||||
export type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js";
|
||||
|
||||
export function estimatePrePromptTokens(params: {
|
||||
@@ -46,12 +49,20 @@ export function shouldPreemptivelyCompactBeforePrompt(params: {
|
||||
promptBudgetBeforeReserve: number;
|
||||
overflowTokens: number;
|
||||
toolResultReducibleChars: number;
|
||||
effectiveReserveTokens: number;
|
||||
} {
|
||||
const estimatedPromptTokens = estimatePrePromptTokens(params);
|
||||
const promptBudgetBeforeReserve = Math.max(
|
||||
1,
|
||||
Math.floor(params.contextTokenBudget) - Math.max(0, Math.floor(params.reserveTokens)),
|
||||
const contextTokenBudget = Math.max(1, Math.floor(params.contextTokenBudget));
|
||||
const requestedReserveTokens = Math.max(0, Math.floor(params.reserveTokens));
|
||||
const minPromptBudget = Math.min(
|
||||
MIN_PROMPT_BUDGET_TOKENS,
|
||||
Math.max(1, Math.floor(contextTokenBudget * MIN_PROMPT_BUDGET_RATIO)),
|
||||
);
|
||||
const effectiveReserveTokens = Math.min(
|
||||
requestedReserveTokens,
|
||||
Math.max(0, contextTokenBudget - minPromptBudget),
|
||||
);
|
||||
const promptBudgetBeforeReserve = Math.max(1, contextTokenBudget - effectiveReserveTokens);
|
||||
const overflowTokens = Math.max(0, estimatedPromptTokens - promptBudgetBeforeReserve);
|
||||
const toolResultPotential = estimateToolResultReductionPotential({
|
||||
messages: params.messages,
|
||||
@@ -82,5 +93,6 @@ export function shouldPreemptivelyCompactBeforePrompt(params: {
|
||||
promptBudgetBeforeReserve,
|
||||
overflowTokens,
|
||||
toolResultReducibleChars,
|
||||
effectiveReserveTokens,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user