mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-01 04:10:22 +00:00
fix(cache): compact newest tool results first to preserve prompt cache prefix (#58036)
* fix(cache): compact newest tool results first to preserve prompt cache prefix compactExistingToolResultsInPlace iterated front-to-back, replacing the oldest tool results with placeholders when context exceeded 75%. This rewrote messages[k] for small k, invalidating the provider prompt cache from that point onward on every subsequent turn. Reverse the loop to compact newest-first. The cached prefix stays intact; the tradeoff is the model loses recent tool output instead of old, which is acceptable since this guard only fires as an emergency measure past the 75% threshold. * fix(cache): compact newest tool results first to preserve prompt cache prefix (#58036) Thanks @bcherny --------- Co-authored-by: George Zhang <georgezhangtj97@gmail.com>
This commit is contained in:
@@ -106,7 +106,7 @@ function expectCompactedToolResultsWithoutContextNotice(
|
||||
}
|
||||
|
||||
describe("installToolResultContextGuard", () => {
|
||||
it("compacts oldest-first when total context overflows, even if each result fits individually", async () => {
|
||||
it("compacts newest-first when total context overflows, even if each result fits individually", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
const contextForNextCall = makeTwoToolResultOverflowContext();
|
||||
const transformed = await applyGuardToContext(agent, contextForNextCall);
|
||||
@@ -115,7 +115,7 @@ describe("installToolResultContextGuard", () => {
|
||||
expectCompactedToolResultsWithoutContextNotice(contextForNextCall, 1, 2);
|
||||
});
|
||||
|
||||
it("keeps compacting oldest-first until context is back under budget", async () => {
|
||||
it("keeps compacting newest-first until context is back under budget", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
|
||||
installToolResultContextGuard({
|
||||
@@ -141,7 +141,7 @@ describe("installToolResultContextGuard", () => {
|
||||
expect(third).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
|
||||
});
|
||||
|
||||
it("survives repeated large tool results by compacting older outputs before later turns", async () => {
|
||||
it("survives repeated large tool results by compacting the newest output each turn", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
|
||||
installToolResultContextGuard({
|
||||
@@ -159,8 +159,10 @@ describe("installToolResultContextGuard", () => {
|
||||
.filter((msg) => msg.role === "toolResult")
|
||||
.map((msg) => getToolResultText(msg as AgentMessage));
|
||||
|
||||
expect(toolResultTexts[0]).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
|
||||
expect(toolResultTexts[3]?.length).toBe(95_000);
|
||||
// Newest-first compaction: oldest results stay intact to preserve the
|
||||
// cached prefix; the newest overflowing result is compacted.
|
||||
expect(toolResultTexts[0]?.length).toBe(95_000);
|
||||
expect(toolResultTexts[3]).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
|
||||
expect(toolResultTexts.join("\n")).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
|
||||
});
|
||||
|
||||
@@ -181,7 +183,7 @@ describe("installToolResultContextGuard", () => {
|
||||
expect(newResultText).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
|
||||
});
|
||||
|
||||
it("keeps compacting oldest-first until overflow clears, including the newest tool result when needed", async () => {
|
||||
it("keeps compacting newest-first until overflow clears, reaching older tool results when needed", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
|
||||
installToolResultContextGuard({
|
||||
|
||||
@@ -108,7 +108,9 @@ function compactExistingToolResultsInPlace(params: {
|
||||
}
|
||||
|
||||
let reduced = 0;
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
// Compact newest-first so the cached prefix stays intact: rewriting messages[k]
|
||||
// for small k invalidates the provider prompt cache from that point onward.
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const msg = messages[i];
|
||||
if (!isToolResultMessage(msg)) {
|
||||
continue;
|
||||
@@ -179,7 +181,8 @@ function enforceToolResultContextBudgetInPlace(params: {
|
||||
return;
|
||||
}
|
||||
|
||||
// Compact oldest tool outputs first until the context is back under budget.
|
||||
// Compact newest tool outputs first to preserve the cached prefix; stop once
|
||||
// the context is back under budget.
|
||||
compactExistingToolResultsInPlace({
|
||||
messages,
|
||||
charsNeeded: currentChars - contextBudgetChars,
|
||||
|
||||
Reference in New Issue
Block a user