fix(agents): prefer overflow compaction for fresh reads

This commit is contained in:
Peter Steinberger
2026-04-06 02:41:24 +01:00
parent 979409eab5
commit 1ffe02e5ba
3 changed files with 135 additions and 3 deletions

View File

@@ -130,7 +130,7 @@ Docs: https://docs.openclaw.ai
- Discord: keep REST, webhook, and monitor traffic on the configured proxy, preserve component-only media sends, honor `@everyone` and `@here` mention gates, keep ACK reactions on the active account, and split voice connect/playback timeouts so auto-join is more reliable. (#57465, #60361, #60345) Thanks @geekhuashan.
- WhatsApp: restore `channels.whatsapp.blockStreaming` and reset watchdog timeouts after reconnect so quiet chats stop falling into reconnect loops. (#60007, #60069) Thanks @MonkeyLeeT and @mcaxtr.
- Memory: keep `memory-core` builtin embedding registration on the already-registered path so selecting `memory-core` no longer recurses through plugin discovery and crashes during startup. (#61402) Thanks @ngutman.
- Agents/tool results: keep large `read` outputs visible longer and preserve the latest `read` output during tool-result context compaction so fresh file reads stop getting replaced by compacted stubs when older tool output can absorb the overflow budget. Thanks @vincentkoc.
- Agents/tool results: keep large `read` outputs visible longer, preserve the latest `read` output when older tool output can absorb the overflow budget, and fall back to Pi's normal overflow compaction/retry path before replacing a fresh `read` with a compacted stub. Thanks @vincentkoc.
- Memory/QMD: prefer modern `qmd collection add --glob`, accept newer single-line JSON hit metadata while keeping legacy line fields, refresh QMD docs/doctor install guidance and model-override guidance, and keep older QMD releases working. Thanks @vincentkoc.
- MS Teams: download inline DM images via Graph API and preserve channel reply threading in proactive fallback. (#52212, #55198) Thanks @Ted-developer and @hyojin.
- MS Teams: replace the deprecated Teams SDK HttpPlugin stub with `httpServerAdapter` so recurring gateway deprecation warnings stop firing and the Express 5 compatibility workaround stays on the supported SDK path. (#60939) Thanks @coolramukaka-sys.

View File

@@ -17,17 +17,21 @@ function makeUser(text: string): AgentMessage {
});
}
function makeToolResult(id: string, text: string): AgentMessage {
function makeToolResult(id: string, text: string, toolName = "grep"): AgentMessage {
return castAgentMessage({
role: "toolResult",
toolCallId: id,
toolName: "read",
toolName,
content: [{ type: "text", text }],
isError: false,
timestamp: Date.now(),
});
}
function makeReadToolResult(id: string, text: string): AgentMessage {
return makeToolResult(id, text, "read");
}
function makeLegacyToolResult(id: string, text: string): AgentMessage {
return castAgentMessage({
role: "tool",
@@ -311,6 +315,51 @@ describe("installToolResultContextGuard", () => {
expect(newResult.details).toBeUndefined();
});
it("throws overflow instead of compacting the latest read result during aggregate compaction", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
const contextForNextCall = [
makeUser("u".repeat(2_600)),
makeToolResult("call_old", "x".repeat(300)),
makeReadToolResult("call_new", "y".repeat(500)),
];
await expect(
agent.transformContext?.(contextForNextCall, new AbortController().signal),
).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(300));
expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(500));
});
it("keeps the latest read result when older outputs absorb the aggregate overflow", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
const contextForNextCall = [
makeUser("u".repeat(1_400)),
makeToolResult("call_old_1", "a".repeat(350)),
makeToolResult("call_old_2", "b".repeat(350)),
makeReadToolResult("call_new", "c".repeat(500)),
];
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
expect(getToolResultText(transformed[3])).toBe("c".repeat(500));
});
it("throws preemptive context overflow when context exceeds 90% after tool-result compaction", async () => {
const agent = makeGuardableAgent();

View File

@@ -49,6 +49,21 @@ type GuardableAgentRecord = {
transformContext?: GuardableTransformContext;
};
function getToolResultName(msg: AgentMessage): string | undefined {
const toolName = (msg as { toolName?: unknown }).toolName;
if (typeof toolName === "string" && toolName.trim().length > 0) {
return toolName;
}
const legacyToolName = (msg as { tool_name?: unknown }).tool_name;
return typeof legacyToolName === "string" && legacyToolName.trim().length > 0
? legacyToolName
: undefined;
}
function isReadToolResultMessage(msg: AgentMessage): boolean {
return isToolResultMessage(msg) && getToolResultName(msg) === "read";
}
function truncateTextToBudget(text: string, maxChars: number): string {
if (text.length <= maxChars) {
return text;
@@ -278,6 +293,65 @@ function resolveToolResultCompactionOrder(messages: AgentMessage[]): number[] {
return [...olderIndexes, newestIndex];
}
function getNewestToolResultIndex(messages: AgentMessage[]): number | undefined {
for (let i = messages.length - 1; i >= 0; i -= 1) {
if (isToolResultMessage(messages[i])) {
return i;
}
}
return undefined;
}
function shouldPreferOverflowForLatestRead(params: {
messages: AgentMessage[];
contextBudgetChars: number;
maxSingleToolResultChars: number;
}): boolean {
const newestToolResultIndex = getNewestToolResultIndex(params.messages);
if (newestToolResultIndex === undefined) {
return false;
}
const newestToolResult = params.messages[newestToolResultIndex];
if (!isReadToolResultMessage(newestToolResult)) {
return false;
}
const initialCache = createMessageCharEstimateCache();
if (
estimateMessageCharsCached(newestToolResult, initialCache) > params.maxSingleToolResultChars
) {
return false;
}
const simulatedMessages = cloneMessagesForGuard(params.messages);
const estimateCache = createMessageCharEstimateCache();
for (const message of simulatedMessages) {
if (!isToolResultMessage(message)) {
continue;
}
const truncated = truncateToolResultToChars(
message,
params.maxSingleToolResultChars,
estimateCache,
);
applyMessageMutationInPlace(message, truncated, estimateCache);
}
const currentChars = estimateContextChars(simulatedMessages, estimateCache);
if (currentChars <= params.contextBudgetChars) {
return false;
}
const newestToolResultAfterPerToolLimit = simulatedMessages[newestToolResultIndex];
const newestToolResultTextBefore = getToolResultText(newestToolResultAfterPerToolLimit);
compactExistingToolResultsInPlace({
messages: simulatedMessages,
charsNeeded: currentChars - params.contextBudgetChars,
cache: estimateCache,
});
return getToolResultText(simulatedMessages[newestToolResultIndex]) !== newestToolResultTextBefore;
}
function cloneMessagesForGuard(messages: AgentMessage[]): AgentMessage[] {
return messages.map(
(msg) => ({ ...(msg as unknown as Record<string, unknown>) }) as unknown as AgentMessage,
@@ -388,6 +462,15 @@ export function installToolResultContextGuard(params: {
: messages;
const sourceMessages = Array.isArray(transformed) ? transformed : messages;
if (
shouldPreferOverflowForLatestRead({
messages: sourceMessages,
contextBudgetChars,
maxSingleToolResultChars,
})
) {
throw new Error(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
}
const contextMessages = contextNeedsToolResultCompaction({
messages: sourceMessages,
contextBudgetChars,