mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
fix(context-pruning): prune image-containing tool results instead of skipping them (#41789)
This commit is contained in:
@@ -49,6 +49,30 @@ describe("pruneProcessedHistoryImages", () => {
|
|||||||
expect(first.content[1]).toMatchObject({ type: "image", data: "abc" });
|
expect(first.content[1]).toMatchObject({ type: "image", data: "abc" });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("prunes image blocks from toolResult messages that already have assistant replies", () => {
|
||||||
|
const messages: AgentMessage[] = [
|
||||||
|
castAgentMessage({
|
||||||
|
role: "toolResult",
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text: "screenshot bytes" }, { ...image }],
|
||||||
|
}),
|
||||||
|
castAgentMessage({
|
||||||
|
role: "assistant",
|
||||||
|
content: "ack",
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
const didMutate = pruneProcessedHistoryImages(messages);
|
||||||
|
|
||||||
|
expect(didMutate).toBe(true);
|
||||||
|
const firstTool = messages[0] as Extract<AgentMessage, { role: "toolResult" }> | undefined;
|
||||||
|
if (!firstTool || !Array.isArray(firstTool.content)) {
|
||||||
|
throw new Error("expected toolResult array content");
|
||||||
|
}
|
||||||
|
expect(firstTool.content).toHaveLength(2);
|
||||||
|
expect(firstTool.content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER });
|
||||||
|
});
|
||||||
|
|
||||||
it("does not change messages when no assistant turn exists", () => {
|
it("does not change messages when no assistant turn exists", () => {
|
||||||
const messages: AgentMessage[] = [
|
const messages: AgentMessage[] = [
|
||||||
castAgentMessage({
|
castAgentMessage({
|
||||||
|
|||||||
@@ -21,7 +21,11 @@ export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean {
|
|||||||
let didMutate = false;
|
let didMutate = false;
|
||||||
for (let i = 0; i < lastAssistantIndex; i++) {
|
for (let i = 0; i < lastAssistantIndex; i++) {
|
||||||
const message = messages[i];
|
const message = messages[i];
|
||||||
if (!message || message.role !== "user" || !Array.isArray(message.content)) {
|
if (
|
||||||
|
!message ||
|
||||||
|
(message.role !== "user" && message.role !== "toolResult") ||
|
||||||
|
!Array.isArray(message.content)
|
||||||
|
) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for (let j = 0; j < message.content.length; j++) {
|
for (let j = 0; j < message.content.length; j++) {
|
||||||
|
|||||||
@@ -45,6 +45,19 @@ function makeAssistant(content: AssistantMessage["content"]): AgentMessage {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function makeToolResult(
|
||||||
|
content: Array<
|
||||||
|
{ type: "text"; text: string } | { type: "image"; data: string; mimeType: string }
|
||||||
|
>,
|
||||||
|
): AgentMessage {
|
||||||
|
return {
|
||||||
|
role: "toolResult",
|
||||||
|
toolName: "read",
|
||||||
|
content,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
} as AgentMessage;
|
||||||
|
}
|
||||||
|
|
||||||
describe("pruneContextMessages", () => {
|
describe("pruneContextMessages", () => {
|
||||||
it("does not crash on assistant message with malformed thinking block (missing thinking string)", () => {
|
it("does not crash on assistant message with malformed thinking block (missing thinking string)", () => {
|
||||||
const messages: AgentMessage[] = [
|
const messages: AgentMessage[] = [
|
||||||
@@ -109,4 +122,84 @@ describe("pruneContextMessages", () => {
|
|||||||
});
|
});
|
||||||
expect(result).toHaveLength(2);
|
expect(result).toHaveLength(2);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("soft-trims image-containing tool results by replacing image blocks with placeholders", () => {
|
||||||
|
const messages: AgentMessage[] = [
|
||||||
|
makeUser("summarize this"),
|
||||||
|
makeToolResult([
|
||||||
|
{ type: "text", text: "A".repeat(120) },
|
||||||
|
{ type: "image", data: "img", mimeType: "image/png" },
|
||||||
|
{ type: "text", text: "B".repeat(120) },
|
||||||
|
]),
|
||||||
|
makeAssistant([{ type: "text", text: "done" }]),
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = pruneContextMessages({
|
||||||
|
messages,
|
||||||
|
settings: {
|
||||||
|
...DEFAULT_CONTEXT_PRUNING_SETTINGS,
|
||||||
|
keepLastAssistants: 1,
|
||||||
|
softTrimRatio: 0,
|
||||||
|
hardClear: {
|
||||||
|
...DEFAULT_CONTEXT_PRUNING_SETTINGS.hardClear,
|
||||||
|
enabled: false,
|
||||||
|
},
|
||||||
|
softTrim: {
|
||||||
|
maxChars: 200,
|
||||||
|
headChars: 170,
|
||||||
|
tailChars: 30,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ctx: CONTEXT_WINDOW_1M,
|
||||||
|
isToolPrunable: () => true,
|
||||||
|
contextWindowTokensOverride: 16,
|
||||||
|
});
|
||||||
|
|
||||||
|
const toolResult = result[1] as Extract<AgentMessage, { role: "toolResult" }>;
|
||||||
|
expect(toolResult.content).toHaveLength(1);
|
||||||
|
expect(toolResult.content[0]).toMatchObject({ type: "text" });
|
||||||
|
const textBlock = toolResult.content[0] as { type: "text"; text: string };
|
||||||
|
expect(textBlock.text).toContain("[image removed during context pruning]");
|
||||||
|
expect(textBlock.text).toContain(
|
||||||
|
"[Tool result trimmed: kept first 170 chars and last 30 chars",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("hard-clears image-containing tool results once ratios require clearing", () => {
|
||||||
|
const messages: AgentMessage[] = [
|
||||||
|
makeUser("summarize this"),
|
||||||
|
makeToolResult([
|
||||||
|
{ type: "text", text: "small text" },
|
||||||
|
{ type: "image", data: "img", mimeType: "image/png" },
|
||||||
|
]),
|
||||||
|
makeAssistant([{ type: "text", text: "done" }]),
|
||||||
|
];
|
||||||
|
|
||||||
|
const placeholder = "[hard cleared test placeholder]";
|
||||||
|
const result = pruneContextMessages({
|
||||||
|
messages,
|
||||||
|
settings: {
|
||||||
|
...DEFAULT_CONTEXT_PRUNING_SETTINGS,
|
||||||
|
keepLastAssistants: 1,
|
||||||
|
softTrimRatio: 0,
|
||||||
|
hardClearRatio: 0,
|
||||||
|
minPrunableToolChars: 1,
|
||||||
|
softTrim: {
|
||||||
|
maxChars: 5_000,
|
||||||
|
headChars: 2_000,
|
||||||
|
tailChars: 2_000,
|
||||||
|
},
|
||||||
|
hardClear: {
|
||||||
|
enabled: true,
|
||||||
|
placeholder,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ctx: CONTEXT_WINDOW_1M,
|
||||||
|
isToolPrunable: () => true,
|
||||||
|
contextWindowTokensOverride: 8,
|
||||||
|
});
|
||||||
|
|
||||||
|
const toolResult = result[1] as Extract<AgentMessage, { role: "toolResult" }>;
|
||||||
|
expect(toolResult.content).toEqual([{ type: "text", text: placeholder }]);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -5,9 +5,8 @@ import type { EffectiveContextPruningSettings } from "./settings.js";
|
|||||||
import { makeToolPrunablePredicate } from "./tools.js";
|
import { makeToolPrunablePredicate } from "./tools.js";
|
||||||
|
|
||||||
const CHARS_PER_TOKEN_ESTIMATE = 4;
|
const CHARS_PER_TOKEN_ESTIMATE = 4;
|
||||||
// We currently skip pruning tool results that contain images. Still, we count them (approx.) so
|
|
||||||
// we start trimming prunable tool results earlier when image-heavy context is consuming the window.
|
|
||||||
const IMAGE_CHAR_ESTIMATE = 8_000;
|
const IMAGE_CHAR_ESTIMATE = 8_000;
|
||||||
|
const PRUNED_CONTEXT_IMAGE_MARKER = "[image removed during context pruning]";
|
||||||
|
|
||||||
function asText(text: string): TextContent {
|
function asText(text: string): TextContent {
|
||||||
return { type: "text", text };
|
return { type: "text", text };
|
||||||
@@ -23,6 +22,22 @@ function collectTextSegments(content: ReadonlyArray<TextContent | ImageContent>)
|
|||||||
return parts;
|
return parts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function collectPrunableToolResultSegments(
|
||||||
|
content: ReadonlyArray<TextContent | ImageContent>,
|
||||||
|
): string[] {
|
||||||
|
const parts: string[] = [];
|
||||||
|
for (const block of content) {
|
||||||
|
if (block.type === "text") {
|
||||||
|
parts.push(block.text);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (block.type === "image") {
|
||||||
|
parts.push(PRUNED_CONTEXT_IMAGE_MARKER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
|
||||||
function estimateJoinedTextLength(parts: string[]): number {
|
function estimateJoinedTextLength(parts: string[]): number {
|
||||||
if (parts.length === 0) {
|
if (parts.length === 0) {
|
||||||
return 0;
|
return 0;
|
||||||
@@ -190,12 +205,9 @@ function softTrimToolResultMessage(params: {
|
|||||||
settings: EffectiveContextPruningSettings;
|
settings: EffectiveContextPruningSettings;
|
||||||
}): ToolResultMessage | null {
|
}): ToolResultMessage | null {
|
||||||
const { msg, settings } = params;
|
const { msg, settings } = params;
|
||||||
// Ignore image tool results for now: these are often directly relevant and hard to partially prune safely.
|
const parts = hasImageBlocks(msg.content)
|
||||||
if (hasImageBlocks(msg.content)) {
|
? collectPrunableToolResultSegments(msg.content)
|
||||||
return null;
|
: collectTextSegments(msg.content);
|
||||||
}
|
|
||||||
|
|
||||||
const parts = collectTextSegments(msg.content);
|
|
||||||
const rawLen = estimateJoinedTextLength(parts);
|
const rawLen = estimateJoinedTextLength(parts);
|
||||||
if (rawLen <= settings.softTrim.maxChars) {
|
if (rawLen <= settings.softTrim.maxChars) {
|
||||||
return null;
|
return null;
|
||||||
@@ -274,9 +286,6 @@ export function pruneContextMessages(params: {
|
|||||||
if (!isToolPrunable(msg.toolName)) {
|
if (!isToolPrunable(msg.toolName)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (hasImageBlocks(msg.content)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
prunableToolIndexes.push(i);
|
prunableToolIndexes.push(i);
|
||||||
|
|
||||||
const updated = softTrimToolResultMessage({
|
const updated = softTrimToolResultMessage({
|
||||||
|
|||||||
Reference in New Issue
Block a user