fix: deliver tool result media when verbose is off (#16679)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 6e16feb164
Co-authored-by: christianklotz <69443+christianklotz@users.noreply.github.com>
Co-authored-by: christianklotz <69443+christianklotz@users.noreply.github.com>
Reviewed-by: @christianklotz
This commit is contained in:
Christian Klotz
2026-02-15 02:18:57 +00:00
committed by GitHub
parent 906c32da12
commit 68c78c4b43
6 changed files with 393 additions and 0 deletions

View File

@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Agents: deliver tool result media (screenshots, images, audio) to channels regardless of verbose level. (#11735) Thanks @strelov1.
- Telegram: when `channels.telegram.commands.native` is `false`, exclude plugin commands from `setMyCommands` menu registration while keeping plugin slash handlers callable. (#15132) Thanks @Glucksberg.
- LINE: return 200 OK for Developers Console "Verify" requests (`{"events":[]}`) without `X-Line-Signature`, while still requiring signatures for real deliveries. (#16582) Thanks @arosstale.
- Cron: deliver text-only output directly when `delivery.to` is set so cron recipients get full output instead of summaries. (#16360) Thanks @thewilloftheshadow.

View File

@@ -0,0 +1,174 @@
import { describe, expect, it, vi } from "vitest";
import type { EmbeddedPiSubscribeContext } from "./pi-embedded-subscribe.handlers.types.js";
import { handleToolExecutionEnd } from "./pi-embedded-subscribe.handlers.tools.js";
// Minimal mock context factory. Only the fields needed for the media emission path.
function createMockContext(overrides?: {
shouldEmitToolOutput?: boolean;
onToolResult?: ReturnType<typeof vi.fn>;
}): EmbeddedPiSubscribeContext {
const onToolResult = overrides?.onToolResult ?? vi.fn();
return {
params: {
runId: "test-run",
onToolResult,
onAgentEvent: vi.fn(),
},
state: {
toolMetaById: new Map(),
toolMetas: [],
toolSummaryById: new Set(),
pendingMessagingTexts: new Map(),
pendingMessagingTargets: new Map(),
messagingToolSentTexts: [],
messagingToolSentTextsNormalized: [],
messagingToolSentTargets: [],
},
log: { debug: vi.fn(), warn: vi.fn() },
shouldEmitToolResult: vi.fn(() => false),
shouldEmitToolOutput: vi.fn(() => overrides?.shouldEmitToolOutput ?? false),
emitToolSummary: vi.fn(),
emitToolOutput: vi.fn(),
trimMessagingToolSent: vi.fn(),
hookRunner: undefined,
// Fill in remaining required fields with no-ops.
blockChunker: null,
noteLastAssistant: vi.fn(),
stripBlockTags: vi.fn((t: string) => t),
emitBlockChunk: vi.fn(),
flushBlockReplyBuffer: vi.fn(),
emitReasoningStream: vi.fn(),
consumeReplyDirectives: vi.fn(() => null),
consumePartialReplyDirectives: vi.fn(() => null),
resetAssistantMessageState: vi.fn(),
resetForCompactionRetry: vi.fn(),
finalizeAssistantTexts: vi.fn(),
ensureCompactionPromise: vi.fn(),
noteCompactionRetry: vi.fn(),
resolveCompactionRetry: vi.fn(),
maybeResolveCompactionWait: vi.fn(),
recordAssistantUsage: vi.fn(),
incrementCompactionCount: vi.fn(),
getUsageTotals: vi.fn(() => undefined),
getCompactionCount: vi.fn(() => 0),
} as unknown as EmbeddedPiSubscribeContext;
}
describe("handleToolExecutionEnd media emission", () => {
it("emits media when verbose is off and tool result has MEDIA: path", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "browser",
toolCallId: "tc-1",
isError: false,
result: {
content: [
{ type: "text", text: "MEDIA:/tmp/screenshot.png" },
{ type: "image", data: "base64", mimeType: "image/png" },
],
details: { path: "/tmp/screenshot.png" },
},
});
expect(onToolResult).toHaveBeenCalledWith({
mediaUrls: ["/tmp/screenshot.png"],
});
});
it("does NOT emit media when verbose is full (emitToolOutput handles it)", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: true, onToolResult });
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "browser",
toolCallId: "tc-1",
isError: false,
result: {
content: [
{ type: "text", text: "MEDIA:/tmp/screenshot.png" },
{ type: "image", data: "base64", mimeType: "image/png" },
],
details: { path: "/tmp/screenshot.png" },
},
});
// onToolResult should NOT be called by the new media path (emitToolOutput handles it).
// It may be called by emitToolOutput, but the new block should not fire.
// Verify emitToolOutput was called instead.
expect(ctx.emitToolOutput).toHaveBeenCalled();
// The direct media emission should not have been called with just mediaUrls.
const directMediaCalls = onToolResult.mock.calls.filter(
(call: unknown[]) =>
call[0] &&
typeof call[0] === "object" &&
"mediaUrls" in (call[0] as Record<string, unknown>) &&
!("text" in (call[0] as Record<string, unknown>)),
);
expect(directMediaCalls).toHaveLength(0);
});
it("does NOT emit media for error results", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "browser",
toolCallId: "tc-1",
isError: true,
result: {
content: [
{ type: "text", text: "MEDIA:/tmp/screenshot.png" },
{ type: "image", data: "base64", mimeType: "image/png" },
],
details: { path: "/tmp/screenshot.png" },
},
});
expect(onToolResult).not.toHaveBeenCalled();
});
it("does NOT emit when tool result has no media", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "bash",
toolCallId: "tc-1",
isError: false,
result: {
content: [{ type: "text", text: "Command executed successfully" }],
},
});
expect(onToolResult).not.toHaveBeenCalled();
});
it("emits media from details.path fallback when no MEDIA: text", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "canvas",
toolCallId: "tc-1",
isError: false,
result: {
content: [
{ type: "text", text: "Rendered canvas" },
{ type: "image", data: "base64", mimeType: "image/png" },
],
details: { path: "/tmp/canvas-output.png" },
},
});
expect(onToolResult).toHaveBeenCalledWith({
mediaUrls: ["/tmp/canvas-output.png"],
});
});
});

View File

@@ -10,6 +10,7 @@ import { normalizeTextForComparison } from "./pi-embedded-helpers.js";
import { isMessagingTool, isMessagingToolSendAction } from "./pi-embedded-messaging.js";
import {
extractToolErrorMessage,
extractToolResultMediaPaths,
extractToolResultText,
extractMessagingToolSend,
isToolResultError,
@@ -266,6 +267,20 @@ export async function handleToolExecutionEnd(
}
}
// Deliver media from tool results when the verbose emitToolOutput path is off.
// When shouldEmitToolOutput() is true, emitToolOutput already delivers media
// via parseReplyDirectives (MEDIA: text extraction), so skip to avoid duplicates.
if (ctx.params.onToolResult && !isToolError && !ctx.shouldEmitToolOutput()) {
const mediaPaths = extractToolResultMediaPaths(result);
if (mediaPaths.length > 0) {
try {
void ctx.params.onToolResult({ mediaUrls: mediaPaths });
} catch {
// ignore delivery failures
}
}
}
// Run after_tool_call plugin hook (fire-and-forget)
const hookRunnerAfter = ctx.hookRunner ?? getGlobalHookRunner();
if (hookRunnerAfter?.hasHooks("after_tool_call")) {

View File

@@ -0,0 +1,132 @@
import { describe, expect, it } from "vitest";
import { extractToolResultMediaPaths } from "./pi-embedded-subscribe.tools.js";
describe("extractToolResultMediaPaths", () => {
it("returns empty array for null/undefined", () => {
expect(extractToolResultMediaPaths(null)).toEqual([]);
expect(extractToolResultMediaPaths(undefined)).toEqual([]);
});
it("returns empty array for non-object", () => {
expect(extractToolResultMediaPaths("hello")).toEqual([]);
expect(extractToolResultMediaPaths(42)).toEqual([]);
});
it("returns empty array when content is missing", () => {
expect(extractToolResultMediaPaths({ details: { path: "/tmp/img.png" } })).toEqual([]);
});
it("returns empty array when content has no text or image blocks", () => {
expect(extractToolResultMediaPaths({ content: [{ type: "other" }] })).toEqual([]);
});
it("extracts MEDIA: path from text content block", () => {
const result = {
content: [
{ type: "text", text: "MEDIA:/tmp/screenshot.png" },
{ type: "image", data: "base64data", mimeType: "image/png" },
],
details: { path: "/tmp/screenshot.png" },
};
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/screenshot.png"]);
});
it("extracts MEDIA: path with extra text in the block", () => {
const result = {
content: [{ type: "text", text: "Here is the image\nMEDIA:/tmp/output.jpg\nDone" }],
};
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/output.jpg"]);
});
it("extracts multiple MEDIA: paths from different text blocks", () => {
const result = {
content: [
{ type: "text", text: "MEDIA:/tmp/page1.png" },
{ type: "text", text: "MEDIA:/tmp/page2.png" },
],
};
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/page1.png", "/tmp/page2.png"]);
});
it("falls back to details.path when image content exists but no MEDIA: text", () => {
// Pi SDK read tool doesn't include MEDIA: but OpenClaw imageResult
// sets details.path as fallback.
const result = {
content: [
{ type: "text", text: "Read image file [image/png]" },
{ type: "image", data: "base64data", mimeType: "image/png" },
],
details: { path: "/tmp/generated.png" },
};
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/generated.png"]);
});
it("returns empty array when image content exists but no MEDIA: and no details.path", () => {
// Pi SDK read tool: has image content but no path anywhere in the result.
const result = {
content: [
{ type: "text", text: "Read image file [image/png]" },
{ type: "image", data: "base64data", mimeType: "image/png" },
],
};
expect(extractToolResultMediaPaths(result)).toEqual([]);
});
it("does not fall back to details.path when MEDIA: paths are found", () => {
const result = {
content: [
{ type: "text", text: "MEDIA:/tmp/from-text.png" },
{ type: "image", data: "base64data", mimeType: "image/png" },
],
details: { path: "/tmp/from-details.png" },
};
// MEDIA: text takes priority; details.path is NOT also included.
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/from-text.png"]);
});
it("handles backtick-wrapped MEDIA: paths", () => {
const result = {
content: [{ type: "text", text: "MEDIA: `/tmp/screenshot.png`" }],
};
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/screenshot.png"]);
});
it("ignores null/undefined items in content array", () => {
const result = {
content: [null, undefined, { type: "text", text: "MEDIA:/tmp/ok.png" }],
};
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/ok.png"]);
});
it("returns empty array for text-only results without MEDIA:", () => {
const result = {
content: [{ type: "text", text: "Command executed successfully" }],
};
expect(extractToolResultMediaPaths(result)).toEqual([]);
});
it("ignores details.path when no image content exists", () => {
// details.path without image content is not media.
const result = {
content: [{ type: "text", text: "File saved" }],
details: { path: "/tmp/data.json" },
};
expect(extractToolResultMediaPaths(result)).toEqual([]);
});
it("handles details.path with whitespace", () => {
const result = {
content: [{ type: "image", data: "base64", mimeType: "image/png" }],
details: { path: " /tmp/image.png " },
};
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/image.png"]);
});
it("skips empty details.path", () => {
const result = {
content: [{ type: "image", data: "base64", mimeType: "image/png" }],
details: { path: " " },
};
expect(extractToolResultMediaPaths(result)).toEqual([]);
});
});

View File

@@ -1,5 +1,6 @@
import { getChannelPlugin, normalizeChannelId } from "../channels/plugins/index.js";
import { normalizeTargetForProvider } from "../infra/outbound/target-normalization.js";
import { MEDIA_TOKEN_RE } from "../media/parse.js";
import { truncateUtf16Safe } from "../utils.js";
import { type MessagingToolSend } from "./pi-embedded-messaging.js";
@@ -118,6 +119,72 @@ export function extractToolResultText(result: unknown): string | undefined {
return texts.join("\n");
}
/**
* Extract media file paths from a tool result.
*
* Strategy (first match wins):
* 1. Parse `MEDIA:` tokens from text content blocks (all OpenClaw tools).
* 2. Fall back to `details.path` when image content exists (OpenClaw imageResult).
*
* Returns an empty array when no media is found (e.g. Pi SDK `read` tool
* returns base64 image data but no file path; those need a different delivery
* path like saving to a temp file).
*/
export function extractToolResultMediaPaths(result: unknown): string[] {
if (!result || typeof result !== "object") {
return [];
}
const record = result as Record<string, unknown>;
const content = Array.isArray(record.content) ? record.content : null;
if (!content) {
return [];
}
// Extract MEDIA: paths from text content blocks.
const paths: string[] = [];
let hasImageContent = false;
for (const item of content) {
if (!item || typeof item !== "object") {
continue;
}
const entry = item as Record<string, unknown>;
if (entry.type === "image") {
hasImageContent = true;
continue;
}
if (entry.type === "text" && typeof entry.text === "string") {
// Reset lastIndex since MEDIA_TOKEN_RE is global.
MEDIA_TOKEN_RE.lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = MEDIA_TOKEN_RE.exec(entry.text)) !== null) {
// Strip surrounding quotes/backticks and whitespace (mirrors cleanCandidate in media/parse).
const p = match[1]
?.replace(/^[`"'[{(]+/, "")
.replace(/[`"'\]})\\,]+$/, "")
.trim();
if (p && p.length <= 4096) {
paths.push(p);
}
}
}
}
if (paths.length > 0) {
return paths;
}
// Fall back to details.path when image content exists but no MEDIA: text.
if (hasImageContent) {
const details = record.details as Record<string, unknown> | undefined;
const p = typeof details?.path === "string" ? details.path.trim() : "";
if (p) {
return [p];
}
}
return [];
}
export function isToolResultError(result: unknown): boolean {
if (!result || typeof result !== "object") {
return false;

View File

@@ -128,6 +128,10 @@ export async function runAgentTurnWithFallback(params: {
return { skip: true };
}
if (!text) {
// Allow media-only payloads (e.g. tool result screenshots) through.
if ((payload.mediaUrls?.length ?? 0) > 0) {
return { text: undefined, skip: false };
}
return { skip: true };
}
const sanitized = sanitizeUserFacingText(text, {