mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
fix: deliver tool result media when verbose is off (#16679)
Merged via /review-pr -> /prepare-pr -> /merge-pr.
Prepared head SHA: 6e16feb164
Co-authored-by: christianklotz <69443+christianklotz@users.noreply.github.com>
Co-authored-by: christianklotz <69443+christianklotz@users.noreply.github.com>
Reviewed-by: @christianklotz
This commit is contained in:
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Agents: deliver tool result media (screenshots, images, audio) to channels regardless of verbose level. (#11735) Thanks @strelov1.
|
||||
- Telegram: when `channels.telegram.commands.native` is `false`, exclude plugin commands from `setMyCommands` menu registration while keeping plugin slash handlers callable. (#15132) Thanks @Glucksberg.
|
||||
- LINE: return 200 OK for Developers Console "Verify" requests (`{"events":[]}`) without `X-Line-Signature`, while still requiring signatures for real deliveries. (#16582) Thanks @arosstale.
|
||||
- Cron: deliver text-only output directly when `delivery.to` is set so cron recipients get full output instead of summaries. (#16360) Thanks @thewilloftheshadow.
|
||||
|
||||
174
src/agents/pi-embedded-subscribe.handlers.tools.media.test.ts
Normal file
174
src/agents/pi-embedded-subscribe.handlers.tools.media.test.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import type { EmbeddedPiSubscribeContext } from "./pi-embedded-subscribe.handlers.types.js";
|
||||
import { handleToolExecutionEnd } from "./pi-embedded-subscribe.handlers.tools.js";
|
||||
|
||||
// Minimal mock context factory. Only the fields needed for the media emission path.
|
||||
function createMockContext(overrides?: {
|
||||
shouldEmitToolOutput?: boolean;
|
||||
onToolResult?: ReturnType<typeof vi.fn>;
|
||||
}): EmbeddedPiSubscribeContext {
|
||||
const onToolResult = overrides?.onToolResult ?? vi.fn();
|
||||
return {
|
||||
params: {
|
||||
runId: "test-run",
|
||||
onToolResult,
|
||||
onAgentEvent: vi.fn(),
|
||||
},
|
||||
state: {
|
||||
toolMetaById: new Map(),
|
||||
toolMetas: [],
|
||||
toolSummaryById: new Set(),
|
||||
pendingMessagingTexts: new Map(),
|
||||
pendingMessagingTargets: new Map(),
|
||||
messagingToolSentTexts: [],
|
||||
messagingToolSentTextsNormalized: [],
|
||||
messagingToolSentTargets: [],
|
||||
},
|
||||
log: { debug: vi.fn(), warn: vi.fn() },
|
||||
shouldEmitToolResult: vi.fn(() => false),
|
||||
shouldEmitToolOutput: vi.fn(() => overrides?.shouldEmitToolOutput ?? false),
|
||||
emitToolSummary: vi.fn(),
|
||||
emitToolOutput: vi.fn(),
|
||||
trimMessagingToolSent: vi.fn(),
|
||||
hookRunner: undefined,
|
||||
// Fill in remaining required fields with no-ops.
|
||||
blockChunker: null,
|
||||
noteLastAssistant: vi.fn(),
|
||||
stripBlockTags: vi.fn((t: string) => t),
|
||||
emitBlockChunk: vi.fn(),
|
||||
flushBlockReplyBuffer: vi.fn(),
|
||||
emitReasoningStream: vi.fn(),
|
||||
consumeReplyDirectives: vi.fn(() => null),
|
||||
consumePartialReplyDirectives: vi.fn(() => null),
|
||||
resetAssistantMessageState: vi.fn(),
|
||||
resetForCompactionRetry: vi.fn(),
|
||||
finalizeAssistantTexts: vi.fn(),
|
||||
ensureCompactionPromise: vi.fn(),
|
||||
noteCompactionRetry: vi.fn(),
|
||||
resolveCompactionRetry: vi.fn(),
|
||||
maybeResolveCompactionWait: vi.fn(),
|
||||
recordAssistantUsage: vi.fn(),
|
||||
incrementCompactionCount: vi.fn(),
|
||||
getUsageTotals: vi.fn(() => undefined),
|
||||
getCompactionCount: vi.fn(() => 0),
|
||||
} as unknown as EmbeddedPiSubscribeContext;
|
||||
}
|
||||
|
||||
describe("handleToolExecutionEnd media emission", () => {
|
||||
it("emits media when verbose is off and tool result has MEDIA: path", async () => {
|
||||
const onToolResult = vi.fn();
|
||||
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
|
||||
|
||||
await handleToolExecutionEnd(ctx, {
|
||||
type: "tool_execution_end",
|
||||
toolName: "browser",
|
||||
toolCallId: "tc-1",
|
||||
isError: false,
|
||||
result: {
|
||||
content: [
|
||||
{ type: "text", text: "MEDIA:/tmp/screenshot.png" },
|
||||
{ type: "image", data: "base64", mimeType: "image/png" },
|
||||
],
|
||||
details: { path: "/tmp/screenshot.png" },
|
||||
},
|
||||
});
|
||||
|
||||
expect(onToolResult).toHaveBeenCalledWith({
|
||||
mediaUrls: ["/tmp/screenshot.png"],
|
||||
});
|
||||
});
|
||||
|
||||
it("does NOT emit media when verbose is full (emitToolOutput handles it)", async () => {
|
||||
const onToolResult = vi.fn();
|
||||
const ctx = createMockContext({ shouldEmitToolOutput: true, onToolResult });
|
||||
|
||||
await handleToolExecutionEnd(ctx, {
|
||||
type: "tool_execution_end",
|
||||
toolName: "browser",
|
||||
toolCallId: "tc-1",
|
||||
isError: false,
|
||||
result: {
|
||||
content: [
|
||||
{ type: "text", text: "MEDIA:/tmp/screenshot.png" },
|
||||
{ type: "image", data: "base64", mimeType: "image/png" },
|
||||
],
|
||||
details: { path: "/tmp/screenshot.png" },
|
||||
},
|
||||
});
|
||||
|
||||
// onToolResult should NOT be called by the new media path (emitToolOutput handles it).
|
||||
// It may be called by emitToolOutput, but the new block should not fire.
|
||||
// Verify emitToolOutput was called instead.
|
||||
expect(ctx.emitToolOutput).toHaveBeenCalled();
|
||||
// The direct media emission should not have been called with just mediaUrls.
|
||||
const directMediaCalls = onToolResult.mock.calls.filter(
|
||||
(call: unknown[]) =>
|
||||
call[0] &&
|
||||
typeof call[0] === "object" &&
|
||||
"mediaUrls" in (call[0] as Record<string, unknown>) &&
|
||||
!("text" in (call[0] as Record<string, unknown>)),
|
||||
);
|
||||
expect(directMediaCalls).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("does NOT emit media for error results", async () => {
|
||||
const onToolResult = vi.fn();
|
||||
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
|
||||
|
||||
await handleToolExecutionEnd(ctx, {
|
||||
type: "tool_execution_end",
|
||||
toolName: "browser",
|
||||
toolCallId: "tc-1",
|
||||
isError: true,
|
||||
result: {
|
||||
content: [
|
||||
{ type: "text", text: "MEDIA:/tmp/screenshot.png" },
|
||||
{ type: "image", data: "base64", mimeType: "image/png" },
|
||||
],
|
||||
details: { path: "/tmp/screenshot.png" },
|
||||
},
|
||||
});
|
||||
|
||||
expect(onToolResult).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("does NOT emit when tool result has no media", async () => {
|
||||
const onToolResult = vi.fn();
|
||||
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
|
||||
|
||||
await handleToolExecutionEnd(ctx, {
|
||||
type: "tool_execution_end",
|
||||
toolName: "bash",
|
||||
toolCallId: "tc-1",
|
||||
isError: false,
|
||||
result: {
|
||||
content: [{ type: "text", text: "Command executed successfully" }],
|
||||
},
|
||||
});
|
||||
|
||||
expect(onToolResult).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("emits media from details.path fallback when no MEDIA: text", async () => {
|
||||
const onToolResult = vi.fn();
|
||||
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
|
||||
|
||||
await handleToolExecutionEnd(ctx, {
|
||||
type: "tool_execution_end",
|
||||
toolName: "canvas",
|
||||
toolCallId: "tc-1",
|
||||
isError: false,
|
||||
result: {
|
||||
content: [
|
||||
{ type: "text", text: "Rendered canvas" },
|
||||
{ type: "image", data: "base64", mimeType: "image/png" },
|
||||
],
|
||||
details: { path: "/tmp/canvas-output.png" },
|
||||
},
|
||||
});
|
||||
|
||||
expect(onToolResult).toHaveBeenCalledWith({
|
||||
mediaUrls: ["/tmp/canvas-output.png"],
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -10,6 +10,7 @@ import { normalizeTextForComparison } from "./pi-embedded-helpers.js";
|
||||
import { isMessagingTool, isMessagingToolSendAction } from "./pi-embedded-messaging.js";
|
||||
import {
|
||||
extractToolErrorMessage,
|
||||
extractToolResultMediaPaths,
|
||||
extractToolResultText,
|
||||
extractMessagingToolSend,
|
||||
isToolResultError,
|
||||
@@ -266,6 +267,20 @@ export async function handleToolExecutionEnd(
|
||||
}
|
||||
}
|
||||
|
||||
// Deliver media from tool results when the verbose emitToolOutput path is off.
|
||||
// When shouldEmitToolOutput() is true, emitToolOutput already delivers media
|
||||
// via parseReplyDirectives (MEDIA: text extraction), so skip to avoid duplicates.
|
||||
if (ctx.params.onToolResult && !isToolError && !ctx.shouldEmitToolOutput()) {
|
||||
const mediaPaths = extractToolResultMediaPaths(result);
|
||||
if (mediaPaths.length > 0) {
|
||||
try {
|
||||
void ctx.params.onToolResult({ mediaUrls: mediaPaths });
|
||||
} catch {
|
||||
// ignore delivery failures
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run after_tool_call plugin hook (fire-and-forget)
|
||||
const hookRunnerAfter = ctx.hookRunner ?? getGlobalHookRunner();
|
||||
if (hookRunnerAfter?.hasHooks("after_tool_call")) {
|
||||
|
||||
132
src/agents/pi-embedded-subscribe.tools.media.test.ts
Normal file
132
src/agents/pi-embedded-subscribe.tools.media.test.ts
Normal file
@@ -0,0 +1,132 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { extractToolResultMediaPaths } from "./pi-embedded-subscribe.tools.js";
|
||||
|
||||
describe("extractToolResultMediaPaths", () => {
|
||||
it("returns empty array for null/undefined", () => {
|
||||
expect(extractToolResultMediaPaths(null)).toEqual([]);
|
||||
expect(extractToolResultMediaPaths(undefined)).toEqual([]);
|
||||
});
|
||||
|
||||
it("returns empty array for non-object", () => {
|
||||
expect(extractToolResultMediaPaths("hello")).toEqual([]);
|
||||
expect(extractToolResultMediaPaths(42)).toEqual([]);
|
||||
});
|
||||
|
||||
it("returns empty array when content is missing", () => {
|
||||
expect(extractToolResultMediaPaths({ details: { path: "/tmp/img.png" } })).toEqual([]);
|
||||
});
|
||||
|
||||
it("returns empty array when content has no text or image blocks", () => {
|
||||
expect(extractToolResultMediaPaths({ content: [{ type: "other" }] })).toEqual([]);
|
||||
});
|
||||
|
||||
it("extracts MEDIA: path from text content block", () => {
|
||||
const result = {
|
||||
content: [
|
||||
{ type: "text", text: "MEDIA:/tmp/screenshot.png" },
|
||||
{ type: "image", data: "base64data", mimeType: "image/png" },
|
||||
],
|
||||
details: { path: "/tmp/screenshot.png" },
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/screenshot.png"]);
|
||||
});
|
||||
|
||||
it("extracts MEDIA: path with extra text in the block", () => {
|
||||
const result = {
|
||||
content: [{ type: "text", text: "Here is the image\nMEDIA:/tmp/output.jpg\nDone" }],
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/output.jpg"]);
|
||||
});
|
||||
|
||||
it("extracts multiple MEDIA: paths from different text blocks", () => {
|
||||
const result = {
|
||||
content: [
|
||||
{ type: "text", text: "MEDIA:/tmp/page1.png" },
|
||||
{ type: "text", text: "MEDIA:/tmp/page2.png" },
|
||||
],
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/page1.png", "/tmp/page2.png"]);
|
||||
});
|
||||
|
||||
it("falls back to details.path when image content exists but no MEDIA: text", () => {
|
||||
// Pi SDK read tool doesn't include MEDIA: but OpenClaw imageResult
|
||||
// sets details.path as fallback.
|
||||
const result = {
|
||||
content: [
|
||||
{ type: "text", text: "Read image file [image/png]" },
|
||||
{ type: "image", data: "base64data", mimeType: "image/png" },
|
||||
],
|
||||
details: { path: "/tmp/generated.png" },
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/generated.png"]);
|
||||
});
|
||||
|
||||
it("returns empty array when image content exists but no MEDIA: and no details.path", () => {
|
||||
// Pi SDK read tool: has image content but no path anywhere in the result.
|
||||
const result = {
|
||||
content: [
|
||||
{ type: "text", text: "Read image file [image/png]" },
|
||||
{ type: "image", data: "base64data", mimeType: "image/png" },
|
||||
],
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual([]);
|
||||
});
|
||||
|
||||
it("does not fall back to details.path when MEDIA: paths are found", () => {
|
||||
const result = {
|
||||
content: [
|
||||
{ type: "text", text: "MEDIA:/tmp/from-text.png" },
|
||||
{ type: "image", data: "base64data", mimeType: "image/png" },
|
||||
],
|
||||
details: { path: "/tmp/from-details.png" },
|
||||
};
|
||||
// MEDIA: text takes priority; details.path is NOT also included.
|
||||
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/from-text.png"]);
|
||||
});
|
||||
|
||||
it("handles backtick-wrapped MEDIA: paths", () => {
|
||||
const result = {
|
||||
content: [{ type: "text", text: "MEDIA: `/tmp/screenshot.png`" }],
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/screenshot.png"]);
|
||||
});
|
||||
|
||||
it("ignores null/undefined items in content array", () => {
|
||||
const result = {
|
||||
content: [null, undefined, { type: "text", text: "MEDIA:/tmp/ok.png" }],
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/ok.png"]);
|
||||
});
|
||||
|
||||
it("returns empty array for text-only results without MEDIA:", () => {
|
||||
const result = {
|
||||
content: [{ type: "text", text: "Command executed successfully" }],
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual([]);
|
||||
});
|
||||
|
||||
it("ignores details.path when no image content exists", () => {
|
||||
// details.path without image content is not media.
|
||||
const result = {
|
||||
content: [{ type: "text", text: "File saved" }],
|
||||
details: { path: "/tmp/data.json" },
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual([]);
|
||||
});
|
||||
|
||||
it("handles details.path with whitespace", () => {
|
||||
const result = {
|
||||
content: [{ type: "image", data: "base64", mimeType: "image/png" }],
|
||||
details: { path: " /tmp/image.png " },
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/image.png"]);
|
||||
});
|
||||
|
||||
it("skips empty details.path", () => {
|
||||
const result = {
|
||||
content: [{ type: "image", data: "base64", mimeType: "image/png" }],
|
||||
details: { path: " " },
|
||||
};
|
||||
expect(extractToolResultMediaPaths(result)).toEqual([]);
|
||||
});
|
||||
});
|
||||
@@ -1,5 +1,6 @@
|
||||
import { getChannelPlugin, normalizeChannelId } from "../channels/plugins/index.js";
|
||||
import { normalizeTargetForProvider } from "../infra/outbound/target-normalization.js";
|
||||
import { MEDIA_TOKEN_RE } from "../media/parse.js";
|
||||
import { truncateUtf16Safe } from "../utils.js";
|
||||
import { type MessagingToolSend } from "./pi-embedded-messaging.js";
|
||||
|
||||
@@ -118,6 +119,72 @@ export function extractToolResultText(result: unknown): string | undefined {
|
||||
return texts.join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract media file paths from a tool result.
|
||||
*
|
||||
* Strategy (first match wins):
|
||||
* 1. Parse `MEDIA:` tokens from text content blocks (all OpenClaw tools).
|
||||
* 2. Fall back to `details.path` when image content exists (OpenClaw imageResult).
|
||||
*
|
||||
* Returns an empty array when no media is found (e.g. Pi SDK `read` tool
|
||||
* returns base64 image data but no file path; those need a different delivery
|
||||
* path like saving to a temp file).
|
||||
*/
|
||||
export function extractToolResultMediaPaths(result: unknown): string[] {
|
||||
if (!result || typeof result !== "object") {
|
||||
return [];
|
||||
}
|
||||
const record = result as Record<string, unknown>;
|
||||
const content = Array.isArray(record.content) ? record.content : null;
|
||||
if (!content) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Extract MEDIA: paths from text content blocks.
|
||||
const paths: string[] = [];
|
||||
let hasImageContent = false;
|
||||
for (const item of content) {
|
||||
if (!item || typeof item !== "object") {
|
||||
continue;
|
||||
}
|
||||
const entry = item as Record<string, unknown>;
|
||||
if (entry.type === "image") {
|
||||
hasImageContent = true;
|
||||
continue;
|
||||
}
|
||||
if (entry.type === "text" && typeof entry.text === "string") {
|
||||
// Reset lastIndex since MEDIA_TOKEN_RE is global.
|
||||
MEDIA_TOKEN_RE.lastIndex = 0;
|
||||
let match: RegExpExecArray | null;
|
||||
while ((match = MEDIA_TOKEN_RE.exec(entry.text)) !== null) {
|
||||
// Strip surrounding quotes/backticks and whitespace (mirrors cleanCandidate in media/parse).
|
||||
const p = match[1]
|
||||
?.replace(/^[`"'[{(]+/, "")
|
||||
.replace(/[`"'\]})\\,]+$/, "")
|
||||
.trim();
|
||||
if (p && p.length <= 4096) {
|
||||
paths.push(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (paths.length > 0) {
|
||||
return paths;
|
||||
}
|
||||
|
||||
// Fall back to details.path when image content exists but no MEDIA: text.
|
||||
if (hasImageContent) {
|
||||
const details = record.details as Record<string, unknown> | undefined;
|
||||
const p = typeof details?.path === "string" ? details.path.trim() : "";
|
||||
if (p) {
|
||||
return [p];
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
export function isToolResultError(result: unknown): boolean {
|
||||
if (!result || typeof result !== "object") {
|
||||
return false;
|
||||
|
||||
@@ -128,6 +128,10 @@ export async function runAgentTurnWithFallback(params: {
|
||||
return { skip: true };
|
||||
}
|
||||
if (!text) {
|
||||
// Allow media-only payloads (e.g. tool result screenshots) through.
|
||||
if ((payload.mediaUrls?.length ?? 0) > 0) {
|
||||
return { text: undefined, skip: false };
|
||||
}
|
||||
return { skip: true };
|
||||
}
|
||||
const sanitized = sanitizeUserFacingText(text, {
|
||||
|
||||
Reference in New Issue
Block a user