Files
openclaw/src/agents/pi-embedded-subscribe.handlers.tools.media.test.ts
Gustavo Madeira Santana d78512b09d Refactor: centralize native approval lifecycle assembly (#62135)
Merged via squash.

Prepared head SHA: b7c20a7398
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
2026-04-07 14:40:26 -04:00

450 lines
14 KiB
TypeScript

import { describe, expect, it, vi } from "vitest";
import {
handleToolExecutionEnd,
handleToolExecutionStart,
} from "./pi-embedded-subscribe.handlers.tools.js";
import type { EmbeddedPiSubscribeContext } from "./pi-embedded-subscribe.handlers.types.js";
// Minimal mock context factory. Only the fields needed for the media emission path.
function createMockContext(overrides?: {
shouldEmitToolOutput?: boolean;
onToolResult?: ReturnType<typeof vi.fn>;
toolResultFormat?: "markdown" | "plain";
}): EmbeddedPiSubscribeContext {
const onToolResult = overrides?.onToolResult ?? vi.fn();
return {
params: {
runId: "test-run",
onToolResult,
onAgentEvent: vi.fn(),
toolResultFormat: overrides?.toolResultFormat,
},
state: {
toolMetaById: new Map(),
toolMetas: [],
toolSummaryById: new Set(),
itemActiveIds: new Set(),
itemStartedCount: 0,
itemCompletedCount: 0,
pendingMessagingTexts: new Map(),
pendingMessagingTargets: new Map(),
pendingMessagingMediaUrls: new Map(),
pendingToolMediaUrls: [],
pendingToolAudioAsVoice: false,
messagingToolSentTexts: [],
messagingToolSentTextsNormalized: [],
messagingToolSentMediaUrls: [],
messagingToolSentTargets: [],
deterministicApprovalPromptPending: false,
deterministicApprovalPromptSent: false,
},
log: { debug: vi.fn(), warn: vi.fn() },
shouldEmitToolResult: vi.fn(() => false),
shouldEmitToolOutput: vi.fn(() => overrides?.shouldEmitToolOutput ?? false),
emitToolSummary: vi.fn(),
emitToolOutput: vi.fn(),
trimMessagingToolSent: vi.fn(),
emitBlockReply: vi.fn(),
hookRunner: undefined,
// Fill in remaining required fields with no-ops.
blockChunker: null,
noteLastAssistant: vi.fn(),
stripBlockTags: vi.fn((t: string) => t),
emitBlockChunk: vi.fn(),
flushBlockReplyBuffer: vi.fn(),
emitReasoningStream: vi.fn(),
consumeReplyDirectives: vi.fn(() => null),
consumePartialReplyDirectives: vi.fn(() => null),
resetAssistantMessageState: vi.fn(),
resetForCompactionRetry: vi.fn(),
finalizeAssistantTexts: vi.fn(),
ensureCompactionPromise: vi.fn(),
noteCompactionRetry: vi.fn(),
resolveCompactionRetry: vi.fn(),
maybeResolveCompactionWait: vi.fn(),
recordAssistantUsage: vi.fn(),
incrementCompactionCount: vi.fn(),
getUsageTotals: vi.fn(() => undefined),
getCompactionCount: vi.fn(() => 0),
} as unknown as EmbeddedPiSubscribeContext;
}
async function emitPngMediaToolResult(
ctx: EmbeddedPiSubscribeContext,
opts?: { isError?: boolean },
) {
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "browser",
toolCallId: "tc-1",
isError: opts?.isError ?? false,
result: {
content: [
{ type: "text", text: "MEDIA:/tmp/screenshot.png" },
{ type: "image", data: "base64", mimeType: "image/png" },
],
details: { path: "/tmp/screenshot.png" },
},
});
}
async function emitUntrustedToolMediaResult(
ctx: EmbeddedPiSubscribeContext,
mediaPathOrUrl: string,
) {
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "plugin_tool",
toolCallId: "tc-1",
isError: false,
result: {
content: [{ type: "text", text: `MEDIA:${mediaPathOrUrl}` }],
},
});
}
async function emitMcpMediaToolResult(ctx: EmbeddedPiSubscribeContext, mediaPathOrUrl: string) {
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "browser",
toolCallId: "tc-1",
isError: false,
result: {
content: [{ type: "text", text: `MEDIA:${mediaPathOrUrl}` }],
details: {
mcpServer: "probe",
mcpTool: "browser",
},
},
});
}
describe("handleToolExecutionEnd media emission", () => {
it("does not warn for read tool when path is provided via file_path alias", async () => {
const ctx = createMockContext();
await handleToolExecutionStart(ctx, {
type: "tool_execution_start",
toolName: "read",
toolCallId: "tc-1",
args: { file_path: "README.md" },
});
expect(ctx.log.warn).not.toHaveBeenCalled();
});
it("emits media when verbose is off and tool result has MEDIA: path", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await emitPngMediaToolResult(ctx);
expect(onToolResult).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/screenshot.png"]);
});
it("does NOT emit local media for untrusted tools", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await emitUntrustedToolMediaResult(ctx, "/tmp/secret.png");
expect(onToolResult).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
});
it("emits remote media for untrusted tools", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await emitUntrustedToolMediaResult(ctx, "https://example.com/file.png");
expect(onToolResult).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual(["https://example.com/file.png"]);
});
it("does NOT emit local media for MCP-provenance results", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await emitMcpMediaToolResult(ctx, "/tmp/secret.png");
expect(onToolResult).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
});
it("emits remote media for MCP-provenance results", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await emitMcpMediaToolResult(ctx, "https://example.com/file.png");
expect(onToolResult).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual(["https://example.com/file.png"]);
});
it("does NOT queue legacy MEDIA paths when verbose is full", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: true, onToolResult });
await emitPngMediaToolResult(ctx);
// onToolResult should NOT be called by the new media path (emitToolOutput handles it).
// It may be called by emitToolOutput, but the new block should not fire.
// Verify emitToolOutput was called instead.
expect(ctx.emitToolOutput).toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
});
it("still queues structured media when verbose is full", async () => {
const ctx = createMockContext({ shouldEmitToolOutput: true, onToolResult: vi.fn() });
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "tts",
toolCallId: "tc-1",
isError: false,
result: {
content: [{ type: "text", text: "Generated audio reply." }],
details: {
media: {
mediaUrl: "/tmp/reply.opus",
audioAsVoice: true,
},
},
},
});
expect(ctx.emitToolOutput).toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/reply.opus"]);
expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
});
it("does not queue structured media already emitted in plain verbose output", async () => {
const ctx = createMockContext({
shouldEmitToolOutput: true,
onToolResult: vi.fn(),
toolResultFormat: "plain",
});
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "image_generate",
toolCallId: "tc-1",
isError: false,
result: {
content: [
{
type: "text",
text: "Generated 1 image with google/gemini-3.1-flash-image-preview.\nMEDIA:/tmp/generated.png",
},
],
details: {
media: {
mediaUrls: ["/tmp/generated.png"],
},
},
},
});
expect(ctx.emitToolOutput).toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
});
it("still queues structured media for markdown verbose output", async () => {
const ctx = createMockContext({
shouldEmitToolOutput: true,
onToolResult: vi.fn(),
toolResultFormat: "markdown",
});
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "image_generate",
toolCallId: "tc-1",
isError: false,
result: {
content: [
{
type: "text",
text: "Generated 1 image with google/gemini-3.1-flash-image-preview.\nMEDIA:/tmp/generated.png",
},
],
details: {
media: {
mediaUrls: ["/tmp/generated.png"],
},
},
},
});
expect(ctx.emitToolOutput).toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/generated.png"]);
});
it("emits provider inventory output for compact video_generate list results", async () => {
const ctx = createMockContext({
shouldEmitToolOutput: false,
onToolResult: vi.fn(),
toolResultFormat: "plain",
});
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "video_generate",
toolCallId: "tc-1",
isError: false,
result: {
content: [
{
type: "text",
text: [
"openai: default=sora-2 | models=sora-2",
"google: default=veo-3.1-fast-generate-preview | models=veo-3.1-fast-generate-preview",
].join("\n"),
},
],
details: {
providers: [
{ id: "openai", defaultModel: "sora-2", models: ["sora-2"] },
{
id: "google",
defaultModel: "veo-3.1-fast-generate-preview",
models: ["veo-3.1-fast-generate-preview"],
},
],
},
},
});
expect(ctx.emitToolOutput).toHaveBeenCalledWith(
"video_generate",
undefined,
[
"openai: default=sora-2 | models=sora-2",
"google: default=veo-3.1-fast-generate-preview | models=veo-3.1-fast-generate-preview",
].join("\n"),
expect.any(Object),
);
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
});
it("does NOT emit media for error results", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await emitPngMediaToolResult(ctx, { isError: true });
expect(onToolResult).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
});
it("does NOT emit when tool result has no media", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "bash",
toolCallId: "tc-1",
isError: false,
result: {
content: [{ type: "text", text: "Command executed successfully" }],
},
});
expect(onToolResult).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
});
it("does NOT emit media for <media:audio> placeholder text", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "tts",
toolCallId: "tc-1",
isError: false,
result: {
content: [
{
type: "text",
text: "<media:audio> placeholder with successful preflight voice transcript",
},
],
},
});
expect(onToolResult).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
});
it("does NOT emit media for malformed MEDIA:-prefixed prose", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "browser",
toolCallId: "tc-1",
isError: false,
result: {
content: [
{
type: "text",
text: "MEDIA:-prefixed paths (lenient whitespace) when loading outbound media",
},
],
},
});
expect(onToolResult).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
});
it("queues media from details.path fallback when no MEDIA: text", async () => {
const onToolResult = vi.fn();
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "canvas",
toolCallId: "tc-1",
isError: false,
result: {
content: [
{ type: "text", text: "Rendered canvas" },
{ type: "image", data: "base64", mimeType: "image/png" },
],
details: { path: "/tmp/canvas-output.png" },
},
});
expect(onToolResult).not.toHaveBeenCalled();
expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/canvas-output.png"]);
});
it("queues structured details.media and voice metadata", async () => {
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult: vi.fn() });
await handleToolExecutionEnd(ctx, {
type: "tool_execution_end",
toolName: "tts",
toolCallId: "tc-1",
isError: false,
result: {
details: {
media: {
mediaUrl: "/tmp/reply.opus",
audioAsVoice: true,
},
},
},
});
expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/reply.opus"]);
expect(ctx.state.pendingToolAudioAsVoice).toBe(true);
});
});