diff --git a/src/media-understanding/runtime.test.ts b/src/media-understanding/runtime.test.ts index a71d0db97ab..71406ec5219 100644 --- a/src/media-understanding/runtime.test.ts +++ b/src/media-understanding/runtime.test.ts @@ -1,136 +1,69 @@ -import fs from "node:fs/promises"; -import os from "node:os"; -import path from "node:path"; import { afterEach, beforeAll, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; -import { - withBundledPluginAllowlistCompat, - withBundledPluginEnablementCompat, - withBundledPluginVitestCompat, -} from "../plugins/bundled-compat.js"; -import { __testing as loaderTesting } from "../plugins/loader.js"; -import { loadPluginManifestRegistry } from "../plugins/manifest-registry.js"; -import { createEmptyPluginRegistry } from "../plugins/registry.js"; -import { setActivePluginRegistry } from "../plugins/runtime.js"; -const { resolveRuntimePluginRegistryMock } = vi.hoisted(() => ({ - resolveRuntimePluginRegistryMock: vi.fn< - (params?: unknown) => ReturnType | undefined - >(() => undefined), +const hoisted = vi.hoisted(() => ({ + describeImageFile: vi.fn(), + runMediaUnderstandingFile: vi.fn(), })); -vi.mock("../plugins/loader.js", async (importOriginal) => { - const actual = await importOriginal(); - return { - ...actual, - resolveRuntimePluginRegistry: resolveRuntimePluginRegistryMock, - }; -}); +vi.mock("../plugin-sdk/media-understanding-runtime.js", () => ({ + describeImageFile: hoisted.describeImageFile, + describeImageFileWithModel: vi.fn(), + describeVideoFile: vi.fn(), + runMediaUnderstandingFile: hoisted.runMediaUnderstandingFile, + transcribeAudioFile: vi.fn(), +})); let describeImageFile: typeof import("./runtime.js").describeImageFile; let runMediaUnderstandingFile: typeof import("./runtime.js").runMediaUnderstandingFile; -function setCompatibleActiveMediaUnderstandingRegistry( - pluginRegistry: ReturnType, - cfg: OpenClawConfig, -) { - const pluginIds = loadPluginManifestRegistry({ - config: cfg, - env: process.env, - }) - .plugins.filter( - (plugin) => - plugin.origin === "bundled" && - (plugin.contracts?.mediaUnderstandingProviders?.length ?? 0) > 0, - ) - .map((plugin) => plugin.id) - .toSorted((left, right) => left.localeCompare(right)); - const compatibleConfig = withBundledPluginVitestCompat({ - config: withBundledPluginEnablementCompat({ - config: withBundledPluginAllowlistCompat({ - config: cfg, - pluginIds, - }), - pluginIds, - }), - pluginIds, - env: process.env, - }); - const { cacheKey } = loaderTesting.resolvePluginLoadCacheContext({ - config: compatibleConfig, - env: process.env, - }); - setActivePluginRegistry(pluginRegistry, cacheKey); -} - -describe("media-understanding runtime helpers", () => { +describe("media-understanding runtime facade", () => { beforeAll(async () => { ({ describeImageFile, runMediaUnderstandingFile } = await import("./runtime.js")); }); afterEach(() => { - resolveRuntimePluginRegistryMock.mockReset(); - resolveRuntimePluginRegistryMock.mockReturnValue(undefined); + hoisted.describeImageFile.mockReset(); + hoisted.runMediaUnderstandingFile.mockReset(); }); - it("describes images through the active media-understanding registry", async () => { - const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-media-runtime-")); - const imagePath = path.join(tempDir, "sample.jpg"); - await fs.writeFile(imagePath, Buffer.from("image-bytes")); - - const pluginRegistry = createEmptyPluginRegistry(); - pluginRegistry.mediaUnderstandingProviders.push({ - pluginId: "vision-plugin", - pluginName: "Vision Plugin", - source: "test", - provider: { - id: "vision-plugin", - capabilities: ["image"], - describeImage: async () => ({ text: "image ok", model: "vision-v1" }), - }, - }); - resolveRuntimePluginRegistryMock.mockReturnValue(pluginRegistry); - - const cfg = { - tools: { - media: { - image: { - models: [{ provider: "vision-plugin", model: "vision-v1" }], + it("delegates describeImageFile to the plugin-sdk runtime", async () => { + const params = { + filePath: "/tmp/sample.jpg", + mime: "image/jpeg", + cfg: { + tools: { + media: { + image: { + models: [{ provider: "vision-plugin", model: "vision-v1" }], + }, }, }, - }, - } as OpenClawConfig; - setCompatibleActiveMediaUnderstandingRegistry(pluginRegistry, cfg); - - const result = await describeImageFile({ - filePath: imagePath, - mime: "image/jpeg", - cfg, + } as OpenClawConfig, agentDir: "/tmp/agent", - }); - - expect(result).toEqual({ + }; + const result = { text: "image ok", provider: "vision-plugin", model: "vision-v1", output: { - kind: "image.description", + kind: "image.description" as const, attachmentIndex: 0, text: "image ok", provider: "vision-plugin", model: "vision-v1", }, - }); + }; + hoisted.describeImageFile.mockResolvedValue(result); + + await expect(describeImageFile(params)).resolves.toEqual(result); + expect(hoisted.describeImageFile).toHaveBeenCalledWith(params); }); - it("returns undefined when no media output is produced", async () => { - const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-media-runtime-")); - const imagePath = path.join(tempDir, "sample.jpg"); - await fs.writeFile(imagePath, Buffer.from("image-bytes")); - - const result = await runMediaUnderstandingFile({ - capability: "image", - filePath: imagePath, + it("delegates runMediaUnderstandingFile to the plugin-sdk runtime", async () => { + const params = { + capability: "image" as const, + filePath: "/tmp/sample.jpg", mime: "image/jpeg", cfg: { tools: { @@ -142,13 +75,16 @@ describe("media-understanding runtime helpers", () => { }, } as OpenClawConfig, agentDir: "/tmp/agent", - }); - - expect(result).toEqual({ + }; + const result = { text: undefined, provider: undefined, model: undefined, output: undefined, - }); + }; + hoisted.runMediaUnderstandingFile.mockResolvedValue(result); + + await expect(runMediaUnderstandingFile(params)).resolves.toEqual(result); + expect(hoisted.runMediaUnderstandingFile).toHaveBeenCalledWith(params); }); });