From 1687c672a7904bd38fc92243d3ebe66afd8b1f61 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 18 Apr 2026 21:08:42 +0100 Subject: [PATCH] refactor: dedupe media understanding provider helpers --- .../config-provider-models.ts | 31 ++++ .../media-understanding-misc.test.ts | 116 +++++++------- .../media-understanding-url-fallback.test.ts | 151 ++++++++---------- .../provider-capability-registry.test.ts | 49 ++++++ .../provider-capability-registry.ts | 27 +--- src/media-understanding/provider-registry.ts | 55 ++----- .../runner.auto-audio.test.ts | 26 +-- .../runner.deepgram.test.ts | 26 +-- src/media-understanding/runner.proxy.test.ts | 97 +++++------ .../runner.skip-tiny-audio.test.ts | 65 +++----- src/media-understanding/runner.test-mocks.ts | 19 +++ .../runner.vision-skip.test.ts | 18 +-- 12 files changed, 320 insertions(+), 360 deletions(-) create mode 100644 src/media-understanding/config-provider-models.ts create mode 100644 src/media-understanding/provider-capability-registry.test.ts create mode 100644 src/media-understanding/runner.test-mocks.ts diff --git a/src/media-understanding/config-provider-models.ts b/src/media-understanding/config-provider-models.ts new file mode 100644 index 00000000000..03a3b725e80 --- /dev/null +++ b/src/media-understanding/config-provider-models.ts @@ -0,0 +1,31 @@ +import type { OpenClawConfig } from "../config/types.js"; +import { normalizeMediaProviderId } from "./provider-id.js"; + +type ConfigProvider = NonNullable< + NonNullable["providers"]>[string] +>; + +type ConfigProviderModel = NonNullable[number]; + +function hasImageCapableModel(providerCfg: ConfigProvider): boolean { + const models = providerCfg.models ?? []; + return models.some( + (model: ConfigProviderModel) => Array.isArray(model?.input) && model.input.includes("image"), + ); +} + +export function resolveImageCapableConfigProviderIds(cfg?: OpenClawConfig): string[] { + const configProviders = cfg?.models?.providers; + if (!configProviders || typeof configProviders !== "object") { + return []; + } + + const providerIds: string[] = []; + for (const [providerKey, providerCfg] of Object.entries(configProviders)) { + if (!providerKey?.trim() || !hasImageCapableModel(providerCfg)) { + continue; + } + providerIds.push(normalizeMediaProviderId(providerKey)); + } + return providerIds; +} diff --git a/src/media-understanding/media-understanding-misc.test.ts b/src/media-understanding/media-understanding-misc.test.ts index 8794eb01469..a8414a740e4 100644 --- a/src/media-understanding/media-understanding-misc.test.ts +++ b/src/media-understanding/media-understanding-misc.test.ts @@ -25,6 +25,29 @@ describe("media understanding scope", () => { const originalFetch = globalThis.fetch; +async function withLocalAttachmentCache( + prefix: string, + run: (params: { + cache: MediaAttachmentCache; + attachmentPath: string; + canonicalAttachmentPath: string; + }) => Promise, +) { + await withTempDir({ prefix }, async (base) => { + const allowedRoot = path.join(base, "allowed"); + const attachmentPath = path.join(allowedRoot, "voice-note.m4a"); + await fs.mkdir(allowedRoot, { recursive: true }); + await fs.writeFile(attachmentPath, "ok"); + const canonicalAttachmentPath = await fs.realpath(attachmentPath).catch(() => attachmentPath); + + const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], { + localPathRoots: [allowedRoot], + }); + + await run({ cache, attachmentPath, canonicalAttachmentPath }); + }); +} + describe("media understanding attachments SSRF", () => { afterEach(() => { globalThis.fetch = originalFetch; @@ -45,16 +68,7 @@ describe("media understanding attachments SSRF", () => { }); it("reads local attachments inside configured roots", async () => { - await withTempDir({ prefix: "openclaw-media-cache-allowed-" }, async (base) => { - const allowedRoot = path.join(base, "allowed"); - const attachmentPath = path.join(allowedRoot, "voice-note.m4a"); - await fs.mkdir(allowedRoot, { recursive: true }); - await fs.writeFile(attachmentPath, "ok"); - - const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], { - localPathRoots: [allowedRoot], - }); - + await withLocalAttachmentCache("openclaw-media-cache-allowed-", async ({ cache }) => { const result = await cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }); expect(result.buffer.toString()).toBe("ok"); }); @@ -111,63 +125,51 @@ describe("media understanding attachments SSRF", () => { }); it("enforces maxBytes after reading local attachments", async () => { - await withTempDir({ prefix: "openclaw-media-cache-max-bytes-" }, async (base) => { - const allowedRoot = path.join(base, "allowed"); - const attachmentPath = path.join(allowedRoot, "voice-note.m4a"); - await fs.mkdir(allowedRoot, { recursive: true }); - await fs.writeFile(attachmentPath, "ok"); - const canonicalAttachmentPath = await fs.realpath(attachmentPath).catch(() => attachmentPath); + await withLocalAttachmentCache( + "openclaw-media-cache-max-bytes-", + async ({ cache, canonicalAttachmentPath }) => { + const originalOpen = fs.open.bind(fs); + const openSpy = vi.spyOn(fs, "open"); - const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], { - localPathRoots: [allowedRoot], - }); - const originalOpen = fs.open.bind(fs); - const openSpy = vi.spyOn(fs, "open"); + openSpy.mockImplementation(async (filePath, flags) => { + const handle = await originalOpen(filePath, flags); + const candidatePath = await fs.realpath(String(filePath)).catch(() => String(filePath)); + if (candidatePath !== canonicalAttachmentPath) { + return handle; + } + const mockedHandle = handle as typeof handle & { + readFile: typeof handle.readFile; + }; + mockedHandle.readFile = (async () => Buffer.alloc(2048, 1)) as typeof handle.readFile; + return mockedHandle; + }); - openSpy.mockImplementation(async (filePath, flags) => { - const handle = await originalOpen(filePath, flags); - const candidatePath = await fs.realpath(String(filePath)).catch(() => String(filePath)); - if (candidatePath !== canonicalAttachmentPath) { - return handle; - } - const mockedHandle = handle as typeof handle & { - readFile: typeof handle.readFile; - }; - mockedHandle.readFile = (async () => Buffer.alloc(2048, 1)) as typeof handle.readFile; - return mockedHandle; - }); - - await expect( - cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }), - ).rejects.toThrow(/exceeds maxBytes 1024/i); - }); + await expect( + cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }), + ).rejects.toThrow(/exceeds maxBytes 1024/i); + }, + ); }); it("opens local attachments with nofollow on posix", async () => { if (process.platform === "win32") { return; } - await withTempDir({ prefix: "openclaw-media-cache-flags-" }, async (base) => { - const allowedRoot = path.join(base, "allowed"); - const attachmentPath = path.join(allowedRoot, "voice-note.m4a"); - await fs.mkdir(allowedRoot, { recursive: true }); - await fs.writeFile(attachmentPath, "ok"); - const canonicalAttachmentPath = await fs.realpath(attachmentPath).catch(() => attachmentPath); + await withLocalAttachmentCache( + "openclaw-media-cache-flags-", + async ({ cache, canonicalAttachmentPath }) => { + const openSpy = vi.spyOn(fs, "open"); - const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], { - localPathRoots: [allowedRoot], - }); - const openSpy = vi.spyOn(fs, "open"); + await cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }); - await cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }); - - expect(openSpy).toHaveBeenCalled(); - const [openedPath, openedFlags] = openSpy.mock.calls[0] ?? []; - expect(await fs.realpath(String(openedPath)).catch(() => String(openedPath))).toBe( - canonicalAttachmentPath, - ); - expect(openedFlags).toBe(fsConstants.O_RDONLY | fsConstants.O_NOFOLLOW); - }); + expect(openSpy).toHaveBeenCalled(); + const [openedPath, openedFlags] = openSpy.mock.calls[0] ?? []; + expect(await fs.realpath(String(openedPath)).catch(() => String(openedPath))).toBe( + canonicalAttachmentPath, + ); + expect(openedFlags).toBe(fsConstants.O_RDONLY | fsConstants.O_NOFOLLOW); + }, + ); }); it("rejects local attachments when canonicalization fails", async () => { diff --git a/src/media-understanding/media-understanding-url-fallback.test.ts b/src/media-understanding/media-understanding-url-fallback.test.ts index 566d5af1087..a401785bf18 100644 --- a/src/media-understanding/media-understanding-url-fallback.test.ts +++ b/src/media-understanding/media-understanding-url-fallback.test.ts @@ -14,6 +14,41 @@ vi.mock("../media/fetch.js", async () => { }; }); +async function withBlockedLocalAttachmentFallback( + prefix: string, + run: (params: { cache: MediaAttachmentCache; fallbackUrl: string }) => Promise, +) { + await withTempDir({ prefix }, async (base) => { + const allowedRoot = path.join(base, "allowed"); + const attachmentPath = path.join(allowedRoot, "voice-note.m4a"); + const fallbackUrl = "https://example.com/fallback.jpg"; + await fs.mkdir(allowedRoot, { recursive: true }); + await fs.writeFile(attachmentPath, "ok"); + + const cache = new MediaAttachmentCache( + [{ index: 0, path: attachmentPath, url: fallbackUrl, mime: "image/jpeg" }], + { + localPathRoots: [allowedRoot], + }, + ); + const originalRealpath = fs.realpath.bind(fs); + fetchRemoteMediaMock.mockResolvedValue({ + buffer: Buffer.from("fallback-buffer"), + contentType: "image/jpeg", + fileName: "fallback.jpg", + }); + + vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => { + if (String(candidatePath) === attachmentPath) { + throw new Error("EACCES"); + } + return await originalRealpath(candidatePath); + }); + + await run({ cache, fallbackUrl }); + }); +} + describe("media understanding attachment URL fallback", () => { afterEach(() => { vi.restoreAllMocks(); @@ -21,90 +56,44 @@ describe("media understanding attachment URL fallback", () => { }); it("getPath falls back to URL fetch when local path is blocked", async () => { - await withTempDir({ prefix: "openclaw-media-cache-getpath-url-fallback-" }, async (base) => { - const allowedRoot = path.join(base, "allowed"); - const attachmentPath = path.join(allowedRoot, "voice-note.m4a"); - const fallbackUrl = "https://example.com/fallback.jpg"; - await fs.mkdir(allowedRoot, { recursive: true }); - await fs.writeFile(attachmentPath, "ok"); - - const cache = new MediaAttachmentCache( - [{ index: 0, path: attachmentPath, url: fallbackUrl, mime: "image/jpeg" }], - { - localPathRoots: [allowedRoot], - }, - ); - const originalRealpath = fs.realpath.bind(fs); - fetchRemoteMediaMock.mockResolvedValue({ - buffer: Buffer.from("fallback-buffer"), - contentType: "image/jpeg", - fileName: "fallback.jpg", - }); - - vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => { - if (String(candidatePath) === attachmentPath) { - throw new Error("EACCES"); + await withBlockedLocalAttachmentFallback( + "openclaw-media-cache-getpath-url-fallback-", + async ({ cache, fallbackUrl }) => { + const result = await cache.getPath({ + attachmentIndex: 0, + maxBytes: 1024, + timeoutMs: 1000, + }); + // getPath should fall through to getBuffer URL fetch, write a temp file, + // and return a path to that temp file instead of throwing. + expect(result.path).toBeTruthy(); + expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1); + expect(fetchRemoteMediaMock).toHaveBeenCalledWith( + expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }), + ); + // Clean up the temp file + if (result.cleanup) { + await result.cleanup(); } - return await originalRealpath(candidatePath); - }); - - const result = await cache.getPath({ - attachmentIndex: 0, - maxBytes: 1024, - timeoutMs: 1000, - }); - // getPath should fall through to getBuffer URL fetch, write a temp file, - // and return a path to that temp file instead of throwing. - expect(result.path).toBeTruthy(); - expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1); - expect(fetchRemoteMediaMock).toHaveBeenCalledWith( - expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }), - ); - // Clean up the temp file - if (result.cleanup) { - await result.cleanup(); - } - }); + }, + ); }); it("falls back to URL fetch when local attachment canonicalization fails", async () => { - await withTempDir({ prefix: "openclaw-media-cache-url-fallback-" }, async (base) => { - const allowedRoot = path.join(base, "allowed"); - const attachmentPath = path.join(allowedRoot, "voice-note.m4a"); - const fallbackUrl = "https://example.com/fallback.jpg"; - await fs.mkdir(allowedRoot, { recursive: true }); - await fs.writeFile(attachmentPath, "ok"); - - const cache = new MediaAttachmentCache( - [{ index: 0, path: attachmentPath, url: fallbackUrl, mime: "image/jpeg" }], - { - localPathRoots: [allowedRoot], - }, - ); - const originalRealpath = fs.realpath.bind(fs); - fetchRemoteMediaMock.mockResolvedValue({ - buffer: Buffer.from("fallback-buffer"), - contentType: "image/jpeg", - fileName: "fallback.jpg", - }); - - vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => { - if (String(candidatePath) === attachmentPath) { - throw new Error("EACCES"); - } - return await originalRealpath(candidatePath); - }); - - const result = await cache.getBuffer({ - attachmentIndex: 0, - maxBytes: 1024, - timeoutMs: 1000, - }); - expect(result.buffer.toString()).toBe("fallback-buffer"); - expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1); - expect(fetchRemoteMediaMock).toHaveBeenCalledWith( - expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }), - ); - }); + await withBlockedLocalAttachmentFallback( + "openclaw-media-cache-url-fallback-", + async ({ cache, fallbackUrl }) => { + const result = await cache.getBuffer({ + attachmentIndex: 0, + maxBytes: 1024, + timeoutMs: 1000, + }); + expect(result.buffer.toString()).toBe("fallback-buffer"); + expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1); + expect(fetchRemoteMediaMock).toHaveBeenCalledWith( + expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }), + ); + }, + ); }); }); diff --git a/src/media-understanding/provider-capability-registry.test.ts b/src/media-understanding/provider-capability-registry.test.ts new file mode 100644 index 00000000000..d7551ef1ad5 --- /dev/null +++ b/src/media-understanding/provider-capability-registry.test.ts @@ -0,0 +1,49 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js"; +import { buildMediaUnderstandingCapabilityRegistry } from "./provider-capability-registry.js"; + +vi.mock("../plugins/capability-provider-runtime.js", () => ({ + resolvePluginCapabilityProviders: vi.fn(() => []), +})); + +const resolveProviders = vi.mocked(resolvePluginCapabilityProviders); + +describe("media-understanding capability registry", () => { + beforeEach(() => { + resolveProviders.mockReturnValue([]); + }); + + it("auto-registers config providers with image-capable models", () => { + const registry = buildMediaUnderstandingCapabilityRegistry({ + models: { + providers: { + glm: { + models: [{ id: "glm-4.6v", input: ["text", "image"] }], + }, + textOnly: { + models: [{ id: "text-model", input: ["text"] }], + }, + }, + }, + } as never); + + expect(registry.get("glm")?.capabilities).toEqual(["image"]); + expect(registry.get("textOnly")).toBeUndefined(); + }); + + it("keeps plugin-owned capabilities ahead of config auto-registration", () => { + resolveProviders.mockReturnValue([{ id: "google", capabilities: ["audio"] } as never]); + + const registry = buildMediaUnderstandingCapabilityRegistry({ + models: { + providers: { + google: { + models: [{ id: "custom-gemini", input: ["text", "image"] }], + }, + }, + }, + } as never); + + expect(registry.get("google")?.capabilities).toEqual(["audio"]); + }); +}); diff --git a/src/media-understanding/provider-capability-registry.ts b/src/media-understanding/provider-capability-registry.ts index 2206d112998..1cd07601d82 100644 --- a/src/media-understanding/provider-capability-registry.ts +++ b/src/media-understanding/provider-capability-registry.ts @@ -1,14 +1,9 @@ import type { OpenClawConfig } from "../config/types.js"; import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js"; +import { resolveImageCapableConfigProviderIds } from "./config-provider-models.js"; import { normalizeMediaProviderId } from "./provider-id.js"; import type { MediaUnderstandingCapabilityRegistry, MediaUnderstandingProvider } from "./types.js"; -type ConfigProvider = NonNullable< - NonNullable["providers"]>[string] ->; - -type ConfigProviderModel = NonNullable[number]; - function mergeProviderCapabilities( registry: MediaUnderstandingCapabilityRegistry, provider: Pick, @@ -32,24 +27,8 @@ export function buildMediaUnderstandingCapabilityRegistry( mergeProviderCapabilities(registry, provider); } - const configProviders = cfg?.models?.providers; - if (configProviders && typeof configProviders === "object") { - for (const [providerKey, providerCfg] of Object.entries(configProviders)) { - if (!providerKey?.trim()) { - continue; - } - const normalizedKey = normalizeMediaProviderId(providerKey); - if (registry.has(normalizedKey)) { - continue; - } - const models = providerCfg.models ?? []; - const hasImageModel = models.some( - (model: ConfigProviderModel) => - Array.isArray(model?.input) && model.input.includes("image"), - ); - if (!hasImageModel) { - continue; - } + for (const normalizedKey of resolveImageCapableConfigProviderIds(cfg)) { + if (!registry.has(normalizedKey)) { mergeProviderCapabilities(registry, { id: normalizedKey, capabilities: ["image"], diff --git a/src/media-understanding/provider-registry.ts b/src/media-understanding/provider-registry.ts index 3f550b063ca..025f1ea02ae 100644 --- a/src/media-understanding/provider-registry.ts +++ b/src/media-understanding/provider-registry.ts @@ -1,20 +1,16 @@ import type { OpenClawConfig } from "../config/types.js"; import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js"; +import { resolveImageCapableConfigProviderIds } from "./config-provider-models.js"; import { describeImageWithModel, describeImagesWithModel } from "./image-runtime.js"; import { normalizeMediaProviderId } from "./provider-id.js"; import type { MediaUnderstandingProvider } from "./types.js"; -type ConfigProvider = NonNullable< - NonNullable["providers"]>[string] ->; - -type ConfigProviderModel = NonNullable[number]; - function mergeProviderIntoRegistry( registry: Map, provider: MediaUnderstandingProvider, + registryKey = provider.id, ) { - const normalizedKey = normalizeMediaProviderId(provider.id); + const normalizedKey = normalizeMediaProviderId(registryKey); const existing = registry.get(normalizedKey); const merged = existing ? { @@ -43,46 +39,19 @@ export function buildMediaUnderstandingRegistry( mergeProviderIntoRegistry(registry, provider); } // Auto-register media-understanding for config providers with image-capable models (#51392) - const configProviders = cfg?.models?.providers; - if (configProviders && typeof configProviders === "object") { - for (const [providerKey, providerCfg] of Object.entries(configProviders)) { - if (!providerKey?.trim()) { - continue; - } - const normalizedKey = normalizeMediaProviderId(providerKey); - if (registry.has(normalizedKey)) { - continue; - } - const models = providerCfg.models ?? []; - const hasImageModel = models.some( - (m: ConfigProviderModel) => Array.isArray(m?.input) && m.input.includes("image"), - ); - if (hasImageModel) { - const autoProvider: MediaUnderstandingProvider = { - id: normalizedKey, - capabilities: ["image"], - describeImage: describeImageWithModel, - describeImages: describeImagesWithModel, - }; - mergeProviderIntoRegistry(registry, autoProvider); - } + for (const normalizedKey of resolveImageCapableConfigProviderIds(cfg)) { + if (!registry.has(normalizedKey)) { + mergeProviderIntoRegistry(registry, { + id: normalizedKey, + capabilities: ["image"], + describeImage: describeImageWithModel, + describeImages: describeImagesWithModel, + }); } } if (overrides) { for (const [key, provider] of Object.entries(overrides)) { - const normalizedKey = normalizeMediaProviderId(key); - const existing = registry.get(normalizedKey); - const merged = existing - ? { - ...existing, - ...provider, - capabilities: provider.capabilities ?? existing.capabilities, - defaultModels: provider.defaultModels ?? existing.defaultModels, - autoPriority: provider.autoPriority ?? existing.autoPriority, - nativeDocumentInputs: provider.nativeDocumentInputs ?? existing.nativeDocumentInputs, - } - : provider; - registry.set(normalizedKey, merged); + mergeProviderIntoRegistry(registry, provider, key); } } return registry; diff --git a/src/media-understanding/runner.auto-audio.test.ts b/src/media-understanding/runner.auto-audio.test.ts index f20d25422c2..3bed355e0a3 100644 --- a/src/media-understanding/runner.auto-audio.test.ts +++ b/src/media-understanding/runner.auto-audio.test.ts @@ -8,25 +8,15 @@ import { runCapability } from "./runner.js"; import { withAudioFixture } from "./runner.test-utils.js"; import type { AudioTranscriptionRequest, MediaUnderstandingProvider } from "./types.js"; -const modelAuthMocks = vi.hoisted(() => ({ - hasAvailableAuthForProvider: vi.fn(() => true), - resolveApiKeyForProvider: vi.fn(async () => ({ - apiKey: "test-key", - source: "test", - mode: "api-key", - })), - requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"), -})); +vi.mock("../agents/model-auth.js", async () => { + const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js"); + return createAvailableModelAuthMockModule(); +}); -vi.mock("../agents/model-auth.js", () => ({ - hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider, - resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider, - requireApiKey: modelAuthMocks.requireApiKey, -})); - -vi.mock("../plugins/capability-provider-runtime.js", () => ({ - resolvePluginCapabilityProviders: () => [], -})); +vi.mock("../plugins/capability-provider-runtime.js", async () => { + const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js"); + return createEmptyCapabilityProviderMockModule(); +}); function createProviderRegistry( providers: Record, diff --git a/src/media-understanding/runner.deepgram.test.ts b/src/media-understanding/runner.deepgram.test.ts index ae56284d9b1..8e4c161e1e1 100644 --- a/src/media-understanding/runner.deepgram.test.ts +++ b/src/media-understanding/runner.deepgram.test.ts @@ -3,25 +3,15 @@ import type { OpenClawConfig } from "../config/types.js"; import { buildProviderRegistry, runCapability } from "./runner.js"; import { withAudioFixture } from "./runner.test-utils.js"; -const modelAuthMocks = vi.hoisted(() => ({ - hasAvailableAuthForProvider: vi.fn(() => true), - resolveApiKeyForProvider: vi.fn(async () => ({ - apiKey: "test-key", - source: "test", - mode: "api-key", - })), - requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"), -})); +vi.mock("../agents/model-auth.js", async () => { + const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js"); + return createAvailableModelAuthMockModule(); +}); -vi.mock("../agents/model-auth.js", () => ({ - hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider, - resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider, - requireApiKey: modelAuthMocks.requireApiKey, -})); - -vi.mock("../plugins/capability-provider-runtime.js", () => ({ - resolvePluginCapabilityProviders: () => [], -})); +vi.mock("../plugins/capability-provider-runtime.js", async () => { + const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js"); + return createEmptyCapabilityProviderMockModule(); +}); describe("runCapability deepgram provider options", () => { it("merges provider options, headers, and baseUrl overrides", async () => { diff --git a/src/media-understanding/runner.proxy.test.ts b/src/media-understanding/runner.proxy.test.ts index febe654951b..a924eb2775f 100644 --- a/src/media-understanding/runner.proxy.test.ts +++ b/src/media-understanding/runner.proxy.test.ts @@ -3,25 +3,15 @@ import type { OpenClawConfig } from "../config/types.js"; import { withAudioFixture, withVideoFixture } from "./runner.test-utils.js"; import type { AudioTranscriptionRequest, VideoDescriptionRequest } from "./types.js"; -const modelAuthMocks = vi.hoisted(() => ({ - hasAvailableAuthForProvider: vi.fn(() => true), - resolveApiKeyForProvider: vi.fn(async () => ({ - apiKey: "test-key", - source: "test", - mode: "api-key", - })), - requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"), -})); +vi.mock("../agents/model-auth.js", async () => { + const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js"); + return createAvailableModelAuthMockModule(); +}); -vi.mock("../agents/model-auth.js", () => ({ - hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider, - resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider, - requireApiKey: modelAuthMocks.requireApiKey, -})); - -vi.mock("../plugins/capability-provider-runtime.js", () => ({ - resolvePluginCapabilityProviders: () => [], -})); +vi.mock("../plugins/capability-provider-runtime.js", async () => { + const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js"); + return createEmptyCapabilityProviderMockModule(); +}); const proxyFetchMocks = vi.hoisted(() => { const proxyFetch = vi.fn() as unknown as typeof fetch; @@ -45,6 +35,28 @@ let buildProviderRegistry: typeof import("./runner.js").buildProviderRegistry; let clearMediaUnderstandingBinaryCacheForTests: typeof import("./runner.js").clearMediaUnderstandingBinaryCacheForTests; let runCapability: typeof import("./runner.js").runCapability; +function createOpenAiAudioCfg(providerOverrides: Record = {}): OpenClawConfig { + return { + models: { + providers: { + openai: { + apiKey: "test-key", // pragma: allowlist secret + ...providerOverrides, + models: [], + }, + }, + }, + tools: { + media: { + audio: { + enabled: true, + models: [{ provider: "openai", model: "whisper-1" }], + }, + }, + }, + } as unknown as OpenClawConfig; +} + async function runAudioCapabilityWithFetchCapture(params: { fixturePrefix: string; outputText: string; @@ -62,28 +74,9 @@ async function runAudioCapabilityWithFetchCapture(params: { }, }); - const cfg = { - models: { - providers: { - openai: { - apiKey: "test-key", // pragma: allowlist secret - models: [], - }, - }, - }, - tools: { - media: { - audio: { - enabled: true, - models: [{ provider: "openai", model: "whisper-1" }], - }, - }, - }, - } as unknown as OpenClawConfig; - const result = await runCapability({ capability: "audio", - cfg, + cfg: createOpenAiAudioCfg(), ctx, attachments: cache, media, @@ -194,31 +187,13 @@ describe("runCapability proxy fetch passthrough", () => { }, }); - const cfg = { - models: { - providers: { - openai: { - apiKey: "test-key", // pragma: allowlist secret - request: { - allowPrivateNetwork: true, - }, - models: [], - }, - }, - }, - tools: { - media: { - audio: { - enabled: true, - models: [{ provider: "openai", model: "whisper-1" }], - }, - }, - }, - } as unknown as OpenClawConfig; - const result = await runCapability({ capability: "audio", - cfg, + cfg: createOpenAiAudioCfg({ + request: { + allowPrivateNetwork: true, + }, + }), ctx, attachments: cache, media, diff --git a/src/media-understanding/runner.skip-tiny-audio.test.ts b/src/media-understanding/runner.skip-tiny-audio.test.ts index e9cf0b53e1a..fc0386c7a83 100644 --- a/src/media-understanding/runner.skip-tiny-audio.test.ts +++ b/src/media-understanding/runner.skip-tiny-audio.test.ts @@ -1,33 +1,24 @@ -import fs from "node:fs/promises"; -import os from "node:os"; -import path from "node:path"; import { describe, expect, it, vi } from "vitest"; import type { MsgContext } from "../auto-reply/templating.js"; import type { OpenClawConfig } from "../config/types.js"; import { MIN_AUDIO_FILE_BYTES } from "./defaults.js"; -import { createMediaAttachmentCache, normalizeMediaAttachments } from "./runner.attachments.js"; +import type { + createMediaAttachmentCache, + normalizeMediaAttachments, +} from "./runner.attachments.js"; import { buildProviderRegistry, runCapability } from "./runner.js"; +import { withMediaFixture } from "./runner.test-utils.js"; import type { AudioTranscriptionRequest } from "./types.js"; -const modelAuthMocks = vi.hoisted(() => ({ - hasAvailableAuthForProvider: vi.fn(() => true), - resolveApiKeyForProvider: vi.fn(async () => ({ - apiKey: "test-key", - source: "test", - mode: "api-key", - })), - requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"), -})); +vi.mock("../agents/model-auth.js", async () => { + const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js"); + return createAvailableModelAuthMockModule(); +}); -vi.mock("../agents/model-auth.js", () => ({ - hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider, - resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider, - requireApiKey: modelAuthMocks.requireApiKey, -})); - -vi.mock("../plugins/capability-provider-runtime.js", () => ({ - resolvePluginCapabilityProviders: () => [], -})); +vi.mock("../plugins/capability-provider-runtime.js", async () => { + const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js"); + return createEmptyCapabilityProviderMockModule(); +}); async function withAudioFixture(params: { filePrefix: string; @@ -40,29 +31,15 @@ async function withAudioFixture(params: { cache: ReturnType; }) => Promise; }) { - const originalPath = process.env.PATH; - process.env.PATH = "/usr/bin:/bin"; - - const tmpPath = path.join( - os.tmpdir(), - `${params.filePrefix}-${Date.now().toString()}.${params.extension}`, + await withMediaFixture( + { + filePrefix: params.filePrefix, + extension: params.extension, + mediaType: params.mediaType, + fileContents: params.fileContents, + }, + params.run, ); - await fs.writeFile(tmpPath, params.fileContents); - - const ctx: MsgContext = { MediaPath: tmpPath, MediaType: params.mediaType }; - const media = normalizeMediaAttachments(ctx); - const cache = createMediaAttachmentCache(media, { - localPathRoots: [path.dirname(tmpPath)], - includeDefaultLocalPathRoots: false, - }); - - try { - await params.run({ ctx, media, cache }); - } finally { - process.env.PATH = originalPath; - await cache.cleanup(); - await fs.unlink(tmpPath).catch(() => {}); - } } const AUDIO_CAPABILITY_CFG = { diff --git a/src/media-understanding/runner.test-mocks.ts b/src/media-understanding/runner.test-mocks.ts new file mode 100644 index 00000000000..0cfa565208b --- /dev/null +++ b/src/media-understanding/runner.test-mocks.ts @@ -0,0 +1,19 @@ +import { vi } from "vitest"; + +export function createAvailableModelAuthMockModule() { + return { + hasAvailableAuthForProvider: vi.fn(() => true), + resolveApiKeyForProvider: vi.fn(async () => ({ + apiKey: "test-key", + source: "test", + mode: "api-key", + })), + requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"), + }; +} + +export function createEmptyCapabilityProviderMockModule() { + return { + resolvePluginCapabilityProviders: () => [], + }; +} diff --git a/src/media-understanding/runner.vision-skip.test.ts b/src/media-understanding/runner.vision-skip.test.ts index 86f5b78d223..0ca988bf791 100644 --- a/src/media-understanding/runner.vision-skip.test.ts +++ b/src/media-understanding/runner.vision-skip.test.ts @@ -24,21 +24,11 @@ const baseCatalog = [ let catalog = [...baseCatalog]; const loadModelCatalog = vi.hoisted(() => vi.fn(async () => catalog)); -const modelAuthMocks = vi.hoisted(() => ({ - hasAvailableAuthForProvider: vi.fn(() => true), - resolveApiKeyForProvider: vi.fn(async () => ({ - apiKey: "test-key", - source: "test", - mode: "api-key", - })), - requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"), -})); -vi.mock("../agents/model-auth.js", () => ({ - hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider, - resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider, - requireApiKey: modelAuthMocks.requireApiKey, -})); +vi.mock("../agents/model-auth.js", async () => { + const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js"); + return createAvailableModelAuthMockModule(); +}); vi.mock("../plugins/capability-provider-runtime.js", async () => { const runtime =