refactor: dedupe media understanding provider helpers

This commit is contained in:
Peter Steinberger
2026-04-18 21:08:42 +01:00
parent 045010bb78
commit 1687c672a7
12 changed files with 320 additions and 360 deletions

View File

@@ -0,0 +1,31 @@
import type { OpenClawConfig } from "../config/types.js";
import { normalizeMediaProviderId } from "./provider-id.js";
type ConfigProvider = NonNullable<
NonNullable<NonNullable<OpenClawConfig["models"]>["providers"]>[string]
>;
type ConfigProviderModel = NonNullable<ConfigProvider["models"]>[number];
function hasImageCapableModel(providerCfg: ConfigProvider): boolean {
const models = providerCfg.models ?? [];
return models.some(
(model: ConfigProviderModel) => Array.isArray(model?.input) && model.input.includes("image"),
);
}
export function resolveImageCapableConfigProviderIds(cfg?: OpenClawConfig): string[] {
const configProviders = cfg?.models?.providers;
if (!configProviders || typeof configProviders !== "object") {
return [];
}
const providerIds: string[] = [];
for (const [providerKey, providerCfg] of Object.entries(configProviders)) {
if (!providerKey?.trim() || !hasImageCapableModel(providerCfg)) {
continue;
}
providerIds.push(normalizeMediaProviderId(providerKey));
}
return providerIds;
}

View File

@@ -25,6 +25,29 @@ describe("media understanding scope", () => {
const originalFetch = globalThis.fetch;
async function withLocalAttachmentCache(
prefix: string,
run: (params: {
cache: MediaAttachmentCache;
attachmentPath: string;
canonicalAttachmentPath: string;
}) => Promise<void>,
) {
await withTempDir({ prefix }, async (base) => {
const allowedRoot = path.join(base, "allowed");
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
await fs.mkdir(allowedRoot, { recursive: true });
await fs.writeFile(attachmentPath, "ok");
const canonicalAttachmentPath = await fs.realpath(attachmentPath).catch(() => attachmentPath);
const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], {
localPathRoots: [allowedRoot],
});
await run({ cache, attachmentPath, canonicalAttachmentPath });
});
}
describe("media understanding attachments SSRF", () => {
afterEach(() => {
globalThis.fetch = originalFetch;
@@ -45,16 +68,7 @@ describe("media understanding attachments SSRF", () => {
});
it("reads local attachments inside configured roots", async () => {
await withTempDir({ prefix: "openclaw-media-cache-allowed-" }, async (base) => {
const allowedRoot = path.join(base, "allowed");
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
await fs.mkdir(allowedRoot, { recursive: true });
await fs.writeFile(attachmentPath, "ok");
const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], {
localPathRoots: [allowedRoot],
});
await withLocalAttachmentCache("openclaw-media-cache-allowed-", async ({ cache }) => {
const result = await cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 });
expect(result.buffer.toString()).toBe("ok");
});
@@ -111,63 +125,51 @@ describe("media understanding attachments SSRF", () => {
});
it("enforces maxBytes after reading local attachments", async () => {
await withTempDir({ prefix: "openclaw-media-cache-max-bytes-" }, async (base) => {
const allowedRoot = path.join(base, "allowed");
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
await fs.mkdir(allowedRoot, { recursive: true });
await fs.writeFile(attachmentPath, "ok");
const canonicalAttachmentPath = await fs.realpath(attachmentPath).catch(() => attachmentPath);
await withLocalAttachmentCache(
"openclaw-media-cache-max-bytes-",
async ({ cache, canonicalAttachmentPath }) => {
const originalOpen = fs.open.bind(fs);
const openSpy = vi.spyOn(fs, "open");
const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], {
localPathRoots: [allowedRoot],
});
const originalOpen = fs.open.bind(fs);
const openSpy = vi.spyOn(fs, "open");
openSpy.mockImplementation(async (filePath, flags) => {
const handle = await originalOpen(filePath, flags);
const candidatePath = await fs.realpath(String(filePath)).catch(() => String(filePath));
if (candidatePath !== canonicalAttachmentPath) {
return handle;
}
const mockedHandle = handle as typeof handle & {
readFile: typeof handle.readFile;
};
mockedHandle.readFile = (async () => Buffer.alloc(2048, 1)) as typeof handle.readFile;
return mockedHandle;
});
openSpy.mockImplementation(async (filePath, flags) => {
const handle = await originalOpen(filePath, flags);
const candidatePath = await fs.realpath(String(filePath)).catch(() => String(filePath));
if (candidatePath !== canonicalAttachmentPath) {
return handle;
}
const mockedHandle = handle as typeof handle & {
readFile: typeof handle.readFile;
};
mockedHandle.readFile = (async () => Buffer.alloc(2048, 1)) as typeof handle.readFile;
return mockedHandle;
});
await expect(
cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }),
).rejects.toThrow(/exceeds maxBytes 1024/i);
});
await expect(
cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }),
).rejects.toThrow(/exceeds maxBytes 1024/i);
},
);
});
it("opens local attachments with nofollow on posix", async () => {
if (process.platform === "win32") {
return;
}
await withTempDir({ prefix: "openclaw-media-cache-flags-" }, async (base) => {
const allowedRoot = path.join(base, "allowed");
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
await fs.mkdir(allowedRoot, { recursive: true });
await fs.writeFile(attachmentPath, "ok");
const canonicalAttachmentPath = await fs.realpath(attachmentPath).catch(() => attachmentPath);
await withLocalAttachmentCache(
"openclaw-media-cache-flags-",
async ({ cache, canonicalAttachmentPath }) => {
const openSpy = vi.spyOn(fs, "open");
const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], {
localPathRoots: [allowedRoot],
});
const openSpy = vi.spyOn(fs, "open");
await cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 });
await cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 });
expect(openSpy).toHaveBeenCalled();
const [openedPath, openedFlags] = openSpy.mock.calls[0] ?? [];
expect(await fs.realpath(String(openedPath)).catch(() => String(openedPath))).toBe(
canonicalAttachmentPath,
);
expect(openedFlags).toBe(fsConstants.O_RDONLY | fsConstants.O_NOFOLLOW);
});
expect(openSpy).toHaveBeenCalled();
const [openedPath, openedFlags] = openSpy.mock.calls[0] ?? [];
expect(await fs.realpath(String(openedPath)).catch(() => String(openedPath))).toBe(
canonicalAttachmentPath,
);
expect(openedFlags).toBe(fsConstants.O_RDONLY | fsConstants.O_NOFOLLOW);
},
);
});
it("rejects local attachments when canonicalization fails", async () => {

View File

@@ -14,6 +14,41 @@ vi.mock("../media/fetch.js", async () => {
};
});
async function withBlockedLocalAttachmentFallback(
prefix: string,
run: (params: { cache: MediaAttachmentCache; fallbackUrl: string }) => Promise<void>,
) {
await withTempDir({ prefix }, async (base) => {
const allowedRoot = path.join(base, "allowed");
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
const fallbackUrl = "https://example.com/fallback.jpg";
await fs.mkdir(allowedRoot, { recursive: true });
await fs.writeFile(attachmentPath, "ok");
const cache = new MediaAttachmentCache(
[{ index: 0, path: attachmentPath, url: fallbackUrl, mime: "image/jpeg" }],
{
localPathRoots: [allowedRoot],
},
);
const originalRealpath = fs.realpath.bind(fs);
fetchRemoteMediaMock.mockResolvedValue({
buffer: Buffer.from("fallback-buffer"),
contentType: "image/jpeg",
fileName: "fallback.jpg",
});
vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => {
if (String(candidatePath) === attachmentPath) {
throw new Error("EACCES");
}
return await originalRealpath(candidatePath);
});
await run({ cache, fallbackUrl });
});
}
describe("media understanding attachment URL fallback", () => {
afterEach(() => {
vi.restoreAllMocks();
@@ -21,90 +56,44 @@ describe("media understanding attachment URL fallback", () => {
});
it("getPath falls back to URL fetch when local path is blocked", async () => {
await withTempDir({ prefix: "openclaw-media-cache-getpath-url-fallback-" }, async (base) => {
const allowedRoot = path.join(base, "allowed");
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
const fallbackUrl = "https://example.com/fallback.jpg";
await fs.mkdir(allowedRoot, { recursive: true });
await fs.writeFile(attachmentPath, "ok");
const cache = new MediaAttachmentCache(
[{ index: 0, path: attachmentPath, url: fallbackUrl, mime: "image/jpeg" }],
{
localPathRoots: [allowedRoot],
},
);
const originalRealpath = fs.realpath.bind(fs);
fetchRemoteMediaMock.mockResolvedValue({
buffer: Buffer.from("fallback-buffer"),
contentType: "image/jpeg",
fileName: "fallback.jpg",
});
vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => {
if (String(candidatePath) === attachmentPath) {
throw new Error("EACCES");
await withBlockedLocalAttachmentFallback(
"openclaw-media-cache-getpath-url-fallback-",
async ({ cache, fallbackUrl }) => {
const result = await cache.getPath({
attachmentIndex: 0,
maxBytes: 1024,
timeoutMs: 1000,
});
// getPath should fall through to getBuffer URL fetch, write a temp file,
// and return a path to that temp file instead of throwing.
expect(result.path).toBeTruthy();
expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1);
expect(fetchRemoteMediaMock).toHaveBeenCalledWith(
expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }),
);
// Clean up the temp file
if (result.cleanup) {
await result.cleanup();
}
return await originalRealpath(candidatePath);
});
const result = await cache.getPath({
attachmentIndex: 0,
maxBytes: 1024,
timeoutMs: 1000,
});
// getPath should fall through to getBuffer URL fetch, write a temp file,
// and return a path to that temp file instead of throwing.
expect(result.path).toBeTruthy();
expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1);
expect(fetchRemoteMediaMock).toHaveBeenCalledWith(
expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }),
);
// Clean up the temp file
if (result.cleanup) {
await result.cleanup();
}
});
},
);
});
it("falls back to URL fetch when local attachment canonicalization fails", async () => {
await withTempDir({ prefix: "openclaw-media-cache-url-fallback-" }, async (base) => {
const allowedRoot = path.join(base, "allowed");
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
const fallbackUrl = "https://example.com/fallback.jpg";
await fs.mkdir(allowedRoot, { recursive: true });
await fs.writeFile(attachmentPath, "ok");
const cache = new MediaAttachmentCache(
[{ index: 0, path: attachmentPath, url: fallbackUrl, mime: "image/jpeg" }],
{
localPathRoots: [allowedRoot],
},
);
const originalRealpath = fs.realpath.bind(fs);
fetchRemoteMediaMock.mockResolvedValue({
buffer: Buffer.from("fallback-buffer"),
contentType: "image/jpeg",
fileName: "fallback.jpg",
});
vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => {
if (String(candidatePath) === attachmentPath) {
throw new Error("EACCES");
}
return await originalRealpath(candidatePath);
});
const result = await cache.getBuffer({
attachmentIndex: 0,
maxBytes: 1024,
timeoutMs: 1000,
});
expect(result.buffer.toString()).toBe("fallback-buffer");
expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1);
expect(fetchRemoteMediaMock).toHaveBeenCalledWith(
expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }),
);
});
await withBlockedLocalAttachmentFallback(
"openclaw-media-cache-url-fallback-",
async ({ cache, fallbackUrl }) => {
const result = await cache.getBuffer({
attachmentIndex: 0,
maxBytes: 1024,
timeoutMs: 1000,
});
expect(result.buffer.toString()).toBe("fallback-buffer");
expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1);
expect(fetchRemoteMediaMock).toHaveBeenCalledWith(
expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }),
);
},
);
});
});

View File

@@ -0,0 +1,49 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js";
import { buildMediaUnderstandingCapabilityRegistry } from "./provider-capability-registry.js";
vi.mock("../plugins/capability-provider-runtime.js", () => ({
resolvePluginCapabilityProviders: vi.fn(() => []),
}));
const resolveProviders = vi.mocked(resolvePluginCapabilityProviders);
describe("media-understanding capability registry", () => {
beforeEach(() => {
resolveProviders.mockReturnValue([]);
});
it("auto-registers config providers with image-capable models", () => {
const registry = buildMediaUnderstandingCapabilityRegistry({
models: {
providers: {
glm: {
models: [{ id: "glm-4.6v", input: ["text", "image"] }],
},
textOnly: {
models: [{ id: "text-model", input: ["text"] }],
},
},
},
} as never);
expect(registry.get("glm")?.capabilities).toEqual(["image"]);
expect(registry.get("textOnly")).toBeUndefined();
});
it("keeps plugin-owned capabilities ahead of config auto-registration", () => {
resolveProviders.mockReturnValue([{ id: "google", capabilities: ["audio"] } as never]);
const registry = buildMediaUnderstandingCapabilityRegistry({
models: {
providers: {
google: {
models: [{ id: "custom-gemini", input: ["text", "image"] }],
},
},
},
} as never);
expect(registry.get("google")?.capabilities).toEqual(["audio"]);
});
});

View File

@@ -1,14 +1,9 @@
import type { OpenClawConfig } from "../config/types.js";
import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js";
import { resolveImageCapableConfigProviderIds } from "./config-provider-models.js";
import { normalizeMediaProviderId } from "./provider-id.js";
import type { MediaUnderstandingCapabilityRegistry, MediaUnderstandingProvider } from "./types.js";
type ConfigProvider = NonNullable<
NonNullable<NonNullable<OpenClawConfig["models"]>["providers"]>[string]
>;
type ConfigProviderModel = NonNullable<ConfigProvider["models"]>[number];
function mergeProviderCapabilities(
registry: MediaUnderstandingCapabilityRegistry,
provider: Pick<MediaUnderstandingProvider, "id" | "capabilities">,
@@ -32,24 +27,8 @@ export function buildMediaUnderstandingCapabilityRegistry(
mergeProviderCapabilities(registry, provider);
}
const configProviders = cfg?.models?.providers;
if (configProviders && typeof configProviders === "object") {
for (const [providerKey, providerCfg] of Object.entries(configProviders)) {
if (!providerKey?.trim()) {
continue;
}
const normalizedKey = normalizeMediaProviderId(providerKey);
if (registry.has(normalizedKey)) {
continue;
}
const models = providerCfg.models ?? [];
const hasImageModel = models.some(
(model: ConfigProviderModel) =>
Array.isArray(model?.input) && model.input.includes("image"),
);
if (!hasImageModel) {
continue;
}
for (const normalizedKey of resolveImageCapableConfigProviderIds(cfg)) {
if (!registry.has(normalizedKey)) {
mergeProviderCapabilities(registry, {
id: normalizedKey,
capabilities: ["image"],

View File

@@ -1,20 +1,16 @@
import type { OpenClawConfig } from "../config/types.js";
import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js";
import { resolveImageCapableConfigProviderIds } from "./config-provider-models.js";
import { describeImageWithModel, describeImagesWithModel } from "./image-runtime.js";
import { normalizeMediaProviderId } from "./provider-id.js";
import type { MediaUnderstandingProvider } from "./types.js";
type ConfigProvider = NonNullable<
NonNullable<NonNullable<OpenClawConfig["models"]>["providers"]>[string]
>;
type ConfigProviderModel = NonNullable<ConfigProvider["models"]>[number];
function mergeProviderIntoRegistry(
registry: Map<string, MediaUnderstandingProvider>,
provider: MediaUnderstandingProvider,
registryKey = provider.id,
) {
const normalizedKey = normalizeMediaProviderId(provider.id);
const normalizedKey = normalizeMediaProviderId(registryKey);
const existing = registry.get(normalizedKey);
const merged = existing
? {
@@ -43,46 +39,19 @@ export function buildMediaUnderstandingRegistry(
mergeProviderIntoRegistry(registry, provider);
}
// Auto-register media-understanding for config providers with image-capable models (#51392)
const configProviders = cfg?.models?.providers;
if (configProviders && typeof configProviders === "object") {
for (const [providerKey, providerCfg] of Object.entries(configProviders)) {
if (!providerKey?.trim()) {
continue;
}
const normalizedKey = normalizeMediaProviderId(providerKey);
if (registry.has(normalizedKey)) {
continue;
}
const models = providerCfg.models ?? [];
const hasImageModel = models.some(
(m: ConfigProviderModel) => Array.isArray(m?.input) && m.input.includes("image"),
);
if (hasImageModel) {
const autoProvider: MediaUnderstandingProvider = {
id: normalizedKey,
capabilities: ["image"],
describeImage: describeImageWithModel,
describeImages: describeImagesWithModel,
};
mergeProviderIntoRegistry(registry, autoProvider);
}
for (const normalizedKey of resolveImageCapableConfigProviderIds(cfg)) {
if (!registry.has(normalizedKey)) {
mergeProviderIntoRegistry(registry, {
id: normalizedKey,
capabilities: ["image"],
describeImage: describeImageWithModel,
describeImages: describeImagesWithModel,
});
}
}
if (overrides) {
for (const [key, provider] of Object.entries(overrides)) {
const normalizedKey = normalizeMediaProviderId(key);
const existing = registry.get(normalizedKey);
const merged = existing
? {
...existing,
...provider,
capabilities: provider.capabilities ?? existing.capabilities,
defaultModels: provider.defaultModels ?? existing.defaultModels,
autoPriority: provider.autoPriority ?? existing.autoPriority,
nativeDocumentInputs: provider.nativeDocumentInputs ?? existing.nativeDocumentInputs,
}
: provider;
registry.set(normalizedKey, merged);
mergeProviderIntoRegistry(registry, provider, key);
}
}
return registry;

View File

@@ -8,25 +8,15 @@ import { runCapability } from "./runner.js";
import { withAudioFixture } from "./runner.test-utils.js";
import type { AudioTranscriptionRequest, MediaUnderstandingProvider } from "./types.js";
const modelAuthMocks = vi.hoisted(() => ({
hasAvailableAuthForProvider: vi.fn(() => true),
resolveApiKeyForProvider: vi.fn(async () => ({
apiKey: "test-key",
source: "test",
mode: "api-key",
})),
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
}));
vi.mock("../agents/model-auth.js", async () => {
const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js");
return createAvailableModelAuthMockModule();
});
vi.mock("../agents/model-auth.js", () => ({
hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider,
resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider,
requireApiKey: modelAuthMocks.requireApiKey,
}));
vi.mock("../plugins/capability-provider-runtime.js", () => ({
resolvePluginCapabilityProviders: () => [],
}));
vi.mock("../plugins/capability-provider-runtime.js", async () => {
const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js");
return createEmptyCapabilityProviderMockModule();
});
function createProviderRegistry(
providers: Record<string, MediaUnderstandingProvider>,

View File

@@ -3,25 +3,15 @@ import type { OpenClawConfig } from "../config/types.js";
import { buildProviderRegistry, runCapability } from "./runner.js";
import { withAudioFixture } from "./runner.test-utils.js";
const modelAuthMocks = vi.hoisted(() => ({
hasAvailableAuthForProvider: vi.fn(() => true),
resolveApiKeyForProvider: vi.fn(async () => ({
apiKey: "test-key",
source: "test",
mode: "api-key",
})),
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
}));
vi.mock("../agents/model-auth.js", async () => {
const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js");
return createAvailableModelAuthMockModule();
});
vi.mock("../agents/model-auth.js", () => ({
hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider,
resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider,
requireApiKey: modelAuthMocks.requireApiKey,
}));
vi.mock("../plugins/capability-provider-runtime.js", () => ({
resolvePluginCapabilityProviders: () => [],
}));
vi.mock("../plugins/capability-provider-runtime.js", async () => {
const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js");
return createEmptyCapabilityProviderMockModule();
});
describe("runCapability deepgram provider options", () => {
it("merges provider options, headers, and baseUrl overrides", async () => {

View File

@@ -3,25 +3,15 @@ import type { OpenClawConfig } from "../config/types.js";
import { withAudioFixture, withVideoFixture } from "./runner.test-utils.js";
import type { AudioTranscriptionRequest, VideoDescriptionRequest } from "./types.js";
const modelAuthMocks = vi.hoisted(() => ({
hasAvailableAuthForProvider: vi.fn(() => true),
resolveApiKeyForProvider: vi.fn(async () => ({
apiKey: "test-key",
source: "test",
mode: "api-key",
})),
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
}));
vi.mock("../agents/model-auth.js", async () => {
const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js");
return createAvailableModelAuthMockModule();
});
vi.mock("../agents/model-auth.js", () => ({
hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider,
resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider,
requireApiKey: modelAuthMocks.requireApiKey,
}));
vi.mock("../plugins/capability-provider-runtime.js", () => ({
resolvePluginCapabilityProviders: () => [],
}));
vi.mock("../plugins/capability-provider-runtime.js", async () => {
const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js");
return createEmptyCapabilityProviderMockModule();
});
const proxyFetchMocks = vi.hoisted(() => {
const proxyFetch = vi.fn() as unknown as typeof fetch;
@@ -45,6 +35,28 @@ let buildProviderRegistry: typeof import("./runner.js").buildProviderRegistry;
let clearMediaUnderstandingBinaryCacheForTests: typeof import("./runner.js").clearMediaUnderstandingBinaryCacheForTests;
let runCapability: typeof import("./runner.js").runCapability;
function createOpenAiAudioCfg(providerOverrides: Record<string, unknown> = {}): OpenClawConfig {
return {
models: {
providers: {
openai: {
apiKey: "test-key", // pragma: allowlist secret
...providerOverrides,
models: [],
},
},
},
tools: {
media: {
audio: {
enabled: true,
models: [{ provider: "openai", model: "whisper-1" }],
},
},
},
} as unknown as OpenClawConfig;
}
async function runAudioCapabilityWithFetchCapture(params: {
fixturePrefix: string;
outputText: string;
@@ -62,28 +74,9 @@ async function runAudioCapabilityWithFetchCapture(params: {
},
});
const cfg = {
models: {
providers: {
openai: {
apiKey: "test-key", // pragma: allowlist secret
models: [],
},
},
},
tools: {
media: {
audio: {
enabled: true,
models: [{ provider: "openai", model: "whisper-1" }],
},
},
},
} as unknown as OpenClawConfig;
const result = await runCapability({
capability: "audio",
cfg,
cfg: createOpenAiAudioCfg(),
ctx,
attachments: cache,
media,
@@ -194,31 +187,13 @@ describe("runCapability proxy fetch passthrough", () => {
},
});
const cfg = {
models: {
providers: {
openai: {
apiKey: "test-key", // pragma: allowlist secret
request: {
allowPrivateNetwork: true,
},
models: [],
},
},
},
tools: {
media: {
audio: {
enabled: true,
models: [{ provider: "openai", model: "whisper-1" }],
},
},
},
} as unknown as OpenClawConfig;
const result = await runCapability({
capability: "audio",
cfg,
cfg: createOpenAiAudioCfg({
request: {
allowPrivateNetwork: true,
},
}),
ctx,
attachments: cache,
media,

View File

@@ -1,33 +1,24 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it, vi } from "vitest";
import type { MsgContext } from "../auto-reply/templating.js";
import type { OpenClawConfig } from "../config/types.js";
import { MIN_AUDIO_FILE_BYTES } from "./defaults.js";
import { createMediaAttachmentCache, normalizeMediaAttachments } from "./runner.attachments.js";
import type {
createMediaAttachmentCache,
normalizeMediaAttachments,
} from "./runner.attachments.js";
import { buildProviderRegistry, runCapability } from "./runner.js";
import { withMediaFixture } from "./runner.test-utils.js";
import type { AudioTranscriptionRequest } from "./types.js";
const modelAuthMocks = vi.hoisted(() => ({
hasAvailableAuthForProvider: vi.fn(() => true),
resolveApiKeyForProvider: vi.fn(async () => ({
apiKey: "test-key",
source: "test",
mode: "api-key",
})),
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
}));
vi.mock("../agents/model-auth.js", async () => {
const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js");
return createAvailableModelAuthMockModule();
});
vi.mock("../agents/model-auth.js", () => ({
hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider,
resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider,
requireApiKey: modelAuthMocks.requireApiKey,
}));
vi.mock("../plugins/capability-provider-runtime.js", () => ({
resolvePluginCapabilityProviders: () => [],
}));
vi.mock("../plugins/capability-provider-runtime.js", async () => {
const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js");
return createEmptyCapabilityProviderMockModule();
});
async function withAudioFixture(params: {
filePrefix: string;
@@ -40,29 +31,15 @@ async function withAudioFixture(params: {
cache: ReturnType<typeof createMediaAttachmentCache>;
}) => Promise<void>;
}) {
const originalPath = process.env.PATH;
process.env.PATH = "/usr/bin:/bin";
const tmpPath = path.join(
os.tmpdir(),
`${params.filePrefix}-${Date.now().toString()}.${params.extension}`,
await withMediaFixture(
{
filePrefix: params.filePrefix,
extension: params.extension,
mediaType: params.mediaType,
fileContents: params.fileContents,
},
params.run,
);
await fs.writeFile(tmpPath, params.fileContents);
const ctx: MsgContext = { MediaPath: tmpPath, MediaType: params.mediaType };
const media = normalizeMediaAttachments(ctx);
const cache = createMediaAttachmentCache(media, {
localPathRoots: [path.dirname(tmpPath)],
includeDefaultLocalPathRoots: false,
});
try {
await params.run({ ctx, media, cache });
} finally {
process.env.PATH = originalPath;
await cache.cleanup();
await fs.unlink(tmpPath).catch(() => {});
}
}
const AUDIO_CAPABILITY_CFG = {

View File

@@ -0,0 +1,19 @@
import { vi } from "vitest";
export function createAvailableModelAuthMockModule() {
return {
hasAvailableAuthForProvider: vi.fn(() => true),
resolveApiKeyForProvider: vi.fn(async () => ({
apiKey: "test-key",
source: "test",
mode: "api-key",
})),
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
};
}
export function createEmptyCapabilityProviderMockModule() {
return {
resolvePluginCapabilityProviders: () => [],
};
}

View File

@@ -24,21 +24,11 @@ const baseCatalog = [
let catalog = [...baseCatalog];
const loadModelCatalog = vi.hoisted(() => vi.fn(async () => catalog));
const modelAuthMocks = vi.hoisted(() => ({
hasAvailableAuthForProvider: vi.fn(() => true),
resolveApiKeyForProvider: vi.fn(async () => ({
apiKey: "test-key",
source: "test",
mode: "api-key",
})),
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
}));
vi.mock("../agents/model-auth.js", () => ({
hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider,
resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider,
requireApiKey: modelAuthMocks.requireApiKey,
}));
vi.mock("../agents/model-auth.js", async () => {
const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js");
return createAvailableModelAuthMockModule();
});
vi.mock("../plugins/capability-provider-runtime.js", async () => {
const runtime =