mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:40:44 +00:00
refactor: dedupe media understanding provider helpers
This commit is contained in:
31
src/media-understanding/config-provider-models.ts
Normal file
31
src/media-understanding/config-provider-models.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import { normalizeMediaProviderId } from "./provider-id.js";
|
||||
|
||||
type ConfigProvider = NonNullable<
|
||||
NonNullable<NonNullable<OpenClawConfig["models"]>["providers"]>[string]
|
||||
>;
|
||||
|
||||
type ConfigProviderModel = NonNullable<ConfigProvider["models"]>[number];
|
||||
|
||||
function hasImageCapableModel(providerCfg: ConfigProvider): boolean {
|
||||
const models = providerCfg.models ?? [];
|
||||
return models.some(
|
||||
(model: ConfigProviderModel) => Array.isArray(model?.input) && model.input.includes("image"),
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveImageCapableConfigProviderIds(cfg?: OpenClawConfig): string[] {
|
||||
const configProviders = cfg?.models?.providers;
|
||||
if (!configProviders || typeof configProviders !== "object") {
|
||||
return [];
|
||||
}
|
||||
|
||||
const providerIds: string[] = [];
|
||||
for (const [providerKey, providerCfg] of Object.entries(configProviders)) {
|
||||
if (!providerKey?.trim() || !hasImageCapableModel(providerCfg)) {
|
||||
continue;
|
||||
}
|
||||
providerIds.push(normalizeMediaProviderId(providerKey));
|
||||
}
|
||||
return providerIds;
|
||||
}
|
||||
@@ -25,6 +25,29 @@ describe("media understanding scope", () => {
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
async function withLocalAttachmentCache(
|
||||
prefix: string,
|
||||
run: (params: {
|
||||
cache: MediaAttachmentCache;
|
||||
attachmentPath: string;
|
||||
canonicalAttachmentPath: string;
|
||||
}) => Promise<void>,
|
||||
) {
|
||||
await withTempDir({ prefix }, async (base) => {
|
||||
const allowedRoot = path.join(base, "allowed");
|
||||
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
|
||||
await fs.mkdir(allowedRoot, { recursive: true });
|
||||
await fs.writeFile(attachmentPath, "ok");
|
||||
const canonicalAttachmentPath = await fs.realpath(attachmentPath).catch(() => attachmentPath);
|
||||
|
||||
const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], {
|
||||
localPathRoots: [allowedRoot],
|
||||
});
|
||||
|
||||
await run({ cache, attachmentPath, canonicalAttachmentPath });
|
||||
});
|
||||
}
|
||||
|
||||
describe("media understanding attachments SSRF", () => {
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
@@ -45,16 +68,7 @@ describe("media understanding attachments SSRF", () => {
|
||||
});
|
||||
|
||||
it("reads local attachments inside configured roots", async () => {
|
||||
await withTempDir({ prefix: "openclaw-media-cache-allowed-" }, async (base) => {
|
||||
const allowedRoot = path.join(base, "allowed");
|
||||
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
|
||||
await fs.mkdir(allowedRoot, { recursive: true });
|
||||
await fs.writeFile(attachmentPath, "ok");
|
||||
|
||||
const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], {
|
||||
localPathRoots: [allowedRoot],
|
||||
});
|
||||
|
||||
await withLocalAttachmentCache("openclaw-media-cache-allowed-", async ({ cache }) => {
|
||||
const result = await cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 });
|
||||
expect(result.buffer.toString()).toBe("ok");
|
||||
});
|
||||
@@ -111,63 +125,51 @@ describe("media understanding attachments SSRF", () => {
|
||||
});
|
||||
|
||||
it("enforces maxBytes after reading local attachments", async () => {
|
||||
await withTempDir({ prefix: "openclaw-media-cache-max-bytes-" }, async (base) => {
|
||||
const allowedRoot = path.join(base, "allowed");
|
||||
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
|
||||
await fs.mkdir(allowedRoot, { recursive: true });
|
||||
await fs.writeFile(attachmentPath, "ok");
|
||||
const canonicalAttachmentPath = await fs.realpath(attachmentPath).catch(() => attachmentPath);
|
||||
await withLocalAttachmentCache(
|
||||
"openclaw-media-cache-max-bytes-",
|
||||
async ({ cache, canonicalAttachmentPath }) => {
|
||||
const originalOpen = fs.open.bind(fs);
|
||||
const openSpy = vi.spyOn(fs, "open");
|
||||
|
||||
const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], {
|
||||
localPathRoots: [allowedRoot],
|
||||
});
|
||||
const originalOpen = fs.open.bind(fs);
|
||||
const openSpy = vi.spyOn(fs, "open");
|
||||
openSpy.mockImplementation(async (filePath, flags) => {
|
||||
const handle = await originalOpen(filePath, flags);
|
||||
const candidatePath = await fs.realpath(String(filePath)).catch(() => String(filePath));
|
||||
if (candidatePath !== canonicalAttachmentPath) {
|
||||
return handle;
|
||||
}
|
||||
const mockedHandle = handle as typeof handle & {
|
||||
readFile: typeof handle.readFile;
|
||||
};
|
||||
mockedHandle.readFile = (async () => Buffer.alloc(2048, 1)) as typeof handle.readFile;
|
||||
return mockedHandle;
|
||||
});
|
||||
|
||||
openSpy.mockImplementation(async (filePath, flags) => {
|
||||
const handle = await originalOpen(filePath, flags);
|
||||
const candidatePath = await fs.realpath(String(filePath)).catch(() => String(filePath));
|
||||
if (candidatePath !== canonicalAttachmentPath) {
|
||||
return handle;
|
||||
}
|
||||
const mockedHandle = handle as typeof handle & {
|
||||
readFile: typeof handle.readFile;
|
||||
};
|
||||
mockedHandle.readFile = (async () => Buffer.alloc(2048, 1)) as typeof handle.readFile;
|
||||
return mockedHandle;
|
||||
});
|
||||
|
||||
await expect(
|
||||
cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }),
|
||||
).rejects.toThrow(/exceeds maxBytes 1024/i);
|
||||
});
|
||||
await expect(
|
||||
cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }),
|
||||
).rejects.toThrow(/exceeds maxBytes 1024/i);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("opens local attachments with nofollow on posix", async () => {
|
||||
if (process.platform === "win32") {
|
||||
return;
|
||||
}
|
||||
await withTempDir({ prefix: "openclaw-media-cache-flags-" }, async (base) => {
|
||||
const allowedRoot = path.join(base, "allowed");
|
||||
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
|
||||
await fs.mkdir(allowedRoot, { recursive: true });
|
||||
await fs.writeFile(attachmentPath, "ok");
|
||||
const canonicalAttachmentPath = await fs.realpath(attachmentPath).catch(() => attachmentPath);
|
||||
await withLocalAttachmentCache(
|
||||
"openclaw-media-cache-flags-",
|
||||
async ({ cache, canonicalAttachmentPath }) => {
|
||||
const openSpy = vi.spyOn(fs, "open");
|
||||
|
||||
const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], {
|
||||
localPathRoots: [allowedRoot],
|
||||
});
|
||||
const openSpy = vi.spyOn(fs, "open");
|
||||
await cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 });
|
||||
|
||||
await cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 });
|
||||
|
||||
expect(openSpy).toHaveBeenCalled();
|
||||
const [openedPath, openedFlags] = openSpy.mock.calls[0] ?? [];
|
||||
expect(await fs.realpath(String(openedPath)).catch(() => String(openedPath))).toBe(
|
||||
canonicalAttachmentPath,
|
||||
);
|
||||
expect(openedFlags).toBe(fsConstants.O_RDONLY | fsConstants.O_NOFOLLOW);
|
||||
});
|
||||
expect(openSpy).toHaveBeenCalled();
|
||||
const [openedPath, openedFlags] = openSpy.mock.calls[0] ?? [];
|
||||
expect(await fs.realpath(String(openedPath)).catch(() => String(openedPath))).toBe(
|
||||
canonicalAttachmentPath,
|
||||
);
|
||||
expect(openedFlags).toBe(fsConstants.O_RDONLY | fsConstants.O_NOFOLLOW);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects local attachments when canonicalization fails", async () => {
|
||||
|
||||
@@ -14,6 +14,41 @@ vi.mock("../media/fetch.js", async () => {
|
||||
};
|
||||
});
|
||||
|
||||
async function withBlockedLocalAttachmentFallback(
|
||||
prefix: string,
|
||||
run: (params: { cache: MediaAttachmentCache; fallbackUrl: string }) => Promise<void>,
|
||||
) {
|
||||
await withTempDir({ prefix }, async (base) => {
|
||||
const allowedRoot = path.join(base, "allowed");
|
||||
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
|
||||
const fallbackUrl = "https://example.com/fallback.jpg";
|
||||
await fs.mkdir(allowedRoot, { recursive: true });
|
||||
await fs.writeFile(attachmentPath, "ok");
|
||||
|
||||
const cache = new MediaAttachmentCache(
|
||||
[{ index: 0, path: attachmentPath, url: fallbackUrl, mime: "image/jpeg" }],
|
||||
{
|
||||
localPathRoots: [allowedRoot],
|
||||
},
|
||||
);
|
||||
const originalRealpath = fs.realpath.bind(fs);
|
||||
fetchRemoteMediaMock.mockResolvedValue({
|
||||
buffer: Buffer.from("fallback-buffer"),
|
||||
contentType: "image/jpeg",
|
||||
fileName: "fallback.jpg",
|
||||
});
|
||||
|
||||
vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => {
|
||||
if (String(candidatePath) === attachmentPath) {
|
||||
throw new Error("EACCES");
|
||||
}
|
||||
return await originalRealpath(candidatePath);
|
||||
});
|
||||
|
||||
await run({ cache, fallbackUrl });
|
||||
});
|
||||
}
|
||||
|
||||
describe("media understanding attachment URL fallback", () => {
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
@@ -21,90 +56,44 @@ describe("media understanding attachment URL fallback", () => {
|
||||
});
|
||||
|
||||
it("getPath falls back to URL fetch when local path is blocked", async () => {
|
||||
await withTempDir({ prefix: "openclaw-media-cache-getpath-url-fallback-" }, async (base) => {
|
||||
const allowedRoot = path.join(base, "allowed");
|
||||
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
|
||||
const fallbackUrl = "https://example.com/fallback.jpg";
|
||||
await fs.mkdir(allowedRoot, { recursive: true });
|
||||
await fs.writeFile(attachmentPath, "ok");
|
||||
|
||||
const cache = new MediaAttachmentCache(
|
||||
[{ index: 0, path: attachmentPath, url: fallbackUrl, mime: "image/jpeg" }],
|
||||
{
|
||||
localPathRoots: [allowedRoot],
|
||||
},
|
||||
);
|
||||
const originalRealpath = fs.realpath.bind(fs);
|
||||
fetchRemoteMediaMock.mockResolvedValue({
|
||||
buffer: Buffer.from("fallback-buffer"),
|
||||
contentType: "image/jpeg",
|
||||
fileName: "fallback.jpg",
|
||||
});
|
||||
|
||||
vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => {
|
||||
if (String(candidatePath) === attachmentPath) {
|
||||
throw new Error("EACCES");
|
||||
await withBlockedLocalAttachmentFallback(
|
||||
"openclaw-media-cache-getpath-url-fallback-",
|
||||
async ({ cache, fallbackUrl }) => {
|
||||
const result = await cache.getPath({
|
||||
attachmentIndex: 0,
|
||||
maxBytes: 1024,
|
||||
timeoutMs: 1000,
|
||||
});
|
||||
// getPath should fall through to getBuffer URL fetch, write a temp file,
|
||||
// and return a path to that temp file instead of throwing.
|
||||
expect(result.path).toBeTruthy();
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1);
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }),
|
||||
);
|
||||
// Clean up the temp file
|
||||
if (result.cleanup) {
|
||||
await result.cleanup();
|
||||
}
|
||||
return await originalRealpath(candidatePath);
|
||||
});
|
||||
|
||||
const result = await cache.getPath({
|
||||
attachmentIndex: 0,
|
||||
maxBytes: 1024,
|
||||
timeoutMs: 1000,
|
||||
});
|
||||
// getPath should fall through to getBuffer URL fetch, write a temp file,
|
||||
// and return a path to that temp file instead of throwing.
|
||||
expect(result.path).toBeTruthy();
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1);
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }),
|
||||
);
|
||||
// Clean up the temp file
|
||||
if (result.cleanup) {
|
||||
await result.cleanup();
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("falls back to URL fetch when local attachment canonicalization fails", async () => {
|
||||
await withTempDir({ prefix: "openclaw-media-cache-url-fallback-" }, async (base) => {
|
||||
const allowedRoot = path.join(base, "allowed");
|
||||
const attachmentPath = path.join(allowedRoot, "voice-note.m4a");
|
||||
const fallbackUrl = "https://example.com/fallback.jpg";
|
||||
await fs.mkdir(allowedRoot, { recursive: true });
|
||||
await fs.writeFile(attachmentPath, "ok");
|
||||
|
||||
const cache = new MediaAttachmentCache(
|
||||
[{ index: 0, path: attachmentPath, url: fallbackUrl, mime: "image/jpeg" }],
|
||||
{
|
||||
localPathRoots: [allowedRoot],
|
||||
},
|
||||
);
|
||||
const originalRealpath = fs.realpath.bind(fs);
|
||||
fetchRemoteMediaMock.mockResolvedValue({
|
||||
buffer: Buffer.from("fallback-buffer"),
|
||||
contentType: "image/jpeg",
|
||||
fileName: "fallback.jpg",
|
||||
});
|
||||
|
||||
vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => {
|
||||
if (String(candidatePath) === attachmentPath) {
|
||||
throw new Error("EACCES");
|
||||
}
|
||||
return await originalRealpath(candidatePath);
|
||||
});
|
||||
|
||||
const result = await cache.getBuffer({
|
||||
attachmentIndex: 0,
|
||||
maxBytes: 1024,
|
||||
timeoutMs: 1000,
|
||||
});
|
||||
expect(result.buffer.toString()).toBe("fallback-buffer");
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1);
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }),
|
||||
);
|
||||
});
|
||||
await withBlockedLocalAttachmentFallback(
|
||||
"openclaw-media-cache-url-fallback-",
|
||||
async ({ cache, fallbackUrl }) => {
|
||||
const result = await cache.getBuffer({
|
||||
attachmentIndex: 0,
|
||||
maxBytes: 1024,
|
||||
timeoutMs: 1000,
|
||||
});
|
||||
expect(result.buffer.toString()).toBe("fallback-buffer");
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1);
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }),
|
||||
);
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
49
src/media-understanding/provider-capability-registry.test.ts
Normal file
49
src/media-understanding/provider-capability-registry.test.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js";
|
||||
import { buildMediaUnderstandingCapabilityRegistry } from "./provider-capability-registry.js";
|
||||
|
||||
vi.mock("../plugins/capability-provider-runtime.js", () => ({
|
||||
resolvePluginCapabilityProviders: vi.fn(() => []),
|
||||
}));
|
||||
|
||||
const resolveProviders = vi.mocked(resolvePluginCapabilityProviders);
|
||||
|
||||
describe("media-understanding capability registry", () => {
|
||||
beforeEach(() => {
|
||||
resolveProviders.mockReturnValue([]);
|
||||
});
|
||||
|
||||
it("auto-registers config providers with image-capable models", () => {
|
||||
const registry = buildMediaUnderstandingCapabilityRegistry({
|
||||
models: {
|
||||
providers: {
|
||||
glm: {
|
||||
models: [{ id: "glm-4.6v", input: ["text", "image"] }],
|
||||
},
|
||||
textOnly: {
|
||||
models: [{ id: "text-model", input: ["text"] }],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never);
|
||||
|
||||
expect(registry.get("glm")?.capabilities).toEqual(["image"]);
|
||||
expect(registry.get("textOnly")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("keeps plugin-owned capabilities ahead of config auto-registration", () => {
|
||||
resolveProviders.mockReturnValue([{ id: "google", capabilities: ["audio"] } as never]);
|
||||
|
||||
const registry = buildMediaUnderstandingCapabilityRegistry({
|
||||
models: {
|
||||
providers: {
|
||||
google: {
|
||||
models: [{ id: "custom-gemini", input: ["text", "image"] }],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never);
|
||||
|
||||
expect(registry.get("google")?.capabilities).toEqual(["audio"]);
|
||||
});
|
||||
});
|
||||
@@ -1,14 +1,9 @@
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js";
|
||||
import { resolveImageCapableConfigProviderIds } from "./config-provider-models.js";
|
||||
import { normalizeMediaProviderId } from "./provider-id.js";
|
||||
import type { MediaUnderstandingCapabilityRegistry, MediaUnderstandingProvider } from "./types.js";
|
||||
|
||||
type ConfigProvider = NonNullable<
|
||||
NonNullable<NonNullable<OpenClawConfig["models"]>["providers"]>[string]
|
||||
>;
|
||||
|
||||
type ConfigProviderModel = NonNullable<ConfigProvider["models"]>[number];
|
||||
|
||||
function mergeProviderCapabilities(
|
||||
registry: MediaUnderstandingCapabilityRegistry,
|
||||
provider: Pick<MediaUnderstandingProvider, "id" | "capabilities">,
|
||||
@@ -32,24 +27,8 @@ export function buildMediaUnderstandingCapabilityRegistry(
|
||||
mergeProviderCapabilities(registry, provider);
|
||||
}
|
||||
|
||||
const configProviders = cfg?.models?.providers;
|
||||
if (configProviders && typeof configProviders === "object") {
|
||||
for (const [providerKey, providerCfg] of Object.entries(configProviders)) {
|
||||
if (!providerKey?.trim()) {
|
||||
continue;
|
||||
}
|
||||
const normalizedKey = normalizeMediaProviderId(providerKey);
|
||||
if (registry.has(normalizedKey)) {
|
||||
continue;
|
||||
}
|
||||
const models = providerCfg.models ?? [];
|
||||
const hasImageModel = models.some(
|
||||
(model: ConfigProviderModel) =>
|
||||
Array.isArray(model?.input) && model.input.includes("image"),
|
||||
);
|
||||
if (!hasImageModel) {
|
||||
continue;
|
||||
}
|
||||
for (const normalizedKey of resolveImageCapableConfigProviderIds(cfg)) {
|
||||
if (!registry.has(normalizedKey)) {
|
||||
mergeProviderCapabilities(registry, {
|
||||
id: normalizedKey,
|
||||
capabilities: ["image"],
|
||||
|
||||
@@ -1,20 +1,16 @@
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import { resolvePluginCapabilityProviders } from "../plugins/capability-provider-runtime.js";
|
||||
import { resolveImageCapableConfigProviderIds } from "./config-provider-models.js";
|
||||
import { describeImageWithModel, describeImagesWithModel } from "./image-runtime.js";
|
||||
import { normalizeMediaProviderId } from "./provider-id.js";
|
||||
import type { MediaUnderstandingProvider } from "./types.js";
|
||||
|
||||
type ConfigProvider = NonNullable<
|
||||
NonNullable<NonNullable<OpenClawConfig["models"]>["providers"]>[string]
|
||||
>;
|
||||
|
||||
type ConfigProviderModel = NonNullable<ConfigProvider["models"]>[number];
|
||||
|
||||
function mergeProviderIntoRegistry(
|
||||
registry: Map<string, MediaUnderstandingProvider>,
|
||||
provider: MediaUnderstandingProvider,
|
||||
registryKey = provider.id,
|
||||
) {
|
||||
const normalizedKey = normalizeMediaProviderId(provider.id);
|
||||
const normalizedKey = normalizeMediaProviderId(registryKey);
|
||||
const existing = registry.get(normalizedKey);
|
||||
const merged = existing
|
||||
? {
|
||||
@@ -43,46 +39,19 @@ export function buildMediaUnderstandingRegistry(
|
||||
mergeProviderIntoRegistry(registry, provider);
|
||||
}
|
||||
// Auto-register media-understanding for config providers with image-capable models (#51392)
|
||||
const configProviders = cfg?.models?.providers;
|
||||
if (configProviders && typeof configProviders === "object") {
|
||||
for (const [providerKey, providerCfg] of Object.entries(configProviders)) {
|
||||
if (!providerKey?.trim()) {
|
||||
continue;
|
||||
}
|
||||
const normalizedKey = normalizeMediaProviderId(providerKey);
|
||||
if (registry.has(normalizedKey)) {
|
||||
continue;
|
||||
}
|
||||
const models = providerCfg.models ?? [];
|
||||
const hasImageModel = models.some(
|
||||
(m: ConfigProviderModel) => Array.isArray(m?.input) && m.input.includes("image"),
|
||||
);
|
||||
if (hasImageModel) {
|
||||
const autoProvider: MediaUnderstandingProvider = {
|
||||
id: normalizedKey,
|
||||
capabilities: ["image"],
|
||||
describeImage: describeImageWithModel,
|
||||
describeImages: describeImagesWithModel,
|
||||
};
|
||||
mergeProviderIntoRegistry(registry, autoProvider);
|
||||
}
|
||||
for (const normalizedKey of resolveImageCapableConfigProviderIds(cfg)) {
|
||||
if (!registry.has(normalizedKey)) {
|
||||
mergeProviderIntoRegistry(registry, {
|
||||
id: normalizedKey,
|
||||
capabilities: ["image"],
|
||||
describeImage: describeImageWithModel,
|
||||
describeImages: describeImagesWithModel,
|
||||
});
|
||||
}
|
||||
}
|
||||
if (overrides) {
|
||||
for (const [key, provider] of Object.entries(overrides)) {
|
||||
const normalizedKey = normalizeMediaProviderId(key);
|
||||
const existing = registry.get(normalizedKey);
|
||||
const merged = existing
|
||||
? {
|
||||
...existing,
|
||||
...provider,
|
||||
capabilities: provider.capabilities ?? existing.capabilities,
|
||||
defaultModels: provider.defaultModels ?? existing.defaultModels,
|
||||
autoPriority: provider.autoPriority ?? existing.autoPriority,
|
||||
nativeDocumentInputs: provider.nativeDocumentInputs ?? existing.nativeDocumentInputs,
|
||||
}
|
||||
: provider;
|
||||
registry.set(normalizedKey, merged);
|
||||
mergeProviderIntoRegistry(registry, provider, key);
|
||||
}
|
||||
}
|
||||
return registry;
|
||||
|
||||
@@ -8,25 +8,15 @@ import { runCapability } from "./runner.js";
|
||||
import { withAudioFixture } from "./runner.test-utils.js";
|
||||
import type { AudioTranscriptionRequest, MediaUnderstandingProvider } from "./types.js";
|
||||
|
||||
const modelAuthMocks = vi.hoisted(() => ({
|
||||
hasAvailableAuthForProvider: vi.fn(() => true),
|
||||
resolveApiKeyForProvider: vi.fn(async () => ({
|
||||
apiKey: "test-key",
|
||||
source: "test",
|
||||
mode: "api-key",
|
||||
})),
|
||||
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
|
||||
}));
|
||||
vi.mock("../agents/model-auth.js", async () => {
|
||||
const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js");
|
||||
return createAvailableModelAuthMockModule();
|
||||
});
|
||||
|
||||
vi.mock("../agents/model-auth.js", () => ({
|
||||
hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider,
|
||||
resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider,
|
||||
requireApiKey: modelAuthMocks.requireApiKey,
|
||||
}));
|
||||
|
||||
vi.mock("../plugins/capability-provider-runtime.js", () => ({
|
||||
resolvePluginCapabilityProviders: () => [],
|
||||
}));
|
||||
vi.mock("../plugins/capability-provider-runtime.js", async () => {
|
||||
const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js");
|
||||
return createEmptyCapabilityProviderMockModule();
|
||||
});
|
||||
|
||||
function createProviderRegistry(
|
||||
providers: Record<string, MediaUnderstandingProvider>,
|
||||
|
||||
@@ -3,25 +3,15 @@ import type { OpenClawConfig } from "../config/types.js";
|
||||
import { buildProviderRegistry, runCapability } from "./runner.js";
|
||||
import { withAudioFixture } from "./runner.test-utils.js";
|
||||
|
||||
const modelAuthMocks = vi.hoisted(() => ({
|
||||
hasAvailableAuthForProvider: vi.fn(() => true),
|
||||
resolveApiKeyForProvider: vi.fn(async () => ({
|
||||
apiKey: "test-key",
|
||||
source: "test",
|
||||
mode: "api-key",
|
||||
})),
|
||||
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
|
||||
}));
|
||||
vi.mock("../agents/model-auth.js", async () => {
|
||||
const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js");
|
||||
return createAvailableModelAuthMockModule();
|
||||
});
|
||||
|
||||
vi.mock("../agents/model-auth.js", () => ({
|
||||
hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider,
|
||||
resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider,
|
||||
requireApiKey: modelAuthMocks.requireApiKey,
|
||||
}));
|
||||
|
||||
vi.mock("../plugins/capability-provider-runtime.js", () => ({
|
||||
resolvePluginCapabilityProviders: () => [],
|
||||
}));
|
||||
vi.mock("../plugins/capability-provider-runtime.js", async () => {
|
||||
const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js");
|
||||
return createEmptyCapabilityProviderMockModule();
|
||||
});
|
||||
|
||||
describe("runCapability deepgram provider options", () => {
|
||||
it("merges provider options, headers, and baseUrl overrides", async () => {
|
||||
|
||||
@@ -3,25 +3,15 @@ import type { OpenClawConfig } from "../config/types.js";
|
||||
import { withAudioFixture, withVideoFixture } from "./runner.test-utils.js";
|
||||
import type { AudioTranscriptionRequest, VideoDescriptionRequest } from "./types.js";
|
||||
|
||||
const modelAuthMocks = vi.hoisted(() => ({
|
||||
hasAvailableAuthForProvider: vi.fn(() => true),
|
||||
resolveApiKeyForProvider: vi.fn(async () => ({
|
||||
apiKey: "test-key",
|
||||
source: "test",
|
||||
mode: "api-key",
|
||||
})),
|
||||
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
|
||||
}));
|
||||
vi.mock("../agents/model-auth.js", async () => {
|
||||
const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js");
|
||||
return createAvailableModelAuthMockModule();
|
||||
});
|
||||
|
||||
vi.mock("../agents/model-auth.js", () => ({
|
||||
hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider,
|
||||
resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider,
|
||||
requireApiKey: modelAuthMocks.requireApiKey,
|
||||
}));
|
||||
|
||||
vi.mock("../plugins/capability-provider-runtime.js", () => ({
|
||||
resolvePluginCapabilityProviders: () => [],
|
||||
}));
|
||||
vi.mock("../plugins/capability-provider-runtime.js", async () => {
|
||||
const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js");
|
||||
return createEmptyCapabilityProviderMockModule();
|
||||
});
|
||||
|
||||
const proxyFetchMocks = vi.hoisted(() => {
|
||||
const proxyFetch = vi.fn() as unknown as typeof fetch;
|
||||
@@ -45,6 +35,28 @@ let buildProviderRegistry: typeof import("./runner.js").buildProviderRegistry;
|
||||
let clearMediaUnderstandingBinaryCacheForTests: typeof import("./runner.js").clearMediaUnderstandingBinaryCacheForTests;
|
||||
let runCapability: typeof import("./runner.js").runCapability;
|
||||
|
||||
function createOpenAiAudioCfg(providerOverrides: Record<string, unknown> = {}): OpenClawConfig {
|
||||
return {
|
||||
models: {
|
||||
providers: {
|
||||
openai: {
|
||||
apiKey: "test-key", // pragma: allowlist secret
|
||||
...providerOverrides,
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
tools: {
|
||||
media: {
|
||||
audio: {
|
||||
enabled: true,
|
||||
models: [{ provider: "openai", model: "whisper-1" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
}
|
||||
|
||||
async function runAudioCapabilityWithFetchCapture(params: {
|
||||
fixturePrefix: string;
|
||||
outputText: string;
|
||||
@@ -62,28 +74,9 @@ async function runAudioCapabilityWithFetchCapture(params: {
|
||||
},
|
||||
});
|
||||
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
openai: {
|
||||
apiKey: "test-key", // pragma: allowlist secret
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
tools: {
|
||||
media: {
|
||||
audio: {
|
||||
enabled: true,
|
||||
models: [{ provider: "openai", model: "whisper-1" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const result = await runCapability({
|
||||
capability: "audio",
|
||||
cfg,
|
||||
cfg: createOpenAiAudioCfg(),
|
||||
ctx,
|
||||
attachments: cache,
|
||||
media,
|
||||
@@ -194,31 +187,13 @@ describe("runCapability proxy fetch passthrough", () => {
|
||||
},
|
||||
});
|
||||
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
openai: {
|
||||
apiKey: "test-key", // pragma: allowlist secret
|
||||
request: {
|
||||
allowPrivateNetwork: true,
|
||||
},
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
tools: {
|
||||
media: {
|
||||
audio: {
|
||||
enabled: true,
|
||||
models: [{ provider: "openai", model: "whisper-1" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
const result = await runCapability({
|
||||
capability: "audio",
|
||||
cfg,
|
||||
cfg: createOpenAiAudioCfg({
|
||||
request: {
|
||||
allowPrivateNetwork: true,
|
||||
},
|
||||
}),
|
||||
ctx,
|
||||
attachments: cache,
|
||||
media,
|
||||
|
||||
@@ -1,33 +1,24 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import { MIN_AUDIO_FILE_BYTES } from "./defaults.js";
|
||||
import { createMediaAttachmentCache, normalizeMediaAttachments } from "./runner.attachments.js";
|
||||
import type {
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
} from "./runner.attachments.js";
|
||||
import { buildProviderRegistry, runCapability } from "./runner.js";
|
||||
import { withMediaFixture } from "./runner.test-utils.js";
|
||||
import type { AudioTranscriptionRequest } from "./types.js";
|
||||
|
||||
const modelAuthMocks = vi.hoisted(() => ({
|
||||
hasAvailableAuthForProvider: vi.fn(() => true),
|
||||
resolveApiKeyForProvider: vi.fn(async () => ({
|
||||
apiKey: "test-key",
|
||||
source: "test",
|
||||
mode: "api-key",
|
||||
})),
|
||||
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
|
||||
}));
|
||||
vi.mock("../agents/model-auth.js", async () => {
|
||||
const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js");
|
||||
return createAvailableModelAuthMockModule();
|
||||
});
|
||||
|
||||
vi.mock("../agents/model-auth.js", () => ({
|
||||
hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider,
|
||||
resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider,
|
||||
requireApiKey: modelAuthMocks.requireApiKey,
|
||||
}));
|
||||
|
||||
vi.mock("../plugins/capability-provider-runtime.js", () => ({
|
||||
resolvePluginCapabilityProviders: () => [],
|
||||
}));
|
||||
vi.mock("../plugins/capability-provider-runtime.js", async () => {
|
||||
const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js");
|
||||
return createEmptyCapabilityProviderMockModule();
|
||||
});
|
||||
|
||||
async function withAudioFixture(params: {
|
||||
filePrefix: string;
|
||||
@@ -40,29 +31,15 @@ async function withAudioFixture(params: {
|
||||
cache: ReturnType<typeof createMediaAttachmentCache>;
|
||||
}) => Promise<void>;
|
||||
}) {
|
||||
const originalPath = process.env.PATH;
|
||||
process.env.PATH = "/usr/bin:/bin";
|
||||
|
||||
const tmpPath = path.join(
|
||||
os.tmpdir(),
|
||||
`${params.filePrefix}-${Date.now().toString()}.${params.extension}`,
|
||||
await withMediaFixture(
|
||||
{
|
||||
filePrefix: params.filePrefix,
|
||||
extension: params.extension,
|
||||
mediaType: params.mediaType,
|
||||
fileContents: params.fileContents,
|
||||
},
|
||||
params.run,
|
||||
);
|
||||
await fs.writeFile(tmpPath, params.fileContents);
|
||||
|
||||
const ctx: MsgContext = { MediaPath: tmpPath, MediaType: params.mediaType };
|
||||
const media = normalizeMediaAttachments(ctx);
|
||||
const cache = createMediaAttachmentCache(media, {
|
||||
localPathRoots: [path.dirname(tmpPath)],
|
||||
includeDefaultLocalPathRoots: false,
|
||||
});
|
||||
|
||||
try {
|
||||
await params.run({ ctx, media, cache });
|
||||
} finally {
|
||||
process.env.PATH = originalPath;
|
||||
await cache.cleanup();
|
||||
await fs.unlink(tmpPath).catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
const AUDIO_CAPABILITY_CFG = {
|
||||
|
||||
19
src/media-understanding/runner.test-mocks.ts
Normal file
19
src/media-understanding/runner.test-mocks.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
import { vi } from "vitest";
|
||||
|
||||
export function createAvailableModelAuthMockModule() {
|
||||
return {
|
||||
hasAvailableAuthForProvider: vi.fn(() => true),
|
||||
resolveApiKeyForProvider: vi.fn(async () => ({
|
||||
apiKey: "test-key",
|
||||
source: "test",
|
||||
mode: "api-key",
|
||||
})),
|
||||
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
|
||||
};
|
||||
}
|
||||
|
||||
export function createEmptyCapabilityProviderMockModule() {
|
||||
return {
|
||||
resolvePluginCapabilityProviders: () => [],
|
||||
};
|
||||
}
|
||||
@@ -24,21 +24,11 @@ const baseCatalog = [
|
||||
let catalog = [...baseCatalog];
|
||||
|
||||
const loadModelCatalog = vi.hoisted(() => vi.fn(async () => catalog));
|
||||
const modelAuthMocks = vi.hoisted(() => ({
|
||||
hasAvailableAuthForProvider: vi.fn(() => true),
|
||||
resolveApiKeyForProvider: vi.fn(async () => ({
|
||||
apiKey: "test-key",
|
||||
source: "test",
|
||||
mode: "api-key",
|
||||
})),
|
||||
requireApiKey: vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "test-key"),
|
||||
}));
|
||||
|
||||
vi.mock("../agents/model-auth.js", () => ({
|
||||
hasAvailableAuthForProvider: modelAuthMocks.hasAvailableAuthForProvider,
|
||||
resolveApiKeyForProvider: modelAuthMocks.resolveApiKeyForProvider,
|
||||
requireApiKey: modelAuthMocks.requireApiKey,
|
||||
}));
|
||||
vi.mock("../agents/model-auth.js", async () => {
|
||||
const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js");
|
||||
return createAvailableModelAuthMockModule();
|
||||
});
|
||||
|
||||
vi.mock("../plugins/capability-provider-runtime.js", async () => {
|
||||
const runtime =
|
||||
|
||||
Reference in New Issue
Block a user