refactor(media): move deepgram and groq providers into plugins

This commit is contained in:
Vincent Koc
2026-03-22 17:55:48 -07:00
parent 0f54ca20aa
commit 3dcc802fe5
15 changed files with 84 additions and 38 deletions

View File

@@ -165,8 +165,8 @@ describe("applyMediaUnderstanding echo transcript", () => {
}));
vi.doMock("./providers/index.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("./providers/index.js")>();
const { deepgramProvider } = await import("./providers/deepgram/index.js");
const { groqProvider } = await import("./providers/groq/index.js");
const { deepgramProvider } = await import("../../extensions/deepgram/media-understanding-provider.js");
const { groqProvider } = await import("../../extensions/groq/media-understanding-provider.js");
return {
...actual,
buildMediaUnderstandingRegistry: (

View File

@@ -248,8 +248,8 @@ describe("applyMediaUnderstanding", () => {
}));
vi.doMock("./providers/index.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("./providers/index.js")>();
const { deepgramProvider } = await import("./providers/deepgram/index.js");
const { groqProvider } = await import("./providers/groq/index.js");
const { deepgramProvider } = await import("../../extensions/deepgram/media-understanding-provider.js");
const { groqProvider } = await import("../../extensions/groq/media-understanding-provider.js");
return {
...actual,
buildMediaUnderstandingRegistry: (

View File

@@ -1,6 +1,6 @@
import { describe, expect, it } from "vitest";
import { isTruthyEnvValue } from "../../../infra/env.js";
import { transcribeDeepgramAudio } from "./audio.js";
import { transcribeDeepgramAudio } from "../../../../extensions/deepgram/media-understanding-provider.js";
const DEEPGRAM_KEY = process.env.DEEPGRAM_API_KEY ?? "";
const DEEPGRAM_MODEL = process.env.DEEPGRAM_MODEL?.trim() || "nova-3";

View File

@@ -4,7 +4,7 @@ import {
createRequestCaptureJsonFetch,
installPinnedHostnameTestHooks,
} from "../audio.test-helpers.js";
import { transcribeDeepgramAudio } from "./audio.js";
import { transcribeDeepgramAudio } from "../../../../extensions/deepgram/media-understanding-provider.js";
installPinnedHostnameTestHooks();

View File

@@ -1,79 +0,0 @@
import type { AudioTranscriptionRequest, AudioTranscriptionResult } from "../../types.js";
import {
assertOkOrThrowHttpError,
normalizeBaseUrl,
postTranscriptionRequest,
requireTranscriptionText,
} from "../shared.js";
export const DEFAULT_DEEPGRAM_AUDIO_BASE_URL = "https://api.deepgram.com/v1";
export const DEFAULT_DEEPGRAM_AUDIO_MODEL = "nova-3";
function resolveModel(model?: string): string {
const trimmed = model?.trim();
return trimmed || DEFAULT_DEEPGRAM_AUDIO_MODEL;
}
type DeepgramTranscriptResponse = {
results?: {
channels?: Array<{
alternatives?: Array<{
transcript?: string;
}>;
}>;
};
};
export async function transcribeDeepgramAudio(
params: AudioTranscriptionRequest,
): Promise<AudioTranscriptionResult> {
const fetchFn = params.fetchFn ?? fetch;
const baseUrl = normalizeBaseUrl(params.baseUrl, DEFAULT_DEEPGRAM_AUDIO_BASE_URL);
const allowPrivate = Boolean(params.baseUrl?.trim());
const model = resolveModel(params.model);
const url = new URL(`${baseUrl}/listen`);
url.searchParams.set("model", model);
if (params.language?.trim()) {
url.searchParams.set("language", params.language.trim());
}
if (params.query) {
for (const [key, value] of Object.entries(params.query)) {
if (value === undefined) {
continue;
}
url.searchParams.set(key, String(value));
}
}
const headers = new Headers(params.headers);
if (!headers.has("authorization")) {
headers.set("authorization", `Token ${params.apiKey}`);
}
if (!headers.has("content-type")) {
headers.set("content-type", params.mime ?? "application/octet-stream");
}
const body = new Uint8Array(params.buffer);
const { response: res, release } = await postTranscriptionRequest({
url: url.toString(),
headers,
body,
timeoutMs: params.timeoutMs,
fetchFn,
allowPrivateNetwork: allowPrivate,
});
try {
await assertOkOrThrowHttpError(res, "Audio transcription failed");
const payload = (await res.json()) as DeepgramTranscriptResponse;
const transcript = requireTranscriptionText(
payload.results?.channels?.[0]?.alternatives?.[0]?.transcript,
"Audio transcription response missing transcript",
);
return { text: transcript, model };
} finally {
await release();
}
}

View File

@@ -1,8 +0,0 @@
import type { MediaUnderstandingProvider } from "../../types.js";
import { transcribeDeepgramAudio } from "./audio.js";
export const deepgramProvider: MediaUnderstandingProvider = {
id: "deepgram",
capabilities: ["audio"],
transcribeAudio: transcribeDeepgramAudio,
};

View File

@@ -1,17 +0,0 @@
import type { MediaUnderstandingProvider } from "../../types.js";
import { transcribeOpenAiCompatibleAudio } from "../openai-compatible-audio.js";
const DEFAULT_GROQ_AUDIO_BASE_URL = "https://api.groq.com/openai/v1";
const DEFAULT_GROQ_AUDIO_MODEL = "whisper-large-v3-turbo";
export const groqProvider: MediaUnderstandingProvider = {
id: "groq",
capabilities: ["audio"],
transcribeAudio: (req) =>
transcribeOpenAiCompatibleAudio({
...req,
baseUrl: req.baseUrl ?? DEFAULT_GROQ_AUDIO_BASE_URL,
defaultBaseUrl: DEFAULT_GROQ_AUDIO_BASE_URL,
defaultModel: DEFAULT_GROQ_AUDIO_MODEL,
}),
};

View File

@@ -19,17 +19,6 @@ describe("media-understanding provider registry", () => {
resetPluginRuntimeStateForTest();
});
it("keeps core-owned fallback providers registered by default", () => {
const registry = buildMediaUnderstandingRegistry();
const groqProvider = getMediaUnderstandingProvider("groq", registry);
const deepgramProvider = getMediaUnderstandingProvider("deepgram", registry);
expect(groqProvider?.id).toBe("groq");
expect(groqProvider?.capabilities).toEqual(["audio"]);
expect(deepgramProvider?.id).toBe("deepgram");
expect(deepgramProvider?.capabilities).toEqual(["audio"]);
});
it("merges plugin-registered media providers into the active registry", async () => {
const pluginRegistry = createEmptyPluginRegistry();
pluginRegistry.mediaUnderstandingProviders.push({
@@ -75,7 +64,7 @@ describe("media-understanding provider registry", () => {
it("does not load plugins when config is absent and no runtime registry is active", () => {
const registry = buildMediaUnderstandingRegistry();
expect([...registry.keys()]).toEqual(["groq", "deepgram"]);
expect([...registry.keys()]).toEqual([]);
expect(loadOpenClawPluginsMock).not.toHaveBeenCalled();
});
});

View File

@@ -3,10 +3,6 @@ import type { OpenClawConfig } from "../../config/config.js";
import { loadOpenClawPlugins } from "../../plugins/loader.js";
import { getActivePluginRegistry } from "../../plugins/runtime.js";
import type { MediaUnderstandingProvider } from "../types.js";
import { deepgramProvider } from "./deepgram/index.js";
import { groqProvider } from "./groq/index.js";
const PROVIDERS: MediaUnderstandingProvider[] = [groqProvider, deepgramProvider];
function mergeProviderIntoRegistry(
registry: Map<string, MediaUnderstandingProvider>,
@@ -37,9 +33,6 @@ export function buildMediaUnderstandingRegistry(
cfg?: OpenClawConfig,
): Map<string, MediaUnderstandingProvider> {
const registry = new Map<string, MediaUnderstandingProvider>();
for (const provider of PROVIDERS) {
mergeProviderIntoRegistry(registry, provider);
}
const active = getActivePluginRegistry();
const activeEntries = active?.mediaUnderstandingProviders ?? [];
for (const entry of activeEntries) {