mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-01 02:30:23 +00:00
191 lines
5.7 KiB
TypeScript
191 lines
5.7 KiB
TypeScript
import fs from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { describe, expect, it } from "vitest";
|
|
import type { OpenClawConfig } from "../config/config.js";
|
|
import { withEnvAsync } from "../test-utils/env.js";
|
|
import { buildProviderRegistry, runCapability } from "./runner.js";
|
|
import { withAudioFixture } from "./runner.test-utils.js";
|
|
|
|
function createOpenAiAudioProvider(
|
|
transcribeAudio: (req: { model?: string }) => Promise<{ text: string; model: string }>,
|
|
) {
|
|
return buildProviderRegistry({
|
|
openai: {
|
|
id: "openai",
|
|
capabilities: ["audio"],
|
|
transcribeAudio,
|
|
},
|
|
});
|
|
}
|
|
|
|
function createOpenAiAudioCfg(extra?: Partial<OpenClawConfig>): OpenClawConfig {
|
|
return {
|
|
models: {
|
|
providers: {
|
|
openai: {
|
|
apiKey: "test-key",
|
|
models: [],
|
|
},
|
|
},
|
|
},
|
|
...extra,
|
|
} as unknown as OpenClawConfig;
|
|
}
|
|
|
|
async function runAutoAudioCase(params: {
|
|
transcribeAudio: (req: { model?: string }) => Promise<{ text: string; model: string }>;
|
|
cfgExtra?: Partial<OpenClawConfig>;
|
|
}) {
|
|
let runResult: Awaited<ReturnType<typeof runCapability>> | undefined;
|
|
await withAudioFixture("openclaw-auto-audio", async ({ ctx, media, cache }) => {
|
|
const providerRegistry = createOpenAiAudioProvider(params.transcribeAudio);
|
|
const cfg = createOpenAiAudioCfg(params.cfgExtra);
|
|
runResult = await runCapability({
|
|
capability: "audio",
|
|
cfg,
|
|
ctx,
|
|
attachments: cache,
|
|
media,
|
|
providerRegistry,
|
|
});
|
|
});
|
|
if (!runResult) {
|
|
throw new Error("Expected auto audio case result");
|
|
}
|
|
return runResult;
|
|
}
|
|
|
|
describe("runCapability auto audio entries", () => {
|
|
it("uses provider keys to auto-enable audio transcription", async () => {
|
|
let seenModel: string | undefined;
|
|
const result = await runAutoAudioCase({
|
|
transcribeAudio: async (req) => {
|
|
seenModel = req.model;
|
|
return { text: "ok", model: req.model ?? "unknown" };
|
|
},
|
|
});
|
|
expect(result.outputs[0]?.text).toBe("ok");
|
|
expect(seenModel).toBe("gpt-4o-mini-transcribe");
|
|
expect(result.decision.outcome).toBe("success");
|
|
});
|
|
|
|
it("skips auto audio when disabled", async () => {
|
|
const result = await runAutoAudioCase({
|
|
transcribeAudio: async () => ({
|
|
text: "ok",
|
|
model: "whisper-1",
|
|
}),
|
|
cfgExtra: {
|
|
tools: {
|
|
media: {
|
|
audio: {
|
|
enabled: false,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
});
|
|
expect(result.outputs).toHaveLength(0);
|
|
expect(result.decision.outcome).toBe("disabled");
|
|
});
|
|
|
|
it("prefers explicitly configured audio model entries", async () => {
|
|
let seenModel: string | undefined;
|
|
const result = await runAutoAudioCase({
|
|
transcribeAudio: async (req) => {
|
|
seenModel = req.model;
|
|
return { text: "ok", model: req.model ?? "unknown" };
|
|
},
|
|
cfgExtra: {
|
|
tools: {
|
|
media: {
|
|
audio: {
|
|
models: [{ provider: "openai", model: "whisper-1" }],
|
|
},
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
expect(result.outputs[0]?.text).toBe("ok");
|
|
expect(seenModel).toBe("whisper-1");
|
|
});
|
|
|
|
it("uses mistral when only mistral key is configured", async () => {
|
|
const isolatedAgentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-audio-agent-"));
|
|
let runResult: Awaited<ReturnType<typeof runCapability>> | undefined;
|
|
try {
|
|
await withEnvAsync(
|
|
{
|
|
OPENAI_API_KEY: undefined,
|
|
GROQ_API_KEY: undefined,
|
|
DEEPGRAM_API_KEY: undefined,
|
|
GEMINI_API_KEY: undefined,
|
|
GOOGLE_API_KEY: undefined,
|
|
MISTRAL_API_KEY: "mistral-test-key", // pragma: allowlist secret
|
|
OPENCLAW_AGENT_DIR: isolatedAgentDir,
|
|
PI_CODING_AGENT_DIR: isolatedAgentDir,
|
|
},
|
|
async () => {
|
|
await withAudioFixture("openclaw-auto-audio-mistral", async ({ ctx, media, cache }) => {
|
|
const providerRegistry = buildProviderRegistry({
|
|
openai: {
|
|
id: "openai",
|
|
capabilities: ["audio"],
|
|
transcribeAudio: async () => ({
|
|
text: "openai",
|
|
model: "gpt-4o-mini-transcribe",
|
|
}),
|
|
},
|
|
mistral: {
|
|
id: "mistral",
|
|
capabilities: ["audio"],
|
|
transcribeAudio: async (req) => ({
|
|
text: "mistral",
|
|
model: req.model ?? "unknown",
|
|
}),
|
|
},
|
|
});
|
|
const cfg = {
|
|
models: {
|
|
providers: {
|
|
mistral: {
|
|
apiKey: "mistral-test-key", // pragma: allowlist secret
|
|
models: [],
|
|
},
|
|
},
|
|
},
|
|
tools: {
|
|
media: {
|
|
audio: {
|
|
enabled: true,
|
|
},
|
|
},
|
|
},
|
|
} as unknown as OpenClawConfig;
|
|
|
|
runResult = await runCapability({
|
|
capability: "audio",
|
|
cfg,
|
|
ctx,
|
|
attachments: cache,
|
|
media,
|
|
providerRegistry,
|
|
});
|
|
});
|
|
},
|
|
);
|
|
} finally {
|
|
await fs.rm(isolatedAgentDir, { recursive: true, force: true });
|
|
}
|
|
if (!runResult) {
|
|
throw new Error("Expected auto audio mistral result");
|
|
}
|
|
expect(runResult.decision.outcome).toBe("success");
|
|
expect(runResult.outputs[0]?.provider).toBe("mistral");
|
|
expect(runResult.outputs[0]?.model).toBe("voxtral-mini-latest");
|
|
expect(runResult.outputs[0]?.text).toBe("mistral");
|
|
});
|
|
});
|