mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-26 00:55:12 +00:00
197 lines
6.3 KiB
TypeScript
197 lines
6.3 KiB
TypeScript
import { describe, expect, it, vi } from "vitest";
|
|
import type { MsgContext } from "../auto-reply/templating.js";
|
|
import type { OpenClawConfig } from "../config/types.js";
|
|
import { MIN_AUDIO_FILE_BYTES } from "./defaults.js";
|
|
import type {
|
|
createMediaAttachmentCache,
|
|
normalizeMediaAttachments,
|
|
} from "./runner.attachments.js";
|
|
import { buildProviderRegistry, runCapability } from "./runner.js";
|
|
import { withMediaFixture } from "./runner.test-utils.js";
|
|
import type { AudioTranscriptionRequest } from "./types.js";
|
|
|
|
vi.mock("../agents/model-auth.js", async () => {
|
|
const { createAvailableModelAuthMockModule } = await import("./runner.test-mocks.js");
|
|
return createAvailableModelAuthMockModule();
|
|
});
|
|
|
|
vi.mock("../plugins/capability-provider-runtime.js", async () => {
|
|
const { createEmptyCapabilityProviderMockModule } = await import("./runner.test-mocks.js");
|
|
return createEmptyCapabilityProviderMockModule();
|
|
});
|
|
|
|
async function withAudioFixture(params: {
|
|
filePrefix: string;
|
|
extension: string;
|
|
mediaType: string;
|
|
fileContents: Buffer;
|
|
run: (params: {
|
|
ctx: MsgContext;
|
|
media: ReturnType<typeof normalizeMediaAttachments>;
|
|
cache: ReturnType<typeof createMediaAttachmentCache>;
|
|
}) => Promise<void>;
|
|
}) {
|
|
await withMediaFixture(
|
|
{
|
|
filePrefix: params.filePrefix,
|
|
extension: params.extension,
|
|
mediaType: params.mediaType,
|
|
fileContents: params.fileContents,
|
|
},
|
|
params.run,
|
|
);
|
|
}
|
|
|
|
const AUDIO_CAPABILITY_CFG = {
|
|
models: {
|
|
providers: {
|
|
openai: {
|
|
apiKey: "test-key", // pragma: allowlist secret
|
|
models: [],
|
|
},
|
|
},
|
|
},
|
|
} as unknown as OpenClawConfig;
|
|
|
|
async function runAudioCapabilityWithTranscriber(params: {
|
|
ctx: MsgContext;
|
|
media: ReturnType<typeof normalizeMediaAttachments>;
|
|
cache: ReturnType<typeof createMediaAttachmentCache>;
|
|
transcribeAudio: (req: AudioTranscriptionRequest) => Promise<{ text: string; model: string }>;
|
|
}) {
|
|
const providerRegistry = buildProviderRegistry({
|
|
openai: {
|
|
id: "openai",
|
|
capabilities: ["audio"],
|
|
transcribeAudio: params.transcribeAudio,
|
|
},
|
|
});
|
|
|
|
return await runCapability({
|
|
capability: "audio",
|
|
cfg: AUDIO_CAPABILITY_CFG,
|
|
ctx: params.ctx,
|
|
attachments: params.cache,
|
|
media: params.media,
|
|
providerRegistry,
|
|
});
|
|
}
|
|
|
|
describe("runCapability skips tiny audio files", () => {
|
|
it("skips audio transcription when file is smaller than MIN_AUDIO_FILE_BYTES", async () => {
|
|
await withAudioFixture({
|
|
filePrefix: "openclaw-tiny-audio",
|
|
extension: "wav",
|
|
mediaType: "audio/wav",
|
|
fileContents: Buffer.alloc(100), // 100 bytes, way below 1024
|
|
run: async ({ ctx, media, cache }) => {
|
|
let transcribeCalled = false;
|
|
const result = await runAudioCapabilityWithTranscriber({
|
|
ctx,
|
|
media,
|
|
cache,
|
|
transcribeAudio: async (req) => {
|
|
transcribeCalled = true;
|
|
return { text: "should not happen", model: req.model ?? "whisper-1" };
|
|
},
|
|
});
|
|
|
|
// The provider should never be called
|
|
expect(transcribeCalled).toBe(false);
|
|
|
|
// The result should indicate the attachment was skipped
|
|
expect(result.outputs).toHaveLength(0);
|
|
expect(result.decision.outcome).toBe("skipped");
|
|
expect(result.decision.attachments).toHaveLength(1);
|
|
expect(result.decision.attachments[0].attempts).toHaveLength(1);
|
|
expect(result.decision.attachments[0].attempts[0].outcome).toBe("skipped");
|
|
expect(result.decision.attachments[0].attempts[0].reason).toContain("tooSmall");
|
|
},
|
|
});
|
|
});
|
|
|
|
it("skips audio transcription for empty (0-byte) files", async () => {
|
|
await withAudioFixture({
|
|
filePrefix: "openclaw-empty-audio",
|
|
extension: "ogg",
|
|
mediaType: "audio/ogg",
|
|
fileContents: Buffer.alloc(0),
|
|
run: async ({ ctx, media, cache }) => {
|
|
let transcribeCalled = false;
|
|
const result = await runAudioCapabilityWithTranscriber({
|
|
ctx,
|
|
media,
|
|
cache,
|
|
transcribeAudio: async () => {
|
|
transcribeCalled = true;
|
|
return { text: "nope", model: "whisper-1" };
|
|
},
|
|
});
|
|
|
|
expect(transcribeCalled).toBe(false);
|
|
expect(result.outputs).toHaveLength(0);
|
|
},
|
|
});
|
|
});
|
|
|
|
it("proceeds with transcription when file meets minimum size", async () => {
|
|
await withAudioFixture({
|
|
filePrefix: "openclaw-ok-audio",
|
|
extension: "wav",
|
|
mediaType: "audio/wav",
|
|
fileContents: Buffer.alloc(MIN_AUDIO_FILE_BYTES + 100),
|
|
run: async ({ ctx, media, cache }) => {
|
|
let transcribeCalled = false;
|
|
const result = await runAudioCapabilityWithTranscriber({
|
|
ctx,
|
|
media,
|
|
cache,
|
|
transcribeAudio: async (req) => {
|
|
transcribeCalled = true;
|
|
return { text: "hello world", model: req.model ?? "whisper-1" };
|
|
},
|
|
});
|
|
|
|
expect(transcribeCalled).toBe(true);
|
|
expect(result.outputs).toHaveLength(1);
|
|
expect(result.outputs[0].text).toBe("hello world");
|
|
expect(result.decision.outcome).toBe("success");
|
|
},
|
|
});
|
|
});
|
|
|
|
it("marks the decision as failed when every audio model attempt fails", async () => {
|
|
await withAudioFixture({
|
|
filePrefix: "openclaw-failed-audio",
|
|
extension: "ogg",
|
|
mediaType: "audio/ogg",
|
|
fileContents: Buffer.alloc(MIN_AUDIO_FILE_BYTES + 100),
|
|
run: async ({ ctx, media, cache }) => {
|
|
const result = await runAudioCapabilityWithTranscriber({
|
|
ctx,
|
|
media,
|
|
cache,
|
|
transcribeAudio: async () => {
|
|
throw new Error("upstream 500");
|
|
},
|
|
});
|
|
|
|
expect(result.outputs).toHaveLength(0);
|
|
expect(result.decision.outcome).toBe("failed");
|
|
expect(result.decision.attachments).toHaveLength(1);
|
|
const attachment = result.decision.attachments[0];
|
|
if (!attachment) {
|
|
throw new Error("expected failed audio decision attachment");
|
|
}
|
|
expect(attachment.attempts).toHaveLength(1);
|
|
const attempt = attachment.attempts[0];
|
|
if (!attempt) {
|
|
throw new Error("expected failed audio decision attempt");
|
|
}
|
|
expect(attempt.outcome).toBe("failed");
|
|
expect(attempt.reason).toContain("upstream 500");
|
|
},
|
|
});
|
|
});
|
|
});
|