mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
Add runtime.stt.transcribeAudioFile for plugin STT access
Expose audio transcription through the PluginRuntime so external
plugins (e.g. marmot) can use openclaw's media-understanding provider
framework without importing unexported internal modules.
The new transcribeAudioFile() wraps runCapability({capability: "audio"})
and reads provider/model/apiKey from tools.media.audio in the config,
matching the pattern used by the Discord VC implementation.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
committed by
Peter Steinberger
parent
f7b0378ccb
commit
faa4ffec03
@@ -120,6 +120,9 @@ function createMockRuntime(): PluginRuntime {
|
||||
tts: {
|
||||
textToSpeechTelephony: vi.fn() as unknown as PluginRuntime["tts"]["textToSpeechTelephony"],
|
||||
},
|
||||
stt: {
|
||||
transcribeAudioFile: vi.fn() as unknown as PluginRuntime["stt"]["transcribeAudioFile"],
|
||||
},
|
||||
tools: {
|
||||
createMemoryGetTool: vi.fn() as unknown as PluginRuntime["tools"]["createMemoryGetTool"],
|
||||
createMemorySearchTool:
|
||||
|
||||
51
src/media-understanding/transcribe-audio.ts
Normal file
51
src/media-understanding/transcribe-audio.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import {
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
runCapability,
|
||||
} from "./runner.js";
|
||||
|
||||
/**
|
||||
* Transcribe an audio file using the configured media-understanding provider.
|
||||
*
|
||||
* Reads provider/model/apiKey from `tools.media.audio` in the openclaw config,
|
||||
* falling back through configured models until one succeeds.
|
||||
*
|
||||
* This is the runtime-exposed entry point for external plugins (e.g. marmot)
|
||||
* that need STT without importing internal media-understanding modules directly.
|
||||
*/
|
||||
export async function transcribeAudioFile(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
}): Promise<{ text: string | undefined }> {
|
||||
const ctx = {
|
||||
MediaPath: params.filePath,
|
||||
MediaType: params.mime ?? "audio/wav",
|
||||
};
|
||||
const attachments = normalizeMediaAttachments(ctx);
|
||||
if (attachments.length === 0) {
|
||||
return { text: undefined };
|
||||
}
|
||||
const cache = createMediaAttachmentCache(attachments);
|
||||
const providerRegistry = buildProviderRegistry();
|
||||
try {
|
||||
const result = await runCapability({
|
||||
capability: "audio",
|
||||
cfg: params.cfg,
|
||||
ctx,
|
||||
attachments: cache,
|
||||
media: attachments,
|
||||
agentDir: params.agentDir,
|
||||
providerRegistry,
|
||||
config: params.cfg.tools?.media?.audio,
|
||||
});
|
||||
const output = result.outputs.find((entry) => entry.kind === "audio.transcription");
|
||||
const text = output?.text?.trim();
|
||||
return { text: text || undefined };
|
||||
} finally {
|
||||
await cache.cleanup();
|
||||
}
|
||||
}
|
||||
@@ -95,6 +95,7 @@ import { buildTemplateMessageFromPayload } from "../../line/template-messages.js
|
||||
import { getChildLogger } from "../../logging.js";
|
||||
import { normalizeLogLevel } from "../../logging/levels.js";
|
||||
import { convertMarkdownTables } from "../../markdown/tables.js";
|
||||
import { transcribeAudioFile } from "../../media-understanding/transcribe-audio.js";
|
||||
import { isVoiceCompatibleAudio } from "../../media/audio.js";
|
||||
import { mediaKindFromMime } from "../../media/constants.js";
|
||||
import { fetchRemoteMedia } from "../../media/fetch.js";
|
||||
@@ -244,6 +245,7 @@ export function createPluginRuntime(): PluginRuntime {
|
||||
system: createRuntimeSystem(),
|
||||
media: createRuntimeMedia(),
|
||||
tts: { textToSpeechTelephony },
|
||||
stt: { transcribeAudioFile },
|
||||
tools: createRuntimeTools(),
|
||||
channel: createRuntimeChannel(),
|
||||
logging: createRuntimeLogging(),
|
||||
|
||||
@@ -25,6 +25,8 @@ type UpsertChannelPairingRequestForAccount = (
|
||||
type FetchRemoteMedia = typeof import("../../media/fetch.js").fetchRemoteMedia;
|
||||
type SaveMediaBuffer = typeof import("../../media/store.js").saveMediaBuffer;
|
||||
type TextToSpeechTelephony = typeof import("../../tts/tts.js").textToSpeechTelephony;
|
||||
type TranscribeAudioFile =
|
||||
typeof import("../../media-understanding/transcribe-audio.js").transcribeAudioFile;
|
||||
type BuildMentionRegexes = typeof import("../../auto-reply/reply/mentions.js").buildMentionRegexes;
|
||||
type MatchesMentionPatterns =
|
||||
typeof import("../../auto-reply/reply/mentions.js").matchesMentionPatterns;
|
||||
@@ -207,6 +209,9 @@ export type PluginRuntime = {
|
||||
tts: {
|
||||
textToSpeechTelephony: TextToSpeechTelephony;
|
||||
};
|
||||
stt: {
|
||||
transcribeAudioFile: TranscribeAudioFile;
|
||||
};
|
||||
tools: {
|
||||
createMemoryGetTool: CreateMemoryGetTool;
|
||||
createMemorySearchTool: CreateMemorySearchTool;
|
||||
|
||||
Reference in New Issue
Block a user