diff --git a/src/plugin-sdk/tts-runtime.ts b/src/plugin-sdk/tts-runtime.ts index 83986520c6a..c5e75bf9a36 100644 --- a/src/plugin-sdk/tts-runtime.ts +++ b/src/plugin-sdk/tts-runtime.ts @@ -1,151 +1,21 @@ -import type { ReplyPayload } from "../auto-reply/reply-payload.js"; -import type { OpenClawConfig } from "../config/types.openclaw.js"; -import type { TtsAutoMode, TtsProvider } from "../config/types.tts.js"; -import type { - SpeechProviderConfig, - SpeechVoiceOption, - TtsDirectiveOverrides, - TtsDirectiveParseResult, -} from "../tts/provider-types.js"; -import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js"; import { createLazyFacadeObjectValue, loadActivatedBundledPluginPublicSurfaceModuleSync, } from "./facade-runtime.js"; +import type { + ResolvedTtsConfig, + ResolvedTtsModelOverrides, + TtsDirectiveOverrides, + TtsDirectiveParseResult, + TtsResult, + TtsRuntimeFacade, + TtsSynthesisResult, + TtsTelephonyResult, +} from "./tts-runtime.types.js"; // Manual facade. Keep loader boundary explicit and avoid typing this public SDK // seam through the bundled speech-core runtime surface. -type TtsAttemptReasonCode = - | "success" - | "no_provider_registered" - | "not_configured" - | "unsupported_for_telephony" - | "timeout" - | "provider_error"; - -type TtsProviderAttempt = { - provider: string; - outcome: "success" | "skipped" | "failed"; - reasonCode: TtsAttemptReasonCode; - latencyMs?: number; - error?: string; -}; - -type TtsStatusEntry = { - timestamp: number; - success: boolean; - textLength: number; - summarized: boolean; - provider?: string; - fallbackFrom?: string; - attemptedProviders?: string[]; - attempts?: TtsProviderAttempt[]; - latencyMs?: number; - error?: string; -}; - -type SummarizeResult = { - summary: string; - latencyMs: number; - inputLength: number; - outputLength: number; -}; - -type ResolveTtsAutoModeParams = { - config: ResolvedTtsConfig; - prefsPath: string; - sessionAuto?: string; -}; - -type ResolveExplicitTtsOverridesParams = { - cfg: OpenClawConfig; - prefsPath?: string; - provider?: string; - modelId?: string; - voiceId?: string; -}; - -type TtsRequestParams = { - text: string; - cfg: OpenClawConfig; - prefsPath?: string; - channel?: string; - overrides?: TtsDirectiveOverrides; - disableFallback?: boolean; -}; - -type TtsTelephonyRequestParams = { - text: string; - cfg: OpenClawConfig; - prefsPath?: string; -}; - -type ListSpeechVoicesParams = { - provider: string; - cfg?: OpenClawConfig; - config?: ResolvedTtsConfig; - apiKey?: string; - baseUrl?: string; -}; - -type MaybeApplyTtsToPayloadParams = { - payload: ReplyPayload; - cfg: OpenClawConfig; - channel?: string; - kind?: "tool" | "block" | "final"; - inboundAudio?: boolean; - ttsAuto?: string; -}; - -type TtsTestFacade = { - parseTtsDirectives: (...args: unknown[]) => TtsDirectiveParseResult; - resolveModelOverridePolicy: (...args: unknown[]) => ResolvedTtsModelOverrides; - supportsNativeVoiceNoteTts: (channel: string | undefined) => boolean; - summarizeText: (...args: unknown[]) => Promise; - getResolvedSpeechProviderConfig: ( - config: ResolvedTtsConfig, - providerId: string, - cfg?: OpenClawConfig, - ) => SpeechProviderConfig; - formatTtsProviderError: (provider: TtsProvider, err: unknown) => string; - sanitizeTtsErrorForLog: (err: unknown) => string; -}; - -type FacadeModule = { - _test: TtsTestFacade; - buildTtsSystemPromptHint: (cfg: OpenClawConfig) => string | undefined; - getLastTtsAttempt: () => TtsStatusEntry | undefined; - getResolvedSpeechProviderConfig: ( - config: ResolvedTtsConfig, - providerId: string, - cfg?: OpenClawConfig, - ) => SpeechProviderConfig; - getTtsMaxLength: (prefsPath: string) => number; - getTtsProvider: (config: ResolvedTtsConfig, prefsPath: string) => TtsProvider; - isSummarizationEnabled: (prefsPath: string) => boolean; - isTtsEnabled: (config: ResolvedTtsConfig, prefsPath: string, sessionAuto?: string) => boolean; - isTtsProviderConfigured: ( - config: ResolvedTtsConfig, - provider: TtsProvider, - cfg?: OpenClawConfig, - ) => boolean; - listSpeechVoices: (params: ListSpeechVoicesParams) => Promise; - maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise; - resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides; - resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode; - resolveTtsConfig: (cfg: OpenClawConfig) => ResolvedTtsConfig; - resolveTtsPrefsPath: (config: ResolvedTtsConfig) => string; - resolveTtsProviderOrder: (primary: TtsProvider, cfg?: OpenClawConfig) => TtsProvider[]; - setLastTtsAttempt: (entry: TtsStatusEntry | undefined) => void; - setSummarizationEnabled: (prefsPath: string, enabled: boolean) => void; - setTtsAutoMode: (prefsPath: string, mode: TtsAutoMode) => void; - setTtsEnabled: (prefsPath: string, enabled: boolean) => void; - setTtsMaxLength: (prefsPath: string, maxLength: number) => void; - setTtsProvider: (prefsPath: string, provider: TtsProvider) => void; - synthesizeSpeech: (params: TtsRequestParams) => Promise; - textToSpeech: (params: TtsRequestParams) => Promise; - textToSpeechTelephony: (params: TtsTelephonyRequestParams) => Promise; -}; +type FacadeModule = TtsRuntimeFacade; function loadFacadeModule(): FacadeModule { return loadActivatedBundledPluginPublicSurfaceModuleSync({ @@ -203,48 +73,15 @@ export const textToSpeech: FacadeModule["textToSpeech"] = createLazyFacadeValue( export const textToSpeechTelephony: FacadeModule["textToSpeechTelephony"] = createLazyFacadeValue("textToSpeechTelephony"); -export type { ResolvedTtsConfig, ResolvedTtsModelOverrides }; -export type { TtsDirectiveOverrides, TtsDirectiveParseResult }; - -export type TtsResult = { - success: boolean; - audioPath?: string; - error?: string; - latencyMs?: number; - provider?: string; - fallbackFrom?: string; - attemptedProviders?: string[]; - attempts?: TtsProviderAttempt[]; - outputFormat?: string; - voiceCompatible?: boolean; -}; - -export type TtsSynthesisResult = { - success: boolean; - audioBuffer?: Buffer; - error?: string; - latencyMs?: number; - provider?: string; - fallbackFrom?: string; - attemptedProviders?: string[]; - attempts?: TtsProviderAttempt[]; - outputFormat?: string; - voiceCompatible?: boolean; - fileExtension?: string; -}; - -export type TtsTelephonyResult = { - success: boolean; - audioBuffer?: Buffer; - error?: string; - latencyMs?: number; - provider?: string; - fallbackFrom?: string; - attemptedProviders?: string[]; - attempts?: TtsProviderAttempt[]; - outputFormat?: string; - sampleRate?: number; -}; +export type { + ResolvedTtsConfig, + ResolvedTtsModelOverrides, + TtsDirectiveOverrides, + TtsDirectiveParseResult, + TtsResult, + TtsSynthesisResult, + TtsTelephonyResult, +} from "./tts-runtime.types.js"; function createLazyFacadeValue(key: K): FacadeModule[K] { return ((...args: unknown[]) => { diff --git a/src/plugin-sdk/tts-runtime.types.ts b/src/plugin-sdk/tts-runtime.types.ts new file mode 100644 index 00000000000..2a8c5b61458 --- /dev/null +++ b/src/plugin-sdk/tts-runtime.types.ts @@ -0,0 +1,191 @@ +import type { ReplyPayload } from "../auto-reply/reply-payload.js"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; +import type { TtsAutoMode, TtsProvider } from "../config/types.tts.js"; +import type { + SpeechProviderConfig, + SpeechVoiceOption, + TtsDirectiveOverrides, + TtsDirectiveParseResult, +} from "../tts/provider-types.js"; +import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js"; + +export type { ResolvedTtsConfig, ResolvedTtsModelOverrides }; +export type { TtsDirectiveOverrides, TtsDirectiveParseResult }; + +export type TtsAttemptReasonCode = + | "success" + | "no_provider_registered" + | "not_configured" + | "unsupported_for_telephony" + | "timeout" + | "provider_error"; + +export type TtsProviderAttempt = { + provider: string; + outcome: "success" | "skipped" | "failed"; + reasonCode: TtsAttemptReasonCode; + latencyMs?: number; + error?: string; +}; + +export type TtsStatusEntry = { + timestamp: number; + success: boolean; + textLength: number; + summarized: boolean; + provider?: string; + fallbackFrom?: string; + attemptedProviders?: string[]; + attempts?: TtsProviderAttempt[]; + latencyMs?: number; + error?: string; +}; + +export type SummarizeResult = { + summary: string; + latencyMs: number; + inputLength: number; + outputLength: number; +}; + +export type ResolveTtsAutoModeParams = { + config: ResolvedTtsConfig; + prefsPath: string; + sessionAuto?: string; +}; + +export type ResolveExplicitTtsOverridesParams = { + cfg: OpenClawConfig; + prefsPath?: string; + provider?: string; + modelId?: string; + voiceId?: string; +}; + +export type TtsRequestParams = { + text: string; + cfg: OpenClawConfig; + prefsPath?: string; + channel?: string; + overrides?: TtsDirectiveOverrides; + disableFallback?: boolean; +}; + +export type TtsTelephonyRequestParams = { + text: string; + cfg: OpenClawConfig; + prefsPath?: string; +}; + +export type ListSpeechVoicesParams = { + provider: string; + cfg?: OpenClawConfig; + config?: ResolvedTtsConfig; + apiKey?: string; + baseUrl?: string; +}; + +export type MaybeApplyTtsToPayloadParams = { + payload: ReplyPayload; + cfg: OpenClawConfig; + channel?: string; + kind?: "tool" | "block" | "final"; + inboundAudio?: boolean; + ttsAuto?: string; +}; + +export type TtsTestFacade = { + parseTtsDirectives: (...args: unknown[]) => TtsDirectiveParseResult; + resolveModelOverridePolicy: (...args: unknown[]) => ResolvedTtsModelOverrides; + supportsNativeVoiceNoteTts: (channel: string | undefined) => boolean; + summarizeText: (...args: unknown[]) => Promise; + getResolvedSpeechProviderConfig: ( + config: ResolvedTtsConfig, + providerId: string, + cfg?: OpenClawConfig, + ) => SpeechProviderConfig; + formatTtsProviderError: (provider: TtsProvider, err: unknown) => string; + sanitizeTtsErrorForLog: (err: unknown) => string; +}; + +export type TtsResult = { + success: boolean; + audioPath?: string; + error?: string; + latencyMs?: number; + provider?: string; + fallbackFrom?: string; + attemptedProviders?: string[]; + attempts?: TtsProviderAttempt[]; + outputFormat?: string; + voiceCompatible?: boolean; +}; + +export type TtsSynthesisResult = { + success: boolean; + audioBuffer?: Buffer; + error?: string; + latencyMs?: number; + provider?: string; + fallbackFrom?: string; + attemptedProviders?: string[]; + attempts?: TtsProviderAttempt[]; + outputFormat?: string; + voiceCompatible?: boolean; + fileExtension?: string; +}; + +export type TtsTelephonyResult = { + success: boolean; + audioBuffer?: Buffer; + error?: string; + latencyMs?: number; + provider?: string; + fallbackFrom?: string; + attemptedProviders?: string[]; + attempts?: TtsProviderAttempt[]; + outputFormat?: string; + sampleRate?: number; +}; + +export type TextToSpeech = (params: TtsRequestParams) => Promise; +export type TextToSpeechTelephony = ( + params: TtsTelephonyRequestParams, +) => Promise; +export type ListSpeechVoices = (params: ListSpeechVoicesParams) => Promise; + +export type TtsRuntimeFacade = { + _test: TtsTestFacade; + buildTtsSystemPromptHint: (cfg: OpenClawConfig) => string | undefined; + getLastTtsAttempt: () => TtsStatusEntry | undefined; + getResolvedSpeechProviderConfig: ( + config: ResolvedTtsConfig, + providerId: string, + cfg?: OpenClawConfig, + ) => SpeechProviderConfig; + getTtsMaxLength: (prefsPath: string) => number; + getTtsProvider: (config: ResolvedTtsConfig, prefsPath: string) => TtsProvider; + isSummarizationEnabled: (prefsPath: string) => boolean; + isTtsEnabled: (config: ResolvedTtsConfig, prefsPath: string, sessionAuto?: string) => boolean; + isTtsProviderConfigured: ( + config: ResolvedTtsConfig, + provider: TtsProvider, + cfg?: OpenClawConfig, + ) => boolean; + listSpeechVoices: ListSpeechVoices; + maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise; + resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides; + resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode; + resolveTtsConfig: (cfg: OpenClawConfig) => ResolvedTtsConfig; + resolveTtsPrefsPath: (config: ResolvedTtsConfig) => string; + resolveTtsProviderOrder: (primary: TtsProvider, cfg?: OpenClawConfig) => TtsProvider[]; + setLastTtsAttempt: (entry: TtsStatusEntry | undefined) => void; + setSummarizationEnabled: (prefsPath: string, enabled: boolean) => void; + setTtsAutoMode: (prefsPath: string, mode: TtsAutoMode) => void; + setTtsEnabled: (prefsPath: string, enabled: boolean) => void; + setTtsMaxLength: (prefsPath: string, maxLength: number) => void; + setTtsProvider: (prefsPath: string, provider: TtsProvider) => void; + synthesizeSpeech: (params: TtsRequestParams) => Promise; + textToSpeech: TextToSpeech; + textToSpeechTelephony: TextToSpeechTelephony; +}; diff --git a/src/plugins/runtime/types-core.ts b/src/plugins/runtime/types-core.ts index f546fc8881a..83bd238d997 100644 --- a/src/plugins/runtime/types-core.ts +++ b/src/plugins/runtime/types-core.ts @@ -5,6 +5,11 @@ import type { import type { HeartbeatRunResult } from "../../infra/heartbeat-wake.js"; import type { LogLevel } from "../../logging/levels.js"; import type { MediaUnderstandingRuntime } from "../../media-understanding/runtime-types.js"; +import type { + ListSpeechVoices, + TextToSpeech, + TextToSpeechTelephony, +} from "../../plugin-sdk/tts-runtime.types.js"; export type { HeartbeatRunResult }; @@ -87,9 +92,9 @@ export type PluginRuntimeCore = { resizeToJpeg: typeof import("../../media/image-ops.js").resizeToJpeg; }; tts: { - textToSpeech: typeof import("../../tts/tts.js").textToSpeech; - textToSpeechTelephony: typeof import("../../tts/tts.js").textToSpeechTelephony; - listVoices: typeof import("../../tts/tts.js").listSpeechVoices; + textToSpeech: TextToSpeech; + textToSpeechTelephony: TextToSpeechTelephony; + listVoices: ListSpeechVoices; }; mediaUnderstanding: { runFile: MediaUnderstandingRuntime["runMediaUnderstandingFile"];