fix(plugin-sdk): split tts runtime contract types

This commit is contained in:
Vincent Koc
2026-04-12 02:11:48 +01:00
parent 323e37c862
commit 51731d906f
3 changed files with 219 additions and 186 deletions

View File

@@ -1,151 +1,21 @@
import type { ReplyPayload } from "../auto-reply/reply-payload.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import type { TtsAutoMode, TtsProvider } from "../config/types.tts.js";
import type {
SpeechProviderConfig,
SpeechVoiceOption,
TtsDirectiveOverrides,
TtsDirectiveParseResult,
} from "../tts/provider-types.js";
import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js";
import {
createLazyFacadeObjectValue,
loadActivatedBundledPluginPublicSurfaceModuleSync,
} from "./facade-runtime.js";
import type {
ResolvedTtsConfig,
ResolvedTtsModelOverrides,
TtsDirectiveOverrides,
TtsDirectiveParseResult,
TtsResult,
TtsRuntimeFacade,
TtsSynthesisResult,
TtsTelephonyResult,
} from "./tts-runtime.types.js";
// Manual facade. Keep loader boundary explicit and avoid typing this public SDK
// seam through the bundled speech-core runtime surface.
type TtsAttemptReasonCode =
| "success"
| "no_provider_registered"
| "not_configured"
| "unsupported_for_telephony"
| "timeout"
| "provider_error";
type TtsProviderAttempt = {
provider: string;
outcome: "success" | "skipped" | "failed";
reasonCode: TtsAttemptReasonCode;
latencyMs?: number;
error?: string;
};
type TtsStatusEntry = {
timestamp: number;
success: boolean;
textLength: number;
summarized: boolean;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
latencyMs?: number;
error?: string;
};
type SummarizeResult = {
summary: string;
latencyMs: number;
inputLength: number;
outputLength: number;
};
type ResolveTtsAutoModeParams = {
config: ResolvedTtsConfig;
prefsPath: string;
sessionAuto?: string;
};
type ResolveExplicitTtsOverridesParams = {
cfg: OpenClawConfig;
prefsPath?: string;
provider?: string;
modelId?: string;
voiceId?: string;
};
type TtsRequestParams = {
text: string;
cfg: OpenClawConfig;
prefsPath?: string;
channel?: string;
overrides?: TtsDirectiveOverrides;
disableFallback?: boolean;
};
type TtsTelephonyRequestParams = {
text: string;
cfg: OpenClawConfig;
prefsPath?: string;
};
type ListSpeechVoicesParams = {
provider: string;
cfg?: OpenClawConfig;
config?: ResolvedTtsConfig;
apiKey?: string;
baseUrl?: string;
};
type MaybeApplyTtsToPayloadParams = {
payload: ReplyPayload;
cfg: OpenClawConfig;
channel?: string;
kind?: "tool" | "block" | "final";
inboundAudio?: boolean;
ttsAuto?: string;
};
type TtsTestFacade = {
parseTtsDirectives: (...args: unknown[]) => TtsDirectiveParseResult;
resolveModelOverridePolicy: (...args: unknown[]) => ResolvedTtsModelOverrides;
supportsNativeVoiceNoteTts: (channel: string | undefined) => boolean;
summarizeText: (...args: unknown[]) => Promise<SummarizeResult>;
getResolvedSpeechProviderConfig: (
config: ResolvedTtsConfig,
providerId: string,
cfg?: OpenClawConfig,
) => SpeechProviderConfig;
formatTtsProviderError: (provider: TtsProvider, err: unknown) => string;
sanitizeTtsErrorForLog: (err: unknown) => string;
};
type FacadeModule = {
_test: TtsTestFacade;
buildTtsSystemPromptHint: (cfg: OpenClawConfig) => string | undefined;
getLastTtsAttempt: () => TtsStatusEntry | undefined;
getResolvedSpeechProviderConfig: (
config: ResolvedTtsConfig,
providerId: string,
cfg?: OpenClawConfig,
) => SpeechProviderConfig;
getTtsMaxLength: (prefsPath: string) => number;
getTtsProvider: (config: ResolvedTtsConfig, prefsPath: string) => TtsProvider;
isSummarizationEnabled: (prefsPath: string) => boolean;
isTtsEnabled: (config: ResolvedTtsConfig, prefsPath: string, sessionAuto?: string) => boolean;
isTtsProviderConfigured: (
config: ResolvedTtsConfig,
provider: TtsProvider,
cfg?: OpenClawConfig,
) => boolean;
listSpeechVoices: (params: ListSpeechVoicesParams) => Promise<SpeechVoiceOption[]>;
maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise<ReplyPayload>;
resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides;
resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode;
resolveTtsConfig: (cfg: OpenClawConfig) => ResolvedTtsConfig;
resolveTtsPrefsPath: (config: ResolvedTtsConfig) => string;
resolveTtsProviderOrder: (primary: TtsProvider, cfg?: OpenClawConfig) => TtsProvider[];
setLastTtsAttempt: (entry: TtsStatusEntry | undefined) => void;
setSummarizationEnabled: (prefsPath: string, enabled: boolean) => void;
setTtsAutoMode: (prefsPath: string, mode: TtsAutoMode) => void;
setTtsEnabled: (prefsPath: string, enabled: boolean) => void;
setTtsMaxLength: (prefsPath: string, maxLength: number) => void;
setTtsProvider: (prefsPath: string, provider: TtsProvider) => void;
synthesizeSpeech: (params: TtsRequestParams) => Promise<TtsSynthesisResult>;
textToSpeech: (params: TtsRequestParams) => Promise<TtsResult>;
textToSpeechTelephony: (params: TtsTelephonyRequestParams) => Promise<TtsTelephonyResult>;
};
type FacadeModule = TtsRuntimeFacade;
function loadFacadeModule(): FacadeModule {
return loadActivatedBundledPluginPublicSurfaceModuleSync<FacadeModule>({
@@ -203,48 +73,15 @@ export const textToSpeech: FacadeModule["textToSpeech"] = createLazyFacadeValue(
export const textToSpeechTelephony: FacadeModule["textToSpeechTelephony"] =
createLazyFacadeValue("textToSpeechTelephony");
export type { ResolvedTtsConfig, ResolvedTtsModelOverrides };
export type { TtsDirectiveOverrides, TtsDirectiveParseResult };
export type TtsResult = {
success: boolean;
audioPath?: string;
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
outputFormat?: string;
voiceCompatible?: boolean;
};
export type TtsSynthesisResult = {
success: boolean;
audioBuffer?: Buffer;
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
outputFormat?: string;
voiceCompatible?: boolean;
fileExtension?: string;
};
export type TtsTelephonyResult = {
success: boolean;
audioBuffer?: Buffer;
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
outputFormat?: string;
sampleRate?: number;
};
export type {
ResolvedTtsConfig,
ResolvedTtsModelOverrides,
TtsDirectiveOverrides,
TtsDirectiveParseResult,
TtsResult,
TtsSynthesisResult,
TtsTelephonyResult,
} from "./tts-runtime.types.js";
function createLazyFacadeValue<K extends keyof FacadeModule>(key: K): FacadeModule[K] {
return ((...args: unknown[]) => {

View File

@@ -0,0 +1,191 @@
import type { ReplyPayload } from "../auto-reply/reply-payload.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import type { TtsAutoMode, TtsProvider } from "../config/types.tts.js";
import type {
SpeechProviderConfig,
SpeechVoiceOption,
TtsDirectiveOverrides,
TtsDirectiveParseResult,
} from "../tts/provider-types.js";
import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js";
export type { ResolvedTtsConfig, ResolvedTtsModelOverrides };
export type { TtsDirectiveOverrides, TtsDirectiveParseResult };
export type TtsAttemptReasonCode =
| "success"
| "no_provider_registered"
| "not_configured"
| "unsupported_for_telephony"
| "timeout"
| "provider_error";
export type TtsProviderAttempt = {
provider: string;
outcome: "success" | "skipped" | "failed";
reasonCode: TtsAttemptReasonCode;
latencyMs?: number;
error?: string;
};
export type TtsStatusEntry = {
timestamp: number;
success: boolean;
textLength: number;
summarized: boolean;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
latencyMs?: number;
error?: string;
};
export type SummarizeResult = {
summary: string;
latencyMs: number;
inputLength: number;
outputLength: number;
};
export type ResolveTtsAutoModeParams = {
config: ResolvedTtsConfig;
prefsPath: string;
sessionAuto?: string;
};
export type ResolveExplicitTtsOverridesParams = {
cfg: OpenClawConfig;
prefsPath?: string;
provider?: string;
modelId?: string;
voiceId?: string;
};
export type TtsRequestParams = {
text: string;
cfg: OpenClawConfig;
prefsPath?: string;
channel?: string;
overrides?: TtsDirectiveOverrides;
disableFallback?: boolean;
};
export type TtsTelephonyRequestParams = {
text: string;
cfg: OpenClawConfig;
prefsPath?: string;
};
export type ListSpeechVoicesParams = {
provider: string;
cfg?: OpenClawConfig;
config?: ResolvedTtsConfig;
apiKey?: string;
baseUrl?: string;
};
export type MaybeApplyTtsToPayloadParams = {
payload: ReplyPayload;
cfg: OpenClawConfig;
channel?: string;
kind?: "tool" | "block" | "final";
inboundAudio?: boolean;
ttsAuto?: string;
};
export type TtsTestFacade = {
parseTtsDirectives: (...args: unknown[]) => TtsDirectiveParseResult;
resolveModelOverridePolicy: (...args: unknown[]) => ResolvedTtsModelOverrides;
supportsNativeVoiceNoteTts: (channel: string | undefined) => boolean;
summarizeText: (...args: unknown[]) => Promise<SummarizeResult>;
getResolvedSpeechProviderConfig: (
config: ResolvedTtsConfig,
providerId: string,
cfg?: OpenClawConfig,
) => SpeechProviderConfig;
formatTtsProviderError: (provider: TtsProvider, err: unknown) => string;
sanitizeTtsErrorForLog: (err: unknown) => string;
};
export type TtsResult = {
success: boolean;
audioPath?: string;
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
outputFormat?: string;
voiceCompatible?: boolean;
};
export type TtsSynthesisResult = {
success: boolean;
audioBuffer?: Buffer;
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
outputFormat?: string;
voiceCompatible?: boolean;
fileExtension?: string;
};
export type TtsTelephonyResult = {
success: boolean;
audioBuffer?: Buffer;
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
outputFormat?: string;
sampleRate?: number;
};
export type TextToSpeech = (params: TtsRequestParams) => Promise<TtsResult>;
export type TextToSpeechTelephony = (
params: TtsTelephonyRequestParams,
) => Promise<TtsTelephonyResult>;
export type ListSpeechVoices = (params: ListSpeechVoicesParams) => Promise<SpeechVoiceOption[]>;
export type TtsRuntimeFacade = {
_test: TtsTestFacade;
buildTtsSystemPromptHint: (cfg: OpenClawConfig) => string | undefined;
getLastTtsAttempt: () => TtsStatusEntry | undefined;
getResolvedSpeechProviderConfig: (
config: ResolvedTtsConfig,
providerId: string,
cfg?: OpenClawConfig,
) => SpeechProviderConfig;
getTtsMaxLength: (prefsPath: string) => number;
getTtsProvider: (config: ResolvedTtsConfig, prefsPath: string) => TtsProvider;
isSummarizationEnabled: (prefsPath: string) => boolean;
isTtsEnabled: (config: ResolvedTtsConfig, prefsPath: string, sessionAuto?: string) => boolean;
isTtsProviderConfigured: (
config: ResolvedTtsConfig,
provider: TtsProvider,
cfg?: OpenClawConfig,
) => boolean;
listSpeechVoices: ListSpeechVoices;
maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise<ReplyPayload>;
resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides;
resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode;
resolveTtsConfig: (cfg: OpenClawConfig) => ResolvedTtsConfig;
resolveTtsPrefsPath: (config: ResolvedTtsConfig) => string;
resolveTtsProviderOrder: (primary: TtsProvider, cfg?: OpenClawConfig) => TtsProvider[];
setLastTtsAttempt: (entry: TtsStatusEntry | undefined) => void;
setSummarizationEnabled: (prefsPath: string, enabled: boolean) => void;
setTtsAutoMode: (prefsPath: string, mode: TtsAutoMode) => void;
setTtsEnabled: (prefsPath: string, enabled: boolean) => void;
setTtsMaxLength: (prefsPath: string, maxLength: number) => void;
setTtsProvider: (prefsPath: string, provider: TtsProvider) => void;
synthesizeSpeech: (params: TtsRequestParams) => Promise<TtsSynthesisResult>;
textToSpeech: TextToSpeech;
textToSpeechTelephony: TextToSpeechTelephony;
};

View File

@@ -5,6 +5,11 @@ import type {
import type { HeartbeatRunResult } from "../../infra/heartbeat-wake.js";
import type { LogLevel } from "../../logging/levels.js";
import type { MediaUnderstandingRuntime } from "../../media-understanding/runtime-types.js";
import type {
ListSpeechVoices,
TextToSpeech,
TextToSpeechTelephony,
} from "../../plugin-sdk/tts-runtime.types.js";
export type { HeartbeatRunResult };
@@ -87,9 +92,9 @@ export type PluginRuntimeCore = {
resizeToJpeg: typeof import("../../media/image-ops.js").resizeToJpeg;
};
tts: {
textToSpeech: typeof import("../../tts/tts.js").textToSpeech;
textToSpeechTelephony: typeof import("../../tts/tts.js").textToSpeechTelephony;
listVoices: typeof import("../../tts/tts.js").listSpeechVoices;
textToSpeech: TextToSpeech;
textToSpeechTelephony: TextToSpeechTelephony;
listVoices: ListSpeechVoices;
};
mediaUnderstanding: {
runFile: MediaUnderstandingRuntime["runMediaUnderstandingFile"];