fix(plugin-sdk): split tts runtime contract types

2026-05-06 09:00:42 +00:00 · 2026-04-12 02:11:48 +01:00
parent 323e37c862
commit 51731d906f
3 changed files with 219 additions and 186 deletions
--- a/src/plugin-sdk/tts-runtime.ts
+++ b/src/plugin-sdk/tts-runtime.ts
@@ -1,151 +1,21 @@
-import type { ReplyPayload } from "../auto-reply/reply-payload.js";
-import type { OpenClawConfig } from "../config/types.openclaw.js";
-import type { TtsAutoMode, TtsProvider } from "../config/types.tts.js";
-import type {
-  SpeechProviderConfig,
-  SpeechVoiceOption,
-  TtsDirectiveOverrides,
-  TtsDirectiveParseResult,
-} from "../tts/provider-types.js";
-import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js";
 import {
  createLazyFacadeObjectValue,
  loadActivatedBundledPluginPublicSurfaceModuleSync,
 } from "./facade-runtime.js";
+import type {
+  ResolvedTtsConfig,
+  ResolvedTtsModelOverrides,
+  TtsDirectiveOverrides,
+  TtsDirectiveParseResult,
+  TtsResult,
+  TtsRuntimeFacade,
+  TtsSynthesisResult,
+  TtsTelephonyResult,
+} from "./tts-runtime.types.js";

 // Manual facade. Keep loader boundary explicit and avoid typing this public SDK
 // seam through the bundled speech-core runtime surface.
-type TtsAttemptReasonCode =
-  | "success"
-  | "no_provider_registered"
-  | "not_configured"
-  | "unsupported_for_telephony"
-  | "timeout"
-  | "provider_error";
-
-type TtsProviderAttempt = {
-  provider: string;
-  outcome: "success" | "skipped" | "failed";
-  reasonCode: TtsAttemptReasonCode;
-  latencyMs?: number;
-  error?: string;
-};
-
-type TtsStatusEntry = {
-  timestamp: number;
-  success: boolean;
-  textLength: number;
-  summarized: boolean;
-  provider?: string;
-  fallbackFrom?: string;
-  attemptedProviders?: string[];
-  attempts?: TtsProviderAttempt[];
-  latencyMs?: number;
-  error?: string;
-};
-
-type SummarizeResult = {
-  summary: string;
-  latencyMs: number;
-  inputLength: number;
-  outputLength: number;
-};
-
-type ResolveTtsAutoModeParams = {
-  config: ResolvedTtsConfig;
-  prefsPath: string;
-  sessionAuto?: string;
-};
-
-type ResolveExplicitTtsOverridesParams = {
-  cfg: OpenClawConfig;
-  prefsPath?: string;
-  provider?: string;
-  modelId?: string;
-  voiceId?: string;
-};
-
-type TtsRequestParams = {
-  text: string;
-  cfg: OpenClawConfig;
-  prefsPath?: string;
-  channel?: string;
-  overrides?: TtsDirectiveOverrides;
-  disableFallback?: boolean;
-};
-
-type TtsTelephonyRequestParams = {
-  text: string;
-  cfg: OpenClawConfig;
-  prefsPath?: string;
-};
-
-type ListSpeechVoicesParams = {
-  provider: string;
-  cfg?: OpenClawConfig;
-  config?: ResolvedTtsConfig;
-  apiKey?: string;
-  baseUrl?: string;
-};
-
-type MaybeApplyTtsToPayloadParams = {
-  payload: ReplyPayload;
-  cfg: OpenClawConfig;
-  channel?: string;
-  kind?: "tool" | "block" | "final";
-  inboundAudio?: boolean;
-  ttsAuto?: string;
-};
-
-type TtsTestFacade = {
-  parseTtsDirectives: (...args: unknown[]) => TtsDirectiveParseResult;
-  resolveModelOverridePolicy: (...args: unknown[]) => ResolvedTtsModelOverrides;
-  supportsNativeVoiceNoteTts: (channel: string | undefined) => boolean;
-  summarizeText: (...args: unknown[]) => Promise<SummarizeResult>;
-  getResolvedSpeechProviderConfig: (
-    config: ResolvedTtsConfig,
-    providerId: string,
-    cfg?: OpenClawConfig,
-  ) => SpeechProviderConfig;
-  formatTtsProviderError: (provider: TtsProvider, err: unknown) => string;
-  sanitizeTtsErrorForLog: (err: unknown) => string;
-};
-
-type FacadeModule = {
-  _test: TtsTestFacade;
-  buildTtsSystemPromptHint: (cfg: OpenClawConfig) => string | undefined;
-  getLastTtsAttempt: () => TtsStatusEntry | undefined;
-  getResolvedSpeechProviderConfig: (
-    config: ResolvedTtsConfig,
-    providerId: string,
-    cfg?: OpenClawConfig,
-  ) => SpeechProviderConfig;
-  getTtsMaxLength: (prefsPath: string) => number;
-  getTtsProvider: (config: ResolvedTtsConfig, prefsPath: string) => TtsProvider;
-  isSummarizationEnabled: (prefsPath: string) => boolean;
-  isTtsEnabled: (config: ResolvedTtsConfig, prefsPath: string, sessionAuto?: string) => boolean;
-  isTtsProviderConfigured: (
-    config: ResolvedTtsConfig,
-    provider: TtsProvider,
-    cfg?: OpenClawConfig,
-  ) => boolean;
-  listSpeechVoices: (params: ListSpeechVoicesParams) => Promise<SpeechVoiceOption[]>;
-  maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise<ReplyPayload>;
-  resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides;
-  resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode;
-  resolveTtsConfig: (cfg: OpenClawConfig) => ResolvedTtsConfig;
-  resolveTtsPrefsPath: (config: ResolvedTtsConfig) => string;
-  resolveTtsProviderOrder: (primary: TtsProvider, cfg?: OpenClawConfig) => TtsProvider[];
-  setLastTtsAttempt: (entry: TtsStatusEntry | undefined) => void;
-  setSummarizationEnabled: (prefsPath: string, enabled: boolean) => void;
-  setTtsAutoMode: (prefsPath: string, mode: TtsAutoMode) => void;
-  setTtsEnabled: (prefsPath: string, enabled: boolean) => void;
-  setTtsMaxLength: (prefsPath: string, maxLength: number) => void;
-  setTtsProvider: (prefsPath: string, provider: TtsProvider) => void;
-  synthesizeSpeech: (params: TtsRequestParams) => Promise<TtsSynthesisResult>;
-  textToSpeech: (params: TtsRequestParams) => Promise<TtsResult>;
-  textToSpeechTelephony: (params: TtsTelephonyRequestParams) => Promise<TtsTelephonyResult>;
-};
+type FacadeModule = TtsRuntimeFacade;

 function loadFacadeModule(): FacadeModule {
  return loadActivatedBundledPluginPublicSurfaceModuleSync<FacadeModule>({
@@ -203,48 +73,15 @@ export const textToSpeech: FacadeModule["textToSpeech"] = createLazyFacadeValue(
 export const textToSpeechTelephony: FacadeModule["textToSpeechTelephony"] =
  createLazyFacadeValue("textToSpeechTelephony");

-export type { ResolvedTtsConfig, ResolvedTtsModelOverrides };
-export type { TtsDirectiveOverrides, TtsDirectiveParseResult };
-
-export type TtsResult = {
-  success: boolean;
-  audioPath?: string;
-  error?: string;
-  latencyMs?: number;
-  provider?: string;
-  fallbackFrom?: string;
-  attemptedProviders?: string[];
-  attempts?: TtsProviderAttempt[];
-  outputFormat?: string;
-  voiceCompatible?: boolean;
-};
-
-export type TtsSynthesisResult = {
-  success: boolean;
-  audioBuffer?: Buffer;
-  error?: string;
-  latencyMs?: number;
-  provider?: string;
-  fallbackFrom?: string;
-  attemptedProviders?: string[];
-  attempts?: TtsProviderAttempt[];
-  outputFormat?: string;
-  voiceCompatible?: boolean;
-  fileExtension?: string;
-};
-
-export type TtsTelephonyResult = {
-  success: boolean;
-  audioBuffer?: Buffer;
-  error?: string;
-  latencyMs?: number;
-  provider?: string;
-  fallbackFrom?: string;
-  attemptedProviders?: string[];
-  attempts?: TtsProviderAttempt[];
-  outputFormat?: string;
-  sampleRate?: number;
-};
+export type {
+  ResolvedTtsConfig,
+  ResolvedTtsModelOverrides,
+  TtsDirectiveOverrides,
+  TtsDirectiveParseResult,
+  TtsResult,
+  TtsSynthesisResult,
+  TtsTelephonyResult,
+} from "./tts-runtime.types.js";

 function createLazyFacadeValue<K extends keyof FacadeModule>(key: K): FacadeModule[K] {
  return ((...args: unknown[]) => {
--- a/src/plugin-sdk/tts-runtime.types.ts
+++ b/src/plugin-sdk/tts-runtime.types.ts
@@ -0,0 +1,191 @@
+import type { ReplyPayload } from "../auto-reply/reply-payload.js";
+import type { OpenClawConfig } from "../config/types.openclaw.js";
+import type { TtsAutoMode, TtsProvider } from "../config/types.tts.js";
+import type {
+  SpeechProviderConfig,
+  SpeechVoiceOption,
+  TtsDirectiveOverrides,
+  TtsDirectiveParseResult,
+} from "../tts/provider-types.js";
+import type { ResolvedTtsConfig, ResolvedTtsModelOverrides } from "../tts/tts-types.js";
+
+export type { ResolvedTtsConfig, ResolvedTtsModelOverrides };
+export type { TtsDirectiveOverrides, TtsDirectiveParseResult };
+
+export type TtsAttemptReasonCode =
+  | "success"
+  | "no_provider_registered"
+  | "not_configured"
+  | "unsupported_for_telephony"
+  | "timeout"
+  | "provider_error";
+
+export type TtsProviderAttempt = {
+  provider: string;
+  outcome: "success" | "skipped" | "failed";
+  reasonCode: TtsAttemptReasonCode;
+  latencyMs?: number;
+  error?: string;
+};
+
+export type TtsStatusEntry = {
+  timestamp: number;
+  success: boolean;
+  textLength: number;
+  summarized: boolean;
+  provider?: string;
+  fallbackFrom?: string;
+  attemptedProviders?: string[];
+  attempts?: TtsProviderAttempt[];
+  latencyMs?: number;
+  error?: string;
+};
+
+export type SummarizeResult = {
+  summary: string;
+  latencyMs: number;
+  inputLength: number;
+  outputLength: number;
+};
+
+export type ResolveTtsAutoModeParams = {
+  config: ResolvedTtsConfig;
+  prefsPath: string;
+  sessionAuto?: string;
+};
+
+export type ResolveExplicitTtsOverridesParams = {
+  cfg: OpenClawConfig;
+  prefsPath?: string;
+  provider?: string;
+  modelId?: string;
+  voiceId?: string;
+};
+
+export type TtsRequestParams = {
+  text: string;
+  cfg: OpenClawConfig;
+  prefsPath?: string;
+  channel?: string;
+  overrides?: TtsDirectiveOverrides;
+  disableFallback?: boolean;
+};
+
+export type TtsTelephonyRequestParams = {
+  text: string;
+  cfg: OpenClawConfig;
+  prefsPath?: string;
+};
+
+export type ListSpeechVoicesParams = {
+  provider: string;
+  cfg?: OpenClawConfig;
+  config?: ResolvedTtsConfig;
+  apiKey?: string;
+  baseUrl?: string;
+};
+
+export type MaybeApplyTtsToPayloadParams = {
+  payload: ReplyPayload;
+  cfg: OpenClawConfig;
+  channel?: string;
+  kind?: "tool" | "block" | "final";
+  inboundAudio?: boolean;
+  ttsAuto?: string;
+};
+
+export type TtsTestFacade = {
+  parseTtsDirectives: (...args: unknown[]) => TtsDirectiveParseResult;
+  resolveModelOverridePolicy: (...args: unknown[]) => ResolvedTtsModelOverrides;
+  supportsNativeVoiceNoteTts: (channel: string | undefined) => boolean;
+  summarizeText: (...args: unknown[]) => Promise<SummarizeResult>;
+  getResolvedSpeechProviderConfig: (
+    config: ResolvedTtsConfig,
+    providerId: string,
+    cfg?: OpenClawConfig,
+  ) => SpeechProviderConfig;
+  formatTtsProviderError: (provider: TtsProvider, err: unknown) => string;
+  sanitizeTtsErrorForLog: (err: unknown) => string;
+};
+
+export type TtsResult = {
+  success: boolean;
+  audioPath?: string;
+  error?: string;
+  latencyMs?: number;
+  provider?: string;
+  fallbackFrom?: string;
+  attemptedProviders?: string[];
+  attempts?: TtsProviderAttempt[];
+  outputFormat?: string;
+  voiceCompatible?: boolean;
+};
+
+export type TtsSynthesisResult = {
+  success: boolean;
+  audioBuffer?: Buffer;
+  error?: string;
+  latencyMs?: number;
+  provider?: string;
+  fallbackFrom?: string;
+  attemptedProviders?: string[];
+  attempts?: TtsProviderAttempt[];
+  outputFormat?: string;
+  voiceCompatible?: boolean;
+  fileExtension?: string;
+};
+
+export type TtsTelephonyResult = {
+  success: boolean;
+  audioBuffer?: Buffer;
+  error?: string;
+  latencyMs?: number;
+  provider?: string;
+  fallbackFrom?: string;
+  attemptedProviders?: string[];
+  attempts?: TtsProviderAttempt[];
+  outputFormat?: string;
+  sampleRate?: number;
+};
+
+export type TextToSpeech = (params: TtsRequestParams) => Promise<TtsResult>;
+export type TextToSpeechTelephony = (
+  params: TtsTelephonyRequestParams,
+) => Promise<TtsTelephonyResult>;
+export type ListSpeechVoices = (params: ListSpeechVoicesParams) => Promise<SpeechVoiceOption[]>;
+
+export type TtsRuntimeFacade = {
+  _test: TtsTestFacade;
+  buildTtsSystemPromptHint: (cfg: OpenClawConfig) => string | undefined;
+  getLastTtsAttempt: () => TtsStatusEntry | undefined;
+  getResolvedSpeechProviderConfig: (
+    config: ResolvedTtsConfig,
+    providerId: string,
+    cfg?: OpenClawConfig,
+  ) => SpeechProviderConfig;
+  getTtsMaxLength: (prefsPath: string) => number;
+  getTtsProvider: (config: ResolvedTtsConfig, prefsPath: string) => TtsProvider;
+  isSummarizationEnabled: (prefsPath: string) => boolean;
+  isTtsEnabled: (config: ResolvedTtsConfig, prefsPath: string, sessionAuto?: string) => boolean;
+  isTtsProviderConfigured: (
+    config: ResolvedTtsConfig,
+    provider: TtsProvider,
+    cfg?: OpenClawConfig,
+  ) => boolean;
+  listSpeechVoices: ListSpeechVoices;
+  maybeApplyTtsToPayload: (params: MaybeApplyTtsToPayloadParams) => Promise<ReplyPayload>;
+  resolveExplicitTtsOverrides: (params: ResolveExplicitTtsOverridesParams) => TtsDirectiveOverrides;
+  resolveTtsAutoMode: (params: ResolveTtsAutoModeParams) => TtsAutoMode;
+  resolveTtsConfig: (cfg: OpenClawConfig) => ResolvedTtsConfig;
+  resolveTtsPrefsPath: (config: ResolvedTtsConfig) => string;
+  resolveTtsProviderOrder: (primary: TtsProvider, cfg?: OpenClawConfig) => TtsProvider[];
+  setLastTtsAttempt: (entry: TtsStatusEntry | undefined) => void;
+  setSummarizationEnabled: (prefsPath: string, enabled: boolean) => void;
+  setTtsAutoMode: (prefsPath: string, mode: TtsAutoMode) => void;
+  setTtsEnabled: (prefsPath: string, enabled: boolean) => void;
+  setTtsMaxLength: (prefsPath: string, maxLength: number) => void;
+  setTtsProvider: (prefsPath: string, provider: TtsProvider) => void;
+  synthesizeSpeech: (params: TtsRequestParams) => Promise<TtsSynthesisResult>;
+  textToSpeech: TextToSpeech;
+  textToSpeechTelephony: TextToSpeechTelephony;
+};
--- a/src/plugins/runtime/types-core.ts
+++ b/src/plugins/runtime/types-core.ts
@@ -5,6 +5,11 @@ import type {
 import type { HeartbeatRunResult } from "../../infra/heartbeat-wake.js";
 import type { LogLevel } from "../../logging/levels.js";
 import type { MediaUnderstandingRuntime } from "../../media-understanding/runtime-types.js";
+import type {
+  ListSpeechVoices,
+  TextToSpeech,
+  TextToSpeechTelephony,
+} from "../../plugin-sdk/tts-runtime.types.js";

 export type { HeartbeatRunResult };

@@ -87,9 +92,9 @@ export type PluginRuntimeCore = {
    resizeToJpeg: typeof import("../../media/image-ops.js").resizeToJpeg;
  };
  tts: {
-    textToSpeech: typeof import("../../tts/tts.js").textToSpeech;
-    textToSpeechTelephony: typeof import("../../tts/tts.js").textToSpeechTelephony;
-    listVoices: typeof import("../../tts/tts.js").listSpeechVoices;
+    textToSpeech: TextToSpeech;
+    textToSpeechTelephony: TextToSpeechTelephony;
+    listVoices: ListSpeechVoices;
  };
  mediaUnderstanding: {
    runFile: MediaUnderstandingRuntime["runMediaUnderstandingFile"];