mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-20 22:40:58 +00:00
fix(review): preserve talk directive overrides
This commit is contained in:
@@ -723,6 +723,9 @@ class TalkModeManager(
|
||||
TalkModeRuntime.validatedLanguage(directive?.language)?.let {
|
||||
put("language", JsonPrimitive(it))
|
||||
}
|
||||
directive?.outputFormat?.trim()?.takeIf { it.isNotEmpty() }?.let {
|
||||
put("outputFormat", JsonPrimitive(it))
|
||||
}
|
||||
}
|
||||
val res = session.request("talk.speak", params.toString())
|
||||
val root = json.parseToJsonElement(res).asObjectOrNull() ?: error("talk.speak returned invalid JSON")
|
||||
|
||||
@@ -21,6 +21,7 @@ export const TalkSpeakParamsSchema = Type.Object(
|
||||
text: NonEmptyString,
|
||||
voiceId: Type.Optional(Type.String()),
|
||||
modelId: Type.Optional(Type.String()),
|
||||
outputFormat: Type.Optional(Type.String()),
|
||||
speed: Type.Optional(Type.Number()),
|
||||
stability: Type.Optional(Type.Number()),
|
||||
similarity: Type.Optional(Type.Number()),
|
||||
|
||||
@@ -69,7 +69,13 @@ function resolveTalkVoiceId(
|
||||
if (!aliases) {
|
||||
return requested;
|
||||
}
|
||||
return aliases[normalizeAliasKey(requested)] ?? requested;
|
||||
const normalizedRequested = normalizeAliasKey(requested);
|
||||
for (const [alias, voiceId] of Object.entries(aliases)) {
|
||||
if (normalizeAliasKey(alias) === normalizedRequested) {
|
||||
return voiceId;
|
||||
}
|
||||
}
|
||||
return requested;
|
||||
}
|
||||
|
||||
function readTalkVoiceSettings(
|
||||
@@ -189,6 +195,7 @@ function buildTalkSpeakOverrides(
|
||||
): TtsDirectiveOverrides {
|
||||
const voiceId = resolveTalkVoiceId(providerConfig, trimString(params.voiceId));
|
||||
const modelId = trimString(params.modelId);
|
||||
const outputFormat = trimString(params.outputFormat);
|
||||
const speed = finiteNumber(params.speed);
|
||||
const seed = finiteNumber(params.seed);
|
||||
const normalize = normalizeTextNormalization(params.normalize);
|
||||
@@ -212,6 +219,7 @@ function buildTalkSpeakOverrides(
|
||||
overrides.elevenlabs = {
|
||||
...(voiceId == null ? {} : { voiceId }),
|
||||
...(modelId == null ? {} : { modelId }),
|
||||
...(outputFormat == null ? {} : { outputFormat }),
|
||||
...(seed == null ? {} : { seed }),
|
||||
...(normalize == null ? {} : { applyTextNormalization: normalize }),
|
||||
...(language == null ? {} : { languageCode: language }),
|
||||
@@ -230,7 +238,10 @@ function buildTalkSpeakOverrides(
|
||||
}
|
||||
|
||||
if (provider === "microsoft") {
|
||||
overrides.microsoft = voiceId == null ? undefined : { voice: voiceId };
|
||||
overrides.microsoft = {
|
||||
...(voiceId == null ? {} : { voice: voiceId }),
|
||||
...(outputFormat == null ? {} : { outputFormat }),
|
||||
};
|
||||
}
|
||||
|
||||
return overrides;
|
||||
|
||||
@@ -301,4 +301,51 @@ describe("gateway talk.config", () => {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
it("resolves talk voice aliases case-insensitively and forwards output format", async () => {
|
||||
const { writeConfigFile } = await import("../config/config.js");
|
||||
await writeConfigFile({
|
||||
talk: {
|
||||
provider: "elevenlabs",
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
apiKey: "elevenlabs-talk-key", // pragma: allowlist secret
|
||||
voiceId: "voice-default",
|
||||
voiceAliases: {
|
||||
Clawd: "EXAVITQu4vr4xnSDxMaL",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
let fetchUrl: string | undefined;
|
||||
const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
|
||||
fetchUrl = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
|
||||
return new Response(new Uint8Array([4, 5, 6]), { status: 200 });
|
||||
});
|
||||
globalThis.fetch = fetchMock as typeof fetch;
|
||||
|
||||
try {
|
||||
await withServer(async (ws) => {
|
||||
await connectOperator(ws, ["operator.read", "operator.write"]);
|
||||
const res = await fetchTalkSpeak(ws, {
|
||||
text: "Hello from talk mode.",
|
||||
voiceId: "clawd",
|
||||
outputFormat: "pcm_44100",
|
||||
});
|
||||
expect(res.ok).toBe(true);
|
||||
expect(res.payload?.provider).toBe("elevenlabs");
|
||||
expect(res.payload?.outputFormat).toBe("pcm_44100");
|
||||
expect(res.payload?.audioBase64).toBe(Buffer.from([4, 5, 6]).toString("base64"));
|
||||
});
|
||||
|
||||
expect(fetchMock).toHaveBeenCalled();
|
||||
expect(fetchUrl).toContain("/v1/text-to-speech/EXAVITQu4vr4xnSDxMaL");
|
||||
expect(fetchUrl).toContain("output_format=pcm_44100");
|
||||
} finally {
|
||||
globalThis.fetch = originalFetch;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -72,7 +72,9 @@ export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin {
|
||||
if (!apiKey) {
|
||||
throw new Error("ElevenLabs API key missing");
|
||||
}
|
||||
const outputFormat = req.target === "voice-note" ? "opus_48000_64" : "mp3_44100_128";
|
||||
const outputFormat =
|
||||
req.overrides?.elevenlabs?.outputFormat ??
|
||||
(req.target === "voice-note" ? "opus_48000_64" : "mp3_44100_128");
|
||||
const audioBuffer = await elevenLabsTTS({
|
||||
text: req.text,
|
||||
apiKey,
|
||||
|
||||
@@ -83,7 +83,7 @@ export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
|
||||
const tempRoot = resolvePreferredOpenClawTmpDir();
|
||||
mkdirSync(tempRoot, { recursive: true, mode: 0o700 });
|
||||
const tempDir = mkdtempSync(path.join(tempRoot, "tts-microsoft-"));
|
||||
let outputFormat = req.config.edge.outputFormat;
|
||||
let outputFormat = req.overrides?.microsoft?.outputFormat ?? req.config.edge.outputFormat;
|
||||
const fallbackOutputFormat =
|
||||
outputFormat !== DEFAULT_EDGE_OUTPUT_FORMAT ? DEFAULT_EDGE_OUTPUT_FORMAT : undefined;
|
||||
|
||||
|
||||
@@ -167,6 +167,7 @@ export type TtsDirectiveOverrides = {
|
||||
elevenlabs?: {
|
||||
voiceId?: string;
|
||||
modelId?: string;
|
||||
outputFormat?: string;
|
||||
seed?: number;
|
||||
applyTextNormalization?: "auto" | "on" | "off";
|
||||
languageCode?: string;
|
||||
@@ -174,6 +175,7 @@ export type TtsDirectiveOverrides = {
|
||||
};
|
||||
microsoft?: {
|
||||
voice?: string;
|
||||
outputFormat?: string;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user