fix(review): preserve talk directive overrides

This commit is contained in:
Ayaan Zaidi
2026-03-20 10:51:29 +05:30
parent 4a0341ed03
commit 47e412bd0b
7 changed files with 70 additions and 4 deletions

View File

@@ -723,6 +723,9 @@ class TalkModeManager(
TalkModeRuntime.validatedLanguage(directive?.language)?.let {
put("language", JsonPrimitive(it))
}
directive?.outputFormat?.trim()?.takeIf { it.isNotEmpty() }?.let {
put("outputFormat", JsonPrimitive(it))
}
}
val res = session.request("talk.speak", params.toString())
val root = json.parseToJsonElement(res).asObjectOrNull() ?: error("talk.speak returned invalid JSON")

View File

@@ -21,6 +21,7 @@ export const TalkSpeakParamsSchema = Type.Object(
text: NonEmptyString,
voiceId: Type.Optional(Type.String()),
modelId: Type.Optional(Type.String()),
outputFormat: Type.Optional(Type.String()),
speed: Type.Optional(Type.Number()),
stability: Type.Optional(Type.Number()),
similarity: Type.Optional(Type.Number()),

View File

@@ -69,7 +69,13 @@ function resolveTalkVoiceId(
if (!aliases) {
return requested;
}
return aliases[normalizeAliasKey(requested)] ?? requested;
const normalizedRequested = normalizeAliasKey(requested);
for (const [alias, voiceId] of Object.entries(aliases)) {
if (normalizeAliasKey(alias) === normalizedRequested) {
return voiceId;
}
}
return requested;
}
function readTalkVoiceSettings(
@@ -189,6 +195,7 @@ function buildTalkSpeakOverrides(
): TtsDirectiveOverrides {
const voiceId = resolveTalkVoiceId(providerConfig, trimString(params.voiceId));
const modelId = trimString(params.modelId);
const outputFormat = trimString(params.outputFormat);
const speed = finiteNumber(params.speed);
const seed = finiteNumber(params.seed);
const normalize = normalizeTextNormalization(params.normalize);
@@ -212,6 +219,7 @@ function buildTalkSpeakOverrides(
overrides.elevenlabs = {
...(voiceId == null ? {} : { voiceId }),
...(modelId == null ? {} : { modelId }),
...(outputFormat == null ? {} : { outputFormat }),
...(seed == null ? {} : { seed }),
...(normalize == null ? {} : { applyTextNormalization: normalize }),
...(language == null ? {} : { languageCode: language }),
@@ -230,7 +238,10 @@ function buildTalkSpeakOverrides(
}
if (provider === "microsoft") {
overrides.microsoft = voiceId == null ? undefined : { voice: voiceId };
overrides.microsoft = {
...(voiceId == null ? {} : { voice: voiceId }),
...(outputFormat == null ? {} : { outputFormat }),
};
}
return overrides;

View File

@@ -301,4 +301,51 @@ describe("gateway talk.config", () => {
globalThis.fetch = originalFetch;
}
});
it("resolves talk voice aliases case-insensitively and forwards output format", async () => {
const { writeConfigFile } = await import("../config/config.js");
await writeConfigFile({
talk: {
provider: "elevenlabs",
providers: {
elevenlabs: {
apiKey: "elevenlabs-talk-key", // pragma: allowlist secret
voiceId: "voice-default",
voiceAliases: {
Clawd: "EXAVITQu4vr4xnSDxMaL",
},
},
},
},
});
const originalFetch = globalThis.fetch;
let fetchUrl: string | undefined;
const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
fetchUrl = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
return new Response(new Uint8Array([4, 5, 6]), { status: 200 });
});
globalThis.fetch = fetchMock as typeof fetch;
try {
await withServer(async (ws) => {
await connectOperator(ws, ["operator.read", "operator.write"]);
const res = await fetchTalkSpeak(ws, {
text: "Hello from talk mode.",
voiceId: "clawd",
outputFormat: "pcm_44100",
});
expect(res.ok).toBe(true);
expect(res.payload?.provider).toBe("elevenlabs");
expect(res.payload?.outputFormat).toBe("pcm_44100");
expect(res.payload?.audioBase64).toBe(Buffer.from([4, 5, 6]).toString("base64"));
});
expect(fetchMock).toHaveBeenCalled();
expect(fetchUrl).toContain("/v1/text-to-speech/EXAVITQu4vr4xnSDxMaL");
expect(fetchUrl).toContain("output_format=pcm_44100");
} finally {
globalThis.fetch = originalFetch;
}
});
});

View File

@@ -72,7 +72,9 @@ export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin {
if (!apiKey) {
throw new Error("ElevenLabs API key missing");
}
const outputFormat = req.target === "voice-note" ? "opus_48000_64" : "mp3_44100_128";
const outputFormat =
req.overrides?.elevenlabs?.outputFormat ??
(req.target === "voice-note" ? "opus_48000_64" : "mp3_44100_128");
const audioBuffer = await elevenLabsTTS({
text: req.text,
apiKey,

View File

@@ -83,7 +83,7 @@ export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
const tempRoot = resolvePreferredOpenClawTmpDir();
mkdirSync(tempRoot, { recursive: true, mode: 0o700 });
const tempDir = mkdtempSync(path.join(tempRoot, "tts-microsoft-"));
let outputFormat = req.config.edge.outputFormat;
let outputFormat = req.overrides?.microsoft?.outputFormat ?? req.config.edge.outputFormat;
const fallbackOutputFormat =
outputFormat !== DEFAULT_EDGE_OUTPUT_FORMAT ? DEFAULT_EDGE_OUTPUT_FORMAT : undefined;

View File

@@ -167,6 +167,7 @@ export type TtsDirectiveOverrides = {
elevenlabs?: {
voiceId?: string;
modelId?: string;
outputFormat?: string;
seed?: number;
applyTextNormalization?: "auto" | "on" | "off";
languageCode?: string;
@@ -174,6 +175,7 @@ export type TtsDirectiveOverrides = {
};
microsoft?: {
voice?: string;
outputFormat?: string;
};
};