mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:20:43 +00:00
fix(tts): surface voice status and harden providers
This commit is contained in:
@@ -68,6 +68,7 @@ Docs: https://docs.openclaw.ai
|
||||
- ACP: send subagent and async-task completion wakes to external ACP harnesses as
|
||||
plain prompts instead of OpenClaw internal runtime-context envelopes, while
|
||||
keeping those envelopes out of ACP transcripts.
|
||||
- TTS/status: show configured TTS model, voice, and sanitized custom endpoint in `/status`, preserve OpenAI-compatible TTS instructions on custom endpoints, and retry empty Microsoft/Edge TTS output once. Addresses #46602, #47232, and #43936. Thanks @leekuangtao, @Huntterxx, and @rex993.
|
||||
- Agents/Claude: treat zero-token empty `stop` turns as failed provider output,
|
||||
retry once, repair replay, and allow configured model fallback instead of
|
||||
preserving them as successful silent replies. Fixes #71880. Thanks @MagnaAI.
|
||||
|
||||
@@ -846,6 +846,8 @@ Notes:
|
||||
- success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
|
||||
- failure: `Error: ...` plus `Attempts: ...`
|
||||
- detailed diagnostics: `Attempt details: provider:outcome(reasonCode) latency`
|
||||
- `/status` shows the active TTS mode plus configured provider, model, voice,
|
||||
and sanitized custom endpoint metadata when TTS is enabled.
|
||||
- OpenAI and ElevenLabs API failures now include parsed provider error detail and request id (when returned by the provider), which is surfaced in TTS errors/logs.
|
||||
|
||||
## Agent tool
|
||||
|
||||
@@ -1,13 +1,20 @@
|
||||
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeAll, describe, expect, it } from "vitest";
|
||||
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
|
||||
let edgeTTS: typeof import("./tts.js").edgeTTS;
|
||||
|
||||
function createEdgeTTSDeps(ttsPromise: (text: string, filePath: string) => Promise<void>) {
|
||||
function createEdgeTTSDeps(
|
||||
ttsPromise: (text: string, filePath: string) => Promise<void>,
|
||||
onConstruct?: () => void,
|
||||
) {
|
||||
return {
|
||||
EdgeTTS: class {
|
||||
constructor() {
|
||||
onConstruct?.();
|
||||
}
|
||||
|
||||
ttsPromise(text: string, filePath: string) {
|
||||
return ttsPromise(text, filePath);
|
||||
}
|
||||
@@ -36,11 +43,35 @@ describe("edgeTTS empty audio validation", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("throws when the output file is 0 bytes", async () => {
|
||||
it("rejects blank text before constructing Edge TTS", async () => {
|
||||
tempDir = mkdtempSync(path.join(tmpdir(), "tts-test-"));
|
||||
const outputPath = path.join(tempDir, "voice.mp3");
|
||||
|
||||
const onConstruct = vi.fn();
|
||||
const deps = createEdgeTTSDeps(async (_text: string, filePath: string) => {
|
||||
writeFileSync(filePath, Buffer.from([0xff]));
|
||||
}, onConstruct);
|
||||
|
||||
await expect(
|
||||
edgeTTS(
|
||||
{
|
||||
text: " \n\t ",
|
||||
outputPath,
|
||||
config: baseEdgeConfig,
|
||||
timeoutMs: 10000,
|
||||
},
|
||||
deps,
|
||||
),
|
||||
).rejects.toThrow("Microsoft TTS text cannot be empty");
|
||||
expect(onConstruct).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("throws after one retry when the output file stays empty", async () => {
|
||||
tempDir = mkdtempSync(path.join(tmpdir(), "tts-test-"));
|
||||
const outputPath = path.join(tempDir, "voice.mp3");
|
||||
const calls: string[] = [];
|
||||
|
||||
const deps = createEdgeTTSDeps(async (text: string, filePath: string) => {
|
||||
calls.push(text);
|
||||
writeFileSync(filePath, "");
|
||||
});
|
||||
|
||||
@@ -54,7 +85,8 @@ describe("edgeTTS empty audio validation", () => {
|
||||
},
|
||||
deps,
|
||||
),
|
||||
).rejects.toThrow("Edge TTS produced empty audio file");
|
||||
).rejects.toThrow("Edge TTS produced empty audio file after retry");
|
||||
expect(calls).toEqual(["Hello", "Hello"]);
|
||||
});
|
||||
|
||||
it("succeeds when the output file has content", async () => {
|
||||
@@ -77,4 +109,78 @@ describe("edgeTTS empty audio validation", () => {
|
||||
),
|
||||
).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
it("retries once when the first output file is empty", async () => {
|
||||
tempDir = mkdtempSync(path.join(tmpdir(), "tts-test-"));
|
||||
const outputPath = path.join(tempDir, "voice.mp3");
|
||||
const calls: string[] = [];
|
||||
|
||||
const deps = createEdgeTTSDeps(async (text: string, filePath: string) => {
|
||||
calls.push(text);
|
||||
writeFileSync(filePath, calls.length === 1 ? "" : Buffer.from([0xff, 0xfb, 0x90, 0x00]));
|
||||
});
|
||||
|
||||
await expect(
|
||||
edgeTTS(
|
||||
{
|
||||
text: "Hello",
|
||||
outputPath,
|
||||
config: baseEdgeConfig,
|
||||
timeoutMs: 10000,
|
||||
},
|
||||
deps,
|
||||
),
|
||||
).resolves.toBeUndefined();
|
||||
expect(calls).toEqual(["Hello", "Hello"]);
|
||||
});
|
||||
|
||||
it("retries once when Edge TTS resolves without creating an output file", async () => {
|
||||
tempDir = mkdtempSync(path.join(tmpdir(), "tts-test-"));
|
||||
const outputPath = path.join(tempDir, "voice.mp3");
|
||||
const calls: string[] = [];
|
||||
|
||||
const deps = createEdgeTTSDeps(async (text: string, filePath: string) => {
|
||||
calls.push(text);
|
||||
if (calls.length === 2) {
|
||||
writeFileSync(filePath, Buffer.from([0xff, 0xfb, 0x90, 0x00]));
|
||||
}
|
||||
});
|
||||
|
||||
await expect(
|
||||
edgeTTS(
|
||||
{
|
||||
text: "Hello",
|
||||
outputPath,
|
||||
config: baseEdgeConfig,
|
||||
timeoutMs: 10000,
|
||||
},
|
||||
deps,
|
||||
),
|
||||
).resolves.toBeUndefined();
|
||||
expect(calls).toEqual(["Hello", "Hello"]);
|
||||
});
|
||||
|
||||
it("does not retry provider errors", async () => {
|
||||
tempDir = mkdtempSync(path.join(tmpdir(), "tts-test-"));
|
||||
const outputPath = path.join(tempDir, "voice.mp3");
|
||||
const calls: string[] = [];
|
||||
|
||||
const deps = createEdgeTTSDeps(async (text: string) => {
|
||||
calls.push(text);
|
||||
throw new Error("upstream timeout");
|
||||
});
|
||||
|
||||
await expect(
|
||||
edgeTTS(
|
||||
{
|
||||
text: "Hello",
|
||||
outputPath,
|
||||
config: baseEdgeConfig,
|
||||
timeoutMs: 10000,
|
||||
},
|
||||
deps,
|
||||
),
|
||||
).rejects.toThrow("upstream timeout");
|
||||
expect(calls).toEqual(["Hello"]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -24,6 +24,26 @@ async function loadDefaultEdgeTTSDeps(): Promise<EdgeTTSDeps> {
|
||||
return { EdgeTTS };
|
||||
}
|
||||
|
||||
function isMissingOutputFileError(error: unknown): boolean {
|
||||
return (
|
||||
typeof error === "object" &&
|
||||
error !== null &&
|
||||
"code" in error &&
|
||||
(error as { code?: unknown }).code === "ENOENT"
|
||||
);
|
||||
}
|
||||
|
||||
function readOutputSize(outputPath: string): number {
|
||||
try {
|
||||
return statSync(outputPath).size;
|
||||
} catch (error) {
|
||||
if (isMissingOutputFileError(error)) {
|
||||
return 0;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export function inferEdgeExtension(outputFormat: string): string {
|
||||
const normalized = normalizeLowercaseStringOrEmpty(outputFormat);
|
||||
if (normalized.includes("webm")) {
|
||||
@@ -61,6 +81,10 @@ export async function edgeTTS(
|
||||
deps?: EdgeTTSDeps,
|
||||
): Promise<void> {
|
||||
const { text, outputPath, config, timeoutMs } = params;
|
||||
if (text.trim().length === 0) {
|
||||
throw new Error("Microsoft TTS text cannot be empty");
|
||||
}
|
||||
|
||||
const resolvedDeps = deps ?? (await loadDefaultEdgeTTSDeps());
|
||||
const tts = new resolvedDeps.EdgeTTS({
|
||||
voice: config.voice,
|
||||
@@ -73,10 +97,12 @@ export async function edgeTTS(
|
||||
volume: config.volume,
|
||||
timeout: config.timeoutMs ?? timeoutMs,
|
||||
});
|
||||
await tts.ttsPromise(text, outputPath);
|
||||
|
||||
const { size } = statSync(outputPath);
|
||||
if (size === 0) {
|
||||
throw new Error("Edge TTS produced empty audio file");
|
||||
for (let attempt = 0; attempt < 2; attempt += 1) {
|
||||
await tts.ttsPromise(text, outputPath);
|
||||
if (readOutputSize(outputPath) > 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw new Error("Edge TTS produced empty audio file after retry");
|
||||
}
|
||||
|
||||
@@ -91,9 +91,75 @@ describe("openai tts", () => {
|
||||
expect(resolveOpenAITtsInstructions("tts-1-hd", "Speak warmly")).toBeUndefined();
|
||||
expect(resolveOpenAITtsInstructions("gpt-4o-mini-tts", " ")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("preserves instructions for custom OpenAI-compatible TTS endpoints", () => {
|
||||
expect(
|
||||
resolveOpenAITtsInstructions("tts-1", " Speak warmly ", "https://tts.example.com/v1"),
|
||||
).toBe("Speak warmly");
|
||||
expect(
|
||||
resolveOpenAITtsInstructions("tts-1", " Speak warmly ", "https://api.openai.com/v1/"),
|
||||
).toBeUndefined();
|
||||
expect(
|
||||
resolveOpenAITtsInstructions("tts-1", " ", "https://tts.example.com/v1"),
|
||||
).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("openaiTTS diagnostics", () => {
|
||||
it("sends instructions to custom OpenAI-compatible endpoints", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async (_url: string | URL, _init?: RequestInit) =>
|
||||
new Response(Buffer.from("audio-bytes"), { status: 200 }),
|
||||
);
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
|
||||
await openaiTTS({
|
||||
text: "hello",
|
||||
apiKey: "test-key",
|
||||
baseUrl: "https://tts.example.com/v1",
|
||||
model: "tts-1",
|
||||
voice: "custom-voice",
|
||||
instructions: " Speak warmly ",
|
||||
responseFormat: "mp3",
|
||||
timeoutMs: 5_000,
|
||||
});
|
||||
|
||||
const [, init] = fetchMock.mock.calls[0] ?? [];
|
||||
if (typeof init?.body !== "string") {
|
||||
throw new Error("expected JSON request body");
|
||||
}
|
||||
const body = JSON.parse(init.body) as Record<string, unknown>;
|
||||
expect(body.instructions).toBe("Speak warmly");
|
||||
expect(body.model).toBe("tts-1");
|
||||
expect(body.voice).toBe("custom-voice");
|
||||
});
|
||||
|
||||
it("omits instructions for unsupported models on the official OpenAI endpoint", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async (_url: string | URL, _init?: RequestInit) =>
|
||||
new Response(Buffer.from("audio-bytes"), { status: 200 }),
|
||||
);
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
|
||||
await openaiTTS({
|
||||
text: "hello",
|
||||
apiKey: "test-key",
|
||||
baseUrl: "https://api.openai.com/v1/",
|
||||
model: "tts-1",
|
||||
voice: "alloy",
|
||||
instructions: "Speak warmly",
|
||||
responseFormat: "mp3",
|
||||
timeoutMs: 5_000,
|
||||
});
|
||||
|
||||
const [, init] = fetchMock.mock.calls[0] ?? [];
|
||||
if (typeof init?.body !== "string") {
|
||||
throw new Error("expected JSON request body");
|
||||
}
|
||||
const body = JSON.parse(init.body) as Record<string, unknown>;
|
||||
expect(body.instructions).toBeUndefined();
|
||||
});
|
||||
|
||||
it("includes parsed provider detail and request id for JSON API errors", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
|
||||
@@ -63,9 +63,16 @@ export function isValidOpenAIVoice(voice: string, baseUrl?: string): voice is Op
|
||||
export function resolveOpenAITtsInstructions(
|
||||
model: string,
|
||||
instructions?: string,
|
||||
baseUrl?: string,
|
||||
): string | undefined {
|
||||
const next = instructions?.trim();
|
||||
return next && model.includes("gpt-4o-mini-tts") ? next : undefined;
|
||||
if (!next) {
|
||||
return undefined;
|
||||
}
|
||||
if (baseUrl !== undefined && isCustomOpenAIEndpoint(baseUrl)) {
|
||||
return next;
|
||||
}
|
||||
return model.includes("gpt-4o-mini-tts") ? next : undefined;
|
||||
}
|
||||
|
||||
export async function openaiTTS(params: {
|
||||
@@ -81,7 +88,7 @@ export async function openaiTTS(params: {
|
||||
}): Promise<Buffer> {
|
||||
const { text, apiKey, baseUrl, model, voice, speed, instructions, responseFormat, timeoutMs } =
|
||||
params;
|
||||
const effectiveInstructions = resolveOpenAITtsInstructions(model, instructions);
|
||||
const effectiveInstructions = resolveOpenAITtsInstructions(model, instructions, baseUrl);
|
||||
|
||||
if (!isValidOpenAIModel(model, baseUrl)) {
|
||||
throw new Error(`Invalid model: ${model}`);
|
||||
|
||||
@@ -103,6 +103,39 @@ describe("buildStatusMessage", () => {
|
||||
expect(normalized).toContain("Queue: collect");
|
||||
});
|
||||
|
||||
it("shows sanitized TTS provider details in the voice status line", async () => {
|
||||
await withTempHome(async () => {
|
||||
const text = buildStatusMessage({
|
||||
config: {
|
||||
messages: {
|
||||
tts: {
|
||||
auto: "always",
|
||||
provider: "openai",
|
||||
providers: {
|
||||
openai: {
|
||||
displayName: "NeuTTS local",
|
||||
baseUrl: "http://user:secret@127.0.0.1:18801/v1?token=hidden#fragment",
|
||||
model: "neutts-nano",
|
||||
voice: "clara",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig,
|
||||
agent: {},
|
||||
now: 0,
|
||||
});
|
||||
const normalized = normalizeTestText(text);
|
||||
|
||||
expect(normalized).toContain(
|
||||
"Voice: always · provider=openai · name=NeuTTS local · model=neutts-nano · voice=clara · endpoint=custom(http://127.0.0.1:18801/v1)",
|
||||
);
|
||||
expect(normalized).not.toContain("secret");
|
||||
expect(normalized).not.toContain("token=hidden");
|
||||
expect(normalized).not.toContain("fragment");
|
||||
});
|
||||
});
|
||||
|
||||
it("shows the model runtime for CLI-backed providers", () => {
|
||||
const text = buildStatusMessage({
|
||||
config: {
|
||||
|
||||
@@ -464,7 +464,25 @@ const formatVoiceModeLine = (
|
||||
if (!snapshot) {
|
||||
return null;
|
||||
}
|
||||
return `🔊 Voice: ${snapshot.autoMode} · provider=${snapshot.provider} · limit=${snapshot.maxLength} · summary=${snapshot.summarize ? "on" : "off"}`;
|
||||
const parts = [`🔊 Voice: ${snapshot.autoMode}`, `provider=${snapshot.provider}`];
|
||||
if (snapshot.displayName) {
|
||||
parts.push(`name=${snapshot.displayName}`);
|
||||
}
|
||||
if (snapshot.model) {
|
||||
parts.push(`model=${snapshot.model}`);
|
||||
}
|
||||
if (snapshot.voice) {
|
||||
parts.push(`voice=${snapshot.voice}`);
|
||||
}
|
||||
if (snapshot.baseUrl) {
|
||||
parts.push(
|
||||
snapshot.customBaseUrl
|
||||
? `endpoint=custom(${snapshot.baseUrl})`
|
||||
: `endpoint=${snapshot.baseUrl}`,
|
||||
);
|
||||
}
|
||||
parts.push(`limit=${snapshot.maxLength}`, `summary=${snapshot.summarize ? "on" : "off"}`);
|
||||
return parts.join(" · ");
|
||||
};
|
||||
|
||||
export function buildStatusMessage(args: StatusArgs): string {
|
||||
|
||||
@@ -138,6 +138,162 @@ describe("resolveStatusTtsSnapshot", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("reports configured OpenAI TTS model, voice, and sanitized custom endpoint", async () => {
|
||||
await withStatusTempHome(async () => {
|
||||
expect(
|
||||
resolveStatusTtsSnapshot({
|
||||
cfg: {
|
||||
messages: {
|
||||
tts: {
|
||||
auto: "always",
|
||||
provider: "openai",
|
||||
providers: {
|
||||
openai: {
|
||||
displayName: "NeuTTS local",
|
||||
baseUrl: "http://user:secret@127.0.0.1:18801/v1?token=hidden#fragment",
|
||||
model: "neutts-nano",
|
||||
voice: "clara",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
}),
|
||||
).toEqual({
|
||||
autoMode: "always",
|
||||
provider: "openai",
|
||||
displayName: "NeuTTS local",
|
||||
model: "neutts-nano",
|
||||
voice: "clara",
|
||||
baseUrl: "http://127.0.0.1:18801/v1",
|
||||
customBaseUrl: true,
|
||||
maxLength: 1500,
|
||||
summarize: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("omits default OpenAI endpoint details from status", async () => {
|
||||
await withStatusTempHome(async () => {
|
||||
expect(
|
||||
resolveStatusTtsSnapshot({
|
||||
cfg: {
|
||||
messages: {
|
||||
tts: {
|
||||
auto: "always",
|
||||
provider: "openai",
|
||||
providers: {
|
||||
openai: {
|
||||
baseUrl: "https://api.openai.com/v1/",
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "coral",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
}),
|
||||
).toEqual({
|
||||
autoMode: "always",
|
||||
provider: "openai",
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "coral",
|
||||
maxLength: 1500,
|
||||
summarize: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("reports merged per-agent provider metadata", async () => {
|
||||
await withStatusTempHome(async () => {
|
||||
expect(
|
||||
resolveStatusTtsSnapshot({
|
||||
cfg: {
|
||||
messages: {
|
||||
tts: {
|
||||
auto: "off",
|
||||
provider: "openai",
|
||||
providers: {
|
||||
openai: {
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "coral",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
agents: {
|
||||
list: [
|
||||
{
|
||||
id: "reader",
|
||||
tts: {
|
||||
auto: "always",
|
||||
providers: {
|
||||
openai: {
|
||||
voice: "nova",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
agentId: "reader",
|
||||
}),
|
||||
).toEqual({
|
||||
autoMode: "always",
|
||||
provider: "openai",
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "nova",
|
||||
maxLength: 1500,
|
||||
summarize: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("uses provider metadata for local provider prefs overrides", async () => {
|
||||
await withStatusTempHome(async (home) => {
|
||||
const prefsPath = path.join(home, ".openclaw", "settings", "tts.json");
|
||||
fs.mkdirSync(path.dirname(prefsPath), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
prefsPath,
|
||||
JSON.stringify({
|
||||
tts: {
|
||||
auto: "always",
|
||||
provider: "edge",
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
expect(
|
||||
resolveStatusTtsSnapshot({
|
||||
cfg: {
|
||||
messages: {
|
||||
tts: {
|
||||
provider: "openai",
|
||||
prefsPath,
|
||||
providers: {
|
||||
microsoft: {
|
||||
voice: "en-US-AvaMultilingualNeural",
|
||||
},
|
||||
openai: {
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "coral",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
}),
|
||||
).toEqual({
|
||||
autoMode: "always",
|
||||
provider: "microsoft",
|
||||
voice: "en-US-AvaMultilingualNeural",
|
||||
maxLength: 1500,
|
||||
summarize: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("derives the default prefs path from OPENCLAW_CONFIG_PATH when set", async () => {
|
||||
await withStatusTempHome(async (home) => {
|
||||
const stateDir = path.join(home, ".openclaw-dev");
|
||||
|
||||
@@ -12,6 +12,8 @@ import { resolveEffectiveTtsConfig } from "./tts-config.js";
|
||||
|
||||
const DEFAULT_TTS_MAX_LENGTH = 1500;
|
||||
const DEFAULT_TTS_SUMMARIZE = true;
|
||||
const DEFAULT_OPENAI_TTS_BASE_URL = "https://api.openai.com/v1";
|
||||
const MAX_STATUS_DETAIL_LENGTH = 96;
|
||||
|
||||
type TtsUserPrefs = {
|
||||
tts?: {
|
||||
@@ -26,6 +28,11 @@ type TtsUserPrefs = {
|
||||
type TtsStatusSnapshot = {
|
||||
autoMode: TtsAutoMode;
|
||||
provider: TtsProvider;
|
||||
displayName?: string;
|
||||
model?: string;
|
||||
voice?: string;
|
||||
baseUrl?: string;
|
||||
customBaseUrl?: boolean;
|
||||
maxLength: number;
|
||||
summarize: boolean;
|
||||
};
|
||||
@@ -78,6 +85,116 @@ function resolveTtsAutoModeFromPrefs(prefs: TtsUserPrefs): TtsAutoMode | undefin
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function isObjectRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function normalizeStatusDetail(
|
||||
value: unknown,
|
||||
maxLength = MAX_STATUS_DETAIL_LENGTH,
|
||||
): string | undefined {
|
||||
if (typeof value !== "string") {
|
||||
return undefined;
|
||||
}
|
||||
const normalized = value.trim().replace(/\s+/g, " ");
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
return normalized.length > maxLength ? `${normalized.slice(0, maxLength - 3)}...` : normalized;
|
||||
}
|
||||
|
||||
function sanitizeBaseUrlForStatus(value: unknown): string | undefined {
|
||||
const raw = normalizeStatusDetail(value, 180);
|
||||
if (!raw) {
|
||||
return undefined;
|
||||
}
|
||||
try {
|
||||
const parsed = new URL(raw);
|
||||
parsed.username = "";
|
||||
parsed.password = "";
|
||||
parsed.search = "";
|
||||
parsed.hash = "";
|
||||
const sanitized = parsed.toString().replace(/\/+$/, "");
|
||||
return normalizeStatusDetail(sanitized, 120);
|
||||
} catch {
|
||||
return "[invalid-url]";
|
||||
}
|
||||
}
|
||||
|
||||
function isCustomOpenAiTtsBaseUrl(baseUrl: string | undefined): boolean {
|
||||
return baseUrl ? baseUrl.replace(/\/+$/, "") !== DEFAULT_OPENAI_TTS_BASE_URL : false;
|
||||
}
|
||||
|
||||
function firstStatusDetail(
|
||||
record: Record<string, unknown> | undefined,
|
||||
keys: string[],
|
||||
): string | undefined {
|
||||
if (!record) {
|
||||
return undefined;
|
||||
}
|
||||
for (const key of keys) {
|
||||
const value = normalizeStatusDetail(record[key]);
|
||||
if (value) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveProviderConfigRecord(
|
||||
raw: TtsConfig,
|
||||
provider: TtsProvider,
|
||||
): Record<string, unknown> | undefined {
|
||||
const rawRecord: Record<string, unknown> = isObjectRecord(raw)
|
||||
? (raw as Record<string, unknown>)
|
||||
: {};
|
||||
const providers: Record<string, unknown> = isObjectRecord(raw.providers) ? raw.providers : {};
|
||||
if (provider === "microsoft") {
|
||||
return {
|
||||
...(isObjectRecord(rawRecord.edge) ? rawRecord.edge : {}),
|
||||
...(isObjectRecord(rawRecord.microsoft) ? rawRecord.microsoft : {}),
|
||||
...(isObjectRecord(providers.edge) ? providers.edge : {}),
|
||||
...(isObjectRecord(providers.microsoft) ? providers.microsoft : {}),
|
||||
};
|
||||
}
|
||||
const direct = rawRecord[provider];
|
||||
const providerScoped = providers[provider];
|
||||
if (isObjectRecord(providerScoped)) {
|
||||
return providerScoped;
|
||||
}
|
||||
if (isObjectRecord(direct)) {
|
||||
return direct;
|
||||
}
|
||||
return rawRecord;
|
||||
}
|
||||
|
||||
function resolveStatusProviderDetails(raw: TtsConfig, provider: TtsProvider) {
|
||||
if (provider === "auto") {
|
||||
return {};
|
||||
}
|
||||
const record = resolveProviderConfigRecord(raw, provider);
|
||||
const sanitizedBaseUrl = sanitizeBaseUrlForStatus(record?.baseUrl);
|
||||
const customBaseUrl = provider === "openai" && isCustomOpenAiTtsBaseUrl(sanitizedBaseUrl);
|
||||
const details: Partial<TtsStatusSnapshot> = {};
|
||||
const displayName = firstStatusDetail(record, ["displayName"]);
|
||||
if (displayName) {
|
||||
details.displayName = displayName;
|
||||
}
|
||||
const model = firstStatusDetail(record, ["model", "modelId"]);
|
||||
if (model) {
|
||||
details.model = model;
|
||||
}
|
||||
const voice = firstStatusDetail(record, ["voice", "voiceId", "voiceName"]);
|
||||
if (voice) {
|
||||
details.voice = voice;
|
||||
}
|
||||
if (sanitizedBaseUrl && (provider !== "openai" || customBaseUrl)) {
|
||||
details.baseUrl = sanitizedBaseUrl;
|
||||
details.customBaseUrl = customBaseUrl;
|
||||
}
|
||||
return details;
|
||||
}
|
||||
|
||||
export function resolveStatusTtsSnapshot(params: {
|
||||
cfg: OpenClawConfig;
|
||||
sessionAuto?: string;
|
||||
@@ -95,12 +212,15 @@ export function resolveStatusTtsSnapshot(params: {
|
||||
return null;
|
||||
}
|
||||
|
||||
const provider =
|
||||
normalizeConfiguredSpeechProviderId(prefs.tts?.provider) ??
|
||||
normalizeConfiguredSpeechProviderId(raw.provider) ??
|
||||
"auto";
|
||||
|
||||
return {
|
||||
autoMode,
|
||||
provider:
|
||||
normalizeConfiguredSpeechProviderId(prefs.tts?.provider) ??
|
||||
normalizeConfiguredSpeechProviderId(raw.provider) ??
|
||||
"auto",
|
||||
provider,
|
||||
...resolveStatusProviderDetails(raw, provider),
|
||||
maxLength: prefs.tts?.maxLength ?? DEFAULT_TTS_MAX_LENGTH,
|
||||
summarize: prefs.tts?.summarize ?? DEFAULT_TTS_SUMMARIZE,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user