talk: add configurable silence timeout

This commit is contained in:
dano does design
2026-03-08 17:58:15 +11:00
committed by Peter Steinberger
parent 097c588a6b
commit 6ff7e8f42e
18 changed files with 162 additions and 9 deletions

View File

@@ -305,6 +305,7 @@ const TARGET_KEYS = [
"talk.modelId",
"talk.outputFormat",
"talk.interruptOnSpeech",
"talk.silenceTimeoutMs",
"meta",
"env",
"env.shellEnv",

View File

@@ -163,6 +163,8 @@ export const FIELD_HELP: Record<string, string> = {
"Use this legacy ElevenLabs API key for Talk mode only during migration, and keep secrets in env-backed storage. Prefer talk.providers.elevenlabs.apiKey (fallback: ELEVENLABS_API_KEY).",
"talk.interruptOnSpeech":
"If true (default), stop assistant speech when the user starts speaking in Talk mode. Keep enabled for conversational turn-taking.",
"talk.silenceTimeoutMs":
"Milliseconds of user silence before Talk mode finalizes and sends the current transcript. Leave unset to keep the platform default pause window (700 ms on macOS and Android, 900 ms on iOS).",
acp: "ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.",
"acp.enabled":
"Global ACP feature gate. Keep disabled unless ACP runtime + policy are configured.",

View File

@@ -651,6 +651,7 @@ export const FIELD_LABELS: Record<string, string> = {
"talk.modelId": "Talk Model ID",
"talk.outputFormat": "Talk Output Format",
"talk.interruptOnSpeech": "Talk Interrupt on Speech",
"talk.silenceTimeoutMs": "Talk Silence Timeout (ms)",
messages: "Messages",
"messages.messagePrefix": "Inbound Message Prefix",
"messages.responsePrefix": "Outbound Response Prefix",

View File

@@ -32,6 +32,7 @@ describe("talk normalization", () => {
outputFormat: "pcm_44100",
apiKey: "secret-key", // pragma: allowlist secret
interruptOnSpeech: false,
silenceTimeoutMs: 1500,
});
expect(normalized).toEqual({
@@ -51,6 +52,7 @@ describe("talk normalization", () => {
outputFormat: "pcm_44100",
apiKey: "secret-key", // pragma: allowlist secret
interruptOnSpeech: false,
silenceTimeoutMs: 1500,
});
});

View File

@@ -47,6 +47,13 @@ function normalizeTalkSecretInput(value: unknown): TalkProviderConfig["apiKey"]
return coerceSecretRef(value) ?? undefined;
}
function normalizeSilenceTimeoutMs(value: unknown): number | undefined {
if (typeof value !== "number" || !Number.isInteger(value) || value <= 0) {
return undefined;
}
return value;
}
function normalizeTalkProviderConfig(value: unknown): TalkProviderConfig | undefined {
if (!isPlainObject(value)) {
return undefined;
@@ -125,6 +132,10 @@ function normalizedLegacyTalkFields(source: Record<string, unknown>): Partial<Ta
if (apiKey !== undefined) {
legacy.apiKey = apiKey;
}
const silenceTimeoutMs = normalizeSilenceTimeoutMs(source.silenceTimeoutMs);
if (silenceTimeoutMs !== undefined) {
legacy.silenceTimeoutMs = silenceTimeoutMs;
}
return legacy;
}
@@ -267,6 +278,9 @@ export function buildTalkConfigResponse(value: unknown): TalkConfig | undefined
if (typeof normalized.interruptOnSpeech === "boolean") {
payload.interruptOnSpeech = normalized.interruptOnSpeech;
}
if (typeof normalized.silenceTimeoutMs === "number") {
payload.silenceTimeoutMs = normalized.silenceTimeoutMs;
}
if (normalized.providers && Object.keys(normalized.providers).length > 0) {
payload.providers = normalized.providers;
}

View File

@@ -70,6 +70,8 @@ export type TalkConfig = {
providers?: Record<string, TalkProviderConfig>;
/** Stop speaking when user starts talking (default: true). */
interruptOnSpeech?: boolean;
/** Milliseconds of user silence before Talk mode sends the transcript after a pause. */
silenceTimeoutMs?: number;
/**
* Legacy ElevenLabs compatibility fields.

View File

@@ -595,6 +595,7 @@ export const OpenClawSchema = z
outputFormat: z.string().optional(),
apiKey: SecretInputSchema.optional().register(sensitive),
interruptOnSpeech: z.boolean().optional(),
silenceTimeoutMs: z.number().int().positive().optional(),
})
.strict()
.optional(),

View File

@@ -42,6 +42,7 @@ export const TalkConfigResultSchema = Type.Object(
outputFormat: Type.Optional(Type.String()),
apiKey: Type.Optional(Type.String()),
interruptOnSpeech: Type.Optional(Type.Boolean()),
silenceTimeoutMs: Type.Optional(Type.Integer({ minimum: 1 })),
},
{ additionalProperties: false },
),

View File

@@ -56,7 +56,11 @@ async function connectOperator(ws: GatewaySocket, scopes: string[]) {
});
}
async function writeTalkConfig(config: { apiKey?: string; voiceId?: string }) {
async function writeTalkConfig(config: {
apiKey?: string;
voiceId?: string;
silenceTimeoutMs?: number;
}) {
const { writeConfigFile } = await import("../config/config.js");
await writeConfigFile({ talk: config });
}
@@ -68,6 +72,7 @@ describe("gateway talk.config", () => {
talk: {
voiceId: "voice-123",
apiKey: "secret-key-abc", // pragma: allowlist secret
silenceTimeoutMs: 1500,
},
session: {
mainKey: "main-test",
@@ -88,6 +93,7 @@ describe("gateway talk.config", () => {
};
apiKey?: string;
voiceId?: string;
silenceTimeoutMs?: number;
};
};
}>(ws, "talk.config", {});
@@ -99,6 +105,7 @@ describe("gateway talk.config", () => {
);
expect(res.payload?.config?.talk?.voiceId).toBe("voice-123");
expect(res.payload?.config?.talk?.apiKey).toBe("__OPENCLAW_REDACTED__");
expect(res.payload?.config?.talk?.silenceTimeoutMs).toBe(1500);
});
});