fix: tighten Discord voice wake matching (#86595)

* fix: tighten Discord voice wake matching

* test: avoid wildcard model runtime normalization
This commit is contained in:
Peter Steinberger
2026-05-25 19:51:32 +01:00
committed by GitHub
parent baab4cf045
commit f00a912c25
12 changed files with 535 additions and 118 deletions

View File

@@ -40,7 +40,7 @@ Docs: https://docs.openclaw.ai
- Agents/memory: return optional not-found context for missing date-only daily memory reads instead of logging benign first-run `ENOENT` failures. Fixes #82928. Thanks @galiniliev.
- Discord: merge streamed text captions into following media block replies so captions and attachments send as one message. (#86487) Thanks @neeravmakwana.
- Gateway: avoid sending duplicate tool-event frames to Control UI connections that are subscribed by both run and session.
- Discord/OpenAI voice: accept broader leading fuzzy wake-name transcripts while keeping ambient speech gated.
- Discord/OpenAI voice: accept broader edge-position fuzzy wake-name transcripts while keeping ambient speech gated.
- Discord/OpenAI voice: accept longer leading wake-name mistranscripts such as "Open Club" for OpenClaw.
- Agents/OpenAI-compatible: stop ModelStudio-compatible chat requests before sending system/tool-only payloads that have no usable user or assistant turn. (#86177) Thanks @TurboTheTurtle.
- Gateway/plugins: reuse plugin package realpath checks while building installed plugin indexes so startup avoids repeated filesystem resolution work.

View File

@@ -1234,7 +1234,7 @@ Notes:
- In `stt-tts` mode, STT uses `tools.media.audio`; `voice.model` does not affect transcription.
- In realtime modes, `voice.realtime.provider`, `voice.realtime.model`, and `voice.realtime.voice` configure the realtime audio session. For OpenAI Realtime 2 plus the Codex brain, use `voice.realtime.model: "gpt-realtime-2"` and `voice.model: "openai-codex/gpt-5.5"`.
- Realtime voice modes include small `IDENTITY.md`, `USER.md`, and `SOUL.md` profile files in the realtime provider instructions by default so fast direct turns keep the same identity, user grounding, and persona as the routed OpenClaw agent. Set `voice.realtime.bootstrapContextFiles` to a subset to customize this, or `[]` to disable it. The supported realtime bootstrap files are limited to those profile files; `AGENTS.md` stays in the normal agent context. The injected profile context does not replace `openclaw_agent_consult` for workspace work, current facts, memory lookup, or tool-backed actions.
- In OpenAI `agent-proxy` realtime mode, set `voice.realtime.requireWakeName: true` to keep Discord realtime voice silent until a transcript contains a wake name. If `voice.realtime.wakeNames` is unset, OpenClaw uses the routed agent `name` plus `OpenClaw`, falling back to the agent id plus `OpenClaw`. Wake-name gating disables realtime provider auto-response and routes accepted turns through the OpenClaw agent consult path.
- In OpenAI `agent-proxy` realtime mode, set `voice.realtime.requireWakeName: true` to keep Discord realtime voice silent until a transcript starts or ends with a wake name. Configured wake names must be one or two words. If `voice.realtime.wakeNames` is unset, OpenClaw uses the routed agent `name` plus `OpenClaw`, falling back to the agent id plus `OpenClaw`. Wake-name gating disables realtime provider auto-response and routes accepted turns through the OpenClaw agent consult path.
- The OpenAI realtime provider accepts current Realtime 2 event names and legacy Codex-compatible aliases for output audio and transcript events, so compatible provider snapshots can drift without dropping assistant audio.
- `voice.realtime.bargeIn` controls whether Discord speaker-start events interrupt active realtime playback. If unset, it follows the realtime provider's input-audio interruption setting.
- `voice.realtime.minBargeInAudioEndMs` controls the minimum assistant playback duration before an OpenAI realtime barge-in truncates audio. Default: `250`. Set `0` for immediate interruption in low-echo rooms, or raise it for echo-heavy speaker setups.

View File

@@ -244,7 +244,9 @@ describe("discord config schema", () => {
{ mode: "bidi", realtime: { toolPolicy: "dangerous" } },
{ mode: "agent-proxy", realtime: { consultPolicy: "substantive" } },
{ mode: "bidi", realtime: { bootstrapContextFiles: ["AGENTS.md"] } },
{ mode: "agent-proxy", realtime: { wakeNames: [] } },
{ mode: "agent-proxy", realtime: { wakeNames: [""] } },
{ mode: "agent-proxy", realtime: { wakeNames: ["Claw Bot Helper"] } },
{ mode: "agent-proxy", realtime: { debounceMs: 10_001 } },
{ mode: "agent-proxy", realtime: { minBargeInAudioEndMs: -1 } },
{ mode: "agent-proxy", realtime: { minBargeInAudioEndMs: 10_001 } },

View File

@@ -239,7 +239,7 @@ export const discordChannelConfigUiHints = {
},
"voice.realtime.wakeNames": {
label: "Discord Realtime Wake Names",
help: "Names that allow OpenAI agent-proxy Discord realtime voice to respond when requireWakeName is enabled.",
help: "One- or two-word activation names that allow OpenAI agent-proxy Discord realtime voice to respond when requireWakeName is enabled.",
},
"voice.realtime.bootstrapContextFiles": {
label: "Discord Realtime Bootstrap Context Files",

View File

@@ -7,6 +7,7 @@ import { asObjectRecord, normalizeLegacyChannelAliases } from "openclaw/plugin-s
import { resolveDiscordPreviewStreamMode } from "./preview-streaming.js";
const LEGACY_TTS_PROVIDER_KEYS = ["openai", "elevenlabs", "microsoft", "edge"] as const;
const DISCORD_REALTIME_WAKE_NAME_MAX_WORDS = 2;
type AgentBindingConfig = NonNullable<OpenClawConfig["bindings"]>[number];
function hasLegacyTtsProviderKeys(value: unknown): boolean {
@@ -77,6 +78,51 @@ function hasLegacyDiscordAccountGuildChannelAgentId(value: unknown): boolean {
return Object.values(accounts).some((account) => hasLegacyDiscordGuildChannelAgentId(account));
}
function realtimeWakeNameWordCount(value: string): number {
return Array.from(value.matchAll(/[a-z0-9]+/gi)).length;
}
function normalizeRealtimeWakeName(value: string): string | undefined {
const words = Array.from(value.matchAll(/[a-z0-9]+/gi), (match) => match[0]);
if (words.length === 0) {
return undefined;
}
return words.slice(0, DISCORD_REALTIME_WAKE_NAME_MAX_WORDS).join(" ");
}
function isSupportedRealtimeWakeName(value: string): boolean {
const wordCount = realtimeWakeNameWordCount(value);
return wordCount >= 1 && wordCount <= DISCORD_REALTIME_WAKE_NAME_MAX_WORDS;
}
function hasUnsupportedRealtimeWakeNamesInVoice(value: unknown): boolean {
const voice = asObjectRecord(value);
const realtime = asObjectRecord(voice?.realtime);
const wakeNames = realtime?.wakeNames;
return Array.isArray(wakeNames)
? wakeNames.length === 0 ||
wakeNames.some(
(wakeName) => typeof wakeName === "string" && !isSupportedRealtimeWakeName(wakeName),
)
: false;
}
function hasUnsupportedDiscordRealtimeWakeNames(value: unknown): boolean {
const entry = asObjectRecord(value);
if (!entry) {
return false;
}
return hasUnsupportedRealtimeWakeNamesInVoice(entry.voice);
}
function hasUnsupportedDiscordAccountRealtimeWakeNames(value: unknown): boolean {
const accounts = asObjectRecord(value);
if (!accounts) {
return false;
}
return Object.values(accounts).some((account) => hasUnsupportedDiscordRealtimeWakeNames(account));
}
function mergeMissing(target: Record<string, unknown>, source: Record<string, unknown>) {
for (const [key, value] of Object.entries(source)) {
if (value === undefined) {
@@ -152,6 +198,83 @@ function migrateLegacyTtsConfig(
return changed;
}
function normalizeUnsupportedRealtimeWakeNames(
entry: Record<string, unknown>,
pathPrefix: string,
changes: string[],
): { entry: Record<string, unknown>; changed: boolean } {
const voice = asObjectRecord(entry.voice);
const realtime = asObjectRecord(voice?.realtime);
const wakeNames = realtime?.wakeNames;
if (!voice || !realtime || !Array.isArray(wakeNames)) {
return { entry, changed: false };
}
if (wakeNames.length === 0) {
const nextRealtime = { ...realtime };
delete nextRealtime.wakeNames;
changes.push(
`Removed empty ${pathPrefix}.voice.realtime.wakeNames; unset wake names use the default agent/OpenClaw fallback.`,
);
return {
entry: {
...entry,
voice: {
...voice,
realtime: nextRealtime,
},
},
changed: true,
};
}
let normalized = 0;
let removed = 0;
const nextWakeNames = wakeNames.flatMap((wakeName) => {
if (typeof wakeName !== "string" || isSupportedRealtimeWakeName(wakeName)) {
return [wakeName];
}
const nextWakeName = normalizeRealtimeWakeName(wakeName);
if (!nextWakeName) {
removed += 1;
return [];
}
normalized += 1;
return [nextWakeName];
});
if (normalized === 0 && removed === 0) {
return { entry, changed: false };
}
const dedupedWakeNames = Array.from(new Set(nextWakeNames));
const nextRealtime = { ...realtime };
if (dedupedWakeNames.length > 0) {
nextRealtime.wakeNames = dedupedWakeNames;
} else {
delete nextRealtime.wakeNames;
}
if (normalized > 0) {
changes.push(
`Shortened ${normalized} unsupported ${pathPrefix}.voice.realtime.wakeNames entries to one or two words.`,
);
}
if (removed > 0) {
changes.push(
`Removed ${removed} unsupported ${pathPrefix}.voice.realtime.wakeNames entries with no usable words.`,
);
}
return {
entry: {
...entry,
voice: {
...voice,
realtime: nextRealtime,
},
},
changed: true,
};
}
function normalizeDiscordGuildChannelAllowAliases(params: {
entry: Record<string, unknown>;
pathPrefix: string;
@@ -343,6 +466,18 @@ export const legacyConfigRules: ChannelDoctorLegacyConfigRule[] = [
'channels.discord.accounts.<id>.guilds.<id>.channels.<id>.agentId is legacy; use top-level bindings[] with match.accountId for per-channel Discord agent routing. Run "openclaw doctor --fix".',
match: hasLegacyDiscordAccountGuildChannelAgentId,
},
{
path: ["channels", "discord"],
message:
'channels.discord.voice.realtime.wakeNames entries longer than two words are unsupported; use one- or two-word activation names. Run "openclaw doctor --fix".',
match: hasUnsupportedDiscordRealtimeWakeNames,
},
{
path: ["channels", "discord", "accounts"],
message:
'channels.discord.accounts.<id>.voice.realtime.wakeNames entries longer than two words are unsupported; use one- or two-word activation names. Run "openclaw doctor --fix".',
match: hasUnsupportedDiscordAccountRealtimeWakeNames,
},
];
export function normalizeCompatibilityConfig({
@@ -438,6 +573,13 @@ export function normalizeCompatibilityConfig({
});
nextAccount = normalizedAgentIds.entry;
accountChanged = accountChanged || normalizedAgentIds.changed;
const normalizedWakeNames = normalizeUnsupportedRealtimeWakeNames(
nextAccount,
`channels.discord.accounts.${accountId}`,
changes,
);
nextAccount = normalizedWakeNames.entry;
accountChanged = accountChanged || normalizedWakeNames.changed;
if (!accountChanged) {
continue;
}
@@ -458,6 +600,13 @@ export function normalizeCompatibilityConfig({
updated = { ...updated, voice };
changed = true;
}
const normalizedWakeNames = normalizeUnsupportedRealtimeWakeNames(
updated,
"channels.discord",
changes,
);
updated = normalizedWakeNames.entry;
changed = changed || normalizedWakeNames.changed;
if (!changed) {
return { config: cfg, changes: [] };

View File

@@ -118,6 +118,68 @@ describe("discord doctor", () => {
expect(mainTts?.edge).toBeUndefined();
});
it("removes unsupported Discord realtime wake names", () => {
const normalize = getDiscordCompatibilityNormalizer();
const result = normalize({
cfg: {
channels: {
discord: {
voice: {
realtime: {
wakeNames: ["Claw", "Claw Bot Helper", "Open Claw"],
},
},
accounts: {
work: {
voice: {
realtime: {
wakeNames: ["Work Bot Helper", "Work Bot"],
},
},
},
invalid: {
voice: {
realtime: {
wakeNames: ["Only Three Words"],
},
},
},
empty: {
voice: {
realtime: {
wakeNames: [],
},
},
},
},
},
},
} as never,
});
expect(result.changes).toEqual([
"Shortened 1 unsupported channels.discord.accounts.work.voice.realtime.wakeNames entries to one or two words.",
"Shortened 1 unsupported channels.discord.accounts.invalid.voice.realtime.wakeNames entries to one or two words.",
"Removed empty channels.discord.accounts.empty.voice.realtime.wakeNames; unset wake names use the default agent/OpenClaw fallback.",
"Shortened 1 unsupported channels.discord.voice.realtime.wakeNames entries to one or two words.",
]);
expect(result.config.channels?.discord?.voice?.realtime?.wakeNames).toEqual([
"Claw",
"Claw Bot",
"Open Claw",
]);
expect(result.config.channels?.discord?.accounts?.work?.voice?.realtime?.wakeNames).toEqual([
"Work Bot",
]);
expect(result.config.channels?.discord?.accounts?.invalid?.voice?.realtime?.wakeNames).toEqual([
"Only Three",
]);
expect(result.config.channels?.discord?.accounts?.empty?.voice?.realtime?.wakeNames).toBe(
undefined,
);
});
it("moves legacy guild channel allow toggles into enabled", () => {
const normalize = getDiscordCompatibilityNormalizer();

View File

@@ -3018,6 +3018,63 @@ describe("DiscordVoiceManager", () => {
expectUserMessageIncludes("openclaw wake answer");
});
it("ignores default agent wake names longer than two words", async () => {
agentCommandMock.mockResolvedValueOnce({ payloads: [{ text: "fallback wake answer" }] });
const manager = createManager(
{
groupPolicy: "open",
voice: {
enabled: true,
mode: "agent-proxy",
realtime: { provider: "openai", consultPolicy: "auto", requireWakeName: true },
},
},
undefined,
{
agents: {
list: [{ id: "agent-1", identity: { name: "Claw Bot Helper" } }],
},
},
);
await manager.join({ guildId: "g1", channelId: "1001" });
const entry = getSessionEntry(manager) as {
realtime?: {
beginSpeakerTurn: (
context: { extraSystemPrompt?: string; senderIsOwner: boolean; speakerLabel: string },
userId: string,
) => { close: () => void; sendInputAudio: (audio: Buffer) => void };
};
};
const bridgeParams = lastRealtimeBridgeParams() as
| {
onTranscript?: (role: "user" | "assistant", text: string, isFinal: boolean) => void;
}
| undefined;
const longNameTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
"u-owner",
);
longNameTurn?.sendInputAudio(Buffer.alloc(8));
bridgeParams?.onTranscript?.("user", "Claw Bot Helper, should not wake", true);
await new Promise((resolve) => setTimeout(resolve, 260));
expect(agentCommandMock).not.toHaveBeenCalled();
const fallbackTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
"u-owner",
);
fallbackTurn?.sendInputAudio(Buffer.alloc(8));
bridgeParams?.onTranscript?.("user", "OpenClaw, fallback still wakes", true);
await new Promise((resolve) => setTimeout(resolve, 260));
expect(lastAgentCommandArgs().message).toContain("fallback still wakes");
expect(lastAgentCommandArgs().message).not.toContain("OpenClaw");
expectUserMessageIncludes("fallback wake answer");
});
it("accepts leading fuzzy wake names before realtime agent-proxy consults", async () => {
const manager = createManager(
{
@@ -3084,6 +3141,17 @@ describe("DiscordVoiceManager", () => {
expect(agentCommandArgsAt(2).message).toContain("step through the maintainer queue.");
expect(agentCommandArgsAt(2).message).not.toContain("Multi");
const martyTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
"u-owner",
);
martyTurn?.sendInputAudio(Buffer.alloc(8));
bridgeParams?.onTranscript?.("user", "Marty, can you hear me?", true);
await new Promise((resolve) => setTimeout(resolve, 260));
expect(agentCommandArgsAt(3).message).toContain("can you hear me?");
expect(agentCommandArgsAt(3).message).not.toContain("Marty");
const openClawTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
"u-owner",
@@ -3092,8 +3160,8 @@ describe("DiscordVoiceManager", () => {
bridgeParams?.onTranscript?.("user", "Open claw can you still hear me?", true);
await new Promise((resolve) => setTimeout(resolve, 260));
expect(agentCommandArgsAt(3).message).toContain("can you still hear me?");
expect(agentCommandArgsAt(3).message).not.toContain("Open claw");
expect(agentCommandArgsAt(4).message).toContain("can you still hear me?");
expect(agentCommandArgsAt(4).message).not.toContain("Open claw");
const openClubTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
@@ -3103,8 +3171,8 @@ describe("DiscordVoiceManager", () => {
bridgeParams?.onTranscript?.("user", "Open Club, can you hear me now?", true);
await new Promise((resolve) => setTimeout(resolve, 260));
expect(agentCommandArgsAt(4).message).toContain("can you hear me now?");
expect(agentCommandArgsAt(4).message).not.toContain("Open Club");
expect(agentCommandArgsAt(5).message).toContain("can you hear me now?");
expect(agentCommandArgsAt(5).message).not.toContain("Open Club");
const openCloudTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
@@ -3114,8 +3182,19 @@ describe("DiscordVoiceManager", () => {
bridgeParams?.onTranscript?.("user", "Open Cloud, can you hear me too?", true);
await new Promise((resolve) => setTimeout(resolve, 260));
expect(agentCommandArgsAt(5).message).toContain("can you hear me too?");
expect(agentCommandArgsAt(5).message).not.toContain("Open Cloud");
expect(agentCommandArgsAt(6).message).toContain("can you hear me too?");
expect(agentCommandArgsAt(6).message).not.toContain("Open Cloud");
const trailingMultiTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
"u-owner",
);
trailingMultiTurn?.sendInputAudio(Buffer.alloc(8));
bridgeParams?.onTranscript?.("user", "Can you still hear trailing, Multi.", true);
await new Promise((resolve) => setTimeout(resolve, 260));
expect(agentCommandArgsAt(7).message).toContain("Can you still hear trailing");
expect(agentCommandArgsAt(7).message).not.toContain("Multi");
const openChatTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
@@ -3125,7 +3204,7 @@ describe("DiscordVoiceManager", () => {
bridgeParams?.onTranscript?.("user", "Open chat, can you hear me now?", true);
await new Promise((resolve) => setTimeout(resolve, 260));
expect(agentCommandMock).toHaveBeenCalledTimes(6);
expect(agentCommandMock).toHaveBeenCalledTimes(8);
});
it("rejects non-wake fuzzy leading phrases before realtime agent-proxy consults", async () => {
@@ -3169,6 +3248,14 @@ describe("DiscordVoiceManager", () => {
bridgeParams?.onTranscript?.("user", "This is a multi-step maintainer problem.", true);
await new Promise((resolve) => setTimeout(resolve, 260));
const middleWakeWordTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
"u-owner",
);
middleWakeWordTurn?.sendInputAudio(Buffer.alloc(8));
bridgeParams?.onTranscript?.("user", "I asked multi about this already.", true);
await new Promise((resolve) => setTimeout(resolve, 260));
const openLawTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
"u-owner",
@@ -3217,7 +3304,7 @@ describe("DiscordVoiceManager", () => {
provider: "openai",
consultPolicy: "auto",
requireWakeName: true,
wakeNames: ["Claw", "Claw Bot"],
wakeNames: ["Claw", "Claw Bot", "Okay Google"],
},
},
});
@@ -3249,6 +3336,68 @@ describe("DiscordVoiceManager", () => {
expect(lastAgentCommandArgs().message).not.toContain("Claw");
expect(lastAgentCommandArgs().message).not.toContain("Bot");
expectUserMessageIncludes("configured wake answer");
const openerTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
"u-owner",
);
openerTurn?.sendInputAudio(Buffer.alloc(8));
bridgeParams?.onTranscript?.("user", "Okay Google, try the opener name", true);
await new Promise((resolve) => setTimeout(resolve, 260));
expect(lastAgentCommandArgs().message).toContain("try the opener name");
expect(lastAgentCommandArgs().message).not.toContain("Okay");
expect(lastAgentCommandArgs().message).not.toContain("Google");
expect(agentCommandMock).toHaveBeenCalledTimes(2);
});
it("does not accept configured realtime wake names longer than two words", async () => {
const manager = createManager({
groupPolicy: "open",
voice: {
enabled: true,
mode: "agent-proxy",
realtime: {
provider: "openai",
consultPolicy: "auto",
requireWakeName: true,
wakeNames: ["Claw Bot Helper"],
},
},
});
await manager.join({ guildId: "g1", channelId: "1001" });
const entry = getSessionEntry(manager) as {
realtime?: {
beginSpeakerTurn: (
context: { extraSystemPrompt?: string; senderIsOwner: boolean; speakerLabel: string },
userId: string,
) => { close: () => void; sendInputAudio: (audio: Buffer) => void };
};
};
const turn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
"u-owner",
);
turn?.sendInputAudio(Buffer.alloc(8));
const bridgeParams = lastRealtimeBridgeParams() as
| {
onTranscript?: (role: "user" | "assistant", text: string, isFinal: boolean) => void;
}
| undefined;
bridgeParams?.onTranscript?.("user", "Claw Bot Helper, ship it", true);
await new Promise((resolve) => setTimeout(resolve, 260));
const fallbackTurn = entry.realtime?.beginSpeakerTurn(
{ extraSystemPrompt: undefined, senderIsOwner: true, speakerLabel: "Owner" },
"u-owner",
);
fallbackTurn?.sendInputAudio(Buffer.alloc(8));
bridgeParams?.onTranscript?.("user", "OpenClaw, ship it", true);
await new Promise((resolve) => setTimeout(resolve, 260));
expect(agentCommandMock).not.toHaveBeenCalled();
});
it("lets status questions fall back to normal realtime handling when no run is active", async () => {

View File

@@ -65,7 +65,7 @@ const DISCORD_REALTIME_FORCED_CONSULT_FALLBACK_DELAY_MS = 200;
const DISCORD_REALTIME_DUPLICATE_ERROR_SUPPRESS_MS = 60_000;
const DISCORD_REALTIME_CONTROL_SPEECH_DEDUPE_MS = 5_000;
const DISCORD_REALTIME_OUTPUT_PLAYBACK_WATCHDOG_MARGIN_MS = 1_500;
const DISCORD_REALTIME_WAKE_NAME_FUZZY_PREFIX_WORDS = 3;
const DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS = 2;
const REALTIME_PCM16_BYTES_PER_SAMPLE = 2;
const DISCORD_RAW_PCM_FRAME_BYTES = 3_840;
const DISCORD_REALTIME_OUTPUT_PREROLL_FRAMES = 25;
@@ -360,6 +360,17 @@ function normalizeWakeName(value: string): string | undefined {
return normalized || undefined;
}
function normalizeSupportedWakeName(value: string | undefined): string | undefined {
if (typeof value !== "string") {
return undefined;
}
const normalized = normalizeWakeName(value);
const wordCount = normalized ? Array.from(normalized.matchAll(/[a-z0-9]+/gi)).length : 0;
return wordCount >= 1 && wordCount <= DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS
? normalized
: undefined;
}
function normalizeWakeNameCandidate(value: string): string | undefined {
const normalized = value
.toLowerCase()
@@ -373,39 +384,10 @@ function compactWakeName(value: string): string {
return value.replace(/[^a-z0-9]+/g, "");
}
function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function includesWakeName(text: string, wakeName: string): boolean {
const normalizedText = normalizeRealtimeConsultMatchText(text);
const normalizedName = normalizeWakeName(wakeName);
if (!normalizedName) {
return false;
}
const pattern = new RegExp(`(^|[^a-z0-9])${escapeRegExp(normalizedName)}([^a-z0-9]|$)`);
return pattern.test(normalizedText);
}
function stripLeadingWakeName(text: string, wakeName: string): string {
const normalizedName = normalizeWakeName(wakeName);
if (!normalizedName) {
return text.trim();
}
const wakePattern = normalizedName.split(" ").map(escapeRegExp).join("\\s+");
return text
.replace(
new RegExp(
`^\\s*(?:(?:hey|ok|okay)(?:\\s*[-,:;]+\\s*|\\s+))?${wakePattern}(?:\\s*[-,:;]+\\s*|\\s+)`,
"i",
),
"",
)
.trim();
}
type LeadingWakeNameCandidate = {
type EdgeWakeNameCandidate = {
edge: "leading" | "trailing";
heardName: string;
startIndex: number;
endIndex: number;
strongBoundary: boolean;
};
@@ -415,39 +397,87 @@ type WakeNameTranscriptResult =
| { allowed: false; text: string };
type AllowedWakeNameTranscriptResult = Extract<WakeNameTranscriptResult, { allowed: true }>;
function leadingWakeNameCandidates(text: string): LeadingWakeNameCandidate[] {
function leadingWakeNameCandidates(text: string, maxWords: number): EdgeWakeNameCandidate[] {
const opener = /^\s*(?:(?:hey|ok|okay)(?:\s*[-,:;]+\s*|\s+))?/i.exec(text);
const nameStart = opener?.[0].length ?? 0;
const candidates: LeadingWakeNameCandidate[] = [];
const tokenPattern = /[a-z0-9]+/gi;
tokenPattern.lastIndex = nameStart;
const candidates: EdgeWakeNameCandidate[] = [];
const candidateStarts = nameStart > 0 ? [0, nameStart] : [0];
for (
let wordCount = 0;
wordCount < DISCORD_REALTIME_WAKE_NAME_FUZZY_PREFIX_WORDS;
wordCount += 1
) {
const token = tokenPattern.exec(text);
if (!token) {
for (const startIndex of candidateStarts) {
const tokenPattern = /[a-z0-9]+/gi;
tokenPattern.lastIndex = startIndex;
const startCandidates: EdgeWakeNameCandidate[] = [];
for (let wordCount = 0; wordCount < maxWords; wordCount += 1) {
const token = tokenPattern.exec(text);
if (!token) {
break;
}
const previousEndIndex =
wordCount === 0 ? startIndex : startCandidates[wordCount - 1]?.endIndex;
const between = text.slice(previousEndIndex, token.index);
if (wordCount > 0 && !/^[\s'-]+$/.test(between)) {
break;
}
const endIndex = token.index + token[0].length;
const heardName = normalizeWakeNameCandidate(text.slice(startIndex, endIndex));
if (!heardName) {
break;
}
const boundary = text.slice(endIndex).match(/^\s*([,.:;!?-]|$)/);
startCandidates.push({
edge: "leading",
heardName,
startIndex,
endIndex,
strongBoundary: Boolean(boundary),
});
}
candidates.push(...startCandidates);
}
return candidates;
}
function trailingWakeNameCandidates(text: string, maxWords: number): EdgeWakeNameCandidate[] {
const tokens = Array.from(text.matchAll(/[a-z0-9]+/gi));
const candidates: EdgeWakeNameCandidate[] = [];
const tokenCount = Math.min(tokens.length, maxWords);
for (let wordCount = 1; wordCount <= tokenCount; wordCount += 1) {
const startToken = tokens[tokens.length - wordCount];
const endToken = tokens[tokens.length - 1];
if (!startToken || !endToken?.[0]) {
break;
}
const between = text.slice(
wordCount === 0 ? nameStart : candidates[wordCount - 1]?.endIndex,
token.index,
);
if (wordCount > 0 && !/^[\s'-]+$/.test(between)) {
const startIndex = startToken.index ?? 0;
const endIndex = (endToken.index ?? 0) + endToken[0].length;
if (!/^\s*(?:[,.:;!?-]+\s*)?$/.test(text.slice(endIndex))) {
break;
}
const endIndex = token.index + token[0].length;
const heardName = normalizeWakeNameCandidate(text.slice(nameStart, endIndex));
if (!/(^|[\s,.:;!?-])$/.test(text.slice(0, startIndex))) {
break;
}
if (wordCount > 1) {
const previousToken = tokens[tokens.length - wordCount + 1];
const between = previousToken
? text.slice(startIndex + startToken[0].length, previousToken.index)
: "";
if (!/^[\s'-]+$/.test(between)) {
break;
}
}
const heardName = normalizeWakeNameCandidate(text.slice(startIndex, endIndex));
if (!heardName) {
break;
}
const boundary = text.slice(endIndex).match(/^\s*([,.:;!?-]|$)/);
candidates.push({
edge: "trailing",
heardName,
startIndex,
endIndex,
strongBoundary: Boolean(boundary),
strongBoundary: true,
});
}
@@ -481,11 +511,12 @@ function levenshteinDistance(left: string, right: string): number {
return previous[right.length] ?? Math.max(left.length, right.length);
}
function hasOnlyVowelLikeSubstitutions(left: string, right: string): boolean {
function hasOnlyPhoneticSubstitutions(left: string, right: string): boolean {
if (left.length !== right.length) {
return false;
}
const vowels = new Set(["a", "e", "i", "o", "u", "y"]);
const liquids = new Set(["l", "r"]);
let substitutions = 0;
for (let index = 0; index < left.length; index += 1) {
const leftChar = left[index];
@@ -493,7 +524,9 @@ function hasOnlyVowelLikeSubstitutions(left: string, right: string): boolean {
if (leftChar === rightChar) {
continue;
}
if (!vowels.has(leftChar ?? "") || !vowels.has(rightChar ?? "")) {
const vowelLike = vowels.has(leftChar ?? "") && vowels.has(rightChar ?? "");
const liquidLike = liquids.has(leftChar ?? "") && liquids.has(rightChar ?? "");
if (!vowelLike && !liquidLike) {
return false;
}
substitutions += 1;
@@ -511,7 +544,7 @@ function commonPrefixLength(left: string, right: string): number {
return limit;
}
function isFuzzyWakeNameMatch(candidate: LeadingWakeNameCandidate, wakeName: string): boolean {
function isFuzzyWakeNameMatch(candidate: EdgeWakeNameCandidate, wakeName: string): boolean {
const normalizedWakeName = normalizeWakeNameCandidate(wakeName);
if (!normalizedWakeName) {
return false;
@@ -536,7 +569,7 @@ function isFuzzyWakeNameMatch(candidate: LeadingWakeNameCandidate, wakeName: str
heardCompact.length >= 4 &&
wakeCompact.length >= 5 &&
(heardCompact.length !== wakeCompact.length ||
hasOnlyVowelLikeSubstitutions(heardCompact, wakeCompact) ||
hasOnlyPhoneticSubstitutions(heardCompact, wakeCompact) ||
commonPrefixLength(heardCompact, wakeCompact) >= 6)
) {
return true;
@@ -553,18 +586,31 @@ function isFuzzyWakeNameMatch(candidate: LeadingWakeNameCandidate, wakeName: str
return false;
}
function stripLeadingWakeNameCandidate(text: string, candidate: LeadingWakeNameCandidate): string {
function stripEdgeWakeNameCandidate(text: string, candidate: EdgeWakeNameCandidate): string {
if (candidate.edge === "leading") {
return text
.slice(candidate.endIndex)
.replace(/^\s*(?:[-,:;.!?]+\s*)?/, "")
.trim();
}
return text
.slice(candidate.endIndex)
.replace(/^\s*(?:[-,:;.!?]+\s*)?/, "")
.slice(0, candidate.startIndex)
.replace(/\s*(?:[-,:;.!?]+\s*)?$/, "")
.trim();
}
function matchLeadingFuzzyWakeName(
function matchEdgeWakeName(
text: string,
wakeNames: string[],
): AllowedWakeNameTranscriptResult | undefined {
for (const candidate of leadingWakeNameCandidates(text)) {
const candidates = [
...leadingWakeNameCandidates(text, DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS),
...trailingWakeNameCandidates(text, DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS),
].toSorted(
(left, right) =>
compactWakeName(right.heardName).length - compactWakeName(left.heardName).length,
);
for (const candidate of candidates) {
for (const wakeName of wakeNames) {
const normalizedWakeName = normalizeWakeNameCandidate(wakeName);
if (!normalizedWakeName) {
@@ -575,7 +621,7 @@ function matchLeadingFuzzyWakeName(
if (heardCompact === wakeCompact || isFuzzyWakeNameMatch(candidate, wakeName)) {
return {
allowed: true,
text: stripLeadingWakeNameCandidate(text, candidate),
text: stripEdgeWakeNameCandidate(text, candidate),
wakeName,
heardName: candidate.heardName,
match: heardCompact === wakeCompact ? "exact" : "fuzzy",
@@ -591,24 +637,25 @@ function resolveDiscordRealtimeWakeNames(params: {
cfg: OpenClawConfig;
agentId: string;
}): string[] {
const configured = params.config?.wakeNames
?.map((name) => normalizeWakeName(name))
.filter((name): name is string => Boolean(name));
if (configured && configured.length > 0) {
const rawConfigured = params.config?.wakeNames;
if (rawConfigured) {
const configured = rawConfigured
.map((name) => normalizeSupportedWakeName(name))
.filter((name): name is string => Boolean(name));
return sortWakeNames(Array.from(new Set(configured)));
}
const agent = params.cfg.agents?.list?.find((candidate) => candidate.id === params.agentId);
const configuredAgentNames = [agent?.name, agent?.identity?.name]
.map((name) => (typeof name === "string" ? normalizeWakeName(name) : undefined))
.map((name) => normalizeSupportedWakeName(name))
.filter((name): name is string => Boolean(name));
const productWakeNames = [normalizeWakeName("OpenClaw")].filter((name): name is string =>
const productWakeNames = [normalizeSupportedWakeName("OpenClaw")].filter((name): name is string =>
Boolean(name),
);
const defaults =
configuredAgentNames.length > 0
? [...configuredAgentNames, ...productWakeNames]
: [normalizeWakeName(params.agentId), ...productWakeNames].filter((name): name is string =>
Boolean(name),
: [normalizeSupportedWakeName(params.agentId), ...productWakeNames].filter(
(name): name is string => Boolean(name),
);
return sortWakeNames(Array.from(new Set(defaults)));
}
@@ -1481,22 +1528,12 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
if (!this.requireWakeName) {
return { allowed: true, text, wakeName: "", heardName: "", match: "exact" };
}
const wakeName = this.wakeNames.find((name) => includesWakeName(text, name));
if (wakeName) {
return {
allowed: true,
text: stripLeadingWakeName(text, wakeName),
wakeName,
heardName: wakeName,
match: "exact",
};
}
const fuzzyWakeName = matchLeadingFuzzyWakeName(text, this.wakeNames);
if (fuzzyWakeName) {
const wakeNameResult = matchEdgeWakeName(text, this.wakeNames);
if (wakeNameResult) {
logger.info(
`discord voice: realtime wake-name gate matched canonical=${fuzzyWakeName.wakeName} heard=${fuzzyWakeName.heardName} match=${fuzzyWakeName.match} voiceSession=${this.params.entry.voiceSessionKey} agent=${this.params.entry.route.agentId}`,
`discord voice: realtime wake-name gate matched canonical=${wakeNameResult.wakeName} heard=${wakeNameResult.heardName} match=${wakeNameResult.match} voiceSession=${this.params.entry.voiceSessionKey} agent=${this.params.entry.route.agentId}`,
);
return fuzzyWakeName;
return wakeNameResult;
}
return { allowed: false, text };
}

View File

@@ -1,9 +1,20 @@
import { describe, expect, it, vi } from "vitest";
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { resolveVisibleModelCatalog } from "./model-catalog-visibility.js";
import type { ModelCatalogEntry } from "./model-catalog.types.js";
const normalizeProviderModelIdWithRuntimeMock = vi.hoisted(() => vi.fn());
vi.mock("./provider-model-normalization.runtime.js", () => ({
normalizeProviderModelIdWithRuntime: (params: unknown) =>
normalizeProviderModelIdWithRuntimeMock(params),
}));
describe("resolveVisibleModelCatalog", () => {
beforeEach(() => {
normalizeProviderModelIdWithRuntimeMock.mockReset();
});
it("can use static auth checks for gateway read-only model lists", async () => {
const authChecker = vi.fn((provider: string) => provider === "openai");
const catalog: ModelCatalogEntry[] = [
@@ -64,6 +75,7 @@ describe("resolveVisibleModelCatalog", () => {
{ provider: "openai-codex", id: "gpt-codex-test", name: "GPT Codex Test" },
{ provider: "vllm", id: "qwen-local", name: "Qwen Local" },
]);
expect(normalizeProviderModelIdWithRuntimeMock).not.toHaveBeenCalled();
});
it("does not broaden visibility when selected providers have no catalog rows", async () => {

View File

@@ -453,6 +453,10 @@ function buildModelCatalogMetadata(
const aliasByKey = new Map<string, string>();
const configuredModels = params.cfg.agents?.defaults?.models ?? {};
for (const [rawKey, entryRaw] of Object.entries(configuredModels)) {
const alias = ((entryRaw as { alias?: string } | undefined)?.alias ?? "").trim();
if (!alias) {
continue;
}
const key = resolveAllowlistModelKey({
cfg: params.cfg,
raw: rawKey,
@@ -462,10 +466,6 @@ function buildModelCatalogMetadata(
if (!key) {
continue;
}
const alias = ((entryRaw as { alias?: string } | undefined)?.alias ?? "").trim();
if (!alias) {
continue;
}
aliasByKey.set(key, alias);
}

File diff suppressed because one or more lines are too long

View File

@@ -570,6 +570,12 @@ const DiscordVoiceRealtimeBootstrapContextFileSchema = z.enum([
"USER.md",
"SOUL.md",
]);
const DiscordVoiceRealtimeWakeNameSchema = z
.string()
.min(1)
.regex(/^\s*[^a-z0-9]*[a-z0-9]+(?:[^a-z0-9]+[a-z0-9]+)?[^a-z0-9]*\s*$/i, {
message: "Discord realtime wake names must be one or two words.",
});
const DiscordVoiceRealtimeSchema = z
.object({
provider: z.string().min(1).optional(),
@@ -579,7 +585,7 @@ const DiscordVoiceRealtimeSchema = z
toolPolicy: DiscordVoiceRealtimeToolPolicySchema.optional(),
consultPolicy: DiscordVoiceRealtimeConsultPolicySchema.optional(),
requireWakeName: z.boolean().optional(),
wakeNames: z.array(z.string().min(1)).optional(),
wakeNames: z.array(DiscordVoiceRealtimeWakeNameSchema).min(1).optional(),
bootstrapContextFiles: z.array(DiscordVoiceRealtimeBootstrapContextFileSchema).optional(),
bargeIn: z.boolean().optional(),
minBargeInAudioEndMs: z.number().int().min(0).max(10_000).optional(),