refactor: share realtime voice activation helpers (#86615)

2026-07-13 16:36:06 +00:00 · 2026-05-25 20:25:17 +01:00
parent 170e0aac2a
commit d0ab0d9922
9 changed files with 548 additions and 364 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai

 ### Changes

+- Voice: share activation-name matching and consult-transcript screening through the realtime voice SDK so Discord, browser voice, and meeting surfaces can reuse one implementation.
 - Cron: default `cron.maxConcurrentRuns` to 8 so scheduled automations and their isolated agent turns can make progress in parallel without explicit configuration.
 - QA-Lab: add `qa coverage --match <query>` so focused proof selection can discover matching scenarios from existing metadata before running live or remote lanes.
 - Control UI: add an ephemeral Activity tab for sanitized live tool activity summaries without persisting raw telemetry. Fixes #12831. Thanks @BunsDev.
--- a/docs/.generated/plugin-sdk-api-baseline.sha256
+++ b/docs/.generated/plugin-sdk-api-baseline.sha256
@@ -1,2 +1,2 @@
-390681a3d97af8c004db89ead136bd6cff693af5a0ddfe86a8e3c55a29a077eb  plugin-sdk-api-baseline.json
-8dfaf69ee3d0a946bfdd1d8d97ef85262824d52c20854249f900db61f2a7f7b4  plugin-sdk-api-baseline.jsonl
+1d3e6177eeac57fc43736f7d5f76d8f825e1859ca625d268e97dc30b5567ea34  plugin-sdk-api-baseline.json
+6c093ff7c10bd81ee9d2c4fc5d07b206bc3a1f5acd0bad491cfc9e0df6689f6b  plugin-sdk-api-baseline.jsonl
--- a/extensions/discord/src/doctor-contract.ts
+++ b/extensions/discord/src/doctor-contract.ts
@@ -3,11 +3,14 @@ import type {
  ChannelDoctorLegacyConfigRule,
 } from "openclaw/plugin-sdk/channel-contract";
 import type { OpenClawConfig } from "openclaw/plugin-sdk/config-contracts";
+import {
+  isSupportedRealtimeVoiceActivationName,
+  normalizeRealtimeVoiceActivationNamePrefix,
+} from "openclaw/plugin-sdk/realtime-voice";
 import { asObjectRecord, normalizeLegacyChannelAliases } from "openclaw/plugin-sdk/runtime-doctor";
 import { resolveDiscordPreviewStreamMode } from "./preview-streaming.js";

 const LEGACY_TTS_PROVIDER_KEYS = ["openai", "elevenlabs", "microsoft", "edge"] as const;
-const DISCORD_REALTIME_WAKE_NAME_MAX_WORDS = 2;
 type AgentBindingConfig = NonNullable<OpenClawConfig["bindings"]>[number];

 function hasLegacyTtsProviderKeys(value: unknown): boolean {
@@ -78,23 +81,6 @@ function hasLegacyDiscordAccountGuildChannelAgentId(value: unknown): boolean {
  return Object.values(accounts).some((account) => hasLegacyDiscordGuildChannelAgentId(account));
 }

-function realtimeWakeNameWordCount(value: string): number {
-  return Array.from(value.matchAll(/[a-z0-9]+/gi)).length;
-}
-
-function normalizeRealtimeWakeName(value: string): string | undefined {
-  const words = Array.from(value.matchAll(/[a-z0-9]+/gi), (match) => match[0]);
-  if (words.length === 0) {
-    return undefined;
-  }
-  return words.slice(0, DISCORD_REALTIME_WAKE_NAME_MAX_WORDS).join(" ");
-}
-
-function isSupportedRealtimeWakeName(value: string): boolean {
-  const wordCount = realtimeWakeNameWordCount(value);
-  return wordCount >= 1 && wordCount <= DISCORD_REALTIME_WAKE_NAME_MAX_WORDS;
-}
-
 function hasUnsupportedRealtimeWakeNamesInVoice(value: unknown): boolean {
  const voice = asObjectRecord(value);
  const realtime = asObjectRecord(voice?.realtime);
@@ -102,7 +88,8 @@ function hasUnsupportedRealtimeWakeNamesInVoice(value: unknown): boolean {
  return Array.isArray(wakeNames)
    ? wakeNames.length === 0 ||
        wakeNames.some(
-          (wakeName) => typeof wakeName === "string" && !isSupportedRealtimeWakeName(wakeName),
+          (wakeName) =>
+            typeof wakeName === "string" && !isSupportedRealtimeVoiceActivationName(wakeName),
        )
    : false;
 }
@@ -231,10 +218,10 @@ function normalizeUnsupportedRealtimeWakeNames(
  let normalized = 0;
  let removed = 0;
  const nextWakeNames = wakeNames.flatMap((wakeName) => {
-    if (typeof wakeName !== "string" || isSupportedRealtimeWakeName(wakeName)) {
+    if (typeof wakeName !== "string" || isSupportedRealtimeVoiceActivationName(wakeName)) {
      return [wakeName];
    }
-    const nextWakeName = normalizeRealtimeWakeName(wakeName);
+    const nextWakeName = normalizeRealtimeVoiceActivationNamePrefix(wakeName);
    if (!nextWakeName) {
      removed += 1;
      return [];
--- a/extensions/discord/src/voice/realtime.ts
+++ b/extensions/discord/src/voice/realtime.ts
@@ -3,9 +3,12 @@ import type { DiscordAccountConfig, OpenClawConfig } from "openclaw/plugin-sdk/c
 import {
  buildRealtimeVoiceAgentConsultChatMessage,
  buildRealtimeVoiceAgentConsultPolicyInstructions,
+  classifySkippableRealtimeVoiceConsultTranscript,
  controlRealtimeVoiceAgentRun,
  createRealtimeVoiceAgentTalkbackQueue,
  createRealtimeVoiceBridgeSession,
+  matchRealtimeVoiceActivationName,
+  normalizeSupportedRealtimeVoiceActivationName,
  REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
  REALTIME_VOICE_AGENT_CONTROL_TOOL,
  REALTIME_VOICE_AGENT_CONTROL_TOOL_NAME,
@@ -22,6 +25,8 @@ import {
  type RealtimeVoiceBridgeSession,
  type RealtimeVoiceProviderConfig,
  type RealtimeVoiceToolCallEvent,
+  sortRealtimeVoiceActivationNames,
+  type RealtimeVoiceActivationNameTranscriptResult,
 } from "openclaw/plugin-sdk/realtime-voice";
 import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env";
 import { formatErrorMessage } from "openclaw/plugin-sdk/ssrf-runtime";
@@ -65,35 +70,11 @@ const DISCORD_REALTIME_FORCED_CONSULT_FALLBACK_DELAY_MS = 200;
 const DISCORD_REALTIME_DUPLICATE_ERROR_SUPPRESS_MS = 60_000;
 const DISCORD_REALTIME_CONTROL_SPEECH_DEDUPE_MS = 5_000;
 const DISCORD_REALTIME_OUTPUT_PLAYBACK_WATCHDOG_MARGIN_MS = 1_500;
-const DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS = 2;
 const REALTIME_PCM16_BYTES_PER_SAMPLE = 2;
 const DISCORD_RAW_PCM_FRAME_BYTES = 3_840;
 const DISCORD_REALTIME_OUTPUT_PREROLL_FRAMES = 25;
 const DISCORD_REALTIME_TRAILING_SILENCE_MIN_MS = 700;
 const DISCORD_REALTIME_TRAILING_SILENCE_MAX_MS = 3_000;
-const DISCORD_REALTIME_FORCED_CONSULT_TRAILING_FRAGMENT_WORDS = new Set([
-  "a",
-  "about",
-  "an",
-  "and",
-  "as",
-  "at",
-  "because",
-  "but",
-  "by",
-  "for",
-  "from",
-  "in",
-  "of",
-  "on",
-  "or",
-  "so",
-  "that",
-  "the",
-  "then",
-  "to",
-  "with",
-]);
 const DISCORD_REALTIME_FORCED_CONSULT_REASON =
  "provider_final_transcript_without_openclaw_agent_consult";
 const DISCORD_REALTIME_VERBOSE_OMITTED_EVENTS = new Set([
@@ -204,28 +185,6 @@ function shouldLogRealtimeVerboseEvent(event: RealtimeVoiceBridgeEvent): boolean
  return !DISCORD_REALTIME_VERBOSE_OMITTED_EVENTS.has(event.type);
 }

-function classifySkippableForcedAgentProxyTranscript(text: string): string | undefined {
-  const normalized = text.replace(/\s+/g, " ").trim().toLowerCase();
-  if (!normalized) {
-    return "empty";
-  }
-  if (/(\.\.\.|…)\s*$/.test(normalized)) {
-    return "incomplete-transcript";
-  }
-  const lastWord = normalized.match(/[a-z']+$/)?.[0]?.replace(/^'+|'+$/g, "");
-  if (lastWord && DISCORD_REALTIME_FORCED_CONSULT_TRAILING_FRAGMENT_WORDS.has(lastWord)) {
-    return "trailing-fragment";
-  }
-  if (
-    !normalized.includes("?") &&
-    (/^(i'?ll|i will) be (right )?back\b/.test(normalized) ||
-      /\b(see you|bye(?:-bye)?|goodbye)\b/.test(normalized))
-  ) {
-    return "non-actionable-closing";
-  }
-  return undefined;
-}
-
 function readProviderConfigString(
  config: RealtimeVoiceProviderConfig,
  key: string,
@@ -355,283 +314,6 @@ function normalizeControlSpeechText(text: string): string {
  return text.toLowerCase().replace(/\s+/g, " ").trim();
 }

-function normalizeWakeName(value: string): string | undefined {
-  const normalized = value.toLowerCase().replace(/\s+/g, " ").trim();
-  return normalized || undefined;
-}
-
-function normalizeSupportedWakeName(value: string | undefined): string | undefined {
-  if (typeof value !== "string") {
-    return undefined;
-  }
-  const normalized = normalizeWakeName(value);
-  const wordCount = normalized ? Array.from(normalized.matchAll(/[a-z0-9]+/gi)).length : 0;
-  return wordCount >= 1 && wordCount <= DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS
-    ? normalized
-    : undefined;
-}
-
-function normalizeWakeNameCandidate(value: string): string | undefined {
-  const normalized = value
-    .toLowerCase()
-    .replace(/[^a-z0-9]+/g, " ")
-    .replace(/\s+/g, " ")
-    .trim();
-  return normalized || undefined;
-}
-
-function compactWakeName(value: string): string {
-  return value.replace(/[^a-z0-9]+/g, "");
-}
-
-type EdgeWakeNameCandidate = {
-  edge: "leading" | "trailing";
-  heardName: string;
-  startIndex: number;
-  endIndex: number;
-  strongBoundary: boolean;
-};
-
-type WakeNameTranscriptResult =
-  | { allowed: true; text: string; wakeName: string; heardName: string; match: "exact" | "fuzzy" }
-  | { allowed: false; text: string };
-type AllowedWakeNameTranscriptResult = Extract<WakeNameTranscriptResult, { allowed: true }>;
-
-function leadingWakeNameCandidates(text: string, maxWords: number): EdgeWakeNameCandidate[] {
-  const opener = /^\s*(?:(?:hey|ok|okay)(?:\s*[-,:;]+\s*|\s+))?/i.exec(text);
-  const nameStart = opener?.[0].length ?? 0;
-  const candidates: EdgeWakeNameCandidate[] = [];
-  const candidateStarts = nameStart > 0 ? [0, nameStart] : [0];
-
-  for (const startIndex of candidateStarts) {
-    const tokenPattern = /[a-z0-9]+/gi;
-    tokenPattern.lastIndex = startIndex;
-    const startCandidates: EdgeWakeNameCandidate[] = [];
-
-    for (let wordCount = 0; wordCount < maxWords; wordCount += 1) {
-      const token = tokenPattern.exec(text);
-      if (!token) {
-        break;
-      }
-      const previousEndIndex =
-        wordCount === 0 ? startIndex : startCandidates[wordCount - 1]?.endIndex;
-      const between = text.slice(previousEndIndex, token.index);
-      if (wordCount > 0 && !/^[\s'-]+$/.test(between)) {
-        break;
-      }
-      const endIndex = token.index + token[0].length;
-      const heardName = normalizeWakeNameCandidate(text.slice(startIndex, endIndex));
-      if (!heardName) {
-        break;
-      }
-      const boundary = text.slice(endIndex).match(/^\s*([,.:;!?-]|$)/);
-      startCandidates.push({
-        edge: "leading",
-        heardName,
-        startIndex,
-        endIndex,
-        strongBoundary: Boolean(boundary),
-      });
-    }
-
-    candidates.push(...startCandidates);
-  }
-
-  return candidates;
-}
-
-function trailingWakeNameCandidates(text: string, maxWords: number): EdgeWakeNameCandidate[] {
-  const tokens = Array.from(text.matchAll(/[a-z0-9]+/gi));
-  const candidates: EdgeWakeNameCandidate[] = [];
-  const tokenCount = Math.min(tokens.length, maxWords);
-
-  for (let wordCount = 1; wordCount <= tokenCount; wordCount += 1) {
-    const startToken = tokens[tokens.length - wordCount];
-    const endToken = tokens[tokens.length - 1];
-    if (!startToken || !endToken?.[0]) {
-      break;
-    }
-    const startIndex = startToken.index ?? 0;
-    const endIndex = (endToken.index ?? 0) + endToken[0].length;
-    if (!/^\s*(?:[,.:;!?-]+\s*)?$/.test(text.slice(endIndex))) {
-      break;
-    }
-    if (!/(^|[\s,.:;!?-])$/.test(text.slice(0, startIndex))) {
-      break;
-    }
-    if (wordCount > 1) {
-      const previousToken = tokens[tokens.length - wordCount + 1];
-      const between = previousToken
-        ? text.slice(startIndex + startToken[0].length, previousToken.index)
-        : "";
-      if (!/^[\s'-]+$/.test(between)) {
-        break;
-      }
-    }
-    const heardName = normalizeWakeNameCandidate(text.slice(startIndex, endIndex));
-    if (!heardName) {
-      break;
-    }
-    candidates.push({
-      edge: "trailing",
-      heardName,
-      startIndex,
-      endIndex,
-      strongBoundary: true,
-    });
-  }
-
-  return candidates;
-}
-
-function levenshteinDistance(left: string, right: string): number {
-  if (left === right) {
-    return 0;
-  }
-  if (!left) {
-    return right.length;
-  }
-  if (!right) {
-    return left.length;
-  }
-
-  let previous = Array.from({ length: right.length + 1 }, (_, index) => index);
-  for (let leftIndex = 0; leftIndex < left.length; leftIndex += 1) {
-    const current = [leftIndex + 1];
-    for (let rightIndex = 0; rightIndex < right.length; rightIndex += 1) {
-      const cost = left[leftIndex] === right[rightIndex] ? 0 : 1;
-      current[rightIndex + 1] = Math.min(
-        current[rightIndex] + 1,
-        previous[rightIndex + 1] + 1,
-        previous[rightIndex] + cost,
-      );
-    }
-    previous = current;
-  }
-  return previous[right.length] ?? Math.max(left.length, right.length);
-}
-
-function hasOnlyPhoneticSubstitutions(left: string, right: string): boolean {
-  if (left.length !== right.length) {
-    return false;
-  }
-  const vowels = new Set(["a", "e", "i", "o", "u", "y"]);
-  const liquids = new Set(["l", "r"]);
-  let substitutions = 0;
-  for (let index = 0; index < left.length; index += 1) {
-    const leftChar = left[index];
-    const rightChar = right[index];
-    if (leftChar === rightChar) {
-      continue;
-    }
-    const vowelLike = vowels.has(leftChar ?? "") && vowels.has(rightChar ?? "");
-    const liquidLike = liquids.has(leftChar ?? "") && liquids.has(rightChar ?? "");
-    if (!vowelLike && !liquidLike) {
-      return false;
-    }
-    substitutions += 1;
-  }
-  return substitutions > 0;
-}
-
-function commonPrefixLength(left: string, right: string): number {
-  const limit = Math.min(left.length, right.length);
-  for (let index = 0; index < limit; index += 1) {
-    if (left[index] !== right[index]) {
-      return index;
-    }
-  }
-  return limit;
-}
-
-function isFuzzyWakeNameMatch(candidate: EdgeWakeNameCandidate, wakeName: string): boolean {
-  const normalizedWakeName = normalizeWakeNameCandidate(wakeName);
-  if (!normalizedWakeName) {
-    return false;
-  }
-  const heardCompact = compactWakeName(candidate.heardName);
-  const wakeCompact = compactWakeName(normalizedWakeName);
-  if (!heardCompact || !wakeCompact || wakeCompact.length < 5) {
-    return false;
-  }
-  if (!candidate.strongBoundary) {
-    return false;
-  }
-  if (heardCompact[0] !== wakeCompact[0]) {
-    return false;
-  }
-  const distance = levenshteinDistance(heardCompact, wakeCompact);
-  if (distance <= 1) {
-    return true;
-  }
-  if (
-    distance === 2 &&
-    heardCompact.length >= 4 &&
-    wakeCompact.length >= 5 &&
-    (heardCompact.length !== wakeCompact.length ||
-      hasOnlyPhoneticSubstitutions(heardCompact, wakeCompact) ||
-      commonPrefixLength(heardCompact, wakeCompact) >= 6)
-  ) {
-    return true;
-  }
-  if (
-    distance === 3 &&
-    heardCompact.length >= 7 &&
-    wakeCompact.length >= 7 &&
-    heardCompact.length !== wakeCompact.length &&
-    commonPrefixLength(heardCompact, wakeCompact) >= 5
-  ) {
-    return true;
-  }
-  return false;
-}
-
-function stripEdgeWakeNameCandidate(text: string, candidate: EdgeWakeNameCandidate): string {
-  if (candidate.edge === "leading") {
-    return text
-      .slice(candidate.endIndex)
-      .replace(/^\s*(?:[-,:;.!?]+\s*)?/, "")
-      .trim();
-  }
-  return text
-    .slice(0, candidate.startIndex)
-    .replace(/\s*(?:[-,:;.!?]+\s*)?$/, "")
-    .trim();
-}
-
-function matchEdgeWakeName(
-  text: string,
-  wakeNames: string[],
-): AllowedWakeNameTranscriptResult | undefined {
-  const candidates = [
-    ...leadingWakeNameCandidates(text, DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS),
-    ...trailingWakeNameCandidates(text, DISCORD_REALTIME_WAKE_NAME_EDGE_WORDS),
-  ].toSorted(
-    (left, right) =>
-      compactWakeName(right.heardName).length - compactWakeName(left.heardName).length,
-  );
-  for (const candidate of candidates) {
-    for (const wakeName of wakeNames) {
-      const normalizedWakeName = normalizeWakeNameCandidate(wakeName);
-      if (!normalizedWakeName) {
-        continue;
-      }
-      const heardCompact = compactWakeName(candidate.heardName);
-      const wakeCompact = compactWakeName(normalizedWakeName);
-      if (heardCompact === wakeCompact || isFuzzyWakeNameMatch(candidate, wakeName)) {
-        return {
-          allowed: true,
-          text: stripEdgeWakeNameCandidate(text, candidate),
-          wakeName,
-          heardName: candidate.heardName,
-          match: heardCompact === wakeCompact ? "exact" : "fuzzy",
-        };
-      }
-    }
-  }
-  return undefined;
-}
-
 function resolveDiscordRealtimeWakeNames(params: {
  config: DiscordRealtimeVoiceConfig;
  cfg: OpenClawConfig;
@@ -640,30 +322,24 @@ function resolveDiscordRealtimeWakeNames(params: {
  const rawConfigured = params.config?.wakeNames;
  if (rawConfigured) {
    const configured = rawConfigured
-      .map((name) => normalizeSupportedWakeName(name))
+      .map((name) => normalizeSupportedRealtimeVoiceActivationName(name))
      .filter((name): name is string => Boolean(name));
-    return sortWakeNames(Array.from(new Set(configured)));
+    return sortRealtimeVoiceActivationNames(Array.from(new Set(configured)));
  }
  const agent = params.cfg.agents?.list?.find((candidate) => candidate.id === params.agentId);
  const configuredAgentNames = [agent?.name, agent?.identity?.name]
-    .map((name) => normalizeSupportedWakeName(name))
+    .map((name) => normalizeSupportedRealtimeVoiceActivationName(name))
    .filter((name): name is string => Boolean(name));
-  const productWakeNames = [normalizeSupportedWakeName("OpenClaw")].filter((name): name is string =>
-    Boolean(name),
+  const productWakeNames = [normalizeSupportedRealtimeVoiceActivationName("OpenClaw")].filter(
+    (name): name is string => Boolean(name),
  );
  const defaults =
    configuredAgentNames.length > 0
      ? [...configuredAgentNames, ...productWakeNames]
-      : [normalizeSupportedWakeName(params.agentId), ...productWakeNames].filter(
+      : [normalizeSupportedRealtimeVoiceActivationName(params.agentId), ...productWakeNames].filter(
          (name): name is string => Boolean(name),
        );
-  return sortWakeNames(Array.from(new Set(defaults)));
-}
-
-function sortWakeNames(wakeNames: string[]): string[] {
-  return wakeNames.toSorted(
-    (left, right) => right.length - left.length || left.localeCompare(right),
-  );
+  return sortRealtimeVoiceActivationNames(Array.from(new Set(defaults)));
 }

 function matchesPendingAgentProxyQuestion(consultMessage: string, question: string): boolean {
@@ -1524,14 +1200,21 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
    this.talkback.enqueue(acceptedText, this.consumePendingSpeakerContext());
  }

-  private resolveWakeNameTranscript(text: string): WakeNameTranscriptResult {
+  private resolveWakeNameTranscript(text: string): RealtimeVoiceActivationNameTranscriptResult {
    if (!this.requireWakeName) {
-      return { allowed: true, text, wakeName: "", heardName: "", match: "exact" };
+      return {
+        allowed: true,
+        text,
+        activationName: "",
+        heardName: "",
+        match: "exact",
+        edge: "leading",
+      };
    }
-    const wakeNameResult = matchEdgeWakeName(text, this.wakeNames);
+    const wakeNameResult = matchRealtimeVoiceActivationName(text, this.wakeNames);
    if (wakeNameResult) {
      logger.info(
-        `discord voice: realtime wake-name gate matched canonical=${wakeNameResult.wakeName} heard=${wakeNameResult.heardName} match=${wakeNameResult.match} voiceSession=${this.params.entry.voiceSessionKey} agent=${this.params.entry.route.agentId}`,
+        `discord voice: realtime wake-name gate matched canonical=${wakeNameResult.activationName} heard=${wakeNameResult.heardName} match=${wakeNameResult.match} voiceSession=${this.params.entry.voiceSessionKey} agent=${this.params.entry.route.agentId}`,
      );
      return wakeNameResult;
    }
@@ -1585,7 +1268,7 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
    if (!question) {
      return undefined;
    }
-    const skipReason = classifySkippableForcedAgentProxyTranscript(question);
+    const skipReason = classifySkippableRealtimeVoiceConsultTranscript(question);
    if (skipReason) {
      const context = this.consumePendingSpeakerContext();
      logger.info(
--- a/src/plugin-sdk/realtime-voice.ts
+++ b/src/plugin-sdk/realtime-voice.ts
@@ -50,6 +50,23 @@ export {
  type TalkTurnResult,
  type TalkTurnSuccess,
 } from "../talk/talk-session-controller.js";
+export {
+  REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS,
+  isSupportedRealtimeVoiceActivationName,
+  matchRealtimeVoiceActivationName,
+  normalizeRealtimeVoiceActivationName,
+  normalizeRealtimeVoiceActivationNamePrefix,
+  normalizeSupportedRealtimeVoiceActivationName,
+  realtimeVoiceActivationNameWordCount,
+  sortRealtimeVoiceActivationNames,
+  type RealtimeVoiceActivationNameEdge,
+  type RealtimeVoiceActivationNameMatchKind,
+  type RealtimeVoiceActivationNameTranscriptResult,
+} from "../talk/activation-name.js";
+export {
+  classifySkippableRealtimeVoiceConsultTranscript,
+  type SkippableRealtimeVoiceConsultTranscriptReason,
+} from "../talk/consult-transcript.js";
 export {
  buildRealtimeVoiceAgentConsultChatMessage,
  buildRealtimeVoiceAgentConsultPolicyInstructions,
--- a/src/talk/activation-name.test.ts
+++ b/src/talk/activation-name.test.ts
@@ -0,0 +1,74 @@
+import { describe, expect, it } from "vitest";
+import {
+  isSupportedRealtimeVoiceActivationName,
+  matchRealtimeVoiceActivationName,
+  normalizeRealtimeVoiceActivationNamePrefix,
+  normalizeSupportedRealtimeVoiceActivationName,
+  sortRealtimeVoiceActivationNames,
+} from "./activation-name.js";
+
+describe("realtime voice activation names", () => {
+  it("normalizes and validates one- or two-word activation names", () => {
+    expect(normalizeSupportedRealtimeVoiceActivationName("  OpenClaw  ")).toBe("openclaw");
+    expect(normalizeSupportedRealtimeVoiceActivationName("Open Claw")).toBe("open claw");
+    expect(normalizeSupportedRealtimeVoiceActivationName("Claw Bot Helper")).toBeUndefined();
+    expect(isSupportedRealtimeVoiceActivationName("Claw Bot")).toBe(true);
+    expect(isSupportedRealtimeVoiceActivationName("Claw Bot Helper")).toBe(false);
+    expect(normalizeRealtimeVoiceActivationNamePrefix("Claw Bot Helper")).toBe("Claw Bot");
+  });
+
+  it("matches and strips leading exact activation names", () => {
+    expect(matchRealtimeVoiceActivationName("Hey, Molty, ship it", ["molty"])).toEqual({
+      allowed: true,
+      activationName: "molty",
+      edge: "leading",
+      heardName: "molty",
+      match: "exact",
+      text: "ship it",
+    });
+  });
+
+  it("matches and strips trailing exact activation names", () => {
+    expect(matchRealtimeVoiceActivationName("ship it, Claw Bot", ["claw bot"])).toEqual({
+      allowed: true,
+      activationName: "claw bot",
+      edge: "trailing",
+      heardName: "claw bot",
+      match: "exact",
+      text: "ship it",
+    });
+  });
+
+  it("accepts bounded fuzzy matches at the transcript edge", () => {
+    expect(matchRealtimeVoiceActivationName("Malty, what changed?", ["molty"])).toMatchObject({
+      allowed: true,
+      activationName: "molty",
+      edge: "leading",
+      heardName: "malty",
+      match: "fuzzy",
+      text: "what changed?",
+    });
+  });
+
+  it("does not fuzzy match inside a larger phrase without an edge boundary", () => {
+    expect(matchRealtimeVoiceActivationName("maltiness is not a wake name", ["molty"])).toBe(
+      undefined,
+    );
+  });
+
+  it("prefers longer activation names first", () => {
+    expect(sortRealtimeVoiceActivationNames(["claw", "claw bot", "openclaw"])).toEqual([
+      "claw bot",
+      "openclaw",
+      "claw",
+    ]);
+    expect(matchRealtimeVoiceActivationName("Claw Bot, status", ["claw", "claw bot"])).toEqual({
+      allowed: true,
+      activationName: "claw bot",
+      edge: "leading",
+      heardName: "claw bot",
+      match: "exact",
+      text: "status",
+    });
+  });
+});
--- a/src/talk/activation-name.ts
+++ b/src/talk/activation-name.ts
@@ -0,0 +1,334 @@
+export const REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS = 2;
+
+export type RealtimeVoiceActivationNameEdge = "leading" | "trailing";
+export type RealtimeVoiceActivationNameMatchKind = "exact" | "fuzzy";
+
+export type RealtimeVoiceActivationNameTranscriptResult =
+  | {
+      allowed: true;
+      text: string;
+      activationName: string;
+      heardName: string;
+      match: RealtimeVoiceActivationNameMatchKind;
+      edge: RealtimeVoiceActivationNameEdge;
+    }
+  | { allowed: false; text: string };
+
+type EdgeActivationNameCandidate = {
+  edge: RealtimeVoiceActivationNameEdge;
+  heardName: string;
+  startIndex: number;
+  endIndex: number;
+  strongBoundary: boolean;
+};
+
+export function realtimeVoiceActivationNameWordCount(value: string): number {
+  return Array.from(value.matchAll(/[a-z0-9]+/gi)).length;
+}
+
+export function normalizeRealtimeVoiceActivationName(value: string): string | undefined {
+  const normalized = value.toLowerCase().replace(/\s+/g, " ").trim();
+  return normalized || undefined;
+}
+
+export function normalizeRealtimeVoiceActivationNamePrefix(
+  value: string,
+  maxWords = REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS,
+): string | undefined {
+  const words = Array.from(value.matchAll(/[a-z0-9]+/gi), (match) => match[0]);
+  if (words.length === 0) {
+    return undefined;
+  }
+  return words.slice(0, maxWords).join(" ");
+}
+
+export function isSupportedRealtimeVoiceActivationName(
+  value: string,
+  maxWords = REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS,
+): boolean {
+  const wordCount = realtimeVoiceActivationNameWordCount(value);
+  return wordCount >= 1 && wordCount <= maxWords;
+}
+
+export function normalizeSupportedRealtimeVoiceActivationName(
+  value: string | undefined,
+  maxWords = REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS,
+): string | undefined {
+  if (typeof value !== "string") {
+    return undefined;
+  }
+  const normalized = normalizeRealtimeVoiceActivationName(value);
+  return normalized && isSupportedRealtimeVoiceActivationName(normalized, maxWords)
+    ? normalized
+    : undefined;
+}
+
+export function sortRealtimeVoiceActivationNames(names: string[]): string[] {
+  return names.toSorted((left, right) => right.length - left.length || left.localeCompare(right));
+}
+
+export function matchRealtimeVoiceActivationName(
+  text: string,
+  activationNames: string[],
+  maxWords = REALTIME_VOICE_ACTIVATION_NAME_MAX_WORDS,
+): Extract<RealtimeVoiceActivationNameTranscriptResult, { allowed: true }> | undefined {
+  const candidates = [
+    ...leadingActivationNameCandidates(text, maxWords),
+    ...trailingActivationNameCandidates(text, maxWords),
+  ].toSorted(
+    (left, right) =>
+      compactActivationName(right.heardName).length - compactActivationName(left.heardName).length,
+  );
+
+  for (const candidate of candidates) {
+    for (const activationName of activationNames) {
+      const normalizedActivationName = normalizeActivationNameCandidate(activationName);
+      if (!normalizedActivationName) {
+        continue;
+      }
+      const heardCompact = compactActivationName(candidate.heardName);
+      const activationCompact = compactActivationName(normalizedActivationName);
+      if (
+        heardCompact === activationCompact ||
+        isFuzzyActivationNameMatch(candidate, activationName)
+      ) {
+        return {
+          allowed: true,
+          text: stripEdgeActivationNameCandidate(text, candidate),
+          activationName,
+          heardName: candidate.heardName,
+          match: heardCompact === activationCompact ? "exact" : "fuzzy",
+          edge: candidate.edge,
+        };
+      }
+    }
+  }
+  return undefined;
+}
+
+function normalizeActivationNameCandidate(value: string): string | undefined {
+  const normalized = value
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, " ")
+    .replace(/\s+/g, " ")
+    .trim();
+  return normalized || undefined;
+}
+
+function compactActivationName(value: string): string {
+  return value.replace(/[^a-z0-9]+/g, "");
+}
+
+function leadingActivationNameCandidates(
+  text: string,
+  maxWords: number,
+): EdgeActivationNameCandidate[] {
+  const opener = /^\s*(?:(?:hey|ok|okay)(?:\s*[-,:;]+\s*|\s+))?/i.exec(text);
+  const nameStart = opener?.[0].length ?? 0;
+  const candidates: EdgeActivationNameCandidate[] = [];
+  const candidateStarts = nameStart > 0 ? [0, nameStart] : [0];
+
+  for (const startIndex of candidateStarts) {
+    const tokenPattern = /[a-z0-9]+/gi;
+    tokenPattern.lastIndex = startIndex;
+    const startCandidates: EdgeActivationNameCandidate[] = [];
+
+    for (let wordCount = 0; wordCount < maxWords; wordCount += 1) {
+      const token = tokenPattern.exec(text);
+      if (!token) {
+        break;
+      }
+      const previousEndIndex =
+        wordCount === 0 ? startIndex : startCandidates[wordCount - 1]?.endIndex;
+      const between = text.slice(previousEndIndex, token.index);
+      if (wordCount > 0 && !/^[\s'-]+$/.test(between)) {
+        break;
+      }
+      const endIndex = token.index + token[0].length;
+      const heardName = normalizeActivationNameCandidate(text.slice(startIndex, endIndex));
+      if (!heardName) {
+        break;
+      }
+      const boundary = text.slice(endIndex).match(/^\s*([,.:;!?-]|$)/);
+      startCandidates.push({
+        edge: "leading",
+        heardName,
+        startIndex,
+        endIndex,
+        strongBoundary: Boolean(boundary),
+      });
+    }
+
+    candidates.push(...startCandidates);
+  }
+
+  return candidates;
+}
+
+function trailingActivationNameCandidates(
+  text: string,
+  maxWords: number,
+): EdgeActivationNameCandidate[] {
+  const tokens = Array.from(text.matchAll(/[a-z0-9]+/gi));
+  const candidates: EdgeActivationNameCandidate[] = [];
+  const tokenCount = Math.min(tokens.length, maxWords);
+
+  for (let wordCount = 1; wordCount <= tokenCount; wordCount += 1) {
+    const startToken = tokens[tokens.length - wordCount];
+    const endToken = tokens[tokens.length - 1];
+    if (!startToken || !endToken?.[0]) {
+      break;
+    }
+    const startIndex = startToken.index ?? 0;
+    const endIndex = (endToken.index ?? 0) + endToken[0].length;
+    if (!/^\s*(?:[,.:;!?-]+\s*)?$/.test(text.slice(endIndex))) {
+      break;
+    }
+    if (!/(^|[\s,.:;!?-])$/.test(text.slice(0, startIndex))) {
+      break;
+    }
+    if (wordCount > 1) {
+      const previousToken = tokens[tokens.length - wordCount + 1];
+      const between = previousToken
+        ? text.slice(startIndex + startToken[0].length, previousToken.index)
+        : "";
+      if (!/^[\s'-]+$/.test(between)) {
+        break;
+      }
+    }
+    const heardName = normalizeActivationNameCandidate(text.slice(startIndex, endIndex));
+    if (!heardName) {
+      break;
+    }
+    candidates.push({
+      edge: "trailing",
+      heardName,
+      startIndex,
+      endIndex,
+      strongBoundary: true,
+    });
+  }
+
+  return candidates;
+}
+
+function levenshteinDistance(left: string, right: string): number {
+  if (left === right) {
+    return 0;
+  }
+  if (!left) {
+    return right.length;
+  }
+  if (!right) {
+    return left.length;
+  }
+
+  let previous = Array.from({ length: right.length + 1 }, (_, index) => index);
+  for (let leftIndex = 0; leftIndex < left.length; leftIndex += 1) {
+    const current = [leftIndex + 1];
+    for (let rightIndex = 0; rightIndex < right.length; rightIndex += 1) {
+      const cost = left[leftIndex] === right[rightIndex] ? 0 : 1;
+      current[rightIndex + 1] = Math.min(
+        current[rightIndex] + 1,
+        previous[rightIndex + 1] + 1,
+        previous[rightIndex] + cost,
+      );
+    }
+    previous = current;
+  }
+  return previous[right.length] ?? Math.max(left.length, right.length);
+}
+
+function hasOnlyPhoneticSubstitutions(left: string, right: string): boolean {
+  if (left.length !== right.length) {
+    return false;
+  }
+  const vowels = new Set(["a", "e", "i", "o", "u", "y"]);
+  const liquids = new Set(["l", "r"]);
+  let substitutions = 0;
+  for (let index = 0; index < left.length; index += 1) {
+    const leftChar = left[index];
+    const rightChar = right[index];
+    if (leftChar === rightChar) {
+      continue;
+    }
+    const vowelLike = vowels.has(leftChar ?? "") && vowels.has(rightChar ?? "");
+    const liquidLike = liquids.has(leftChar ?? "") && liquids.has(rightChar ?? "");
+    if (!vowelLike && !liquidLike) {
+      return false;
+    }
+    substitutions += 1;
+  }
+  return substitutions > 0;
+}
+
+function commonPrefixLength(left: string, right: string): number {
+  const limit = Math.min(left.length, right.length);
+  for (let index = 0; index < limit; index += 1) {
+    if (left[index] !== right[index]) {
+      return index;
+    }
+  }
+  return limit;
+}
+
+function isFuzzyActivationNameMatch(
+  candidate: EdgeActivationNameCandidate,
+  activationName: string,
+): boolean {
+  const normalizedActivationName = normalizeActivationNameCandidate(activationName);
+  if (!normalizedActivationName) {
+    return false;
+  }
+  const heardCompact = compactActivationName(candidate.heardName);
+  const activationCompact = compactActivationName(normalizedActivationName);
+  if (!heardCompact || !activationCompact || activationCompact.length < 5) {
+    return false;
+  }
+  if (!candidate.strongBoundary) {
+    return false;
+  }
+  if (heardCompact[0] !== activationCompact[0]) {
+    return false;
+  }
+  const distance = levenshteinDistance(heardCompact, activationCompact);
+  if (distance <= 1) {
+    return true;
+  }
+  if (
+    distance === 2 &&
+    heardCompact.length >= 4 &&
+    activationCompact.length >= 5 &&
+    (heardCompact.length !== activationCompact.length ||
+      hasOnlyPhoneticSubstitutions(heardCompact, activationCompact) ||
+      commonPrefixLength(heardCompact, activationCompact) >= 6)
+  ) {
+    return true;
+  }
+  if (
+    distance === 3 &&
+    heardCompact.length >= 7 &&
+    activationCompact.length >= 7 &&
+    heardCompact.length !== activationCompact.length &&
+    commonPrefixLength(heardCompact, activationCompact) >= 5
+  ) {
+    return true;
+  }
+  return false;
+}
+
+function stripEdgeActivationNameCandidate(
+  text: string,
+  candidate: EdgeActivationNameCandidate,
+): string {
+  if (candidate.edge === "leading") {
+    return text
+      .slice(candidate.endIndex)
+      .replace(/^\s*(?:[-,:;.!?]+\s*)?/, "")
+      .trim();
+  }
+  return text
+    .slice(0, candidate.startIndex)
+    .replace(/\s*(?:[-,:;.!?]+\s*)?$/, "")
+    .trim();
+}
--- a/src/talk/consult-transcript.test.ts
+++ b/src/talk/consult-transcript.test.ts
@@ -0,0 +1,35 @@
+import { describe, expect, it } from "vitest";
+import { classifySkippableRealtimeVoiceConsultTranscript } from "./consult-transcript.js";
+
+describe("realtime voice consult transcript classification", () => {
+  it("skips empty and incomplete transcripts", () => {
+    expect(classifySkippableRealtimeVoiceConsultTranscript("  ")).toBe("empty");
+    expect(classifySkippableRealtimeVoiceConsultTranscript("can you check...")).toBe(
+      "incomplete-transcript",
+    );
+    expect(classifySkippableRealtimeVoiceConsultTranscript("can you check…")).toBe(
+      "incomplete-transcript",
+    );
+  });
+
+  it("skips likely trailing fragments", () => {
+    expect(classifySkippableRealtimeVoiceConsultTranscript("tell me about")).toBe(
+      "trailing-fragment",
+    );
+    expect(classifySkippableRealtimeVoiceConsultTranscript("ship it so")).toBe("trailing-fragment");
+  });
+
+  it("skips non-actionable closings unless phrased as a question", () => {
+    expect(classifySkippableRealtimeVoiceConsultTranscript("I'll be right back")).toBe(
+      "non-actionable-closing",
+    );
+    expect(classifySkippableRealtimeVoiceConsultTranscript("goodbye for now")).toBe(
+      "non-actionable-closing",
+    );
+    expect(classifySkippableRealtimeVoiceConsultTranscript("can you say goodbye?")).toBeUndefined();
+  });
+
+  it("keeps actionable transcripts", () => {
+    expect(classifySkippableRealtimeVoiceConsultTranscript("what changed in CI?")).toBeUndefined();
+  });
+});
--- a/src/talk/consult-transcript.ts
+++ b/src/talk/consult-transcript.ts
@@ -0,0 +1,53 @@
+const REALTIME_VOICE_CONSULT_TRAILING_FRAGMENT_WORDS = new Set([
+  "a",
+  "about",
+  "an",
+  "and",
+  "as",
+  "at",
+  "because",
+  "but",
+  "by",
+  "for",
+  "from",
+  "in",
+  "of",
+  "on",
+  "or",
+  "so",
+  "that",
+  "the",
+  "then",
+  "to",
+  "with",
+]);
+
+export type SkippableRealtimeVoiceConsultTranscriptReason =
+  | "empty"
+  | "incomplete-transcript"
+  | "trailing-fragment"
+  | "non-actionable-closing";
+
+export function classifySkippableRealtimeVoiceConsultTranscript(
+  text: string,
+): SkippableRealtimeVoiceConsultTranscriptReason | undefined {
+  const normalized = text.replace(/\s+/g, " ").trim().toLowerCase();
+  if (!normalized) {
+    return "empty";
+  }
+  if (/(\.\.\.|…)\s*$/.test(normalized)) {
+    return "incomplete-transcript";
+  }
+  const lastWord = normalized.match(/[a-z']+$/)?.[0]?.replace(/^'+|'+$/g, "");
+  if (lastWord && REALTIME_VOICE_CONSULT_TRAILING_FRAGMENT_WORDS.has(lastWord)) {
+    return "trailing-fragment";
+  }
+  if (
+    !normalized.includes("?") &&
+    (/^(i'?ll|i will) be (right )?back\b/.test(normalized) ||
+      /\b(see you|bye(?:-bye)?|goodbye)\b/.test(normalized))
+  ) {
+    return "non-actionable-closing";
+  }
+  return undefined;
+}