From 1d47974f8984ceaae392f7b70d4d3c6ce3a25d17 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 1 May 2026 12:49:17 +0100 Subject: [PATCH] fix: default Discord voice to explicit opt-in --- CHANGELOG.md | 1 + docs/channels/discord.md | 4 +- docs/gateway/config-channels.md | 2 +- extensions/discord/src/config-ui-hints.ts | 4 +- .../src/monitor/gateway-plugin.test.ts | 28 +++++++++++-- .../discord/src/monitor/gateway-plugin.ts | 5 ++- .../discord/src/monitor/provider.test.ts | 39 +++++++++++++++++++ extensions/discord/src/monitor/provider.ts | 3 +- extensions/discord/src/voice/config.ts | 8 ++++ .../discord/src/voice/manager.e2e.test.ts | 13 ++++++- extensions/discord/src/voice/manager.ts | 3 +- 11 files changed, 96 insertions(+), 14 deletions(-) create mode 100644 extensions/discord/src/voice/config.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index f73999b2c28..9d8b6d14c3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai - Voice Call/Twilio: send notify-mode initial TwiML directly in the outbound create-call request while keeping conversation and pre-connect DTMF calls webhook-driven, so one-shot notify calls do not depend on a first-answer webhook fetch. Supersedes #72758. Thanks @tyshepps. - Discord/Slack: defer status-reaction cleanup until run finalization so queued, thinking, tool, and terminal reactions no longer flicker during normal progress updates. (#75582) +- Discord/voice: leave Discord voice off for text-only configs unless `channels.discord.voice` is explicitly configured, avoiding default `GuildVoiceStates` traffic and idle gateway CPU pressure for bots that do not use `/vc`. Fixes #73753; refs #74044. Thanks @sanchezm86 and @SecureCloudProjO. - Discord/voice: rerun configured voice auto-join after Discord gateway RESUMED events and ignore already-destroyed stale voice connections during reconnect cleanup, so health-monitor account restarts can rejoin configured channels. Fixes #40665. Thanks @liz709. - Discord/voice: lengthen the default voice join Ready wait, add configurable `voice.connectTimeoutMs`/`voice.reconnectGraceMs`, and warn before destroying unrecovered disconnected sessions so slow Discord voice handshakes and reconnects no longer fail silently. Fixes #63098; refs #39825 and #65039. Thanks @darealgege, @kzicherman, and @ayochim. - Gateway/health: refresh cached health RPC snapshots when channel runtime state diverges, so Discord and other channel status reads no longer report stale running or connected values until the cache TTL expires. (#75423) Thanks @clawsweeper. diff --git a/docs/channels/discord.md b/docs/channels/discord.md index ac78012866b..4c1929d6876 100644 --- a/docs/channels/discord.md +++ b/docs/channels/discord.md @@ -1067,8 +1067,8 @@ Notes: - STT uses `tools.media.audio`; `voice.model` does not affect transcription. - Per-channel Discord `systemPrompt` overrides apply to voice transcript turns for that voice channel. - Voice transcript turns derive owner status from Discord `allowFrom` (or `dm.allowFrom`); non-owner speakers cannot access owner-only tools (for example `gateway` and `cron`). -- Voice is enabled by default; set `channels.discord.voice.enabled=false` to disable voice runtime and the `GuildVoiceStates` gateway intent. -- `channels.discord.intents.voiceStates` can explicitly override voice-state intent subscription. Leave it unset for the intent to follow `voice.enabled`. +- Discord voice is opt-in for text-only configs; set `channels.discord.voice.enabled=true` (or keep an existing `channels.discord.voice` block) to enable `/vc` commands, the voice runtime, and the `GuildVoiceStates` gateway intent. +- `channels.discord.intents.voiceStates` can explicitly override voice-state intent subscription. Leave it unset for the intent to follow effective voice enablement. - `voice.daveEncryption` and `voice.decryptionFailureTolerance` pass through to `@discordjs/voice` join options. - `@discordjs/voice` defaults are `daveEncryption=true` and `decryptionFailureTolerance=24` if unset. - `voice.connectTimeoutMs` controls the initial `@discordjs/voice` Ready wait for `/vc join` and auto-join attempts. Default: `30000`. diff --git a/docs/gateway/config-channels.md b/docs/gateway/config-channels.md index 1865f112278..b551d8107bd 100644 --- a/docs/gateway/config-channels.md +++ b/docs/gateway/config-channels.md @@ -338,7 +338,7 @@ WhatsApp runs through the gateway's web channel (Baileys Web). It starts automat - `spawnSubagentSessions`: opt-in switch for `sessions_spawn({ thread: true })` auto thread creation/binding - Top-level `bindings[]` entries with `type: "acp"` configure persistent ACP bindings for channels and threads (use channel/thread id in `match.peer.id`). Field semantics are shared in [ACP Agents](/tools/acp-agents#channel-specific-settings). - `channels.discord.ui.components.accentColor` sets the accent color for Discord components v2 containers. -- `channels.discord.voice` enables Discord voice channel conversations and optional auto-join + LLM + TTS overrides. +- `channels.discord.voice` enables Discord voice channel conversations and optional auto-join + LLM + TTS overrides. Text-only Discord configs leave voice off by default; set `channels.discord.voice.enabled=true` to opt in. - `channels.discord.voice.model` optionally overrides the LLM model used for Discord voice channel responses. - `channels.discord.voice.daveEncryption` and `channels.discord.voice.decryptionFailureTolerance` pass through to `@discordjs/voice` DAVE options (`true` and `24` by default). - `channels.discord.voice.connectTimeoutMs` controls the initial `@discordjs/voice` Ready wait for `/vc join` and auto-join attempts (`30000` by default). diff --git a/extensions/discord/src/config-ui-hints.ts b/extensions/discord/src/config-ui-hints.ts index d40a1676240..8c5fc6d129e 100644 --- a/extensions/discord/src/config-ui-hints.ts +++ b/extensions/discord/src/config-ui-hints.ts @@ -135,7 +135,7 @@ export const discordChannelConfigUiHints = { }, "intents.voiceStates": { label: "Discord Voice States Intent", - help: "Enable the Guild Voice States intent. Defaults to the effective Discord voice setting; set false for text-only gateway sessions even when voice config is present.", + help: "Enable the Guild Voice States intent. Defaults to the effective Discord voice setting; set true only for Discord voice channel conversations.", }, gatewayInfoTimeoutMs: { label: "Discord Gateway Metadata Timeout (ms)", @@ -143,7 +143,7 @@ export const discordChannelConfigUiHints = { }, "voice.enabled": { label: "Discord Voice Enabled", - help: "Enable Discord voice channel conversations (default: true). Set false for text-only gateway sessions.", + help: "Enable Discord voice channel conversations. Text-only Discord configs leave voice off by default; set true to enable /vc commands and the Guild Voice States intent.", }, "voice.model": { label: "Discord Voice Model", diff --git a/extensions/discord/src/monitor/gateway-plugin.test.ts b/extensions/discord/src/monitor/gateway-plugin.test.ts index f9aa1e8bc9b..612f586b20f 100644 --- a/extensions/discord/src/monitor/gateway-plugin.test.ts +++ b/extensions/discord/src/monitor/gateway-plugin.test.ts @@ -102,10 +102,14 @@ describe("createDiscordGatewayPlugin", () => { }); } - it("includes GuildVoiceStates when voice is enabled by default", () => { - expect(resolveDiscordGatewayIntents() & GatewayIntents.GuildVoiceStates).toBe( - GatewayIntents.GuildVoiceStates, - ); + it("omits GuildVoiceStates by default for text-only Discord configs", () => { + expect(resolveDiscordGatewayIntents() & GatewayIntents.GuildVoiceStates).toBe(0); + }); + + it("includes GuildVoiceStates when voice is enabled", () => { + const intents = resolveDiscordGatewayIntents({ voiceEnabled: true }); + + expect(intents & GatewayIntents.GuildVoiceStates).toBe(GatewayIntents.GuildVoiceStates); }); it("omits GuildVoiceStates when voice is disabled", () => { @@ -197,6 +201,22 @@ describe("createDiscordGatewayPlugin", () => { expect((options?.intents ?? 0) & GatewayIntents.GuildVoiceStates).toBe(0); }); + it("omits voice states when Discord voice config is absent", () => { + const plugin = createPlugin(undefined, {}); + const options = (plugin as unknown as { options?: { intents?: number } }).options; + + expect((options?.intents ?? 0) & GatewayIntents.GuildVoiceStates).toBe(0); + }); + + it("keeps voice states for existing Discord voice config blocks", () => { + const plugin = createPlugin(undefined, { voice: {} }); + const options = (plugin as unknown as { options?: { intents?: number } }).options; + + expect((options?.intents ?? 0) & GatewayIntents.GuildVoiceStates).toBe( + GatewayIntents.GuildVoiceStates, + ); + }); + it("leaves autoInteractions disabled so OpenClaw owns interaction handoff", () => { const plugin = createPlugin(); diff --git a/extensions/discord/src/monitor/gateway-plugin.ts b/extensions/discord/src/monitor/gateway-plugin.ts index cdbe5f3699b..4b450db733d 100644 --- a/extensions/discord/src/monitor/gateway-plugin.ts +++ b/extensions/discord/src/monitor/gateway-plugin.ts @@ -11,6 +11,7 @@ import type { RuntimeEnv } from "openclaw/plugin-sdk/runtime-env"; import * as ws from "ws"; import * as discordGateway from "../internal/gateway.js"; import { validateDiscordProxyUrl } from "../proxy-fetch.js"; +import { resolveDiscordVoiceEnabled } from "../voice/config.js"; import { DISCORD_GATEWAY_TRANSPORT_ACTIVITY_EVENT } from "./gateway-handle.js"; import { fetchDiscordGatewayInfoWithTimeout, @@ -70,7 +71,7 @@ type ResolveDiscordGatewayIntentsParams = { export function resolveDiscordGatewayIntents(params?: ResolveDiscordGatewayIntentsParams): number { const intentsConfig = params?.intentsConfig; const voiceEnabled = params?.voiceEnabled; - const voiceStatesEnabled = intentsConfig?.voiceStates ?? voiceEnabled ?? true; + const voiceStatesEnabled = intentsConfig?.voiceStates ?? voiceEnabled ?? false; let intents = discordGateway.GatewayIntents.Guilds | discordGateway.GatewayIntents.GuildMessages | @@ -253,7 +254,7 @@ export function createDiscordGatewayPlugin(params: { }): discordGateway.GatewayPlugin { const intents = resolveDiscordGatewayIntents({ intentsConfig: params.discordConfig?.intents, - voiceEnabled: params.discordConfig?.voice?.enabled !== false, + voiceEnabled: resolveDiscordVoiceEnabled(params.discordConfig?.voice), }); const proxy = resolveEffectiveDebugProxyUrl(params.discordConfig?.proxy); const debugProxySettings = resolveDebugProxySettings(); diff --git a/extensions/discord/src/monitor/provider.test.ts b/extensions/discord/src/monitor/provider.test.ts index 95c0f04594f..9627c617490 100644 --- a/extensions/discord/src/monitor/provider.test.ts +++ b/extensions/discord/src/monitor/provider.test.ts @@ -391,6 +391,25 @@ describe("monitorDiscordProvider", () => { expect(voiceRuntimeModuleLoadedMock).not.toHaveBeenCalled(); }); + it("does not load the Discord voice runtime for text-only default config", async () => { + resolveDiscordAccountMock.mockReturnValue({ + accountId: "default", + token: "MTIz.abc.def", + config: { + commands: { native: true, nativeSkills: false }, + agentComponents: { enabled: false }, + execApprovals: { enabled: false }, + }, + }); + + await monitorDiscordProvider({ + config: baseConfig(), + runtime: baseRuntime(), + }); + + expect(voiceRuntimeModuleLoadedMock).not.toHaveBeenCalled(); + }); + it("loads the Discord voice runtime only when voice is enabled", async () => { resolveDiscordAccountMock.mockReturnValue({ accountId: "default", @@ -411,6 +430,26 @@ describe("monitorDiscordProvider", () => { expect(voiceRuntimeModuleLoadedMock).toHaveBeenCalledTimes(1); }); + it("loads the Discord voice runtime for existing voice config blocks", async () => { + resolveDiscordAccountMock.mockReturnValue({ + accountId: "default", + token: "MTIz.abc.def", + config: { + commands: { native: true, nativeSkills: false }, + voice: {}, + agentComponents: { enabled: false }, + execApprovals: { enabled: false }, + }, + }); + + await monitorDiscordProvider({ + config: baseConfig(), + runtime: baseRuntime(), + }); + + expect(voiceRuntimeModuleLoadedMock).toHaveBeenCalledTimes(1); + }); + it("wires exec approval button context from the resolved Discord account config", async () => { const cfg = createConfigWithDiscordAccount(); const execApprovalsConfig = { enabled: true, approvers: ["123"] }; diff --git a/extensions/discord/src/monitor/provider.ts b/extensions/discord/src/monitor/provider.ts index 1905f36fee4..2b4bf255521 100644 --- a/extensions/discord/src/monitor/provider.ts +++ b/extensions/discord/src/monitor/provider.ts @@ -32,6 +32,7 @@ import { GatewayCloseCodes } from "../internal/gateway.js"; import { fetchDiscordApplicationId, parseApplicationIdFromToken } from "../probe.js"; import { resolveDiscordProxyFetchForAccount } from "../proxy-fetch.js"; import { normalizeDiscordToken } from "../token.js"; +import { resolveDiscordVoiceEnabled } from "../voice/config.js"; import { createDiscordAutoPresenceController } from "./auto-presence.js"; import { resolveDiscordSlashCommandConfig } from "./commands.js"; import type { MutableDiscordGateway } from "./gateway-handle.js"; @@ -282,7 +283,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { const slashCommand = resolveDiscordSlashCommandConfig(discordCfg.slashCommand); const sessionPrefix = "discord:slash"; const ephemeralDefault = slashCommand.ephemeral; - const voiceEnabled = discordCfg.voice?.enabled !== false; + const voiceEnabled = resolveDiscordVoiceEnabled(discordCfg.voice); const allowlistResolved = await resolveDiscordAllowlistConfig({ token, diff --git a/extensions/discord/src/voice/config.ts b/extensions/discord/src/voice/config.ts new file mode 100644 index 00000000000..7b27fe45c84 --- /dev/null +++ b/extensions/discord/src/voice/config.ts @@ -0,0 +1,8 @@ +import type { DiscordAccountConfig } from "openclaw/plugin-sdk/config-types"; + +export function resolveDiscordVoiceEnabled(voice: DiscordAccountConfig["voice"]): boolean { + if (voice?.enabled !== undefined) { + return voice.enabled; + } + return voice !== undefined; +} diff --git a/extensions/discord/src/voice/manager.e2e.test.ts b/extensions/discord/src/voice/manager.e2e.test.ts index 8a95bb786f6..ed31c5038cb 100644 --- a/extensions/discord/src/voice/manager.e2e.test.ts +++ b/extensions/discord/src/voice/manager.e2e.test.ts @@ -213,7 +213,7 @@ describe("DiscordVoiceManager", () => { const createManager = ( discordConfig: ConstructorParameters< typeof managerModule.DiscordVoiceManager - >[0]["discordConfig"] = {}, + >[0]["discordConfig"] = { voice: { enabled: true } }, clientOverride?: ReturnType, cfgOverride: ConstructorParameters[0]["cfg"] = {}, ) => @@ -250,6 +250,17 @@ describe("DiscordVoiceManager", () => { ); }; + it("rejects joins when Discord voice config is absent", async () => { + const manager = createManager({}); + + await expect(manager.join({ guildId: "g1", channelId: "1001" })).resolves.toMatchObject({ + ok: false, + message: "Discord voice is disabled (channels.discord.voice.enabled).", + }); + + expect(joinVoiceChannelMock).not.toHaveBeenCalled(); + }); + type ProcessSegmentInvoker = { processSegment: (params: { entry: unknown; diff --git a/extensions/discord/src/voice/manager.ts b/extensions/discord/src/voice/manager.ts index 480d918ea0e..394e8336b89 100644 --- a/extensions/discord/src/voice/manager.ts +++ b/extensions/discord/src/voice/manager.ts @@ -19,6 +19,7 @@ import { scheduleVoiceCaptureFinalize, stopVoiceCaptureState, } from "./capture-state.js"; +import { resolveDiscordVoiceEnabled } from "./config.js"; import { analyzeVoiceReceiveError, createVoiceReceiveRecoveryState, @@ -102,7 +103,7 @@ export class DiscordVoiceManager { }, ) { this.botUserId = params.botUserId; - this.voiceEnabled = params.discordConfig.voice?.enabled !== false; + this.voiceEnabled = resolveDiscordVoiceEnabled(params.discordConfig.voice); this.ownerAllowFrom = resolveDiscordAccountAllowFrom({ cfg: params.cfg, accountId: params.accountId }) ?? params.discordConfig.allowFrom ??