diff --git a/docs/channels/discord.md b/docs/channels/discord.md index ccf0d7dc282..15a92fc5161 100644 --- a/docs/channels/discord.md +++ b/docs/channels/discord.md @@ -944,6 +944,7 @@ Auto-join example: Notes: - `voice.tts` overrides `messages.tts` for voice playback only. +- Voice transcript turns derive owner status from Discord `allowFrom` (or `dm.allowFrom`); non-owner speakers cannot access owner-only tools (for example `gateway` and `cron`). - Voice is enabled by default; set `channels.discord.voice.enabled=false` to disable it. - `voice.daveEncryption` and `voice.decryptionFailureTolerance` pass through to `@discordjs/voice` join options. - `@discordjs/voice` defaults are `daveEncryption=true` and `decryptionFailureTolerance=24` if unset. diff --git a/package.json b/package.json index 92c3e723f60..fe9a767f9ed 100644 --- a/package.json +++ b/package.json @@ -63,7 +63,7 @@ "build:plugin-sdk:dts": "tsc -p tsconfig.plugin-sdk.dts.json", "build:strict-smoke": "pnpm canvas:a2ui:bundle && tsdown && pnpm build:plugin-sdk:dts", "canvas:a2ui:bundle": "bash scripts/bundle-a2ui.sh", - "check": "pnpm format:check && pnpm tsgo && pnpm lint && pnpm lint:tmp:no-random-messaging && pnpm lint:tmp:channel-agnostic-boundaries && pnpm lint:tmp:no-raw-channel-fetch && pnpm lint:plugins:no-register-http-handler && pnpm lint:webhook:no-low-level-body-read && pnpm lint:auth:no-pairing-store-group && pnpm lint:auth:pairing-account-scope && pnpm check:host-env-policy:swift", + "check": "pnpm format:check && pnpm tsgo && pnpm lint && pnpm lint:tmp:no-random-messaging && pnpm lint:tmp:channel-agnostic-boundaries && pnpm lint:tmp:no-raw-channel-fetch && pnpm lint:agent:ingress-owner && pnpm lint:plugins:no-register-http-handler && pnpm lint:webhook:no-low-level-body-read && pnpm lint:auth:no-pairing-store-group && pnpm lint:auth:pairing-account-scope && pnpm check:host-env-policy:swift", "check:docs": "pnpm format:docs:check && pnpm lint:docs && pnpm docs:check-links", "check:host-env-policy:swift": "node scripts/generate-host-env-security-policy-swift.mjs --check", "check:loc": "node --import tsx scripts/check-ts-max-loc.ts --max 500", @@ -100,6 +100,7 @@ "ios:open": "bash -lc './scripts/ios-configure-signing.sh && cd apps/ios && xcodegen generate && open OpenClaw.xcodeproj'", "ios:run": "bash -lc './scripts/ios-configure-signing.sh && cd apps/ios && xcodegen generate && xcodebuild -project OpenClaw.xcodeproj -scheme OpenClaw -destination \"${IOS_DEST:-platform=iOS Simulator,name=iPhone 17}\" -configuration Debug build && xcrun simctl boot \"${IOS_SIM:-iPhone 17}\" || true && xcrun simctl launch booted ai.openclaw.ios'", "lint": "oxlint --type-aware", + "lint:agent:ingress-owner": "node scripts/check-ingress-agent-owner-context.mjs", "lint:all": "pnpm lint && pnpm lint:swift", "lint:auth:no-pairing-store-group": "node scripts/check-no-pairing-store-group-auth.mjs", "lint:auth:pairing-account-scope": "node scripts/check-pairing-account-scope.mjs", diff --git a/scripts/check-ingress-agent-owner-context.mjs b/scripts/check-ingress-agent-owner-context.mjs new file mode 100644 index 00000000000..20b99536e1d --- /dev/null +++ b/scripts/check-ingress-agent-owner-context.mjs @@ -0,0 +1,45 @@ +#!/usr/bin/env node + +import path from "node:path"; +import ts from "typescript"; +import { runCallsiteGuard } from "./lib/callsite-guard.mjs"; +import { runAsScript, toLine, unwrapExpression } from "./lib/ts-guard-utils.mjs"; + +const sourceRoots = ["src/gateway", "src/discord/voice"]; +const enforcedFiles = new Set([ + "src/discord/voice/manager.ts", + "src/gateway/openai-http.ts", + "src/gateway/openresponses-http.ts", + "src/gateway/server-methods/agent.ts", + "src/gateway/server-node-events.ts", +]); + +export function findLegacyAgentCommandCallLines(content, fileName = "source.ts") { + const sourceFile = ts.createSourceFile(fileName, content, ts.ScriptTarget.Latest, true); + const lines = []; + const visit = (node) => { + if (ts.isCallExpression(node)) { + const callee = unwrapExpression(node.expression); + if (ts.isIdentifier(callee) && callee.text === "agentCommand") { + lines.push(toLine(sourceFile, callee)); + } + } + ts.forEachChild(node, visit); + }; + visit(sourceFile); + return lines; +} + +export async function main() { + await runCallsiteGuard({ + importMetaUrl: import.meta.url, + sourceRoots, + findCallLines: findLegacyAgentCommandCallLines, + skipRelativePath: (relPath) => !enforcedFiles.has(relPath.replaceAll(path.sep, "/")), + header: "Found ingress callsites using local agentCommand() (must be explicit owner-aware):", + footer: + "Use agentCommandFromIngress(...) and pass senderIsOwner explicitly at ingress boundaries.", + }); +} + +runAsScript(import.meta.url, main); diff --git a/src/commands/agent.test.ts b/src/commands/agent.test.ts index f827d445329..7ca6909af4a 100644 --- a/src/commands/agent.test.ts +++ b/src/commands/agent.test.ts @@ -15,7 +15,7 @@ import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js"; import { setActivePluginRegistry } from "../plugins/runtime.js"; import type { RuntimeEnv } from "../runtime.js"; import { createOutboundTestPlugin, createTestRegistry } from "../test-utils/channel-plugins.js"; -import { agentCommand } from "./agent.js"; +import { agentCommand, agentCommandFromIngress } from "./agent.js"; import * as agentDeliveryModule from "./agent/delivery.js"; vi.mock("../agents/auth-profiles.js", async (importOriginal) => { @@ -316,6 +316,27 @@ describe("agentCommand", () => { expect(callArgs?.senderIsOwner).toBe(expected); }); + it("requires explicit senderIsOwner for ingress runs", async () => { + await withTempHome(async (home) => { + const store = path.join(home, "sessions.json"); + mockConfig(home, store); + await expect( + // Runtime guard for non-TS callers; TS callsites are statically typed. + agentCommandFromIngress({ message: "hi", to: "+1555" } as never, runtime), + ).rejects.toThrow("senderIsOwner must be explicitly set for ingress agent runs."); + }); + }); + + it("honors explicit senderIsOwner for ingress runs", async () => { + await withTempHome(async (home) => { + const store = path.join(home, "sessions.json"); + mockConfig(home, store); + await agentCommandFromIngress({ message: "hi", to: "+1555", senderIsOwner: false }, runtime); + const ingressCall = vi.mocked(runEmbeddedPiAgent).mock.calls.at(-1)?.[0]; + expect(ingressCall?.senderIsOwner).toBe(false); + }); + }); + it("resumes when session-id is provided", async () => { await withTempHome(async (home) => { const store = path.join(home, "sessions.json"); diff --git a/src/commands/agent.ts b/src/commands/agent.ts index 4ddde526119..b0d3f3f09ce 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -77,7 +77,7 @@ import { deliverAgentCommandResult } from "./agent/delivery.js"; import { resolveAgentRunContext } from "./agent/run-context.js"; import { updateSessionStoreAfterAgentRun } from "./agent/session-store.js"; import { resolveSession } from "./agent/session.js"; -import type { AgentCommandOpts } from "./agent/types.js"; +import type { AgentCommandIngressOpts, AgentCommandOpts } from "./agent/types.js"; type PersistSessionEntryParams = { sessionStore: Record; @@ -160,7 +160,7 @@ function runAgentAttempt(params: { resolvedThinkLevel: ThinkLevel; timeoutMs: number; runId: string; - opts: AgentCommandOpts; + opts: AgentCommandOpts & { senderIsOwner: boolean }; runContext: ReturnType; spawnedBy: string | undefined; messageChannel: ReturnType; @@ -172,7 +172,6 @@ function runAgentAttempt(params: { sessionStore?: Record; storePath?: string; }) { - const senderIsOwner = params.opts.senderIsOwner ?? true; const effectivePrompt = resolveFallbackRetryPrompt({ body: params.body, isFallbackRetry: params.isFallbackRetry, @@ -292,7 +291,7 @@ function runAgentAttempt(params: { currentThreadTs: params.runContext.currentThreadTs, replyToMode: params.runContext.replyToMode, hasRepliedRef: params.runContext.hasRepliedRef, - senderIsOwner, + senderIsOwner: params.opts.senderIsOwner, sessionFile: params.sessionFile, workspaceDir: params.workspaceDir, config: params.cfg, @@ -318,8 +317,8 @@ function runAgentAttempt(params: { }); } -export async function agentCommand( - opts: AgentCommandOpts, +async function agentCommandInternal( + opts: AgentCommandOpts & { senderIsOwner: boolean }, runtime: RuntimeEnv = defaultRuntime, deps: CliDeps = createDefaultDeps(), ) { @@ -922,3 +921,36 @@ export async function agentCommand( clearAgentRunContext(runId); } } + +export async function agentCommand( + opts: AgentCommandOpts, + runtime: RuntimeEnv = defaultRuntime, + deps: CliDeps = createDefaultDeps(), +) { + return await agentCommandInternal( + { + ...opts, + senderIsOwner: opts.senderIsOwner ?? true, + }, + runtime, + deps, + ); +} + +export async function agentCommandFromIngress( + opts: AgentCommandIngressOpts, + runtime: RuntimeEnv = defaultRuntime, + deps: CliDeps = createDefaultDeps(), +) { + if (typeof opts.senderIsOwner !== "boolean") { + throw new Error("senderIsOwner must be explicitly set for ingress agent runs."); + } + return await agentCommandInternal( + { + ...opts, + senderIsOwner: opts.senderIsOwner, + }, + runtime, + deps, + ); +} diff --git a/src/commands/agent/types.ts b/src/commands/agent/types.ts index 7a8e45ca55f..b92f22dad8e 100644 --- a/src/commands/agent/types.ts +++ b/src/commands/agent/types.ts @@ -81,3 +81,8 @@ export type AgentCommandOpts = { /** Per-call stream param overrides (best-effort). */ streamParams?: AgentStreamParams; }; + +export type AgentCommandIngressOpts = Omit & { + /** Ingress callsites must always pass explicit owner authorization state. */ + senderIsOwner: boolean; +}; diff --git a/src/discord/monitor/agent-components.ts b/src/discord/monitor/agent-components.ts index 7e850e5fc46..a6bceae7ff5 100644 --- a/src/discord/monitor/agent-components.ts +++ b/src/discord/monitor/agent-components.ts @@ -61,6 +61,7 @@ import { resolveDiscordChannelConfigWithFallback, resolveDiscordGuildEntry, resolveDiscordMemberAccessState, + resolveDiscordOwnerAccess, resolveDiscordOwnerAllowFrom, } from "./allow-list.js"; import { formatDiscordUserTag } from "./format.js"; @@ -764,18 +765,15 @@ function resolveComponentCommandAuthorized(params: { return true; } - const ownerAllowList = normalizeDiscordAllowList(ctx.allowFrom, ["discord:", "user:", "pk:"]); - const ownerOk = ownerAllowList - ? resolveDiscordAllowListMatch({ - allowList: ownerAllowList, - candidate: { - id: interactionCtx.user.id, - name: interactionCtx.user.username, - tag: formatDiscordUserTag(interactionCtx.user), - }, - allowNameMatching: params.allowNameMatching, - }).allowed - : false; + const { ownerAllowList, ownerAllowed: ownerOk } = resolveDiscordOwnerAccess({ + allowFrom: ctx.allowFrom, + sender: { + id: interactionCtx.user.id, + name: interactionCtx.user.username, + tag: formatDiscordUserTag(interactionCtx.user), + }, + allowNameMatching: params.allowNameMatching, + }); const { hasAccessRestrictions, memberAllowed } = resolveDiscordMemberAccessState({ channelConfig, diff --git a/src/discord/monitor/allow-list.ts b/src/discord/monitor/allow-list.ts index c0bff421505..e2b3e7371b0 100644 --- a/src/discord/monitor/allow-list.ts +++ b/src/discord/monitor/allow-list.ts @@ -16,6 +16,8 @@ export type DiscordAllowList = { export type DiscordAllowListMatch = AllowlistMatch<"wildcard" | "id" | "name" | "tag">; +const DISCORD_OWNER_ALLOWLIST_PREFIXES = ["discord:", "user:", "pk:"]; + export type DiscordGuildEntryResolved = { id?: string; slug?: string; @@ -265,6 +267,32 @@ export function resolveDiscordOwnerAllowFrom(params: { return [match.matchKey]; } +export function resolveDiscordOwnerAccess(params: { + allowFrom?: string[]; + sender: { id: string; name?: string; tag?: string }; + allowNameMatching?: boolean; +}): { + ownerAllowList: DiscordAllowList | null; + ownerAllowed: boolean; +} { + const ownerAllowList = normalizeDiscordAllowList( + params.allowFrom, + DISCORD_OWNER_ALLOWLIST_PREFIXES, + ); + const ownerAllowed = ownerAllowList + ? allowListMatches( + ownerAllowList, + { + id: params.sender.id, + name: params.sender.name, + tag: params.sender.tag, + }, + { allowNameMatching: params.allowNameMatching }, + ) + : false; + return { ownerAllowList, ownerAllowed }; +} + export function resolveDiscordCommandAuthorized(params: { isDirectMessage: boolean; allowFrom?: string[]; diff --git a/src/discord/monitor/message-handler.preflight.ts b/src/discord/monitor/message-handler.preflight.ts index 1f45e353e16..a7d8fde623f 100644 --- a/src/discord/monitor/message-handler.preflight.ts +++ b/src/discord/monitor/message-handler.preflight.ts @@ -30,13 +30,12 @@ import { DEFAULT_ACCOUNT_ID, resolveAgentIdFromSessionKey } from "../../routing/ import { fetchPluralKitMessageInfo } from "../pluralkit.js"; import { sendMessageDiscord } from "../send.js"; import { - allowListMatches, isDiscordGroupAllowedByPolicy, - normalizeDiscordAllowList, normalizeDiscordSlug, resolveDiscordChannelConfigWithFallback, resolveDiscordGuildEntry, resolveDiscordMemberAccessState, + resolveDiscordOwnerAccess, resolveDiscordShouldRequireMention, resolveGroupDmAllow, } from "./allow-list.js"; @@ -549,22 +548,15 @@ export async function preflightDiscordMessage( }); if (!isDirectMessage) { - const ownerAllowList = normalizeDiscordAllowList(params.allowFrom, [ - "discord:", - "user:", - "pk:", - ]); - const ownerOk = ownerAllowList - ? allowListMatches( - ownerAllowList, - { - id: sender.id, - name: sender.name, - tag: sender.tag, - }, - { allowNameMatching }, - ) - : false; + const { ownerAllowList, ownerAllowed: ownerOk } = resolveDiscordOwnerAccess({ + allowFrom: params.allowFrom, + sender: { + id: sender.id, + name: sender.name, + tag: sender.tag, + }, + allowNameMatching, + }); const commandGate = resolveControlCommandGate({ useAccessGroups, authorizers: [ diff --git a/src/discord/monitor/native-command.ts b/src/discord/monitor/native-command.ts index 61d446ca2a9..d9f319ff2be 100644 --- a/src/discord/monitor/native-command.ts +++ b/src/discord/monitor/native-command.ts @@ -54,13 +54,12 @@ import { withTimeout } from "../../utils/with-timeout.js"; import { loadWebMedia } from "../../web/media.js"; import { chunkDiscordTextWithMode } from "../chunk.js"; import { - allowListMatches, isDiscordGroupAllowedByPolicy, - normalizeDiscordAllowList, normalizeDiscordSlug, resolveDiscordChannelConfigWithFallback, resolveDiscordGuildEntry, resolveDiscordMemberAccessState, + resolveDiscordOwnerAccess, resolveDiscordOwnerAllowFrom, } from "./allow-list.js"; import { resolveDiscordDmCommandAccess } from "./dm-command-auth.js"; @@ -1270,22 +1269,15 @@ async function dispatchDiscordCommandInteraction(params: { ? interaction.rawData.member.roles.map((roleId: string) => String(roleId)) : []; const allowNameMatching = isDangerousNameMatchingEnabled(discordConfig); - const ownerAllowList = normalizeDiscordAllowList( - discordConfig?.allowFrom ?? discordConfig?.dm?.allowFrom ?? [], - ["discord:", "user:", "pk:"], - ); - const ownerOk = - ownerAllowList && user - ? allowListMatches( - ownerAllowList, - { - id: sender.id, - name: sender.name, - tag: sender.tag, - }, - { allowNameMatching }, - ) - : false; + const { ownerAllowList, ownerAllowed: ownerOk } = resolveDiscordOwnerAccess({ + allowFrom: discordConfig?.allowFrom ?? discordConfig?.dm?.allowFrom ?? [], + sender: { + id: sender.id, + name: sender.name, + tag: sender.tag, + }, + allowNameMatching, + }); const guildInfo = resolveDiscordGuildEntry({ guild: interaction.guild ?? undefined, guildEntries: discordConfig?.guilds, diff --git a/src/discord/voice/command.ts b/src/discord/voice/command.ts index adb3e6ca879..1599fec650b 100644 --- a/src/discord/voice/command.ts +++ b/src/discord/voice/command.ts @@ -15,10 +15,9 @@ import type { OpenClawConfig } from "../../config/config.js"; import { isDangerousNameMatchingEnabled } from "../../config/dangerous-name-matching.js"; import type { DiscordAccountConfig } from "../../config/types.js"; import { - allowListMatches, isDiscordGroupAllowedByPolicy, - normalizeDiscordAllowList, normalizeDiscordSlug, + resolveDiscordOwnerAccess, resolveDiscordChannelConfigWithFallback, resolveDiscordGuildEntry, resolveDiscordMemberAccessState, @@ -160,21 +159,15 @@ async function authorizeVoiceCommand( allowNameMatching: isDangerousNameMatchingEnabled(params.discordConfig), }); - const ownerAllowList = normalizeDiscordAllowList( - params.discordConfig.allowFrom ?? params.discordConfig.dm?.allowFrom ?? [], - ["discord:", "user:", "pk:"], - ); - const ownerOk = ownerAllowList - ? allowListMatches( - ownerAllowList, - { - id: sender.id, - name: sender.name, - tag: sender.tag, - }, - { allowNameMatching: isDangerousNameMatchingEnabled(params.discordConfig) }, - ) - : false; + const { ownerAllowList, ownerAllowed: ownerOk } = resolveDiscordOwnerAccess({ + allowFrom: params.discordConfig.allowFrom ?? params.discordConfig.dm?.allowFrom ?? [], + sender: { + id: sender.id, + name: sender.name, + tag: sender.tag, + }, + allowNameMatching: isDangerousNameMatchingEnabled(params.discordConfig), + }); const authorizers = params.useAccessGroups ? [ diff --git a/src/discord/voice/manager.e2e.test.ts b/src/discord/voice/manager.e2e.test.ts index 13c618ed361..93ce4d744a2 100644 --- a/src/discord/voice/manager.e2e.test.ts +++ b/src/discord/voice/manager.e2e.test.ts @@ -7,6 +7,11 @@ const { entersStateMock, createAudioPlayerMock, resolveAgentRouteMock, + agentCommandMock, + buildProviderRegistryMock, + createMediaAttachmentCacheMock, + normalizeMediaAttachmentsMock, + runCapabilityMock, } = vi.hoisted(() => { type EventHandler = (...args: unknown[]) => unknown; type MockConnection = { @@ -62,6 +67,15 @@ const { state: { status: "idle" }, })), resolveAgentRouteMock: vi.fn(() => ({ agentId: "agent-1", sessionKey: "discord:g1:c1" })), + agentCommandMock: vi.fn(async (_opts?: unknown, _runtime?: unknown) => ({ payloads: [] })), + buildProviderRegistryMock: vi.fn(() => ({})), + createMediaAttachmentCacheMock: vi.fn(() => ({ + cleanup: vi.fn(async () => undefined), + })), + normalizeMediaAttachmentsMock: vi.fn(() => [{ kind: "audio", path: "/tmp/test.wav" }]), + runCapabilityMock: vi.fn(async () => ({ + outputs: [{ kind: "audio.transcription", text: "hello from voice" }], + })), }; }); @@ -85,6 +99,17 @@ vi.mock("../../routing/resolve-route.js", () => ({ resolveAgentRoute: resolveAgentRouteMock, })); +vi.mock("../../commands/agent.js", () => ({ + agentCommandFromIngress: agentCommandMock, +})); + +vi.mock("../../media-understanding/runner.js", () => ({ + buildProviderRegistry: buildProviderRegistryMock, + createMediaAttachmentCache: createMediaAttachmentCacheMock, + normalizeMediaAttachments: normalizeMediaAttachmentsMock, + runCapability: runCapabilityMock, +})); + let managerModule: typeof import("./manager.js"); function createClient() { @@ -122,15 +147,27 @@ describe("DiscordVoiceManager", () => { entersStateMock.mockResolvedValue(undefined); createAudioPlayerMock.mockClear(); resolveAgentRouteMock.mockClear(); + agentCommandMock.mockReset(); + agentCommandMock.mockResolvedValue({ payloads: [] }); + buildProviderRegistryMock.mockReset(); + buildProviderRegistryMock.mockReturnValue({}); + createMediaAttachmentCacheMock.mockClear(); + normalizeMediaAttachmentsMock.mockReset(); + normalizeMediaAttachmentsMock.mockReturnValue([{ kind: "audio", path: "/tmp/test.wav" }]); + runCapabilityMock.mockReset(); + runCapabilityMock.mockResolvedValue({ + outputs: [{ kind: "audio.transcription", text: "hello from voice" }], + }); }); const createManager = ( discordConfig: ConstructorParameters< typeof managerModule.DiscordVoiceManager >[0]["discordConfig"] = {}, + clientOverride?: ReturnType, ) => new managerModule.DiscordVoiceManager({ - client: createClient() as never, + client: (clientOverride ?? createClient()) as never, cfg: {}, discordConfig, accountId: "default", @@ -248,4 +285,119 @@ describe("DiscordVoiceManager", () => { expect(joinVoiceChannelMock).toHaveBeenCalledTimes(2); }); + + it("passes senderIsOwner=true for allowlisted voice speakers", async () => { + const client = createClient(); + client.fetchMember.mockResolvedValue({ + nickname: "Owner Nick", + user: { + id: "u-owner", + username: "owner", + globalName: "Owner", + discriminator: "1234", + }, + }); + const manager = createManager({ allowFrom: ["discord:u-owner"] }, client); + await ( + manager as unknown as { + processSegment: (params: { + entry: unknown; + wavPath: string; + userId: string; + durationSeconds: number; + }) => Promise; + } + ).processSegment({ + entry: { + guildId: "g1", + channelId: "c1", + route: { sessionKey: "discord:g1:c1", agentId: "agent-1" }, + }, + wavPath: "/tmp/test.wav", + userId: "u-owner", + durationSeconds: 1.2, + }); + + const commandArgs = agentCommandMock.mock.calls.at(-1)?.[0] as + | { senderIsOwner?: boolean } + | undefined; + expect(commandArgs?.senderIsOwner).toBe(true); + }); + + it("passes senderIsOwner=false for non-owner voice speakers", async () => { + const client = createClient(); + client.fetchMember.mockResolvedValue({ + nickname: "Guest Nick", + user: { + id: "u-guest", + username: "guest", + globalName: "Guest", + discriminator: "4321", + }, + }); + const manager = createManager({ allowFrom: ["discord:u-owner"] }, client); + await ( + manager as unknown as { + processSegment: (params: { + entry: unknown; + wavPath: string; + userId: string; + durationSeconds: number; + }) => Promise; + } + ).processSegment({ + entry: { + guildId: "g1", + channelId: "c1", + route: { sessionKey: "discord:g1:c1", agentId: "agent-1" }, + }, + wavPath: "/tmp/test.wav", + userId: "u-guest", + durationSeconds: 1.2, + }); + + const commandArgs = agentCommandMock.mock.calls.at(-1)?.[0] as + | { senderIsOwner?: boolean } + | undefined; + expect(commandArgs?.senderIsOwner).toBe(false); + }); + + it("reuses speaker context cache for repeated segments from the same speaker", async () => { + const client = createClient(); + client.fetchMember.mockResolvedValue({ + nickname: "Cached Speaker", + user: { + id: "u-cache", + username: "cache", + globalName: "Cache", + discriminator: "1111", + }, + }); + const manager = createManager({ allowFrom: ["discord:u-cache"] }, client); + const runSegment = async () => + await ( + manager as unknown as { + processSegment: (params: { + entry: unknown; + wavPath: string; + userId: string; + durationSeconds: number; + }) => Promise; + } + ).processSegment({ + entry: { + guildId: "g1", + channelId: "c1", + route: { sessionKey: "discord:g1:c1", agentId: "agent-1" }, + }, + wavPath: "/tmp/test.wav", + userId: "u-cache", + durationSeconds: 1.2, + }); + + await runSegment(); + await runSegment(); + + expect(client.fetchMember).toHaveBeenCalledTimes(1); + }); }); diff --git a/src/discord/voice/manager.ts b/src/discord/voice/manager.ts index c246b280fb4..dd1f37a8297 100644 --- a/src/discord/voice/manager.ts +++ b/src/discord/voice/manager.ts @@ -18,8 +18,9 @@ import { } from "@discordjs/voice"; import { resolveAgentDir } from "../../agents/agent-scope.js"; import type { MsgContext } from "../../auto-reply/templating.js"; -import { agentCommand } from "../../commands/agent.js"; +import { agentCommandFromIngress } from "../../commands/agent.js"; import type { OpenClawConfig } from "../../config/config.js"; +import { isDangerousNameMatchingEnabled } from "../../config/dangerous-name-matching.js"; import type { DiscordAccountConfig, TtsConfig } from "../../config/types.js"; import { logVerbose, shouldLogVerbose } from "../../globals.js"; import { formatErrorMessage } from "../../infra/errors.js"; @@ -35,6 +36,8 @@ import { resolveAgentRoute } from "../../routing/resolve-route.js"; import type { RuntimeEnv } from "../../runtime.js"; import { parseTtsDirectives } from "../../tts/tts-core.js"; import { resolveTtsConfig, textToSpeech, type ResolvedTtsConfig } from "../../tts/tts.js"; +import { resolveDiscordOwnerAccess } from "../monitor/allow-list.js"; +import { formatDiscordUserTag } from "../monitor/format.js"; const require = createRequire(import.meta.url); @@ -48,6 +51,7 @@ const SPEAKING_READY_TIMEOUT_MS = 60_000; const DECRYPT_FAILURE_WINDOW_MS = 30_000; const DECRYPT_FAILURE_RECONNECT_THRESHOLD = 3; const DECRYPT_FAILURE_PATTERN = /DecryptionFailed\(/; +const SPEAKER_CONTEXT_CACHE_TTL_MS = 60_000; const logger = createSubsystemLogger("discord/voice"); @@ -275,6 +279,16 @@ export class DiscordVoiceManager { private botUserId?: string; private readonly voiceEnabled: boolean; private autoJoinTask: Promise | null = null; + private readonly ownerAllowFrom: string[]; + private readonly allowDangerousNameMatching: boolean; + private readonly speakerContextCache = new Map< + string, + { + label: string; + senderIsOwner: boolean; + expiresAt: number; + } + >(); constructor( private params: { @@ -288,6 +302,9 @@ export class DiscordVoiceManager { ) { this.botUserId = params.botUserId; this.voiceEnabled = params.discordConfig.voice?.enabled !== false; + this.ownerAllowFrom = + params.discordConfig.allowFrom ?? params.discordConfig.dm?.allowFrom ?? []; + this.allowDangerousNameMatching = isDangerousNameMatchingEnabled(params.discordConfig); } setBotUserId(id?: string) { @@ -625,15 +642,16 @@ export class DiscordVoiceManager { `transcription ok (${transcript.length} chars): guild ${entry.guildId} channel ${entry.channelId}`, ); - const speakerLabel = await this.resolveSpeakerLabel(entry.guildId, userId); - const prompt = speakerLabel ? `${speakerLabel}: ${transcript}` : transcript; + const speaker = await this.resolveSpeakerContext(entry.guildId, userId); + const prompt = speaker.label ? `${speaker.label}: ${transcript}` : transcript; - const result = await agentCommand( + const result = await agentCommandFromIngress( { message: prompt, sessionKey: entry.route.sessionKey, agentId: entry.route.agentId, messageChannel: "discord", + senderIsOwner: speaker.senderIsOwner, deliver: false, }, this.params.runtime, @@ -757,16 +775,113 @@ export class DiscordVoiceManager { } } - private async resolveSpeakerLabel(guildId: string, userId: string): Promise { + private resolveSpeakerIsOwner(params: { id: string; name?: string; tag?: string }): boolean { + return resolveDiscordOwnerAccess({ + allowFrom: this.ownerAllowFrom, + sender: { + id: params.id, + name: params.name, + tag: params.tag, + }, + allowNameMatching: this.allowDangerousNameMatching, + }).ownerAllowed; + } + + private resolveSpeakerContextCacheKey(guildId: string, userId: string): string { + return `${guildId}:${userId}`; + } + + private getCachedSpeakerContext( + guildId: string, + userId: string, + ): + | { + label: string; + senderIsOwner: boolean; + } + | undefined { + const key = this.resolveSpeakerContextCacheKey(guildId, userId); + const cached = this.speakerContextCache.get(key); + if (!cached) { + return undefined; + } + if (cached.expiresAt <= Date.now()) { + this.speakerContextCache.delete(key); + return undefined; + } + return { + label: cached.label, + senderIsOwner: cached.senderIsOwner, + }; + } + + private setCachedSpeakerContext( + guildId: string, + userId: string, + context: { label: string; senderIsOwner: boolean }, + ): void { + const key = this.resolveSpeakerContextCacheKey(guildId, userId); + this.speakerContextCache.set(key, { + label: context.label, + senderIsOwner: context.senderIsOwner, + expiresAt: Date.now() + SPEAKER_CONTEXT_CACHE_TTL_MS, + }); + } + + private async resolveSpeakerContext( + guildId: string, + userId: string, + ): Promise<{ + label: string; + senderIsOwner: boolean; + }> { + const cached = this.getCachedSpeakerContext(guildId, userId); + if (cached) { + return cached; + } + const identity = await this.resolveSpeakerIdentity(guildId, userId); + const context = { + label: identity.label, + senderIsOwner: this.resolveSpeakerIsOwner({ + id: identity.id, + name: identity.name, + tag: identity.tag, + }), + }; + this.setCachedSpeakerContext(guildId, userId, context); + return context; + } + + private async resolveSpeakerIdentity( + guildId: string, + userId: string, + ): Promise<{ + id: string; + label: string; + name?: string; + tag?: string; + }> { try { const member = await this.params.client.fetchMember(guildId, userId); - return member.nickname ?? member.user?.globalName ?? member.user?.username ?? userId; + const username = member.user?.username ?? undefined; + return { + id: userId, + label: member.nickname ?? member.user?.globalName ?? username ?? userId, + name: username, + tag: member.user ? formatDiscordUserTag(member.user) : undefined, + }; } catch { try { const user = await this.params.client.fetchUser(userId); - return user.globalName ?? user.username ?? userId; + const username = user.username ?? undefined; + return { + id: userId, + label: user.globalName ?? username ?? userId, + name: username, + tag: formatDiscordUserTag(user), + }; } catch { - return userId; + return { id: userId, label: userId }; } } } diff --git a/src/gateway/openai-http.ts b/src/gateway/openai-http.ts index 7048b3d6d68..10e8d713fee 100644 --- a/src/gateway/openai-http.ts +++ b/src/gateway/openai-http.ts @@ -1,7 +1,7 @@ import { randomUUID } from "node:crypto"; import type { IncomingMessage, ServerResponse } from "node:http"; import { createDefaultDeps } from "../cli/deps.js"; -import { agentCommand } from "../commands/agent.js"; +import { agentCommandFromIngress } from "../commands/agent.js"; import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js"; import { logWarn } from "../logger.js"; import { defaultRuntime } from "../runtime.js"; @@ -55,6 +55,8 @@ function buildAgentCommandInput(params: { deliver: false as const, messageChannel: params.messageChannel, bestEffortDeliver: false as const, + // HTTP API callers are authenticated operator clients for this gateway context. + senderIsOwner: true as const, }; } @@ -247,7 +249,7 @@ export async function handleOpenAiHttpRequest( if (!stream) { try { - const result = await agentCommand(commandInput, defaultRuntime, deps); + const result = await agentCommandFromIngress(commandInput, defaultRuntime, deps); const content = resolveAgentResponseText(result); @@ -327,7 +329,7 @@ export async function handleOpenAiHttpRequest( void (async () => { try { - const result = await agentCommand(commandInput, defaultRuntime, deps); + const result = await agentCommandFromIngress(commandInput, defaultRuntime, deps); if (closed) { return; diff --git a/src/gateway/openresponses-http.ts b/src/gateway/openresponses-http.ts index 70dc7a719a9..e392b47bebc 100644 --- a/src/gateway/openresponses-http.ts +++ b/src/gateway/openresponses-http.ts @@ -10,7 +10,7 @@ import { randomUUID } from "node:crypto"; import type { IncomingMessage, ServerResponse } from "node:http"; import type { ClientToolDefinition } from "../agents/pi-embedded-runner/run/params.js"; import { createDefaultDeps } from "../cli/deps.js"; -import { agentCommand } from "../commands/agent.js"; +import { agentCommandFromIngress } from "../commands/agent.js"; import type { ImageContent } from "../commands/agent/types.js"; import type { GatewayHttpResponsesConfig } from "../config/types.gateway.js"; import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js"; @@ -236,7 +236,7 @@ async function runResponsesAgentCommand(params: { messageChannel: string; deps: ReturnType; }) { - return agentCommand( + return agentCommandFromIngress( { message: params.message, images: params.images.length > 0 ? params.images : undefined, @@ -248,6 +248,8 @@ async function runResponsesAgentCommand(params: { deliver: false, messageChannel: params.messageChannel, bestEffortDeliver: false, + // HTTP API callers are authenticated operator clients for this gateway context. + senderIsOwner: true, }, defaultRuntime, params.deps, diff --git a/src/gateway/server-methods/agent.ts b/src/gateway/server-methods/agent.ts index c954d439858..d45fddb05f9 100644 --- a/src/gateway/server-methods/agent.ts +++ b/src/gateway/server-methods/agent.ts @@ -2,7 +2,7 @@ import { randomUUID } from "node:crypto"; import { listAgentIds } from "../../agents/agent-scope.js"; import type { AgentInternalEvent } from "../../agents/internal-events.js"; import { BARE_SESSION_RESET_PROMPT } from "../../auto-reply/reply/session-reset-prompt.js"; -import { agentCommand } from "../../commands/agent.js"; +import { agentCommandFromIngress } from "../../commands/agent.js"; import { loadConfig } from "../../config/config.js"; import { mergeSessionEntry, @@ -600,7 +600,7 @@ export const agentHandlers: GatewayRequestHandlers = { const resolvedThreadId = explicitThreadId ?? deliveryPlan.resolvedThreadId; - void agentCommand( + void agentCommandFromIngress( { message, images, diff --git a/src/gateway/server-node-events.ts b/src/gateway/server-node-events.ts index b402a4f0cd5..17495a6e737 100644 --- a/src/gateway/server-node-events.ts +++ b/src/gateway/server-node-events.ts @@ -1,7 +1,7 @@ import { randomUUID } from "node:crypto"; import { normalizeChannelId } from "../channels/plugins/index.js"; import { createOutboundSendDeps } from "../cli/outbound-send-deps.js"; -import { agentCommand } from "../commands/agent.js"; +import { agentCommandFromIngress } from "../commands/agent.js"; import { loadConfig } from "../config/config.js"; import { updateSessionStore } from "../config/sessions.js"; import { requestHeartbeatNow } from "../infra/heartbeat-wake.js"; @@ -303,7 +303,7 @@ export const handleNodeEvent = async (ctx: NodeEventContext, nodeId: string, evt clientRunId: `voice-${randomUUID()}`, }); - void agentCommand( + void agentCommandFromIngress( { message: text, sessionId, @@ -434,7 +434,7 @@ export const handleNodeEvent = async (ctx: NodeEventContext, nodeId: string, evt ); } - void agentCommand( + void agentCommandFromIngress( { message, images,