mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:20:43 +00:00
fix: restore Discord voice replies
This commit is contained in:
@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Discord/voice: run voice-channel turns under a voice-output policy that hides the agent `tts` tool and asks for spoken reply text, so `/vc join` sessions synthesize and play agent replies instead of ending with `NO_REPLY`. Fixes #61536. Thanks @aounakram.
|
||||
- Plugins/runtime-deps: prune legacy version-scoped plugin runtime-deps roots during bundled dependency repair and cover the path in Package Acceptance's upgrade-survivor matrix, so upgrades from 2026.4.x no longer leave stale per-plugin runtime trees after doctor runs. Thanks @vincentkoc.
|
||||
- Plugins/runtime-deps: keep Gateway startup plugin imports and runtime plugin fallback loads verify-only after startup/config repair planning, so packaged installs no longer spawn package-manager repair from hot paths after readiness. Refs #75283 and #75069. Thanks @brokemac79 and @xiaohuaxi.
|
||||
- Voice Call/realtime: add default-off fast memory/session context for `openclaw_agent_consult`, giving live calls a bounded answer-or-miss path before the full agent consult. Fixes #71849. Thanks @amzzzzzzz.
|
||||
|
||||
@@ -1075,7 +1075,7 @@ Voice channel pipeline:
|
||||
|
||||
- Discord PCM capture is converted to a WAV temp file.
|
||||
- `tools.media.audio` handles STT, for example `openai/gpt-4o-mini-transcribe`.
|
||||
- The transcript is sent through normal Discord ingress and routing.
|
||||
- The transcript is sent through Discord ingress and routing while the response LLM runs with a voice-output policy that hides the agent `tts` tool and asks for returned text, because Discord voice owns final TTS playback.
|
||||
- `voice.model`, when set, overrides only the response LLM for this voice-channel turn.
|
||||
- `voice.tts` is merged over `messages.tts`; the resulting audio is played in the joined channel.
|
||||
|
||||
|
||||
@@ -539,6 +539,41 @@ describe("DiscordVoiceManager", () => {
|
||||
expect(commandArgs?.model).toBe("openai/gpt-5.4-mini");
|
||||
});
|
||||
|
||||
it("runs voice replies under Discord voice output policy", async () => {
|
||||
agentCommandMock.mockResolvedValueOnce({
|
||||
payloads: [{ text: "hello back" }],
|
||||
} as never);
|
||||
|
||||
const client = createClient();
|
||||
client.fetchMember.mockResolvedValue({
|
||||
nickname: "Guest Nick",
|
||||
user: {
|
||||
id: "u-guest",
|
||||
username: "guest",
|
||||
globalName: "Guest",
|
||||
discriminator: "4321",
|
||||
},
|
||||
});
|
||||
const manager = createManager({ groupPolicy: "open" }, client, {
|
||||
commands: { useAccessGroups: false },
|
||||
});
|
||||
await processVoiceSegment(manager, "u-guest");
|
||||
|
||||
const commandArgs = agentCommandMock.mock.calls.at(-1)?.[0] as
|
||||
| { message?: string; messageChannel?: string; messageProvider?: string }
|
||||
| undefined;
|
||||
|
||||
expect(commandArgs?.messageChannel).toBe("discord");
|
||||
expect(commandArgs?.messageProvider).toBe("discord-voice");
|
||||
expect(commandArgs?.message).toContain("Do not call the tts tool");
|
||||
expect(textToSpeechMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
channel: "discord",
|
||||
text: "hello back",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("reuses speaker context cache for repeated segments from the same speaker", async () => {
|
||||
const client = createClient();
|
||||
client.fetchMember.mockResolvedValue({
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { formatVoiceIngressPrompt } from "./prompt.js";
|
||||
import { DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT, formatVoiceIngressPrompt } from "./prompt.js";
|
||||
|
||||
describe("formatVoiceIngressPrompt", () => {
|
||||
it("formats speaker-labeled voice input without imperative-looking prefixes", () => {
|
||||
it("formats speaker-labeled voice input with the spoken-output contract", () => {
|
||||
expect(formatVoiceIngressPrompt("hello there", "speaker-1")).toBe(
|
||||
'Voice transcript from speaker "speaker-1":\nhello there',
|
||||
`${DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT}\n\nVoice transcript from speaker "speaker-1":\nhello there`,
|
||||
);
|
||||
});
|
||||
|
||||
it("returns the bare transcript when no speaker label exists", () => {
|
||||
expect(formatVoiceIngressPrompt("hello there")).toBe("hello there");
|
||||
it("keeps unlabeled transcripts under the spoken-output contract", () => {
|
||||
expect(formatVoiceIngressPrompt("hello there")).toBe(
|
||||
`${DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT}\n\nhello there`,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,8 +1,17 @@
|
||||
export const DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT = [
|
||||
"Discord voice reply requirements:",
|
||||
"- Return only the concise text that should be spoken aloud in the voice channel.",
|
||||
"- Do not call the tts tool; Discord voice will synthesize and play the returned text.",
|
||||
"- Do not reply with NO_REPLY unless no spoken response is appropriate.",
|
||||
"- Keep the response brief and conversational.",
|
||||
].join("\n");
|
||||
|
||||
export function formatVoiceIngressPrompt(transcript: string, speakerLabel?: string): string {
|
||||
const cleanedTranscript = transcript.trim();
|
||||
const cleanedLabel = speakerLabel?.trim();
|
||||
if (!cleanedLabel) {
|
||||
return cleanedTranscript;
|
||||
}
|
||||
return [`Voice transcript from speaker "${cleanedLabel}":`, cleanedTranscript].join("\n");
|
||||
const voiceInput = cleanedLabel
|
||||
? [`Voice transcript from speaker "${cleanedLabel}":`, cleanedTranscript].join("\n")
|
||||
: cleanedTranscript;
|
||||
|
||||
return [DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT, voiceInput].join("\n\n");
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
import type { DiscordVoiceSpeakerContextResolver } from "./speaker-context.js";
|
||||
import { synthesizeVoiceReplyAudio, transcribeVoiceAudio } from "./tts.js";
|
||||
|
||||
const DISCORD_VOICE_MESSAGE_PROVIDER = "discord-voice";
|
||||
const logger = createSubsystemLogger("discord/voice");
|
||||
|
||||
export async function processDiscordVoiceSegment(params: {
|
||||
@@ -89,6 +90,7 @@ export async function processDiscordVoiceSegment(params: {
|
||||
sessionKey: entry.route.sessionKey,
|
||||
agentId: entry.route.agentId,
|
||||
messageChannel: "discord",
|
||||
messageProvider: DISCORD_VOICE_MESSAGE_PROVIDER,
|
||||
senderIsOwner: speaker.senderIsOwner,
|
||||
allowModelOverride: Boolean(modelOverride),
|
||||
model: modelOverride,
|
||||
|
||||
@@ -424,7 +424,7 @@ describe("CLI attempt execution", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("forwards user trigger and channel context to CLI runs", async () => {
|
||||
it("forwards separate user trigger, channel, and provider context to CLI runs", async () => {
|
||||
const sessionKey = "agent:main:direct:claude-channel-context";
|
||||
const sessionEntry: SessionEntry = {
|
||||
sessionId: "openclaw-session-channel",
|
||||
@@ -450,10 +450,13 @@ describe("CLI attempt execution", () => {
|
||||
resolvedThinkLevel: "medium",
|
||||
timeoutMs: 1_000,
|
||||
runId: "run-cli-channel-context",
|
||||
opts: { senderIsOwner: false } as Parameters<typeof runAgentAttempt>[0]["opts"],
|
||||
opts: {
|
||||
senderIsOwner: false,
|
||||
messageProvider: "discord-voice",
|
||||
} as Parameters<typeof runAgentAttempt>[0]["opts"],
|
||||
runContext: {} as Parameters<typeof runAgentAttempt>[0]["runContext"],
|
||||
spawnedBy: undefined,
|
||||
messageChannel: "telegram",
|
||||
messageChannel: "discord",
|
||||
skillsSnapshot: undefined,
|
||||
resolvedVerboseLevel: undefined,
|
||||
agentDir: tmpDir,
|
||||
@@ -468,8 +471,8 @@ describe("CLI attempt execution", () => {
|
||||
expect(runCliAgentMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
trigger: "user",
|
||||
messageChannel: "telegram",
|
||||
messageProvider: "telegram",
|
||||
messageChannel: "discord",
|
||||
messageProvider: "discord-voice",
|
||||
}),
|
||||
);
|
||||
});
|
||||
@@ -567,6 +570,7 @@ describe("CLI attempt execution", () => {
|
||||
senderIsOwner: false,
|
||||
modelRun: true,
|
||||
promptMode: "none",
|
||||
messageProvider: "discord-voice",
|
||||
inputProvenance: {
|
||||
kind: "inter_session",
|
||||
sourceSessionKey: "agent:main:discord:source",
|
||||
@@ -575,7 +579,7 @@ describe("CLI attempt execution", () => {
|
||||
} as Parameters<typeof runAgentAttempt>[0]["opts"],
|
||||
runContext: {} as Parameters<typeof runAgentAttempt>[0]["runContext"],
|
||||
spawnedBy: undefined,
|
||||
messageChannel: "telegram",
|
||||
messageChannel: "discord",
|
||||
skillsSnapshot: undefined,
|
||||
resolvedVerboseLevel: undefined,
|
||||
agentDir: tmpDir,
|
||||
@@ -593,6 +597,8 @@ describe("CLI attempt execution", () => {
|
||||
model: "claude-opus-4-7",
|
||||
agentHarnessId: "pi",
|
||||
prompt: "raw prompt",
|
||||
messageChannel: "discord",
|
||||
messageProvider: "discord-voice",
|
||||
modelRun: true,
|
||||
promptMode: "none",
|
||||
disableTools: true,
|
||||
|
||||
@@ -481,7 +481,7 @@ export function runAgentAttempt(params: {
|
||||
skillsSnapshot: params.skillsSnapshot,
|
||||
messageChannel: params.messageChannel,
|
||||
streamParams: params.opts.streamParams,
|
||||
messageProvider: params.messageChannel,
|
||||
messageProvider: params.opts.messageProvider ?? params.messageChannel,
|
||||
agentAccountId: params.runContext.accountId,
|
||||
senderIsOwner: params.opts.senderIsOwner,
|
||||
cleanupBundleMcpOnRunEnd: params.opts.cleanupBundleMcpOnRunEnd,
|
||||
@@ -550,6 +550,7 @@ export function runAgentAttempt(params: {
|
||||
agentId: params.sessionAgentId,
|
||||
trigger: "user",
|
||||
messageChannel: params.messageChannel,
|
||||
messageProvider: params.opts.messageProvider ?? params.messageChannel,
|
||||
agentAccountId: params.runContext.accountId,
|
||||
messageTo: params.opts.replyTo ?? params.opts.to,
|
||||
messageThreadId: params.opts.threadId,
|
||||
|
||||
@@ -71,6 +71,8 @@ export type AgentCommandOpts = {
|
||||
threadId?: string | number;
|
||||
/** Message channel context. */
|
||||
messageChannel?: string;
|
||||
/** Tool-policy/output surface context. Defaults to messageChannel. */
|
||||
messageProvider?: string;
|
||||
/** Delivery channel. */
|
||||
channel?: string;
|
||||
/** Account ID for multi-account channel routing. */
|
||||
|
||||
@@ -22,6 +22,7 @@ import {
|
||||
resolveEmbeddedAgentStreamFn,
|
||||
resolveUnknownToolGuardThreshold,
|
||||
shouldCreateBundleMcpRuntimeForAttempt,
|
||||
resolveAttemptToolPolicyMessageProvider,
|
||||
resolvePromptBuildHookResult,
|
||||
resolvePromptModeForSession,
|
||||
shouldStripBootstrapFromEmbeddedContext,
|
||||
@@ -195,6 +196,21 @@ describe("shouldCreateBundleMcpRuntimeForAttempt", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveAttemptToolPolicyMessageProvider", () => {
|
||||
it("prefers explicit tool-policy provider over transport channel", () => {
|
||||
expect(
|
||||
resolveAttemptToolPolicyMessageProvider({
|
||||
messageChannel: "discord",
|
||||
messageProvider: "discord-voice",
|
||||
}),
|
||||
).toBe("discord-voice");
|
||||
});
|
||||
|
||||
it("falls back to message channel when provider is omitted", () => {
|
||||
expect(resolveAttemptToolPolicyMessageProvider({ messageChannel: "discord" })).toBe("discord");
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolvePromptBuildHookResult", () => {
|
||||
function createLegacyOnlyHookRunner() {
|
||||
return {
|
||||
|
||||
@@ -566,6 +566,13 @@ export function shouldCreateBundleMcpRuntimeForAttempt(params: {
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveAttemptToolPolicyMessageProvider(params: {
|
||||
messageProvider?: string;
|
||||
messageChannel?: string;
|
||||
}): string | undefined {
|
||||
return params.messageProvider ?? params.messageChannel;
|
||||
}
|
||||
|
||||
function collectAttemptExplicitToolAllowlistSources(params: {
|
||||
config?: EmbeddedRunAttemptParams["config"];
|
||||
sessionKey?: string;
|
||||
@@ -784,7 +791,7 @@ export async function runEmbeddedAttempt(
|
||||
elevated: params.bashElevated,
|
||||
},
|
||||
sandbox,
|
||||
messageProvider: params.messageChannel ?? params.messageProvider,
|
||||
messageProvider: resolveAttemptToolPolicyMessageProvider(params),
|
||||
agentAccountId: params.agentAccountId,
|
||||
messageTo: params.messageTo,
|
||||
messageThreadId: params.messageThreadId,
|
||||
@@ -1003,7 +1010,7 @@ export async function runEmbeddedAttempt(
|
||||
agentId: sessionAgentId,
|
||||
modelProvider: params.provider,
|
||||
modelId: params.modelId,
|
||||
messageProvider: params.messageChannel ?? params.messageProvider,
|
||||
messageProvider: resolveAttemptToolPolicyMessageProvider(params),
|
||||
agentAccountId: params.agentAccountId,
|
||||
groupId: params.groupId,
|
||||
groupChannel: params.groupChannel,
|
||||
@@ -1030,7 +1037,7 @@ export async function runEmbeddedAttempt(
|
||||
agentId: sessionAgentId,
|
||||
modelProvider: params.provider,
|
||||
modelId: params.modelId,
|
||||
messageProvider: params.messageChannel ?? params.messageProvider,
|
||||
messageProvider: resolveAttemptToolPolicyMessageProvider(params),
|
||||
agentAccountId: params.agentAccountId,
|
||||
groupId: params.groupId,
|
||||
groupChannel: params.groupChannel,
|
||||
|
||||
@@ -4,7 +4,7 @@ import { filterToolNamesByMessageProvider } from "./pi-tools.message-provider-po
|
||||
const DEFAULT_TOOL_NAMES = ["read", "write", "tts", "web_search"];
|
||||
|
||||
describe("createOpenClawCodingTools message provider policy", () => {
|
||||
it.each(["voice", "VOICE", " Voice "])(
|
||||
it.each(["voice", "VOICE", " Voice ", "discord-voice", "DISCORD-VOICE", " Discord-Voice "])(
|
||||
"does not expose tts tool for normalized voice provider: %s",
|
||||
(messageProvider) => {
|
||||
const names = new Set(filterToolNamesByMessageProvider(DEFAULT_TOOL_NAMES, messageProvider));
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js";
|
||||
|
||||
const TOOL_DENY_BY_MESSAGE_PROVIDER: Readonly<Record<string, readonly string[]>> = {
|
||||
"discord-voice": ["tts"],
|
||||
voice: ["tts"],
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user