mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-12 18:30:44 +00:00
fix(discord): sync realtime voice playback timestamps
This commit is contained in:
@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Discord/voice: synthesize realtime playback timestamps from emitted Discord PCM so OpenAI realtime barge-in truncation no longer sees `audioEndMs=0` and skips legitimate interruptions.
|
||||
- Plugin SDK: keep activated linked plugin runtime facades loadable when bundled plugin fallback is disabled. Thanks @shakkernerd.
|
||||
- Feishu: auto-thread `message(action="send")` replies inside the topic when the active session is group_topic or group_topic_sender, and propagate `replyInThread` through text, card, and media outbound adapters so topic-scoped sessions no longer post at the group root. Fixes #74903. (#77151) Thanks @ai-hpc.
|
||||
- WhatsApp: pass routing context into voice-note transcript echo preflight so echoed transcripts can deliver to the originating chat. Fixes #79778. (#79788) Thanks @hclsys.
|
||||
|
||||
@@ -287,6 +287,7 @@ describe("DiscordVoiceManager", () => {
|
||||
realtimeSessionMock.sendAudio.mockClear();
|
||||
realtimeSessionMock.sendUserMessage.mockClear();
|
||||
realtimeSessionMock.handleBargeIn.mockClear();
|
||||
realtimeSessionMock.setMediaTimestamp.mockClear();
|
||||
realtimeSessionMock.submitToolResult.mockClear();
|
||||
createRealtimeVoiceBridgeSessionMock.mockClear();
|
||||
createRealtimeVoiceBridgeSessionMock.mockReturnValue(realtimeSessionMock);
|
||||
@@ -641,7 +642,12 @@ describe("DiscordVoiceManager", () => {
|
||||
bridgeParams?.audioSink?.sendAudio(Buffer.alloc(480));
|
||||
turn?.sendInputAudio(Buffer.alloc(3840));
|
||||
|
||||
expect(realtimeSessionMock.setMediaTimestamp).toHaveBeenCalledWith(0);
|
||||
expect(realtimeSessionMock.setMediaTimestamp).toHaveBeenCalledWith(10);
|
||||
expect(realtimeSessionMock.handleBargeIn).toHaveBeenCalled();
|
||||
const lastTimestampCall = realtimeSessionMock.setMediaTimestamp.mock.invocationCallOrder.at(-1);
|
||||
const firstBargeInCall = realtimeSessionMock.handleBargeIn.mock.invocationCallOrder[0];
|
||||
expect(lastTimestampCall).toBeLessThan(firstBargeInCall);
|
||||
expect(player.stop).not.toHaveBeenCalled();
|
||||
expect(realtimeSessionMock.sendAudio).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
@@ -43,6 +43,7 @@ const DISCORD_REALTIME_PENDING_SPEAKER_CONTEXT_LIMIT = 32;
|
||||
const DISCORD_REALTIME_LOG_PREVIEW_CHARS = 500;
|
||||
const DISCORD_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS = 250;
|
||||
const DISCORD_REALTIME_FORCED_CONSULT_FALLBACK_DELAY_MS = 200;
|
||||
const REALTIME_PCM16_BYTES_PER_SAMPLE = 2;
|
||||
|
||||
export type DiscordVoiceMode = "stt-tts" | "agent-proxy" | "bidi";
|
||||
|
||||
@@ -245,6 +246,7 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
|
||||
private consultPolicy: "auto" | "always" = "auto";
|
||||
private pendingAgentProxyConsultContexts: PendingAgentProxyConsultContext[] = [];
|
||||
private readonly pendingSpeakerTurns: PendingSpeakerTurn[] = [];
|
||||
private outputAudioTimestampMs = 0;
|
||||
private readonly playerIdleHandler = () => {
|
||||
this.resetOutputStream();
|
||||
};
|
||||
@@ -430,6 +432,7 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
|
||||
if (!this.isBargeInEnabled()) {
|
||||
return;
|
||||
}
|
||||
this.syncOutputAudioTimestamp();
|
||||
this.bridge?.handleBargeIn({ audioPlaybackActive: true });
|
||||
}
|
||||
|
||||
@@ -450,8 +453,13 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
|
||||
if (discordPcm.length === 0) {
|
||||
return;
|
||||
}
|
||||
this.syncOutputAudioTimestamp();
|
||||
const stream = this.ensureOutputStream();
|
||||
stream.write(discordPcm);
|
||||
this.outputAudioTimestampMs += pcm16MonoDurationMs(
|
||||
realtimePcm24kMono,
|
||||
REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ.sampleRateHz,
|
||||
);
|
||||
}
|
||||
|
||||
private ensureOutputStream(): PassThrough {
|
||||
@@ -485,10 +493,15 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
|
||||
private resetOutputStream(): void {
|
||||
const stream = this.outputStream;
|
||||
this.outputStream = null;
|
||||
this.outputAudioTimestampMs = 0;
|
||||
stream?.end();
|
||||
stream?.destroy();
|
||||
}
|
||||
|
||||
private syncOutputAudioTimestamp(): void {
|
||||
this.bridge?.setMediaTimestamp(Math.floor(this.outputAudioTimestampMs));
|
||||
}
|
||||
|
||||
private handleToolCall(
|
||||
event: RealtimeVoiceToolCallEvent,
|
||||
session: RealtimeVoiceBridgeSession,
|
||||
@@ -640,6 +653,7 @@ export class DiscordRealtimeVoiceSession implements VoiceRealtimeSession {
|
||||
logger.info(
|
||||
`discord voice: realtime forced agent consult starting chars=${question.length} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} speaker=${context.speakerLabel} owner=${context.senderIsOwner}`,
|
||||
);
|
||||
this.syncOutputAudioTimestamp();
|
||||
this.bridge?.handleBargeIn({ audioPlaybackActive: true, force: true });
|
||||
this.clearOutputAudio();
|
||||
try {
|
||||
@@ -718,6 +732,14 @@ function isDiscordRealtimeSpeakerContext(value: unknown): value is DiscordRealti
|
||||
);
|
||||
}
|
||||
|
||||
function pcm16MonoDurationMs(audio: Buffer, sampleRate: number): number {
|
||||
if (audio.length === 0 || sampleRate <= 0) {
|
||||
return 0;
|
||||
}
|
||||
const samples = audio.length / REALTIME_PCM16_BYTES_PER_SAMPLE;
|
||||
return (samples * 1000) / sampleRate;
|
||||
}
|
||||
|
||||
function buildProviderConfigs(
|
||||
realtimeConfig: DiscordRealtimeVoiceConfig,
|
||||
): Record<string, RealtimeVoiceProviderConfig | undefined> | undefined {
|
||||
|
||||
Reference in New Issue
Block a user