fix(minimax): stream music generation responses (#84764)

Summary:
- The PR updates the bundled MiniMax music provider to request streaming hex responses, decode SSE/audio bodie ... while preserving JSON/url fallbacks, and adds provider tests for streaming, fallback, and timeout behavior.
- PR surface: Source +148, Tests +152. Total +300 across 2 files.
- Reproducibility: yes. by source inspection and live proof, though I did not run a fresh live reproduction. C ... s provider fallback, and the source PR reports a 130s live MiniMax provider run succeeding after the patch.

Automerge notes:
- PR branch already contained follow-up commit before automerge: fix(minimax): stream music generation responses
- PR branch already contained follow-up commit before automerge: fix(clawsweeper): address review for automerge-openclaw-openclaw-8456…

Validation:
- ClawSweeper review passed for head 806b0b40f2.
- Required merge gates passed before the squash merge.

Prepared head SHA: 806b0b40f2
Review: https://github.com/openclaw/openclaw/pull/84764#issuecomment-4504175527

Co-authored-by: Neerav Makwana <261249544+neeravmakwana@users.noreply.github.com>
Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com>
Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com>
Approved-by: takhoffman
Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
This commit is contained in:
clawsweeper[bot]
2026-05-28 19:11:37 +00:00
committed by GitHub
parent 663cf97bea
commit dfe9774387
2 changed files with 329 additions and 29 deletions

View File

@@ -28,10 +28,11 @@ beforeAll(async () => {
installMinimaxProviderHttpMockCleanup();
function mockMusicGenerationResponse(json: Record<string, unknown>): void {
const response = new Response(JSON.stringify(json), {
headers: { "content-type": "application/json" },
});
postJsonRequestMock.mockResolvedValue({
response: {
json: async () => json,
},
response,
release: vi.fn(async () => {}),
});
fetchWithTimeoutMock.mockResolvedValue({
@@ -53,12 +54,22 @@ describe("minimax music generation provider", () => {
expectExplicitMusicGenerationCapabilities(buildMinimaxMusicGenerationProvider());
});
it("creates music and downloads the generated track", async () => {
mockMusicGenerationResponse({
task_id: "task-123",
audio_url: "https://example.com/out.mp3",
lyrics: "our city wakes",
base_resp: { status_code: 0 },
it("streams generated music chunks from MiniMax", async () => {
const chunkA = Buffer.from("ID3\x04\x00mp3-a");
const chunkB = Buffer.from("mp3-b");
postJsonRequestMock.mockResolvedValue({
response: new Response(
[
`data: ${JSON.stringify({ data: { status: 1, audio: chunkA.toString("hex") }, base_resp: { status_code: 0 } })}`,
`data: ${JSON.stringify({ data: { status: 1, audio: chunkB.toString("hex") }, base_resp: { status_code: 0 } })}`,
`data: ${JSON.stringify({ data: { status: 2, audio: Buffer.concat([chunkA, chunkB]).toString("hex") }, base_resp: { status_code: 0 } })}`,
"",
].join("\n\n"),
{
headers: { "content-type": "text/event-stream" },
},
),
release: vi.fn(async () => {}),
});
const provider = buildMinimaxMusicGenerationProvider();
@@ -79,24 +90,81 @@ describe("minimax music generation provider", () => {
expect(body.prompt).not.toContain("Target duration");
expect(body).not.toHaveProperty("duration");
expect(body.lyrics).toBe("our city wakes");
expect(body.output_format).toBe("url");
expect(body.stream).toBe(true);
expect(body.output_format).toBe("hex");
expect(body.audio_setting).toEqual({
sample_rate: 44100,
bitrate: 256000,
format: "mp3",
});
expect(request.timeoutMs).toBe(300000);
expect(request?.headers).toBeInstanceOf(Headers);
const headers = request?.headers as Headers | undefined;
expect(headers?.get("content-type")).toBe("application/json");
expect(result.tracks).toHaveLength(1);
expect(result.lyrics).toEqual(["our city wakes"]);
expect(result.metadata?.taskId).toBe("task-123");
expect(result.metadata?.audioUrl).toBe("https://example.com/out.mp3");
expect(result.tracks[0]?.buffer).toEqual(Buffer.concat([chunkA, chunkB]));
expect(result.tracks[0]?.mimeType).toBe("audio/mpeg");
expect(result.metadata?.requestedLyrics).toBe(true);
expect(result.metadata).not.toHaveProperty("requestedDurationSeconds");
});
it("reports streaming music task failures", async () => {
postJsonRequestMock.mockResolvedValue({
response: new Response(
`data: ${JSON.stringify({
base_resp: { status_code: 0 },
})}\n\ndata: ${JSON.stringify({
base_resp: { status_code: 2013, status_msg: "render rejected" },
})}`,
{
headers: { "content-type": "text/event-stream" },
},
),
release: vi.fn(async () => {}),
});
const provider = buildMinimaxMusicGenerationProvider();
await expect(
provider.generateMusic({
provider: "minimax",
model: "music-2.6",
prompt: "upbeat dance-pop with female vocals",
cfg: {},
}),
).rejects.toThrow("MiniMax music generation failed (2013): render rejected");
});
it("keeps terminal streaming audio when no progressive chunks were sent", async () => {
const terminalAudio = Buffer.from("terminal-mp3");
postJsonRequestMock.mockResolvedValue({
response: new Response(
`data: ${JSON.stringify({
data: { status: 2, audio: terminalAudio.toString("hex") },
base_resp: { status_code: 0 },
})}`,
{
headers: { "content-type": "text/event-stream" },
},
),
release: vi.fn(async () => {}),
});
const provider = buildMinimaxMusicGenerationProvider();
const result = await provider.generateMusic({
provider: "minimax",
model: "music-2.6",
prompt: "upbeat dance-pop with female vocals",
cfg: {},
});
expect(result.tracks[0]?.buffer).toEqual(terminalAudio);
});
it("downloads tracks when url output is returned in data.audio", async () => {
mockMusicGenerationResponse({
task_id: "task-url",
lyrics: "our city wakes",
data: {
audio: "https://example.com/url-audio.mp3",
},
@@ -119,6 +187,90 @@ describe("minimax music generation provider", () => {
fetch,
);
expect(result.tracks[0]?.buffer.byteLength).toBeGreaterThan(0);
expect(result.lyrics).toEqual(["our city wakes"]);
expect(result.metadata?.taskId).toBe("task-url");
expect(result.metadata?.audioUrl).toBe("https://example.com/url-audio.mp3");
});
it("honors explicit long caller timeouts for request and download fallbacks", async () => {
mockMusicGenerationResponse({
data: {
audio: "https://example.com/long-timeout.mp3",
},
base_resp: { status_code: 0 },
});
const provider = buildMinimaxMusicGenerationProvider();
await provider.generateMusic({
provider: "minimax",
model: "music-2.6",
prompt: "upbeat dance-pop with female vocals",
cfg: {},
lyrics: "our city wakes",
timeoutMs: 600000,
});
expect(mockCallArg(postJsonRequestMock).timeoutMs).toBe(600000);
expect(fetchWithTimeoutMock).toHaveBeenCalledWith(
"https://example.com/long-timeout.mp3",
{ method: "GET" },
600000,
fetch,
);
});
it("applies explicit caller timeouts while reading streaming response bodies", async () => {
vi.useFakeTimers();
try {
let cancelled = false;
const stream = new ReadableStream<Uint8Array>({
start(controller) {
setTimeout(() => {
if (cancelled) {
return;
}
controller.enqueue(
new TextEncoder().encode(
`data: ${JSON.stringify({
data: { status: 2, audio: Buffer.from("late-mp3").toString("hex") },
base_resp: { status_code: 0 },
})}`,
),
);
controller.close();
}, 200);
},
cancel() {
cancelled = true;
},
});
postJsonRequestMock.mockResolvedValue({
response: new Response(stream, {
headers: { "content-type": "text/event-stream" },
}),
release: vi.fn(async () => {}),
});
const provider = buildMinimaxMusicGenerationProvider();
const generation = provider.generateMusic({
provider: "minimax",
model: "music-2.6",
prompt: "upbeat dance-pop with female vocals",
cfg: {},
timeoutMs: 50,
});
const expectation = expect(generation).rejects.toThrow(
"MiniMax music generation timed out after 50ms",
);
await vi.advanceTimersByTimeAsync(0);
await vi.advanceTimersByTimeAsync(50);
await expectation;
expect(cancelled).toBe(true);
} finally {
vi.useRealTimers();
}
});
it("rejects instrumental requests that also include lyrics", async () => {

View File

@@ -7,15 +7,19 @@ import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
import {
assertOkOrThrowHttpError,
createProviderOperationDeadline,
fetchProviderDownloadResponse,
postJsonRequest,
resolveProviderOperationTimeoutMs,
resolveProviderHttpRequestConfig,
type ProviderOperationDeadline,
} from "openclaw/plugin-sdk/provider-http";
import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
const DEFAULT_MINIMAX_MUSIC_BASE_URL = "https://api.minimax.io";
const DEFAULT_MINIMAX_MUSIC_MODEL = "music-2.6";
const DEFAULT_TIMEOUT_MS = 120_000;
const DEFAULT_OPERATION_TIMEOUT_MS = 300_000;
type MinimaxBaseResp = {
status_code?: number;
@@ -35,6 +39,14 @@ type MinimaxMusicCreateResponse = {
base_resp?: MinimaxBaseResp;
};
type MinimaxMusicStreamFrame = {
data?: {
audio?: string;
status?: number | string;
};
base_resp?: MinimaxBaseResp;
};
function resolveMinimaxMusicBaseUrl(
cfg: Parameters<typeof resolveApiKeyForProvider>[0]["cfg"],
providerId: string,
@@ -105,6 +117,119 @@ async function downloadTrackFromUrl(params: {
};
}
function createMinimaxMusicTimeoutError(deadline: ProviderOperationDeadline): Error {
const timeoutLabel =
typeof deadline.timeoutMs === "number" ? ` after ${deadline.timeoutMs}ms` : "";
return new Error(`${deadline.label} timed out${timeoutLabel}`);
}
function resolveBodyReadTimeoutMs(deadline: ProviderOperationDeadline): number {
return resolveProviderOperationTimeoutMs({
deadline,
defaultTimeoutMs: deadline.timeoutMs ?? DEFAULT_OPERATION_TIMEOUT_MS,
});
}
async function readResponseBufferWithDeadline(
response: Response,
deadline: ProviderOperationDeadline,
): Promise<Buffer> {
const body = response.body;
if (!body) {
return Buffer.alloc(0);
}
const reader = body.getReader();
const chunks: Uint8Array[] = [];
let totalBytes = 0;
try {
while (true) {
let timeoutId: ReturnType<typeof setTimeout> | undefined;
try {
const timeoutMs = resolveBodyReadTimeoutMs(deadline);
const timeoutPromise = new Promise<never>((_, reject) => {
timeoutId = setTimeout(() => reject(createMinimaxMusicTimeoutError(deadline)), timeoutMs);
});
const result = await Promise.race([reader.read(), timeoutPromise]);
if (result.done) {
break;
}
if (!result.value || result.value.length === 0) {
continue;
}
chunks.push(result.value);
totalBytes += result.value.byteLength;
} catch (error) {
try {
await reader.cancel(error);
} catch {
// Preserve the timeout or stream read failure that caused cancellation.
}
throw error;
} finally {
if (timeoutId) {
clearTimeout(timeoutId);
}
}
}
} finally {
reader.releaseLock();
}
const buffer = Buffer.allocUnsafe(totalBytes);
let offset = 0;
for (const chunk of chunks) {
buffer.set(chunk, offset);
offset += chunk.byteLength;
}
return buffer;
}
async function readStreamingTrack(
response: Response,
deadline: ProviderOperationDeadline,
): Promise<GeneratedMusicAsset> {
const contentType = normalizeOptionalString(response.headers.get("content-type")) ?? "";
if (contentType.toLowerCase().startsWith("audio/")) {
const ext = extensionForMime(contentType)?.replace(/^\./u, "") || "mp3";
return {
buffer: await readResponseBufferWithDeadline(response, deadline),
mimeType: contentType,
fileName: `track-1.${ext}`,
};
}
const chunks: Buffer[] = [];
const text = new TextDecoder().decode(await readResponseBufferWithDeadline(response, deadline));
for (const rawLine of text.split(/\r?\n/u)) {
const line = rawLine.trim();
if (!line.startsWith("data:")) {
continue;
}
const json = line.slice("data:".length).trim();
if (!json || json === "[DONE]") {
continue;
}
const frame = JSON.parse(json) as MinimaxMusicStreamFrame;
assertMinimaxBaseResp(frame.base_resp, "MiniMax music generation failed");
const audio = normalizeOptionalString(frame.data?.audio);
if (audio) {
if (String(frame.data?.status ?? "") === "2" && chunks.length > 0) {
continue;
}
chunks.push(decodePossibleBinary(audio));
}
}
const buffer = Buffer.concat(chunks);
if (buffer.byteLength === 0) {
throw new Error("MiniMax music generation response missing audio output");
}
return {
buffer,
mimeType: "audio/mpeg",
fileName: "track-1.mp3",
};
}
function resolveMinimaxMusicModel(model: string | undefined): string {
const trimmed = normalizeOptionalString(model);
if (!trimmed) {
@@ -158,6 +283,11 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
}
const fetchFn = fetch;
const operationTimeoutMs = req.timeoutMs ?? DEFAULT_OPERATION_TIMEOUT_MS;
const deadline = createProviderOperationDeadline({
timeoutMs: operationTimeoutMs,
label: "MiniMax music generation",
});
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
resolveProviderHttpRequestConfig({
baseUrl: resolveMinimaxMusicBaseUrl(req.cfg, providerId),
@@ -174,13 +304,18 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
jsonHeaders.set("Content-Type", "application/json");
const model = resolveMinimaxMusicModel(req.model);
const lyrics = normalizeOptionalString(req.lyrics);
const requestedLyrics = normalizeOptionalString(req.lyrics);
const body = {
model,
prompt: req.prompt.trim(),
...(req.instrumental === true ? { is_instrumental: true } : {}),
...(lyrics ? { lyrics } : req.instrumental === true ? {} : { lyrics_optimizer: true }),
output_format: "url",
...(requestedLyrics
? { lyrics: requestedLyrics }
: req.instrumental === true
? {}
: { lyrics_optimizer: true }),
stream: true,
output_format: "hex",
audio_setting: {
sample_rate: 44_100,
bitrate: 256_000,
@@ -192,7 +327,10 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
url: `${baseUrl}/v1/music_generation`,
headers: jsonHeaders,
body,
timeoutMs: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
timeoutMs: resolveProviderOperationTimeoutMs({
deadline,
defaultTimeoutMs: operationTimeoutMs,
}),
fetchFn,
pinDns: false,
allowPrivateNetwork,
@@ -201,22 +339,32 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
try {
await assertOkOrThrowHttpError(res, "MiniMax music generation failed");
const payload = (await res.json()) as MinimaxMusicCreateResponse;
assertMinimaxBaseResp(payload.base_resp, "MiniMax music generation failed");
const contentType = normalizeOptionalString(res.headers.get("content-type")) ?? "";
const lowerContentType = contentType.toLowerCase();
const payload =
lowerContentType.includes("text/event-stream") || lowerContentType.startsWith("audio/")
? null
: ((await res.clone().json()) as MinimaxMusicCreateResponse);
if (payload) {
assertMinimaxBaseResp(payload.base_resp, "MiniMax music generation failed");
}
const audioCandidate =
normalizeOptionalString(payload.audio) ?? normalizeOptionalString(payload.data?.audio);
normalizeOptionalString(payload?.audio) ?? normalizeOptionalString(payload?.data?.audio);
const audioUrl =
normalizeOptionalString(payload.audio_url) ||
normalizeOptionalString(payload.data?.audio_url) ||
normalizeOptionalString(payload?.audio_url) ||
normalizeOptionalString(payload?.data?.audio_url) ||
(isLikelyRemoteUrl(audioCandidate) ? audioCandidate : undefined);
const inlineAudio = isLikelyRemoteUrl(audioCandidate) ? undefined : audioCandidate;
const lyrics = decodePossibleText(payload.lyrics ?? payload.data?.lyrics ?? "");
const responseLyrics = decodePossibleText(payload?.lyrics ?? payload?.data?.lyrics ?? "");
const track = audioUrl
? await downloadTrackFromUrl({
url: audioUrl,
timeoutMs: req.timeoutMs,
timeoutMs: resolveProviderOperationTimeoutMs({
deadline,
defaultTimeoutMs: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
}),
fetchFn,
})
: inlineAudio
@@ -225,22 +373,22 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
mimeType: "audio/mpeg",
fileName: "track-1.mp3",
}
: null;
: await readStreamingTrack(res, deadline);
if (!track) {
throw new Error("MiniMax music generation response missing audio output");
}
return {
tracks: [track],
...(lyrics ? { lyrics: [lyrics] } : {}),
...(responseLyrics ? { lyrics: [responseLyrics] } : {}),
model,
metadata: {
...(normalizeOptionalString(payload.task_id)
? { taskId: normalizeOptionalString(payload.task_id) }
...(normalizeOptionalString(payload?.task_id)
? { taskId: normalizeOptionalString(payload?.task_id) }
: {}),
...(audioUrl ? { audioUrl } : {}),
instrumental: req.instrumental === true,
...(lyrics ? { requestedLyrics: true } : {}),
...(requestedLyrics ? { requestedLyrics: true } : {}),
},
};
} finally {