mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-31 12:28:35 +00:00
fix(minimax): stream music generation responses (#84764)
Summary: - The PR updates the bundled MiniMax music provider to request streaming hex responses, decode SSE/audio bodie ... while preserving JSON/url fallbacks, and adds provider tests for streaming, fallback, and timeout behavior. - PR surface: Source +148, Tests +152. Total +300 across 2 files. - Reproducibility: yes. by source inspection and live proof, though I did not run a fresh live reproduction. C ... s provider fallback, and the source PR reports a 130s live MiniMax provider run succeeding after the patch. Automerge notes: - PR branch already contained follow-up commit before automerge: fix(minimax): stream music generation responses - PR branch already contained follow-up commit before automerge: fix(clawsweeper): address review for automerge-openclaw-openclaw-8456… Validation: - ClawSweeper review passed for head806b0b40f2. - Required merge gates passed before the squash merge. Prepared head SHA:806b0b40f2Review: https://github.com/openclaw/openclaw/pull/84764#issuecomment-4504175527 Co-authored-by: Neerav Makwana <261249544+neeravmakwana@users.noreply.github.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: takhoffman Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
This commit is contained in:
@@ -28,10 +28,11 @@ beforeAll(async () => {
|
||||
installMinimaxProviderHttpMockCleanup();
|
||||
|
||||
function mockMusicGenerationResponse(json: Record<string, unknown>): void {
|
||||
const response = new Response(JSON.stringify(json), {
|
||||
headers: { "content-type": "application/json" },
|
||||
});
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => json,
|
||||
},
|
||||
response,
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
fetchWithTimeoutMock.mockResolvedValue({
|
||||
@@ -53,12 +54,22 @@ describe("minimax music generation provider", () => {
|
||||
expectExplicitMusicGenerationCapabilities(buildMinimaxMusicGenerationProvider());
|
||||
});
|
||||
|
||||
it("creates music and downloads the generated track", async () => {
|
||||
mockMusicGenerationResponse({
|
||||
task_id: "task-123",
|
||||
audio_url: "https://example.com/out.mp3",
|
||||
lyrics: "our city wakes",
|
||||
base_resp: { status_code: 0 },
|
||||
it("streams generated music chunks from MiniMax", async () => {
|
||||
const chunkA = Buffer.from("ID3\x04\x00mp3-a");
|
||||
const chunkB = Buffer.from("mp3-b");
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: new Response(
|
||||
[
|
||||
`data: ${JSON.stringify({ data: { status: 1, audio: chunkA.toString("hex") }, base_resp: { status_code: 0 } })}`,
|
||||
`data: ${JSON.stringify({ data: { status: 1, audio: chunkB.toString("hex") }, base_resp: { status_code: 0 } })}`,
|
||||
`data: ${JSON.stringify({ data: { status: 2, audio: Buffer.concat([chunkA, chunkB]).toString("hex") }, base_resp: { status_code: 0 } })}`,
|
||||
"",
|
||||
].join("\n\n"),
|
||||
{
|
||||
headers: { "content-type": "text/event-stream" },
|
||||
},
|
||||
),
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
const provider = buildMinimaxMusicGenerationProvider();
|
||||
@@ -79,24 +90,81 @@ describe("minimax music generation provider", () => {
|
||||
expect(body.prompt).not.toContain("Target duration");
|
||||
expect(body).not.toHaveProperty("duration");
|
||||
expect(body.lyrics).toBe("our city wakes");
|
||||
expect(body.output_format).toBe("url");
|
||||
expect(body.stream).toBe(true);
|
||||
expect(body.output_format).toBe("hex");
|
||||
expect(body.audio_setting).toEqual({
|
||||
sample_rate: 44100,
|
||||
bitrate: 256000,
|
||||
format: "mp3",
|
||||
});
|
||||
expect(request.timeoutMs).toBe(300000);
|
||||
expect(request?.headers).toBeInstanceOf(Headers);
|
||||
const headers = request?.headers as Headers | undefined;
|
||||
expect(headers?.get("content-type")).toBe("application/json");
|
||||
expect(result.tracks).toHaveLength(1);
|
||||
expect(result.lyrics).toEqual(["our city wakes"]);
|
||||
expect(result.metadata?.taskId).toBe("task-123");
|
||||
expect(result.metadata?.audioUrl).toBe("https://example.com/out.mp3");
|
||||
expect(result.tracks[0]?.buffer).toEqual(Buffer.concat([chunkA, chunkB]));
|
||||
expect(result.tracks[0]?.mimeType).toBe("audio/mpeg");
|
||||
expect(result.metadata?.requestedLyrics).toBe(true);
|
||||
expect(result.metadata).not.toHaveProperty("requestedDurationSeconds");
|
||||
});
|
||||
|
||||
it("reports streaming music task failures", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: new Response(
|
||||
`data: ${JSON.stringify({
|
||||
base_resp: { status_code: 0 },
|
||||
})}\n\ndata: ${JSON.stringify({
|
||||
base_resp: { status_code: 2013, status_msg: "render rejected" },
|
||||
})}`,
|
||||
{
|
||||
headers: { "content-type": "text/event-stream" },
|
||||
},
|
||||
),
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
const provider = buildMinimaxMusicGenerationProvider();
|
||||
|
||||
await expect(
|
||||
provider.generateMusic({
|
||||
provider: "minimax",
|
||||
model: "music-2.6",
|
||||
prompt: "upbeat dance-pop with female vocals",
|
||||
cfg: {},
|
||||
}),
|
||||
).rejects.toThrow("MiniMax music generation failed (2013): render rejected");
|
||||
});
|
||||
|
||||
it("keeps terminal streaming audio when no progressive chunks were sent", async () => {
|
||||
const terminalAudio = Buffer.from("terminal-mp3");
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: new Response(
|
||||
`data: ${JSON.stringify({
|
||||
data: { status: 2, audio: terminalAudio.toString("hex") },
|
||||
base_resp: { status_code: 0 },
|
||||
})}`,
|
||||
{
|
||||
headers: { "content-type": "text/event-stream" },
|
||||
},
|
||||
),
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
const provider = buildMinimaxMusicGenerationProvider();
|
||||
const result = await provider.generateMusic({
|
||||
provider: "minimax",
|
||||
model: "music-2.6",
|
||||
prompt: "upbeat dance-pop with female vocals",
|
||||
cfg: {},
|
||||
});
|
||||
|
||||
expect(result.tracks[0]?.buffer).toEqual(terminalAudio);
|
||||
});
|
||||
|
||||
it("downloads tracks when url output is returned in data.audio", async () => {
|
||||
mockMusicGenerationResponse({
|
||||
task_id: "task-url",
|
||||
lyrics: "our city wakes",
|
||||
data: {
|
||||
audio: "https://example.com/url-audio.mp3",
|
||||
},
|
||||
@@ -119,6 +187,90 @@ describe("minimax music generation provider", () => {
|
||||
fetch,
|
||||
);
|
||||
expect(result.tracks[0]?.buffer.byteLength).toBeGreaterThan(0);
|
||||
expect(result.lyrics).toEqual(["our city wakes"]);
|
||||
expect(result.metadata?.taskId).toBe("task-url");
|
||||
expect(result.metadata?.audioUrl).toBe("https://example.com/url-audio.mp3");
|
||||
});
|
||||
|
||||
it("honors explicit long caller timeouts for request and download fallbacks", async () => {
|
||||
mockMusicGenerationResponse({
|
||||
data: {
|
||||
audio: "https://example.com/long-timeout.mp3",
|
||||
},
|
||||
base_resp: { status_code: 0 },
|
||||
});
|
||||
|
||||
const provider = buildMinimaxMusicGenerationProvider();
|
||||
await provider.generateMusic({
|
||||
provider: "minimax",
|
||||
model: "music-2.6",
|
||||
prompt: "upbeat dance-pop with female vocals",
|
||||
cfg: {},
|
||||
lyrics: "our city wakes",
|
||||
timeoutMs: 600000,
|
||||
});
|
||||
|
||||
expect(mockCallArg(postJsonRequestMock).timeoutMs).toBe(600000);
|
||||
expect(fetchWithTimeoutMock).toHaveBeenCalledWith(
|
||||
"https://example.com/long-timeout.mp3",
|
||||
{ method: "GET" },
|
||||
600000,
|
||||
fetch,
|
||||
);
|
||||
});
|
||||
|
||||
it("applies explicit caller timeouts while reading streaming response bodies", async () => {
|
||||
vi.useFakeTimers();
|
||||
try {
|
||||
let cancelled = false;
|
||||
const stream = new ReadableStream<Uint8Array>({
|
||||
start(controller) {
|
||||
setTimeout(() => {
|
||||
if (cancelled) {
|
||||
return;
|
||||
}
|
||||
controller.enqueue(
|
||||
new TextEncoder().encode(
|
||||
`data: ${JSON.stringify({
|
||||
data: { status: 2, audio: Buffer.from("late-mp3").toString("hex") },
|
||||
base_resp: { status_code: 0 },
|
||||
})}`,
|
||||
),
|
||||
);
|
||||
controller.close();
|
||||
}, 200);
|
||||
},
|
||||
cancel() {
|
||||
cancelled = true;
|
||||
},
|
||||
});
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: new Response(stream, {
|
||||
headers: { "content-type": "text/event-stream" },
|
||||
}),
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
const provider = buildMinimaxMusicGenerationProvider();
|
||||
const generation = provider.generateMusic({
|
||||
provider: "minimax",
|
||||
model: "music-2.6",
|
||||
prompt: "upbeat dance-pop with female vocals",
|
||||
cfg: {},
|
||||
timeoutMs: 50,
|
||||
});
|
||||
const expectation = expect(generation).rejects.toThrow(
|
||||
"MiniMax music generation timed out after 50ms",
|
||||
);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
await vi.advanceTimersByTimeAsync(50);
|
||||
|
||||
await expectation;
|
||||
expect(cancelled).toBe(true);
|
||||
} finally {
|
||||
vi.useRealTimers();
|
||||
}
|
||||
});
|
||||
|
||||
it("rejects instrumental requests that also include lyrics", async () => {
|
||||
|
||||
@@ -7,15 +7,19 @@ import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
createProviderOperationDeadline,
|
||||
fetchProviderDownloadResponse,
|
||||
postJsonRequest,
|
||||
resolveProviderOperationTimeoutMs,
|
||||
resolveProviderHttpRequestConfig,
|
||||
type ProviderOperationDeadline,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
|
||||
|
||||
const DEFAULT_MINIMAX_MUSIC_BASE_URL = "https://api.minimax.io";
|
||||
const DEFAULT_MINIMAX_MUSIC_MODEL = "music-2.6";
|
||||
const DEFAULT_TIMEOUT_MS = 120_000;
|
||||
const DEFAULT_OPERATION_TIMEOUT_MS = 300_000;
|
||||
|
||||
type MinimaxBaseResp = {
|
||||
status_code?: number;
|
||||
@@ -35,6 +39,14 @@ type MinimaxMusicCreateResponse = {
|
||||
base_resp?: MinimaxBaseResp;
|
||||
};
|
||||
|
||||
type MinimaxMusicStreamFrame = {
|
||||
data?: {
|
||||
audio?: string;
|
||||
status?: number | string;
|
||||
};
|
||||
base_resp?: MinimaxBaseResp;
|
||||
};
|
||||
|
||||
function resolveMinimaxMusicBaseUrl(
|
||||
cfg: Parameters<typeof resolveApiKeyForProvider>[0]["cfg"],
|
||||
providerId: string,
|
||||
@@ -105,6 +117,119 @@ async function downloadTrackFromUrl(params: {
|
||||
};
|
||||
}
|
||||
|
||||
function createMinimaxMusicTimeoutError(deadline: ProviderOperationDeadline): Error {
|
||||
const timeoutLabel =
|
||||
typeof deadline.timeoutMs === "number" ? ` after ${deadline.timeoutMs}ms` : "";
|
||||
return new Error(`${deadline.label} timed out${timeoutLabel}`);
|
||||
}
|
||||
|
||||
function resolveBodyReadTimeoutMs(deadline: ProviderOperationDeadline): number {
|
||||
return resolveProviderOperationTimeoutMs({
|
||||
deadline,
|
||||
defaultTimeoutMs: deadline.timeoutMs ?? DEFAULT_OPERATION_TIMEOUT_MS,
|
||||
});
|
||||
}
|
||||
|
||||
async function readResponseBufferWithDeadline(
|
||||
response: Response,
|
||||
deadline: ProviderOperationDeadline,
|
||||
): Promise<Buffer> {
|
||||
const body = response.body;
|
||||
if (!body) {
|
||||
return Buffer.alloc(0);
|
||||
}
|
||||
|
||||
const reader = body.getReader();
|
||||
const chunks: Uint8Array[] = [];
|
||||
let totalBytes = 0;
|
||||
try {
|
||||
while (true) {
|
||||
let timeoutId: ReturnType<typeof setTimeout> | undefined;
|
||||
try {
|
||||
const timeoutMs = resolveBodyReadTimeoutMs(deadline);
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
timeoutId = setTimeout(() => reject(createMinimaxMusicTimeoutError(deadline)), timeoutMs);
|
||||
});
|
||||
const result = await Promise.race([reader.read(), timeoutPromise]);
|
||||
if (result.done) {
|
||||
break;
|
||||
}
|
||||
if (!result.value || result.value.length === 0) {
|
||||
continue;
|
||||
}
|
||||
chunks.push(result.value);
|
||||
totalBytes += result.value.byteLength;
|
||||
} catch (error) {
|
||||
try {
|
||||
await reader.cancel(error);
|
||||
} catch {
|
||||
// Preserve the timeout or stream read failure that caused cancellation.
|
||||
}
|
||||
throw error;
|
||||
} finally {
|
||||
if (timeoutId) {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
|
||||
const buffer = Buffer.allocUnsafe(totalBytes);
|
||||
let offset = 0;
|
||||
for (const chunk of chunks) {
|
||||
buffer.set(chunk, offset);
|
||||
offset += chunk.byteLength;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
async function readStreamingTrack(
|
||||
response: Response,
|
||||
deadline: ProviderOperationDeadline,
|
||||
): Promise<GeneratedMusicAsset> {
|
||||
const contentType = normalizeOptionalString(response.headers.get("content-type")) ?? "";
|
||||
if (contentType.toLowerCase().startsWith("audio/")) {
|
||||
const ext = extensionForMime(contentType)?.replace(/^\./u, "") || "mp3";
|
||||
return {
|
||||
buffer: await readResponseBufferWithDeadline(response, deadline),
|
||||
mimeType: contentType,
|
||||
fileName: `track-1.${ext}`,
|
||||
};
|
||||
}
|
||||
const chunks: Buffer[] = [];
|
||||
const text = new TextDecoder().decode(await readResponseBufferWithDeadline(response, deadline));
|
||||
for (const rawLine of text.split(/\r?\n/u)) {
|
||||
const line = rawLine.trim();
|
||||
if (!line.startsWith("data:")) {
|
||||
continue;
|
||||
}
|
||||
const json = line.slice("data:".length).trim();
|
||||
if (!json || json === "[DONE]") {
|
||||
continue;
|
||||
}
|
||||
const frame = JSON.parse(json) as MinimaxMusicStreamFrame;
|
||||
assertMinimaxBaseResp(frame.base_resp, "MiniMax music generation failed");
|
||||
const audio = normalizeOptionalString(frame.data?.audio);
|
||||
if (audio) {
|
||||
if (String(frame.data?.status ?? "") === "2" && chunks.length > 0) {
|
||||
continue;
|
||||
}
|
||||
chunks.push(decodePossibleBinary(audio));
|
||||
}
|
||||
}
|
||||
const buffer = Buffer.concat(chunks);
|
||||
if (buffer.byteLength === 0) {
|
||||
throw new Error("MiniMax music generation response missing audio output");
|
||||
}
|
||||
return {
|
||||
buffer,
|
||||
mimeType: "audio/mpeg",
|
||||
fileName: "track-1.mp3",
|
||||
};
|
||||
}
|
||||
|
||||
function resolveMinimaxMusicModel(model: string | undefined): string {
|
||||
const trimmed = normalizeOptionalString(model);
|
||||
if (!trimmed) {
|
||||
@@ -158,6 +283,11 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
|
||||
}
|
||||
|
||||
const fetchFn = fetch;
|
||||
const operationTimeoutMs = req.timeoutMs ?? DEFAULT_OPERATION_TIMEOUT_MS;
|
||||
const deadline = createProviderOperationDeadline({
|
||||
timeoutMs: operationTimeoutMs,
|
||||
label: "MiniMax music generation",
|
||||
});
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: resolveMinimaxMusicBaseUrl(req.cfg, providerId),
|
||||
@@ -174,13 +304,18 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
|
||||
jsonHeaders.set("Content-Type", "application/json");
|
||||
|
||||
const model = resolveMinimaxMusicModel(req.model);
|
||||
const lyrics = normalizeOptionalString(req.lyrics);
|
||||
const requestedLyrics = normalizeOptionalString(req.lyrics);
|
||||
const body = {
|
||||
model,
|
||||
prompt: req.prompt.trim(),
|
||||
...(req.instrumental === true ? { is_instrumental: true } : {}),
|
||||
...(lyrics ? { lyrics } : req.instrumental === true ? {} : { lyrics_optimizer: true }),
|
||||
output_format: "url",
|
||||
...(requestedLyrics
|
||||
? { lyrics: requestedLyrics }
|
||||
: req.instrumental === true
|
||||
? {}
|
||||
: { lyrics_optimizer: true }),
|
||||
stream: true,
|
||||
output_format: "hex",
|
||||
audio_setting: {
|
||||
sample_rate: 44_100,
|
||||
bitrate: 256_000,
|
||||
@@ -192,7 +327,10 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
|
||||
url: `${baseUrl}/v1/music_generation`,
|
||||
headers: jsonHeaders,
|
||||
body,
|
||||
timeoutMs: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
timeoutMs: resolveProviderOperationTimeoutMs({
|
||||
deadline,
|
||||
defaultTimeoutMs: operationTimeoutMs,
|
||||
}),
|
||||
fetchFn,
|
||||
pinDns: false,
|
||||
allowPrivateNetwork,
|
||||
@@ -201,22 +339,32 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
|
||||
|
||||
try {
|
||||
await assertOkOrThrowHttpError(res, "MiniMax music generation failed");
|
||||
const payload = (await res.json()) as MinimaxMusicCreateResponse;
|
||||
assertMinimaxBaseResp(payload.base_resp, "MiniMax music generation failed");
|
||||
const contentType = normalizeOptionalString(res.headers.get("content-type")) ?? "";
|
||||
const lowerContentType = contentType.toLowerCase();
|
||||
const payload =
|
||||
lowerContentType.includes("text/event-stream") || lowerContentType.startsWith("audio/")
|
||||
? null
|
||||
: ((await res.clone().json()) as MinimaxMusicCreateResponse);
|
||||
if (payload) {
|
||||
assertMinimaxBaseResp(payload.base_resp, "MiniMax music generation failed");
|
||||
}
|
||||
|
||||
const audioCandidate =
|
||||
normalizeOptionalString(payload.audio) ?? normalizeOptionalString(payload.data?.audio);
|
||||
normalizeOptionalString(payload?.audio) ?? normalizeOptionalString(payload?.data?.audio);
|
||||
const audioUrl =
|
||||
normalizeOptionalString(payload.audio_url) ||
|
||||
normalizeOptionalString(payload.data?.audio_url) ||
|
||||
normalizeOptionalString(payload?.audio_url) ||
|
||||
normalizeOptionalString(payload?.data?.audio_url) ||
|
||||
(isLikelyRemoteUrl(audioCandidate) ? audioCandidate : undefined);
|
||||
const inlineAudio = isLikelyRemoteUrl(audioCandidate) ? undefined : audioCandidate;
|
||||
const lyrics = decodePossibleText(payload.lyrics ?? payload.data?.lyrics ?? "");
|
||||
const responseLyrics = decodePossibleText(payload?.lyrics ?? payload?.data?.lyrics ?? "");
|
||||
|
||||
const track = audioUrl
|
||||
? await downloadTrackFromUrl({
|
||||
url: audioUrl,
|
||||
timeoutMs: req.timeoutMs,
|
||||
timeoutMs: resolveProviderOperationTimeoutMs({
|
||||
deadline,
|
||||
defaultTimeoutMs: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
}),
|
||||
fetchFn,
|
||||
})
|
||||
: inlineAudio
|
||||
@@ -225,22 +373,22 @@ function buildMinimaxMusicProvider(providerId: string): MusicGenerationProvider
|
||||
mimeType: "audio/mpeg",
|
||||
fileName: "track-1.mp3",
|
||||
}
|
||||
: null;
|
||||
: await readStreamingTrack(res, deadline);
|
||||
if (!track) {
|
||||
throw new Error("MiniMax music generation response missing audio output");
|
||||
}
|
||||
|
||||
return {
|
||||
tracks: [track],
|
||||
...(lyrics ? { lyrics: [lyrics] } : {}),
|
||||
...(responseLyrics ? { lyrics: [responseLyrics] } : {}),
|
||||
model,
|
||||
metadata: {
|
||||
...(normalizeOptionalString(payload.task_id)
|
||||
? { taskId: normalizeOptionalString(payload.task_id) }
|
||||
...(normalizeOptionalString(payload?.task_id)
|
||||
? { taskId: normalizeOptionalString(payload?.task_id) }
|
||||
: {}),
|
||||
...(audioUrl ? { audioUrl } : {}),
|
||||
instrumental: req.instrumental === true,
|
||||
...(lyrics ? { requestedLyrics: true } : {}),
|
||||
...(requestedLyrics ? { requestedLyrics: true } : {}),
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
|
||||
Reference in New Issue
Block a user