From f0a7a85e7ad6162e6811d08a16106b8a988a2a38 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 24 Apr 2026 00:03:59 +0100 Subject: [PATCH] feat(agents): add generation tool timeouts --- CHANGELOG.md | 1 + docs/tools/image-generation.md | 4 +++ docs/tools/music-generation.md | 1 + docs/tools/tts.md | 2 ++ docs/tools/video-generation.md | 1 + .../google/image-generation-provider.ts | 2 +- extensions/speech-core/src/tts.ts | 5 +++- src/agents/tools/image-generate-tool.ts | 10 ++++++++ src/agents/tools/media-tool-shared.ts | 21 +++++++++++++++- src/agents/tools/music-generate-tool.ts | 14 +++++++++++ src/agents/tools/tts-tool.test.ts | 20 +++++++++++++++ src/agents/tools/tts-tool.ts | 25 ++++++++++++++++++- src/agents/tools/video-generate-tool.ts | 14 +++++++++++ src/image-generation/runtime-types.ts | 2 ++ src/image-generation/runtime.test.ts | 6 ++++- src/image-generation/runtime.ts | 1 + src/music-generation/runtime-types.ts | 2 ++ src/music-generation/runtime.test.ts | 6 ++++- src/music-generation/runtime.ts | 1 + src/plugin-sdk/tts-runtime.types.ts | 1 + src/video-generation/runtime-types.ts | 2 ++ src/video-generation/runtime.test.ts | 6 ++++- src/video-generation/runtime.ts | 1 + 23 files changed, 141 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68a79f1aefc..54ad4816e7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai ### Changes +- Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it. - Agents/subagents: add optional forked context for native `sessions_spawn` runs so agents can let a child inherit the requester transcript when needed, while keeping clean isolated sessions as the default; includes prompt guidance, context-engine hook metadata, docs, and QA coverage. - Codex harness: add structured debug logging for embedded harness selection decisions so `/status` stays simple while gateway logs explain auto-selection and Pi fallback reasons. (#70760) Thanks @100yenadmin. - Providers/OpenAI: add forward-compatible `gpt-5.5` and `gpt-5.5-pro` support for OpenAI API keys, OpenAI Codex OAuth, and the Codex CLI default model. diff --git a/docs/tools/image-generation.md b/docs/tools/image-generation.md index 28e3e597dbd..614b5673f60 100644 --- a/docs/tools/image-generation.md +++ b/docs/tools/image-generation.md @@ -96,6 +96,10 @@ Resolution hint. Number of images to generate (1–4). + +Optional provider request timeout in milliseconds. + + Output filename hint. diff --git a/docs/tools/music-generation.md b/docs/tools/music-generation.md index 01a139b5e61..b6c0ef8a386 100644 --- a/docs/tools/music-generation.md +++ b/docs/tools/music-generation.md @@ -125,6 +125,7 @@ Direct generation example: | `image` | string | Single reference image path or URL | | `images` | string[] | Multiple reference images (up to 10) | | `durationSeconds` | number | Target duration in seconds when the provider supports duration hints | +| `timeoutMs` | number | Optional provider request timeout in milliseconds | | `format` | string | Output format hint (`mp3` or `wav`) when the provider supports it | | `filename` | string | Output filename hint | diff --git a/docs/tools/tts.md b/docs/tools/tts.md index e9757b234e5..158b4e5fa45 100644 --- a/docs/tools/tts.md +++ b/docs/tools/tts.md @@ -507,6 +507,8 @@ Notes: The `tts` tool converts text to speech and returns an audio attachment for reply delivery. When the channel is Feishu, Matrix, Telegram, or WhatsApp, the audio is delivered as a voice message rather than a file attachment. +It accepts optional `channel` and `timeoutMs` fields; `timeoutMs` is a +per-call provider request timeout in milliseconds. ## Gateway RPC diff --git a/docs/tools/video-generation.md b/docs/tools/video-generation.md index 4a8ef993b47..76188296a6f 100644 --- a/docs/tools/video-generation.md +++ b/docs/tools/video-generation.md @@ -170,6 +170,7 @@ dimensions). Providers that do not declare it surface the value via | `action` | string | `"generate"` (default), `"status"`, or `"list"` | | `model` | string | Provider/model override (e.g. `runway/gen4.5`) | | `filename` | string | Output filename hint | +| `timeoutMs` | number | Optional provider request timeout in milliseconds | | `providerOptions` | object | Provider-specific options as a JSON object (e.g. `{"seed": 42, "draft": true}`). Providers that declare a typed schema validate the keys and types; unknown keys or mismatches skip the candidate during fallback. Providers without a declared schema receive the options as-is. Run `video_generate action=list` to see what each provider accepts | Not all providers support all parameters. OpenClaw already normalizes duration to the closest provider-supported value, and it also remaps translated geometry hints such as size-to-aspect-ratio when a fallback provider exposes a different control surface. Truly unsupported overrides are ignored on a best-effort basis and reported as warnings in the tool result. Hard capability limits (such as too many reference inputs) fail before submission. diff --git a/extensions/google/image-generation-provider.ts b/extensions/google/image-generation-provider.ts index 3ee81a92574..4925727f892 100644 --- a/extensions/google/image-generation-provider.ts +++ b/extensions/google/image-generation-provider.ts @@ -165,7 +165,7 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProvider { : {}), }, }, - timeoutMs: 60_000, + timeoutMs: req.timeoutMs ?? 60_000, fetchFn: fetch, pinDns: false, allowPrivateNetwork, diff --git a/extensions/speech-core/src/tts.ts b/extensions/speech-core/src/tts.ts index 4804bcf489d..4eccca12e19 100644 --- a/extensions/speech-core/src/tts.ts +++ b/extensions/speech-core/src/tts.ts @@ -753,6 +753,7 @@ export async function textToSpeech(params: { channel?: string; overrides?: TtsDirectiveOverrides; disableFallback?: boolean; + timeoutMs?: number; }): Promise { const synthesis = await synthesizeSpeech(params); if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) { @@ -791,6 +792,7 @@ export async function synthesizeSpeech(params: { channel?: string; overrides?: TtsDirectiveOverrides; disableFallback?: boolean; + timeoutMs?: number; }): Promise { const setup = resolveTtsRequestSetup({ text: params.text, @@ -804,6 +806,7 @@ export async function synthesizeSpeech(params: { } const { config, providers } = setup; + const timeoutMs = params.timeoutMs ?? config.timeoutMs; const target = supportsNativeVoiceNoteTts(params.channel) ? "voice-note" : "audio-file"; const errors: string[] = []; @@ -840,7 +843,7 @@ export async function synthesizeSpeech(params: { providerConfig: resolvedProvider.providerConfig, target, providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.provider.id], - timeoutMs: config.timeoutMs, + timeoutMs, }); const latencyMs = Date.now() - providerStart; attempts.push({ diff --git a/src/agents/tools/image-generate-tool.ts b/src/agents/tools/image-generate-tool.ts index 2bf3b946de6..8d402d63258 100644 --- a/src/agents/tools/image-generate-tool.ts +++ b/src/agents/tools/image-generate-tool.ts @@ -25,6 +25,7 @@ import { buildMediaReferenceDetails, isCapabilityProviderConfigured, normalizeMediaReferenceInputs, + readGenerationTimeoutMs, resolveCapabilityModelConfigForTool, resolveGenerateAction, resolveMediaToolLocalRoots, @@ -108,6 +109,12 @@ const ImageGenerateToolSchema = Type.Object({ maximum: MAX_COUNT, }), ), + timeoutMs: Type.Optional( + Type.Number({ + description: "Optional provider request timeout in milliseconds.", + minimum: 1, + }), + ), }); function getImageGenerationProviderAuthEnvVars(providerId: string): string[] { @@ -490,6 +497,7 @@ export function createImageGenerateTool(options?: { const size = readStringParam(params, "size"); const aspectRatio = normalizeAspectRatio(readStringParam(params, "aspectRatio")); const explicitResolution = normalizeResolution(readStringParam(params, "resolution")); + const timeoutMs = readGenerationTimeoutMs(params); const selectedProvider = resolveSelectedImageGenerationProvider({ config: effectiveCfg, imageGenerationModelConfig, @@ -535,6 +543,7 @@ export function createImageGenerateTool(options?: { resolution, count, inputImages, + timeoutMs, }); const ignoredOverrides = result.ignoredOverrides ?? []; const displayProvider = sanitizeInlineDirectiveText(result.provider); @@ -617,6 +626,7 @@ export function createImageGenerateTool(options?: { ? { aspectRatio: normalizedAspectRatio ?? aspectRatio } : {}), ...(filename ? { filename } : {}), + ...(timeoutMs !== undefined ? { timeoutMs } : {}), attempts: result.attempts, ...(result.normalization ? { normalization: result.normalization } : {}), metadata: result.metadata, diff --git a/src/agents/tools/media-tool-shared.ts b/src/agents/tools/media-tool-shared.ts index 62b41b00e89..c1d2fc26433 100644 --- a/src/agents/tools/media-tool-shared.ts +++ b/src/agents/tools/media-tool-shared.ts @@ -9,7 +9,12 @@ import { } from "../../shared/string-coerce.js"; import { normalizeModelRef } from "../model-selection.js"; import { normalizeProviderId } from "../provider-id.js"; -import { ToolInputError, readStringArrayParam, readStringParam } from "./common.js"; +import { + ToolInputError, + readNumberParam, + readStringArrayParam, + readStringParam, +} from "./common.js"; import type { ImageModelConfig } from "./image-tool.helpers.js"; import { buildToolModelConfigFromCandidates, @@ -78,6 +83,20 @@ export function applyMusicGenerationModelConfigDefaults( return applyAgentDefaultModelConfig(cfg, "musicGenerationModel", musicGenerationModelConfig); } +export function readGenerationTimeoutMs(args: Record): number | undefined { + const timeoutMs = readNumberParam(args, "timeoutMs", { + integer: true, + strict: true, + }); + if (timeoutMs === undefined) { + return undefined; + } + if (timeoutMs <= 0) { + throw new ToolInputError("timeoutMs must be a positive integer in milliseconds."); + } + return timeoutMs; +} + function applyAgentDefaultModelConfig( cfg: OpenClawConfig | undefined, key: "imageModel" | "imageGenerationModel" | "videoGenerationModel" | "musicGenerationModel", diff --git a/src/agents/tools/music-generate-tool.ts b/src/agents/tools/music-generate-tool.ts index 53786300e5f..df6f49d7b3d 100644 --- a/src/agents/tools/music-generate-tool.ts +++ b/src/agents/tools/music-generate-tool.ts @@ -28,6 +28,7 @@ import { buildTaskRunDetails, normalizeMediaReferenceInputs, readBooleanToolParam, + readGenerationTimeoutMs, resolveCapabilityModelConfigForTool, resolveGenerateAction, resolveMediaToolLocalRoots, @@ -98,6 +99,12 @@ const MusicGenerateToolSchema = Type.Object({ minimum: 1, }), ), + timeoutMs: Type.Optional( + Type.Number({ + description: "Optional provider request timeout in milliseconds.", + minimum: 1, + }), + ), format: Type.Optional( Type.String({ description: 'Optional output format hint: "mp3" or "wav" when the provider supports it.', @@ -336,6 +343,7 @@ async function executeMusicGenerationJob(params: { filename?: string; loadedReferenceImages: LoadedReferenceImage[]; taskHandle?: MusicGenerationTaskHandle | null; + timeoutMs?: number; }): Promise { if (params.taskHandle) { recordMusicGenerationTaskProgress({ @@ -353,6 +361,7 @@ async function executeMusicGenerationJob(params: { durationSeconds: params.durationSeconds, format: params.format, inputImages: params.loadedReferenceImages.map((entry) => entry.sourceImage), + timeoutMs: params.timeoutMs, }); if (params.taskHandle) { recordMusicGenerationTaskProgress({ @@ -437,6 +446,7 @@ async function executeMusicGenerationJob(params: { : {}), ...(!ignoredOverrideKeys.has("format") && params.format ? { format: params.format } : {}), ...(params.filename ? { filename: params.filename } : {}), + ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}), ...buildMediaReferenceDetails({ entries: params.loadedReferenceImages, singleKey: "image", @@ -520,6 +530,7 @@ export function createMusicGenerateTool(options?: { }); const format = normalizeOutputFormat(readStringParam(args, "format")); const filename = readStringParam(args, "filename"); + const timeoutMs = readGenerationTimeoutMs(args); const imageInputs = normalizeReferenceImageInputs(args); const selectedProvider = resolveSelectedMusicGenerationProvider({ config: effectiveCfg, @@ -564,6 +575,7 @@ export function createMusicGenerateTool(options?: { filename, loadedReferenceImages, taskHandle, + timeoutMs, }); completeMusicGenerationTaskRun({ handle: taskHandle, @@ -627,6 +639,7 @@ export function createMusicGenerateTool(options?: { ...(typeof durationSeconds === "number" ? { durationSeconds } : {}), ...(format ? { format } : {}), ...(filename ? { filename } : {}), + ...(timeoutMs !== undefined ? { timeoutMs } : {}), }, }; } @@ -644,6 +657,7 @@ export function createMusicGenerateTool(options?: { filename, loadedReferenceImages, taskHandle, + timeoutMs, }); completeMusicGenerationTaskRun({ handle: taskHandle, diff --git a/src/agents/tools/tts-tool.test.ts b/src/agents/tools/tts-tool.test.ts index 18786c29c57..e833d5eb6df 100644 --- a/src/agents/tools/tts-tool.test.ts +++ b/src/agents/tools/tts-tool.test.ts @@ -43,6 +43,26 @@ describe("createTtsTool", () => { expect(JSON.stringify(result.content)).not.toContain("MEDIA:"); }); + it("passes an optional timeout to speech generation", async () => { + textToSpeechSpy.mockResolvedValue({ + success: true, + audioPath: "/tmp/reply.opus", + provider: "test", + voiceCompatible: true, + }); + + const tool = createTtsTool(); + const result = await tool.execute("call-1", { text: "hello", timeoutMs: 12_345 }); + + expect(textToSpeechSpy).toHaveBeenCalledWith( + expect.objectContaining({ + text: "hello", + timeoutMs: 12_345, + }), + ); + expect(result.details).toMatchObject({ timeoutMs: 12_345 }); + }); + it("echoes longer utterances verbatim into the tool-result content", async () => { textToSpeechSpy.mockResolvedValue({ success: true, diff --git a/src/agents/tools/tts-tool.ts b/src/agents/tools/tts-tool.ts index b50bef3a67d..a3db27121cb 100644 --- a/src/agents/tools/tts-tool.ts +++ b/src/agents/tools/tts-tool.ts @@ -5,15 +5,35 @@ import type { OpenClawConfig } from "../../config/types.openclaw.js"; import { textToSpeech } from "../../tts/tts.js"; import type { GatewayMessageChannel } from "../../utils/message-channel.js"; import type { AnyAgentTool } from "./common.js"; -import { readStringParam } from "./common.js"; +import { ToolInputError, readNumberParam, readStringParam } from "./common.js"; const TtsToolSchema = Type.Object({ text: Type.String({ description: "Text to convert to speech." }), channel: Type.Optional( Type.String({ description: "Optional channel id to pick output format." }), ), + timeoutMs: Type.Optional( + Type.Number({ + description: "Optional provider request timeout in milliseconds.", + minimum: 1, + }), + ), }); +function readTtsTimeoutMs(args: Record): number | undefined { + const timeoutMs = readNumberParam(args, "timeoutMs", { + integer: true, + strict: true, + }); + if (timeoutMs === undefined) { + return undefined; + } + if (timeoutMs <= 0) { + throw new ToolInputError("timeoutMs must be a positive integer in milliseconds."); + } + return timeoutMs; +} + /** * Defuse reply-directive tokens inside spoken transcripts before they flow * through tool-result content. When verbose tool output is enabled, @@ -48,11 +68,13 @@ export function createTtsTool(opts?: { const params = args as Record; const text = readStringParam(params, "text", { required: true }); const channel = readStringParam(params, "channel"); + const timeoutMs = readTtsTimeoutMs(params); const cfg = opts?.config ?? loadConfig(); const result = await textToSpeech({ text, cfg, channel: channel ?? opts?.agentChannel, + timeoutMs, }); if (result.success && result.audioPath) { @@ -66,6 +88,7 @@ export function createTtsTool(opts?: { details: { audioPath: result.audioPath, provider: result.provider, + ...(timeoutMs !== undefined ? { timeoutMs } : {}), media: { mediaUrl: result.audioPath, trustedLocalMedia: true, diff --git a/src/agents/tools/video-generate-tool.ts b/src/agents/tools/video-generate-tool.ts index 041f21f395e..95eb4bb0b81 100644 --- a/src/agents/tools/video-generate-tool.ts +++ b/src/agents/tools/video-generate-tool.ts @@ -32,6 +32,7 @@ import { buildTaskRunDetails, normalizeMediaReferenceInputs, readBooleanToolParam, + readGenerationTimeoutMs, resolveCapabilityModelConfigForTool, resolveGenerateAction, resolveMediaToolLocalRoots, @@ -205,6 +206,12 @@ const VideoGenerateToolSchema = Type.Object({ "keys each provider accepts.", }), ), + timeoutMs: Type.Optional( + Type.Number({ + description: "Optional provider request timeout in milliseconds.", + minimum: 1, + }), + ), }); export function resolveVideoGenerationModelConfigForTool(params: { @@ -562,6 +569,7 @@ async function executeVideoGenerationJob(params: { loadedReferenceAudios: LoadedReferenceAsset[]; taskHandle?: VideoGenerationTaskHandle | null; providerOptions?: Record; + timeoutMs?: number; }): Promise { if (params.taskHandle) { recordVideoGenerationTaskProgress({ @@ -584,6 +592,7 @@ async function executeVideoGenerationJob(params: { inputVideos: params.loadedReferenceVideos.map((entry) => entry.sourceAsset), inputAudios: params.loadedReferenceAudios.map((entry) => entry.sourceAsset), providerOptions: params.providerOptions, + timeoutMs: params.timeoutMs, }); if (params.taskHandle) { recordVideoGenerationTaskProgress({ @@ -747,6 +756,7 @@ async function executeVideoGenerationJob(params: { ? { watermark: params.watermark } : {}), ...(params.filename ? { filename: params.filename } : {}), + ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}), attempts: result.attempts, ...(result.normalization ? { normalization: result.normalization } : {}), metadata: result.metadata, @@ -825,6 +835,7 @@ export function createVideoGenerateTool(options?: { }); const audio = readBooleanToolParam(args, "audio"); const watermark = readBooleanToolParam(args, "watermark"); + const timeoutMs = readGenerationTimeoutMs(args); // providerOptions must be a plain object. Arrays are objects in JS, so // exclude them explicitly — a bogus call like `providerOptions: ["seed", 42]` // would otherwise be cast to `Record` with numeric-string @@ -960,6 +971,7 @@ export function createVideoGenerateTool(options?: { loadedReferenceAudios, taskHandle, providerOptions, + timeoutMs, }); completeVideoGenerationTaskRun({ handle: taskHandle, @@ -1032,6 +1044,7 @@ export function createVideoGenerateTool(options?: { ...(typeof audio === "boolean" ? { audio } : {}), ...(typeof watermark === "boolean" ? { watermark } : {}), ...(filename ? { filename } : {}), + ...(timeoutMs !== undefined ? { timeoutMs } : {}), }, }; } @@ -1054,6 +1067,7 @@ export function createVideoGenerateTool(options?: { loadedReferenceAudios, taskHandle, providerOptions, + timeoutMs, }); completeVideoGenerationTaskRun({ handle: taskHandle, diff --git a/src/image-generation/runtime-types.ts b/src/image-generation/runtime-types.ts index 32fc8284e84..6eb3474e67a 100644 --- a/src/image-generation/runtime-types.ts +++ b/src/image-generation/runtime-types.ts @@ -21,6 +21,8 @@ export type GenerateImageParams = { aspectRatio?: string; resolution?: ImageGenerationResolution; inputImages?: ImageGenerationSourceImage[]; + /** Optional per-request provider timeout in milliseconds. */ + timeoutMs?: number; }; export type GenerateImageRuntimeResult = { diff --git a/src/image-generation/runtime.test.ts b/src/image-generation/runtime.test.ts index 06044caa820..0d6e6698f58 100644 --- a/src/image-generation/runtime.test.ts +++ b/src/image-generation/runtime.test.ts @@ -26,6 +26,7 @@ describe("image-generation runtime", () => { it("generates images through the active image-generation provider", async () => { const authStore = { version: 1, profiles: {} } as const; let seenAuthStore: unknown; + let seenTimeoutMs: number | undefined; mocks.resolveAgentModelPrimaryValue.mockReturnValue("image-plugin/img-v1"); const provider: ImageGenerationProvider = { id: "image-plugin", @@ -33,8 +34,9 @@ describe("image-generation runtime", () => { generate: {}, edit: { enabled: false }, }, - async generateImage(req: { authStore?: unknown }) { + async generateImage(req: { authStore?: unknown; timeoutMs?: number }) { seenAuthStore = req.authStore; + seenTimeoutMs = req.timeoutMs; return { images: [ { @@ -60,12 +62,14 @@ describe("image-generation runtime", () => { prompt: "draw a cat", agentDir: "/tmp/agent", authStore, + timeoutMs: 12_345, }); expect(result.provider).toBe("image-plugin"); expect(result.model).toBe("img-v1"); expect(result.attempts).toEqual([]); expect(seenAuthStore).toEqual(authStore); + expect(seenTimeoutMs).toBe(12_345); expect(result.images).toEqual([ { buffer: Buffer.from("png-bytes"), diff --git a/src/image-generation/runtime.ts b/src/image-generation/runtime.ts index b52a3de2e85..365d77c0c0c 100644 --- a/src/image-generation/runtime.ts +++ b/src/image-generation/runtime.ts @@ -85,6 +85,7 @@ export async function generateImage( aspectRatio: sanitized.aspectRatio, resolution: sanitized.resolution, inputImages: params.inputImages, + ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}), }); if (!Array.isArray(result.images) || result.images.length === 0) { throw new Error("Image generation provider returned no images."); diff --git a/src/music-generation/runtime-types.ts b/src/music-generation/runtime-types.ts index ce8cc97aa0d..8cd770d6875 100644 --- a/src/music-generation/runtime-types.ts +++ b/src/music-generation/runtime-types.ts @@ -21,6 +21,8 @@ export type GenerateMusicParams = { durationSeconds?: number; format?: MusicGenerationOutputFormat; inputImages?: MusicGenerationSourceImage[]; + /** Optional per-request provider timeout in milliseconds. */ + timeoutMs?: number; }; export type GenerateMusicRuntimeResult = { diff --git a/src/music-generation/runtime.test.ts b/src/music-generation/runtime.test.ts index 61423ebca4e..6c9248cef86 100644 --- a/src/music-generation/runtime.test.ts +++ b/src/music-generation/runtime.test.ts @@ -26,12 +26,14 @@ describe("music-generation runtime", () => { it("generates tracks through the active music-generation provider", async () => { const authStore = { version: 1, profiles: {} } as const; let seenAuthStore: unknown; + let seenTimeoutMs: number | undefined; mocks.resolveAgentModelPrimaryValue.mockReturnValue("music-plugin/track-v1"); const provider: MusicGenerationProvider = { id: "music-plugin", capabilities: {}, - async generateMusic(req: { authStore?: unknown }) { + async generateMusic(req: { authStore?: unknown; timeoutMs?: number }) { seenAuthStore = req.authStore; + seenTimeoutMs = req.timeoutMs; return { tracks: [ { @@ -57,6 +59,7 @@ describe("music-generation runtime", () => { prompt: "play a synth line", agentDir: "/tmp/agent", authStore, + timeoutMs: 12_345, }); expect(result.provider).toBe("music-plugin"); @@ -64,6 +67,7 @@ describe("music-generation runtime", () => { expect(result.attempts).toEqual([]); expect(result.ignoredOverrides).toEqual([]); expect(seenAuthStore).toEqual(authStore); + expect(seenTimeoutMs).toBe(12_345); expect(result.tracks).toEqual([ { buffer: Buffer.from("mp3-bytes"), diff --git a/src/music-generation/runtime.ts b/src/music-generation/runtime.ts index b900acb28ed..07700802ca3 100644 --- a/src/music-generation/runtime.ts +++ b/src/music-generation/runtime.ts @@ -82,6 +82,7 @@ export async function generateMusic( durationSeconds: sanitized.durationSeconds, format: sanitized.format, inputImages: params.inputImages, + ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}), }); if (!Array.isArray(result.tracks) || result.tracks.length === 0) { throw new Error("Music generation provider returned no tracks."); diff --git a/src/plugin-sdk/tts-runtime.types.ts b/src/plugin-sdk/tts-runtime.types.ts index f57e043a5ef..0e43dba237a 100644 --- a/src/plugin-sdk/tts-runtime.types.ts +++ b/src/plugin-sdk/tts-runtime.types.ts @@ -69,6 +69,7 @@ export type TtsRequestParams = { channel?: string; overrides?: TtsDirectiveOverrides; disableFallback?: boolean; + timeoutMs?: number; }; export type TtsTelephonyRequestParams = { diff --git a/src/video-generation/runtime-types.ts b/src/video-generation/runtime-types.ts index 50602bd1fed..b632d841c9e 100644 --- a/src/video-generation/runtime-types.ts +++ b/src/video-generation/runtime-types.ts @@ -27,6 +27,8 @@ export type GenerateVideoParams = { inputAudios?: VideoGenerationSourceAsset[]; /** Arbitrary provider-specific options forwarded as-is to provider.generateVideo. */ providerOptions?: Record; + /** Optional per-request provider timeout in milliseconds. */ + timeoutMs?: number; }; export type GenerateVideoRuntimeResult = { diff --git a/src/video-generation/runtime.test.ts b/src/video-generation/runtime.test.ts index 40205e9c877..61366bbb46b 100644 --- a/src/video-generation/runtime.test.ts +++ b/src/video-generation/runtime.test.ts @@ -43,12 +43,14 @@ describe("video-generation runtime", () => { it("generates videos through the active video-generation provider", async () => { const authStore = { version: 1, profiles: {} } as const; let seenAuthStore: unknown; + let seenTimeoutMs: number | undefined; mocks.resolveAgentModelPrimaryValue.mockReturnValue("video-plugin/vid-v1"); const provider: VideoGenerationProvider = { id: "video-plugin", capabilities: {}, - async generateVideo(req: { authStore?: unknown }) { + async generateVideo(req: { authStore?: unknown; timeoutMs?: number }) { seenAuthStore = req.authStore; + seenTimeoutMs = req.timeoutMs; return { videos: [ { @@ -74,6 +76,7 @@ describe("video-generation runtime", () => { prompt: "animate a cat", agentDir: "/tmp/agent", authStore, + timeoutMs: 12_345, }); expect(result.provider).toBe("video-plugin"); @@ -81,6 +84,7 @@ describe("video-generation runtime", () => { expect(result.attempts).toEqual([]); expect(result.ignoredOverrides).toEqual([]); expect(seenAuthStore).toEqual(authStore); + expect(seenTimeoutMs).toBe(12_345); expect(result.videos).toEqual([ { buffer: Buffer.from("mp4-bytes"), diff --git a/src/video-generation/runtime.ts b/src/video-generation/runtime.ts index ce5a93ddf6c..fd6468746a4 100644 --- a/src/video-generation/runtime.ts +++ b/src/video-generation/runtime.ts @@ -260,6 +260,7 @@ export async function generateVideo( inputVideos: params.inputVideos, inputAudios: params.inputAudios, providerOptions: params.providerOptions, + ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}), }); if (!Array.isArray(result.videos) || result.videos.length === 0) { throw new Error("Video generation provider returned no videos.");