diff --git a/CHANGELOG.md b/CHANGELOG.md
index 68a79f1aefc..54ad4816e7e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
### Changes
+- Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it.
- Agents/subagents: add optional forked context for native `sessions_spawn` runs so agents can let a child inherit the requester transcript when needed, while keeping clean isolated sessions as the default; includes prompt guidance, context-engine hook metadata, docs, and QA coverage.
- Codex harness: add structured debug logging for embedded harness selection decisions so `/status` stays simple while gateway logs explain auto-selection and Pi fallback reasons. (#70760) Thanks @100yenadmin.
- Providers/OpenAI: add forward-compatible `gpt-5.5` and `gpt-5.5-pro` support for OpenAI API keys, OpenAI Codex OAuth, and the Codex CLI default model.
diff --git a/docs/tools/image-generation.md b/docs/tools/image-generation.md
index 28e3e597dbd..614b5673f60 100644
--- a/docs/tools/image-generation.md
+++ b/docs/tools/image-generation.md
@@ -96,6 +96,10 @@ Resolution hint.
Number of images to generate (1–4).
+
+Optional provider request timeout in milliseconds.
+
+
Output filename hint.
diff --git a/docs/tools/music-generation.md b/docs/tools/music-generation.md
index 01a139b5e61..b6c0ef8a386 100644
--- a/docs/tools/music-generation.md
+++ b/docs/tools/music-generation.md
@@ -125,6 +125,7 @@ Direct generation example:
| `image` | string | Single reference image path or URL |
| `images` | string[] | Multiple reference images (up to 10) |
| `durationSeconds` | number | Target duration in seconds when the provider supports duration hints |
+| `timeoutMs` | number | Optional provider request timeout in milliseconds |
| `format` | string | Output format hint (`mp3` or `wav`) when the provider supports it |
| `filename` | string | Output filename hint |
diff --git a/docs/tools/tts.md b/docs/tools/tts.md
index e9757b234e5..158b4e5fa45 100644
--- a/docs/tools/tts.md
+++ b/docs/tools/tts.md
@@ -507,6 +507,8 @@ Notes:
The `tts` tool converts text to speech and returns an audio attachment for
reply delivery. When the channel is Feishu, Matrix, Telegram, or WhatsApp,
the audio is delivered as a voice message rather than a file attachment.
+It accepts optional `channel` and `timeoutMs` fields; `timeoutMs` is a
+per-call provider request timeout in milliseconds.
## Gateway RPC
diff --git a/docs/tools/video-generation.md b/docs/tools/video-generation.md
index 4a8ef993b47..76188296a6f 100644
--- a/docs/tools/video-generation.md
+++ b/docs/tools/video-generation.md
@@ -170,6 +170,7 @@ dimensions). Providers that do not declare it surface the value via
| `action` | string | `"generate"` (default), `"status"`, or `"list"` |
| `model` | string | Provider/model override (e.g. `runway/gen4.5`) |
| `filename` | string | Output filename hint |
+| `timeoutMs` | number | Optional provider request timeout in milliseconds |
| `providerOptions` | object | Provider-specific options as a JSON object (e.g. `{"seed": 42, "draft": true}`). Providers that declare a typed schema validate the keys and types; unknown keys or mismatches skip the candidate during fallback. Providers without a declared schema receive the options as-is. Run `video_generate action=list` to see what each provider accepts |
Not all providers support all parameters. OpenClaw already normalizes duration to the closest provider-supported value, and it also remaps translated geometry hints such as size-to-aspect-ratio when a fallback provider exposes a different control surface. Truly unsupported overrides are ignored on a best-effort basis and reported as warnings in the tool result. Hard capability limits (such as too many reference inputs) fail before submission.
diff --git a/extensions/google/image-generation-provider.ts b/extensions/google/image-generation-provider.ts
index 3ee81a92574..4925727f892 100644
--- a/extensions/google/image-generation-provider.ts
+++ b/extensions/google/image-generation-provider.ts
@@ -165,7 +165,7 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProvider {
: {}),
},
},
- timeoutMs: 60_000,
+ timeoutMs: req.timeoutMs ?? 60_000,
fetchFn: fetch,
pinDns: false,
allowPrivateNetwork,
diff --git a/extensions/speech-core/src/tts.ts b/extensions/speech-core/src/tts.ts
index 4804bcf489d..4eccca12e19 100644
--- a/extensions/speech-core/src/tts.ts
+++ b/extensions/speech-core/src/tts.ts
@@ -753,6 +753,7 @@ export async function textToSpeech(params: {
channel?: string;
overrides?: TtsDirectiveOverrides;
disableFallback?: boolean;
+ timeoutMs?: number;
}): Promise {
const synthesis = await synthesizeSpeech(params);
if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
@@ -791,6 +792,7 @@ export async function synthesizeSpeech(params: {
channel?: string;
overrides?: TtsDirectiveOverrides;
disableFallback?: boolean;
+ timeoutMs?: number;
}): Promise {
const setup = resolveTtsRequestSetup({
text: params.text,
@@ -804,6 +806,7 @@ export async function synthesizeSpeech(params: {
}
const { config, providers } = setup;
+ const timeoutMs = params.timeoutMs ?? config.timeoutMs;
const target = supportsNativeVoiceNoteTts(params.channel) ? "voice-note" : "audio-file";
const errors: string[] = [];
@@ -840,7 +843,7 @@ export async function synthesizeSpeech(params: {
providerConfig: resolvedProvider.providerConfig,
target,
providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.provider.id],
- timeoutMs: config.timeoutMs,
+ timeoutMs,
});
const latencyMs = Date.now() - providerStart;
attempts.push({
diff --git a/src/agents/tools/image-generate-tool.ts b/src/agents/tools/image-generate-tool.ts
index 2bf3b946de6..8d402d63258 100644
--- a/src/agents/tools/image-generate-tool.ts
+++ b/src/agents/tools/image-generate-tool.ts
@@ -25,6 +25,7 @@ import {
buildMediaReferenceDetails,
isCapabilityProviderConfigured,
normalizeMediaReferenceInputs,
+ readGenerationTimeoutMs,
resolveCapabilityModelConfigForTool,
resolveGenerateAction,
resolveMediaToolLocalRoots,
@@ -108,6 +109,12 @@ const ImageGenerateToolSchema = Type.Object({
maximum: MAX_COUNT,
}),
),
+ timeoutMs: Type.Optional(
+ Type.Number({
+ description: "Optional provider request timeout in milliseconds.",
+ minimum: 1,
+ }),
+ ),
});
function getImageGenerationProviderAuthEnvVars(providerId: string): string[] {
@@ -490,6 +497,7 @@ export function createImageGenerateTool(options?: {
const size = readStringParam(params, "size");
const aspectRatio = normalizeAspectRatio(readStringParam(params, "aspectRatio"));
const explicitResolution = normalizeResolution(readStringParam(params, "resolution"));
+ const timeoutMs = readGenerationTimeoutMs(params);
const selectedProvider = resolveSelectedImageGenerationProvider({
config: effectiveCfg,
imageGenerationModelConfig,
@@ -535,6 +543,7 @@ export function createImageGenerateTool(options?: {
resolution,
count,
inputImages,
+ timeoutMs,
});
const ignoredOverrides = result.ignoredOverrides ?? [];
const displayProvider = sanitizeInlineDirectiveText(result.provider);
@@ -617,6 +626,7 @@ export function createImageGenerateTool(options?: {
? { aspectRatio: normalizedAspectRatio ?? aspectRatio }
: {}),
...(filename ? { filename } : {}),
+ ...(timeoutMs !== undefined ? { timeoutMs } : {}),
attempts: result.attempts,
...(result.normalization ? { normalization: result.normalization } : {}),
metadata: result.metadata,
diff --git a/src/agents/tools/media-tool-shared.ts b/src/agents/tools/media-tool-shared.ts
index 62b41b00e89..c1d2fc26433 100644
--- a/src/agents/tools/media-tool-shared.ts
+++ b/src/agents/tools/media-tool-shared.ts
@@ -9,7 +9,12 @@ import {
} from "../../shared/string-coerce.js";
import { normalizeModelRef } from "../model-selection.js";
import { normalizeProviderId } from "../provider-id.js";
-import { ToolInputError, readStringArrayParam, readStringParam } from "./common.js";
+import {
+ ToolInputError,
+ readNumberParam,
+ readStringArrayParam,
+ readStringParam,
+} from "./common.js";
import type { ImageModelConfig } from "./image-tool.helpers.js";
import {
buildToolModelConfigFromCandidates,
@@ -78,6 +83,20 @@ export function applyMusicGenerationModelConfigDefaults(
return applyAgentDefaultModelConfig(cfg, "musicGenerationModel", musicGenerationModelConfig);
}
+export function readGenerationTimeoutMs(args: Record): number | undefined {
+ const timeoutMs = readNumberParam(args, "timeoutMs", {
+ integer: true,
+ strict: true,
+ });
+ if (timeoutMs === undefined) {
+ return undefined;
+ }
+ if (timeoutMs <= 0) {
+ throw new ToolInputError("timeoutMs must be a positive integer in milliseconds.");
+ }
+ return timeoutMs;
+}
+
function applyAgentDefaultModelConfig(
cfg: OpenClawConfig | undefined,
key: "imageModel" | "imageGenerationModel" | "videoGenerationModel" | "musicGenerationModel",
diff --git a/src/agents/tools/music-generate-tool.ts b/src/agents/tools/music-generate-tool.ts
index 53786300e5f..df6f49d7b3d 100644
--- a/src/agents/tools/music-generate-tool.ts
+++ b/src/agents/tools/music-generate-tool.ts
@@ -28,6 +28,7 @@ import {
buildTaskRunDetails,
normalizeMediaReferenceInputs,
readBooleanToolParam,
+ readGenerationTimeoutMs,
resolveCapabilityModelConfigForTool,
resolveGenerateAction,
resolveMediaToolLocalRoots,
@@ -98,6 +99,12 @@ const MusicGenerateToolSchema = Type.Object({
minimum: 1,
}),
),
+ timeoutMs: Type.Optional(
+ Type.Number({
+ description: "Optional provider request timeout in milliseconds.",
+ minimum: 1,
+ }),
+ ),
format: Type.Optional(
Type.String({
description: 'Optional output format hint: "mp3" or "wav" when the provider supports it.',
@@ -336,6 +343,7 @@ async function executeMusicGenerationJob(params: {
filename?: string;
loadedReferenceImages: LoadedReferenceImage[];
taskHandle?: MusicGenerationTaskHandle | null;
+ timeoutMs?: number;
}): Promise {
if (params.taskHandle) {
recordMusicGenerationTaskProgress({
@@ -353,6 +361,7 @@ async function executeMusicGenerationJob(params: {
durationSeconds: params.durationSeconds,
format: params.format,
inputImages: params.loadedReferenceImages.map((entry) => entry.sourceImage),
+ timeoutMs: params.timeoutMs,
});
if (params.taskHandle) {
recordMusicGenerationTaskProgress({
@@ -437,6 +446,7 @@ async function executeMusicGenerationJob(params: {
: {}),
...(!ignoredOverrideKeys.has("format") && params.format ? { format: params.format } : {}),
...(params.filename ? { filename: params.filename } : {}),
+ ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}),
...buildMediaReferenceDetails({
entries: params.loadedReferenceImages,
singleKey: "image",
@@ -520,6 +530,7 @@ export function createMusicGenerateTool(options?: {
});
const format = normalizeOutputFormat(readStringParam(args, "format"));
const filename = readStringParam(args, "filename");
+ const timeoutMs = readGenerationTimeoutMs(args);
const imageInputs = normalizeReferenceImageInputs(args);
const selectedProvider = resolveSelectedMusicGenerationProvider({
config: effectiveCfg,
@@ -564,6 +575,7 @@ export function createMusicGenerateTool(options?: {
filename,
loadedReferenceImages,
taskHandle,
+ timeoutMs,
});
completeMusicGenerationTaskRun({
handle: taskHandle,
@@ -627,6 +639,7 @@ export function createMusicGenerateTool(options?: {
...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
...(format ? { format } : {}),
...(filename ? { filename } : {}),
+ ...(timeoutMs !== undefined ? { timeoutMs } : {}),
},
};
}
@@ -644,6 +657,7 @@ export function createMusicGenerateTool(options?: {
filename,
loadedReferenceImages,
taskHandle,
+ timeoutMs,
});
completeMusicGenerationTaskRun({
handle: taskHandle,
diff --git a/src/agents/tools/tts-tool.test.ts b/src/agents/tools/tts-tool.test.ts
index 18786c29c57..e833d5eb6df 100644
--- a/src/agents/tools/tts-tool.test.ts
+++ b/src/agents/tools/tts-tool.test.ts
@@ -43,6 +43,26 @@ describe("createTtsTool", () => {
expect(JSON.stringify(result.content)).not.toContain("MEDIA:");
});
+ it("passes an optional timeout to speech generation", async () => {
+ textToSpeechSpy.mockResolvedValue({
+ success: true,
+ audioPath: "/tmp/reply.opus",
+ provider: "test",
+ voiceCompatible: true,
+ });
+
+ const tool = createTtsTool();
+ const result = await tool.execute("call-1", { text: "hello", timeoutMs: 12_345 });
+
+ expect(textToSpeechSpy).toHaveBeenCalledWith(
+ expect.objectContaining({
+ text: "hello",
+ timeoutMs: 12_345,
+ }),
+ );
+ expect(result.details).toMatchObject({ timeoutMs: 12_345 });
+ });
+
it("echoes longer utterances verbatim into the tool-result content", async () => {
textToSpeechSpy.mockResolvedValue({
success: true,
diff --git a/src/agents/tools/tts-tool.ts b/src/agents/tools/tts-tool.ts
index b50bef3a67d..a3db27121cb 100644
--- a/src/agents/tools/tts-tool.ts
+++ b/src/agents/tools/tts-tool.ts
@@ -5,15 +5,35 @@ import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { textToSpeech } from "../../tts/tts.js";
import type { GatewayMessageChannel } from "../../utils/message-channel.js";
import type { AnyAgentTool } from "./common.js";
-import { readStringParam } from "./common.js";
+import { ToolInputError, readNumberParam, readStringParam } from "./common.js";
const TtsToolSchema = Type.Object({
text: Type.String({ description: "Text to convert to speech." }),
channel: Type.Optional(
Type.String({ description: "Optional channel id to pick output format." }),
),
+ timeoutMs: Type.Optional(
+ Type.Number({
+ description: "Optional provider request timeout in milliseconds.",
+ minimum: 1,
+ }),
+ ),
});
+function readTtsTimeoutMs(args: Record): number | undefined {
+ const timeoutMs = readNumberParam(args, "timeoutMs", {
+ integer: true,
+ strict: true,
+ });
+ if (timeoutMs === undefined) {
+ return undefined;
+ }
+ if (timeoutMs <= 0) {
+ throw new ToolInputError("timeoutMs must be a positive integer in milliseconds.");
+ }
+ return timeoutMs;
+}
+
/**
* Defuse reply-directive tokens inside spoken transcripts before they flow
* through tool-result content. When verbose tool output is enabled,
@@ -48,11 +68,13 @@ export function createTtsTool(opts?: {
const params = args as Record;
const text = readStringParam(params, "text", { required: true });
const channel = readStringParam(params, "channel");
+ const timeoutMs = readTtsTimeoutMs(params);
const cfg = opts?.config ?? loadConfig();
const result = await textToSpeech({
text,
cfg,
channel: channel ?? opts?.agentChannel,
+ timeoutMs,
});
if (result.success && result.audioPath) {
@@ -66,6 +88,7 @@ export function createTtsTool(opts?: {
details: {
audioPath: result.audioPath,
provider: result.provider,
+ ...(timeoutMs !== undefined ? { timeoutMs } : {}),
media: {
mediaUrl: result.audioPath,
trustedLocalMedia: true,
diff --git a/src/agents/tools/video-generate-tool.ts b/src/agents/tools/video-generate-tool.ts
index 041f21f395e..95eb4bb0b81 100644
--- a/src/agents/tools/video-generate-tool.ts
+++ b/src/agents/tools/video-generate-tool.ts
@@ -32,6 +32,7 @@ import {
buildTaskRunDetails,
normalizeMediaReferenceInputs,
readBooleanToolParam,
+ readGenerationTimeoutMs,
resolveCapabilityModelConfigForTool,
resolveGenerateAction,
resolveMediaToolLocalRoots,
@@ -205,6 +206,12 @@ const VideoGenerateToolSchema = Type.Object({
"keys each provider accepts.",
}),
),
+ timeoutMs: Type.Optional(
+ Type.Number({
+ description: "Optional provider request timeout in milliseconds.",
+ minimum: 1,
+ }),
+ ),
});
export function resolveVideoGenerationModelConfigForTool(params: {
@@ -562,6 +569,7 @@ async function executeVideoGenerationJob(params: {
loadedReferenceAudios: LoadedReferenceAsset[];
taskHandle?: VideoGenerationTaskHandle | null;
providerOptions?: Record;
+ timeoutMs?: number;
}): Promise {
if (params.taskHandle) {
recordVideoGenerationTaskProgress({
@@ -584,6 +592,7 @@ async function executeVideoGenerationJob(params: {
inputVideos: params.loadedReferenceVideos.map((entry) => entry.sourceAsset),
inputAudios: params.loadedReferenceAudios.map((entry) => entry.sourceAsset),
providerOptions: params.providerOptions,
+ timeoutMs: params.timeoutMs,
});
if (params.taskHandle) {
recordVideoGenerationTaskProgress({
@@ -747,6 +756,7 @@ async function executeVideoGenerationJob(params: {
? { watermark: params.watermark }
: {}),
...(params.filename ? { filename: params.filename } : {}),
+ ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}),
attempts: result.attempts,
...(result.normalization ? { normalization: result.normalization } : {}),
metadata: result.metadata,
@@ -825,6 +835,7 @@ export function createVideoGenerateTool(options?: {
});
const audio = readBooleanToolParam(args, "audio");
const watermark = readBooleanToolParam(args, "watermark");
+ const timeoutMs = readGenerationTimeoutMs(args);
// providerOptions must be a plain object. Arrays are objects in JS, so
// exclude them explicitly — a bogus call like `providerOptions: ["seed", 42]`
// would otherwise be cast to `Record` with numeric-string
@@ -960,6 +971,7 @@ export function createVideoGenerateTool(options?: {
loadedReferenceAudios,
taskHandle,
providerOptions,
+ timeoutMs,
});
completeVideoGenerationTaskRun({
handle: taskHandle,
@@ -1032,6 +1044,7 @@ export function createVideoGenerateTool(options?: {
...(typeof audio === "boolean" ? { audio } : {}),
...(typeof watermark === "boolean" ? { watermark } : {}),
...(filename ? { filename } : {}),
+ ...(timeoutMs !== undefined ? { timeoutMs } : {}),
},
};
}
@@ -1054,6 +1067,7 @@ export function createVideoGenerateTool(options?: {
loadedReferenceAudios,
taskHandle,
providerOptions,
+ timeoutMs,
});
completeVideoGenerationTaskRun({
handle: taskHandle,
diff --git a/src/image-generation/runtime-types.ts b/src/image-generation/runtime-types.ts
index 32fc8284e84..6eb3474e67a 100644
--- a/src/image-generation/runtime-types.ts
+++ b/src/image-generation/runtime-types.ts
@@ -21,6 +21,8 @@ export type GenerateImageParams = {
aspectRatio?: string;
resolution?: ImageGenerationResolution;
inputImages?: ImageGenerationSourceImage[];
+ /** Optional per-request provider timeout in milliseconds. */
+ timeoutMs?: number;
};
export type GenerateImageRuntimeResult = {
diff --git a/src/image-generation/runtime.test.ts b/src/image-generation/runtime.test.ts
index 06044caa820..0d6e6698f58 100644
--- a/src/image-generation/runtime.test.ts
+++ b/src/image-generation/runtime.test.ts
@@ -26,6 +26,7 @@ describe("image-generation runtime", () => {
it("generates images through the active image-generation provider", async () => {
const authStore = { version: 1, profiles: {} } as const;
let seenAuthStore: unknown;
+ let seenTimeoutMs: number | undefined;
mocks.resolveAgentModelPrimaryValue.mockReturnValue("image-plugin/img-v1");
const provider: ImageGenerationProvider = {
id: "image-plugin",
@@ -33,8 +34,9 @@ describe("image-generation runtime", () => {
generate: {},
edit: { enabled: false },
},
- async generateImage(req: { authStore?: unknown }) {
+ async generateImage(req: { authStore?: unknown; timeoutMs?: number }) {
seenAuthStore = req.authStore;
+ seenTimeoutMs = req.timeoutMs;
return {
images: [
{
@@ -60,12 +62,14 @@ describe("image-generation runtime", () => {
prompt: "draw a cat",
agentDir: "/tmp/agent",
authStore,
+ timeoutMs: 12_345,
});
expect(result.provider).toBe("image-plugin");
expect(result.model).toBe("img-v1");
expect(result.attempts).toEqual([]);
expect(seenAuthStore).toEqual(authStore);
+ expect(seenTimeoutMs).toBe(12_345);
expect(result.images).toEqual([
{
buffer: Buffer.from("png-bytes"),
diff --git a/src/image-generation/runtime.ts b/src/image-generation/runtime.ts
index b52a3de2e85..365d77c0c0c 100644
--- a/src/image-generation/runtime.ts
+++ b/src/image-generation/runtime.ts
@@ -85,6 +85,7 @@ export async function generateImage(
aspectRatio: sanitized.aspectRatio,
resolution: sanitized.resolution,
inputImages: params.inputImages,
+ ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}),
});
if (!Array.isArray(result.images) || result.images.length === 0) {
throw new Error("Image generation provider returned no images.");
diff --git a/src/music-generation/runtime-types.ts b/src/music-generation/runtime-types.ts
index ce8cc97aa0d..8cd770d6875 100644
--- a/src/music-generation/runtime-types.ts
+++ b/src/music-generation/runtime-types.ts
@@ -21,6 +21,8 @@ export type GenerateMusicParams = {
durationSeconds?: number;
format?: MusicGenerationOutputFormat;
inputImages?: MusicGenerationSourceImage[];
+ /** Optional per-request provider timeout in milliseconds. */
+ timeoutMs?: number;
};
export type GenerateMusicRuntimeResult = {
diff --git a/src/music-generation/runtime.test.ts b/src/music-generation/runtime.test.ts
index 61423ebca4e..6c9248cef86 100644
--- a/src/music-generation/runtime.test.ts
+++ b/src/music-generation/runtime.test.ts
@@ -26,12 +26,14 @@ describe("music-generation runtime", () => {
it("generates tracks through the active music-generation provider", async () => {
const authStore = { version: 1, profiles: {} } as const;
let seenAuthStore: unknown;
+ let seenTimeoutMs: number | undefined;
mocks.resolveAgentModelPrimaryValue.mockReturnValue("music-plugin/track-v1");
const provider: MusicGenerationProvider = {
id: "music-plugin",
capabilities: {},
- async generateMusic(req: { authStore?: unknown }) {
+ async generateMusic(req: { authStore?: unknown; timeoutMs?: number }) {
seenAuthStore = req.authStore;
+ seenTimeoutMs = req.timeoutMs;
return {
tracks: [
{
@@ -57,6 +59,7 @@ describe("music-generation runtime", () => {
prompt: "play a synth line",
agentDir: "/tmp/agent",
authStore,
+ timeoutMs: 12_345,
});
expect(result.provider).toBe("music-plugin");
@@ -64,6 +67,7 @@ describe("music-generation runtime", () => {
expect(result.attempts).toEqual([]);
expect(result.ignoredOverrides).toEqual([]);
expect(seenAuthStore).toEqual(authStore);
+ expect(seenTimeoutMs).toBe(12_345);
expect(result.tracks).toEqual([
{
buffer: Buffer.from("mp3-bytes"),
diff --git a/src/music-generation/runtime.ts b/src/music-generation/runtime.ts
index b900acb28ed..07700802ca3 100644
--- a/src/music-generation/runtime.ts
+++ b/src/music-generation/runtime.ts
@@ -82,6 +82,7 @@ export async function generateMusic(
durationSeconds: sanitized.durationSeconds,
format: sanitized.format,
inputImages: params.inputImages,
+ ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}),
});
if (!Array.isArray(result.tracks) || result.tracks.length === 0) {
throw new Error("Music generation provider returned no tracks.");
diff --git a/src/plugin-sdk/tts-runtime.types.ts b/src/plugin-sdk/tts-runtime.types.ts
index f57e043a5ef..0e43dba237a 100644
--- a/src/plugin-sdk/tts-runtime.types.ts
+++ b/src/plugin-sdk/tts-runtime.types.ts
@@ -69,6 +69,7 @@ export type TtsRequestParams = {
channel?: string;
overrides?: TtsDirectiveOverrides;
disableFallback?: boolean;
+ timeoutMs?: number;
};
export type TtsTelephonyRequestParams = {
diff --git a/src/video-generation/runtime-types.ts b/src/video-generation/runtime-types.ts
index 50602bd1fed..b632d841c9e 100644
--- a/src/video-generation/runtime-types.ts
+++ b/src/video-generation/runtime-types.ts
@@ -27,6 +27,8 @@ export type GenerateVideoParams = {
inputAudios?: VideoGenerationSourceAsset[];
/** Arbitrary provider-specific options forwarded as-is to provider.generateVideo. */
providerOptions?: Record;
+ /** Optional per-request provider timeout in milliseconds. */
+ timeoutMs?: number;
};
export type GenerateVideoRuntimeResult = {
diff --git a/src/video-generation/runtime.test.ts b/src/video-generation/runtime.test.ts
index 40205e9c877..61366bbb46b 100644
--- a/src/video-generation/runtime.test.ts
+++ b/src/video-generation/runtime.test.ts
@@ -43,12 +43,14 @@ describe("video-generation runtime", () => {
it("generates videos through the active video-generation provider", async () => {
const authStore = { version: 1, profiles: {} } as const;
let seenAuthStore: unknown;
+ let seenTimeoutMs: number | undefined;
mocks.resolveAgentModelPrimaryValue.mockReturnValue("video-plugin/vid-v1");
const provider: VideoGenerationProvider = {
id: "video-plugin",
capabilities: {},
- async generateVideo(req: { authStore?: unknown }) {
+ async generateVideo(req: { authStore?: unknown; timeoutMs?: number }) {
seenAuthStore = req.authStore;
+ seenTimeoutMs = req.timeoutMs;
return {
videos: [
{
@@ -74,6 +76,7 @@ describe("video-generation runtime", () => {
prompt: "animate a cat",
agentDir: "/tmp/agent",
authStore,
+ timeoutMs: 12_345,
});
expect(result.provider).toBe("video-plugin");
@@ -81,6 +84,7 @@ describe("video-generation runtime", () => {
expect(result.attempts).toEqual([]);
expect(result.ignoredOverrides).toEqual([]);
expect(seenAuthStore).toEqual(authStore);
+ expect(seenTimeoutMs).toBe(12_345);
expect(result.videos).toEqual([
{
buffer: Buffer.from("mp4-bytes"),
diff --git a/src/video-generation/runtime.ts b/src/video-generation/runtime.ts
index ce5a93ddf6c..fd6468746a4 100644
--- a/src/video-generation/runtime.ts
+++ b/src/video-generation/runtime.ts
@@ -260,6 +260,7 @@ export async function generateVideo(
inputVideos: params.inputVideos,
inputAudios: params.inputAudios,
providerOptions: params.providerOptions,
+ ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}),
});
if (!Array.isArray(result.videos) || result.videos.length === 0) {
throw new Error("Video generation provider returned no videos.");