feat(agents): add generation tool timeouts

2026-05-06 09:50:42 +00:00 · 2026-04-24 00:03:59 +01:00
parent bd49117a50
commit f0a7a85e7a
23 changed files with 141 additions and 7 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai

 ### Changes

+- Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it.
 - Agents/subagents: add optional forked context for native `sessions_spawn` runs so agents can let a child inherit the requester transcript when needed, while keeping clean isolated sessions as the default; includes prompt guidance, context-engine hook metadata, docs, and QA coverage.
 - Codex harness: add structured debug logging for embedded harness selection decisions so `/status` stays simple while gateway logs explain auto-selection and Pi fallback reasons. (#70760) Thanks @100yenadmin.
 - Providers/OpenAI: add forward-compatible `gpt-5.5` and `gpt-5.5-pro` support for OpenAI API keys, OpenAI Codex OAuth, and the Codex CLI default model.
--- a/docs/tools/image-generation.md
+++ b/docs/tools/image-generation.md
@@ -96,6 +96,10 @@ Resolution hint.
 Number of images to generate (1–4).
 </ParamField>

+<ParamField path="timeoutMs" type="number">
+Optional provider request timeout in milliseconds.
+</ParamField>
+
 <ParamField path="filename" type="string">
 Output filename hint.
 </ParamField>
--- a/docs/tools/music-generation.md
+++ b/docs/tools/music-generation.md
@@ -125,6 +125,7 @@ Direct generation example:
 | `image`           | string   | Single reference image path or URL                                                                |
 | `images`          | string[] | Multiple reference images (up to 10)                                                              |
 | `durationSeconds` | number   | Target duration in seconds when the provider supports duration hints                              |
+| `timeoutMs`       | number   | Optional provider request timeout in milliseconds                                                 |
 | `format`          | string   | Output format hint (`mp3` or `wav`) when the provider supports it                                 |
 | `filename`        | string   | Output filename hint                                                                              |

--- a/docs/tools/tts.md
+++ b/docs/tools/tts.md
@@ -507,6 +507,8 @@ Notes:
 The `tts` tool converts text to speech and returns an audio attachment for
 reply delivery. When the channel is Feishu, Matrix, Telegram, or WhatsApp,
 the audio is delivered as a voice message rather than a file attachment.
+It accepts optional `channel` and `timeoutMs` fields; `timeoutMs` is a
+per-call provider request timeout in milliseconds.

 ## Gateway RPC

--- a/docs/tools/video-generation.md
+++ b/docs/tools/video-generation.md
@@ -170,6 +170,7 @@ dimensions). Providers that do not declare it surface the value via
 | `action`          | string | `"generate"` (default), `"status"`, or `"list"`                                                                                                                                                                                                                                                                                                      |
 | `model`           | string | Provider/model override (e.g. `runway/gen4.5`)                                                                                                                                                                                                                                                                                                       |
 | `filename`        | string | Output filename hint                                                                                                                                                                                                                                                                                                                                 |
+| `timeoutMs`       | number | Optional provider request timeout in milliseconds                                                                                                                                                                                                                                                                                                    |
 | `providerOptions` | object | Provider-specific options as a JSON object (e.g. `{"seed": 42, "draft": true}`). Providers that declare a typed schema validate the keys and types; unknown keys or mismatches skip the candidate during fallback. Providers without a declared schema receive the options as-is. Run `video_generate action=list` to see what each provider accepts |

 Not all providers support all parameters. OpenClaw already normalizes duration to the closest provider-supported value, and it also remaps translated geometry hints such as size-to-aspect-ratio when a fallback provider exposes a different control surface. Truly unsupported overrides are ignored on a best-effort basis and reported as warnings in the tool result. Hard capability limits (such as too many reference inputs) fail before submission.
--- a/extensions/google/image-generation-provider.ts
+++ b/extensions/google/image-generation-provider.ts
@@ -165,7 +165,7 @@ export function buildGoogleImageGenerationProvider(): ImageGenerationProvider {
              : {}),
          },
        },
-        timeoutMs: 60_000,
+        timeoutMs: req.timeoutMs ?? 60_000,
        fetchFn: fetch,
        pinDns: false,
        allowPrivateNetwork,
--- a/extensions/speech-core/src/tts.ts
+++ b/extensions/speech-core/src/tts.ts
@@ -753,6 +753,7 @@ export async function textToSpeech(params: {
  channel?: string;
  overrides?: TtsDirectiveOverrides;
  disableFallback?: boolean;
+  timeoutMs?: number;
 }): Promise<TtsResult> {
  const synthesis = await synthesizeSpeech(params);
  if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
@@ -791,6 +792,7 @@ export async function synthesizeSpeech(params: {
  channel?: string;
  overrides?: TtsDirectiveOverrides;
  disableFallback?: boolean;
+  timeoutMs?: number;
 }): Promise<TtsSynthesisResult> {
  const setup = resolveTtsRequestSetup({
    text: params.text,
@@ -804,6 +806,7 @@ export async function synthesizeSpeech(params: {
  }

  const { config, providers } = setup;
+  const timeoutMs = params.timeoutMs ?? config.timeoutMs;
  const target = supportsNativeVoiceNoteTts(params.channel) ? "voice-note" : "audio-file";

  const errors: string[] = [];
@@ -840,7 +843,7 @@ export async function synthesizeSpeech(params: {
        providerConfig: resolvedProvider.providerConfig,
        target,
        providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.provider.id],
-        timeoutMs: config.timeoutMs,
+        timeoutMs,
      });
      const latencyMs = Date.now() - providerStart;
      attempts.push({
--- a/src/agents/tools/image-generate-tool.ts
+++ b/src/agents/tools/image-generate-tool.ts
@@ -25,6 +25,7 @@ import {
  buildMediaReferenceDetails,
  isCapabilityProviderConfigured,
  normalizeMediaReferenceInputs,
+  readGenerationTimeoutMs,
  resolveCapabilityModelConfigForTool,
  resolveGenerateAction,
  resolveMediaToolLocalRoots,
@@ -108,6 +109,12 @@ const ImageGenerateToolSchema = Type.Object({
      maximum: MAX_COUNT,
    }),
  ),
+  timeoutMs: Type.Optional(
+    Type.Number({
+      description: "Optional provider request timeout in milliseconds.",
+      minimum: 1,
+    }),
+  ),
 });

 function getImageGenerationProviderAuthEnvVars(providerId: string): string[] {
@@ -490,6 +497,7 @@ export function createImageGenerateTool(options?: {
      const size = readStringParam(params, "size");
      const aspectRatio = normalizeAspectRatio(readStringParam(params, "aspectRatio"));
      const explicitResolution = normalizeResolution(readStringParam(params, "resolution"));
+      const timeoutMs = readGenerationTimeoutMs(params);
      const selectedProvider = resolveSelectedImageGenerationProvider({
        config: effectiveCfg,
        imageGenerationModelConfig,
@@ -535,6 +543,7 @@ export function createImageGenerateTool(options?: {
        resolution,
        count,
        inputImages,
+        timeoutMs,
      });
      const ignoredOverrides = result.ignoredOverrides ?? [];
      const displayProvider = sanitizeInlineDirectiveText(result.provider);
@@ -617,6 +626,7 @@ export function createImageGenerateTool(options?: {
            ? { aspectRatio: normalizedAspectRatio ?? aspectRatio }
            : {}),
          ...(filename ? { filename } : {}),
+          ...(timeoutMs !== undefined ? { timeoutMs } : {}),
          attempts: result.attempts,
          ...(result.normalization ? { normalization: result.normalization } : {}),
          metadata: result.metadata,
--- a/src/agents/tools/media-tool-shared.ts
+++ b/src/agents/tools/media-tool-shared.ts
@@ -9,7 +9,12 @@ import {
 } from "../../shared/string-coerce.js";
 import { normalizeModelRef } from "../model-selection.js";
 import { normalizeProviderId } from "../provider-id.js";
-import { ToolInputError, readStringArrayParam, readStringParam } from "./common.js";
+import {
+  ToolInputError,
+  readNumberParam,
+  readStringArrayParam,
+  readStringParam,
+} from "./common.js";
 import type { ImageModelConfig } from "./image-tool.helpers.js";
 import {
  buildToolModelConfigFromCandidates,
@@ -78,6 +83,20 @@ export function applyMusicGenerationModelConfigDefaults(
  return applyAgentDefaultModelConfig(cfg, "musicGenerationModel", musicGenerationModelConfig);
 }

+export function readGenerationTimeoutMs(args: Record<string, unknown>): number | undefined {
+  const timeoutMs = readNumberParam(args, "timeoutMs", {
+    integer: true,
+    strict: true,
+  });
+  if (timeoutMs === undefined) {
+    return undefined;
+  }
+  if (timeoutMs <= 0) {
+    throw new ToolInputError("timeoutMs must be a positive integer in milliseconds.");
+  }
+  return timeoutMs;
+}
+
 function applyAgentDefaultModelConfig(
  cfg: OpenClawConfig | undefined,
  key: "imageModel" | "imageGenerationModel" | "videoGenerationModel" | "musicGenerationModel",
--- a/src/agents/tools/music-generate-tool.ts
+++ b/src/agents/tools/music-generate-tool.ts
@@ -28,6 +28,7 @@ import {
  buildTaskRunDetails,
  normalizeMediaReferenceInputs,
  readBooleanToolParam,
+  readGenerationTimeoutMs,
  resolveCapabilityModelConfigForTool,
  resolveGenerateAction,
  resolveMediaToolLocalRoots,
@@ -98,6 +99,12 @@ const MusicGenerateToolSchema = Type.Object({
      minimum: 1,
    }),
  ),
+  timeoutMs: Type.Optional(
+    Type.Number({
+      description: "Optional provider request timeout in milliseconds.",
+      minimum: 1,
+    }),
+  ),
  format: Type.Optional(
    Type.String({
      description: 'Optional output format hint: "mp3" or "wav" when the provider supports it.',
@@ -336,6 +343,7 @@ async function executeMusicGenerationJob(params: {
  filename?: string;
  loadedReferenceImages: LoadedReferenceImage[];
  taskHandle?: MusicGenerationTaskHandle | null;
+  timeoutMs?: number;
 }): Promise<ExecutedMusicGeneration> {
  if (params.taskHandle) {
    recordMusicGenerationTaskProgress({
@@ -353,6 +361,7 @@ async function executeMusicGenerationJob(params: {
    durationSeconds: params.durationSeconds,
    format: params.format,
    inputImages: params.loadedReferenceImages.map((entry) => entry.sourceImage),
+    timeoutMs: params.timeoutMs,
  });
  if (params.taskHandle) {
    recordMusicGenerationTaskProgress({
@@ -437,6 +446,7 @@ async function executeMusicGenerationJob(params: {
        : {}),
      ...(!ignoredOverrideKeys.has("format") && params.format ? { format: params.format } : {}),
      ...(params.filename ? { filename: params.filename } : {}),
+      ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}),
      ...buildMediaReferenceDetails({
        entries: params.loadedReferenceImages,
        singleKey: "image",
@@ -520,6 +530,7 @@ export function createMusicGenerateTool(options?: {
      });
      const format = normalizeOutputFormat(readStringParam(args, "format"));
      const filename = readStringParam(args, "filename");
+      const timeoutMs = readGenerationTimeoutMs(args);
      const imageInputs = normalizeReferenceImageInputs(args);
      const selectedProvider = resolveSelectedMusicGenerationProvider({
        config: effectiveCfg,
@@ -564,6 +575,7 @@ export function createMusicGenerateTool(options?: {
              filename,
              loadedReferenceImages,
              taskHandle,
+              timeoutMs,
            });
            completeMusicGenerationTaskRun({
              handle: taskHandle,
@@ -627,6 +639,7 @@ export function createMusicGenerateTool(options?: {
            ...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
            ...(format ? { format } : {}),
            ...(filename ? { filename } : {}),
+            ...(timeoutMs !== undefined ? { timeoutMs } : {}),
          },
        };
      }
@@ -644,6 +657,7 @@ export function createMusicGenerateTool(options?: {
          filename,
          loadedReferenceImages,
          taskHandle,
+          timeoutMs,
        });
        completeMusicGenerationTaskRun({
          handle: taskHandle,
--- a/src/agents/tools/tts-tool.test.ts
+++ b/src/agents/tools/tts-tool.test.ts
@@ -43,6 +43,26 @@ describe("createTtsTool", () => {
    expect(JSON.stringify(result.content)).not.toContain("MEDIA:");
  });

+  it("passes an optional timeout to speech generation", async () => {
+    textToSpeechSpy.mockResolvedValue({
+      success: true,
+      audioPath: "/tmp/reply.opus",
+      provider: "test",
+      voiceCompatible: true,
+    });
+
+    const tool = createTtsTool();
+    const result = await tool.execute("call-1", { text: "hello", timeoutMs: 12_345 });
+
+    expect(textToSpeechSpy).toHaveBeenCalledWith(
+      expect.objectContaining({
+        text: "hello",
+        timeoutMs: 12_345,
+      }),
+    );
+    expect(result.details).toMatchObject({ timeoutMs: 12_345 });
+  });
+
  it("echoes longer utterances verbatim into the tool-result content", async () => {
    textToSpeechSpy.mockResolvedValue({
      success: true,
--- a/src/agents/tools/tts-tool.ts
+++ b/src/agents/tools/tts-tool.ts
@@ -5,15 +5,35 @@ import type { OpenClawConfig } from "../../config/types.openclaw.js";
 import { textToSpeech } from "../../tts/tts.js";
 import type { GatewayMessageChannel } from "../../utils/message-channel.js";
 import type { AnyAgentTool } from "./common.js";
-import { readStringParam } from "./common.js";
+import { ToolInputError, readNumberParam, readStringParam } from "./common.js";

 const TtsToolSchema = Type.Object({
  text: Type.String({ description: "Text to convert to speech." }),
  channel: Type.Optional(
    Type.String({ description: "Optional channel id to pick output format." }),
  ),
+  timeoutMs: Type.Optional(
+    Type.Number({
+      description: "Optional provider request timeout in milliseconds.",
+      minimum: 1,
+    }),
+  ),
 });

+function readTtsTimeoutMs(args: Record<string, unknown>): number | undefined {
+  const timeoutMs = readNumberParam(args, "timeoutMs", {
+    integer: true,
+    strict: true,
+  });
+  if (timeoutMs === undefined) {
+    return undefined;
+  }
+  if (timeoutMs <= 0) {
+    throw new ToolInputError("timeoutMs must be a positive integer in milliseconds.");
+  }
+  return timeoutMs;
+}
+
 /**
 * Defuse reply-directive tokens inside spoken transcripts before they flow
 * through tool-result content. When verbose tool output is enabled,
@@ -48,11 +68,13 @@ export function createTtsTool(opts?: {
      const params = args as Record<string, unknown>;
      const text = readStringParam(params, "text", { required: true });
      const channel = readStringParam(params, "channel");
+      const timeoutMs = readTtsTimeoutMs(params);
      const cfg = opts?.config ?? loadConfig();
      const result = await textToSpeech({
        text,
        cfg,
        channel: channel ?? opts?.agentChannel,
+        timeoutMs,
      });

      if (result.success && result.audioPath) {
@@ -66,6 +88,7 @@ export function createTtsTool(opts?: {
          details: {
            audioPath: result.audioPath,
            provider: result.provider,
+            ...(timeoutMs !== undefined ? { timeoutMs } : {}),
            media: {
              mediaUrl: result.audioPath,
              trustedLocalMedia: true,
--- a/src/agents/tools/video-generate-tool.ts
+++ b/src/agents/tools/video-generate-tool.ts
@@ -32,6 +32,7 @@ import {
  buildTaskRunDetails,
  normalizeMediaReferenceInputs,
  readBooleanToolParam,
+  readGenerationTimeoutMs,
  resolveCapabilityModelConfigForTool,
  resolveGenerateAction,
  resolveMediaToolLocalRoots,
@@ -205,6 +206,12 @@ const VideoGenerateToolSchema = Type.Object({
        "keys each provider accepts.",
    }),
  ),
+  timeoutMs: Type.Optional(
+    Type.Number({
+      description: "Optional provider request timeout in milliseconds.",
+      minimum: 1,
+    }),
+  ),
 });

 export function resolveVideoGenerationModelConfigForTool(params: {
@@ -562,6 +569,7 @@ async function executeVideoGenerationJob(params: {
  loadedReferenceAudios: LoadedReferenceAsset[];
  taskHandle?: VideoGenerationTaskHandle | null;
  providerOptions?: Record<string, unknown>;
+  timeoutMs?: number;
 }): Promise<ExecutedVideoGeneration> {
  if (params.taskHandle) {
    recordVideoGenerationTaskProgress({
@@ -584,6 +592,7 @@ async function executeVideoGenerationJob(params: {
    inputVideos: params.loadedReferenceVideos.map((entry) => entry.sourceAsset),
    inputAudios: params.loadedReferenceAudios.map((entry) => entry.sourceAsset),
    providerOptions: params.providerOptions,
+    timeoutMs: params.timeoutMs,
  });
  if (params.taskHandle) {
    recordVideoGenerationTaskProgress({
@@ -747,6 +756,7 @@ async function executeVideoGenerationJob(params: {
        ? { watermark: params.watermark }
        : {}),
      ...(params.filename ? { filename: params.filename } : {}),
+      ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}),
      attempts: result.attempts,
      ...(result.normalization ? { normalization: result.normalization } : {}),
      metadata: result.metadata,
@@ -825,6 +835,7 @@ export function createVideoGenerateTool(options?: {
      });
      const audio = readBooleanToolParam(args, "audio");
      const watermark = readBooleanToolParam(args, "watermark");
+      const timeoutMs = readGenerationTimeoutMs(args);
      // providerOptions must be a plain object. Arrays are objects in JS, so
      // exclude them explicitly — a bogus call like `providerOptions: ["seed", 42]`
      // would otherwise be cast to `Record<string, unknown>` with numeric-string
@@ -960,6 +971,7 @@ export function createVideoGenerateTool(options?: {
              loadedReferenceAudios,
              taskHandle,
              providerOptions,
+              timeoutMs,
            });
            completeVideoGenerationTaskRun({
              handle: taskHandle,
@@ -1032,6 +1044,7 @@ export function createVideoGenerateTool(options?: {
            ...(typeof audio === "boolean" ? { audio } : {}),
            ...(typeof watermark === "boolean" ? { watermark } : {}),
            ...(filename ? { filename } : {}),
+            ...(timeoutMs !== undefined ? { timeoutMs } : {}),
          },
        };
      }
@@ -1054,6 +1067,7 @@ export function createVideoGenerateTool(options?: {
          loadedReferenceAudios,
          taskHandle,
          providerOptions,
+          timeoutMs,
        });
        completeVideoGenerationTaskRun({
          handle: taskHandle,
--- a/src/image-generation/runtime-types.ts
+++ b/src/image-generation/runtime-types.ts
@@ -21,6 +21,8 @@ export type GenerateImageParams = {
  aspectRatio?: string;
  resolution?: ImageGenerationResolution;
  inputImages?: ImageGenerationSourceImage[];
+  /** Optional per-request provider timeout in milliseconds. */
+  timeoutMs?: number;
 };

 export type GenerateImageRuntimeResult = {
--- a/src/image-generation/runtime.test.ts
+++ b/src/image-generation/runtime.test.ts
@@ -26,6 +26,7 @@ describe("image-generation runtime", () => {
  it("generates images through the active image-generation provider", async () => {
    const authStore = { version: 1, profiles: {} } as const;
    let seenAuthStore: unknown;
+    let seenTimeoutMs: number | undefined;
    mocks.resolveAgentModelPrimaryValue.mockReturnValue("image-plugin/img-v1");
    const provider: ImageGenerationProvider = {
      id: "image-plugin",
@@ -33,8 +34,9 @@ describe("image-generation runtime", () => {
        generate: {},
        edit: { enabled: false },
      },
-      async generateImage(req: { authStore?: unknown }) {
+      async generateImage(req: { authStore?: unknown; timeoutMs?: number }) {
        seenAuthStore = req.authStore;
+        seenTimeoutMs = req.timeoutMs;
        return {
          images: [
            {
@@ -60,12 +62,14 @@ describe("image-generation runtime", () => {
      prompt: "draw a cat",
      agentDir: "/tmp/agent",
      authStore,
+      timeoutMs: 12_345,
    });

    expect(result.provider).toBe("image-plugin");
    expect(result.model).toBe("img-v1");
    expect(result.attempts).toEqual([]);
    expect(seenAuthStore).toEqual(authStore);
+    expect(seenTimeoutMs).toBe(12_345);
    expect(result.images).toEqual([
      {
        buffer: Buffer.from("png-bytes"),
--- a/src/image-generation/runtime.ts
+++ b/src/image-generation/runtime.ts
@@ -85,6 +85,7 @@ export async function generateImage(
        aspectRatio: sanitized.aspectRatio,
        resolution: sanitized.resolution,
        inputImages: params.inputImages,
+        ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}),
      });
      if (!Array.isArray(result.images) || result.images.length === 0) {
        throw new Error("Image generation provider returned no images.");
--- a/src/music-generation/runtime-types.ts
+++ b/src/music-generation/runtime-types.ts
@@ -21,6 +21,8 @@ export type GenerateMusicParams = {
  durationSeconds?: number;
  format?: MusicGenerationOutputFormat;
  inputImages?: MusicGenerationSourceImage[];
+  /** Optional per-request provider timeout in milliseconds. */
+  timeoutMs?: number;
 };

 export type GenerateMusicRuntimeResult = {
--- a/src/music-generation/runtime.test.ts
+++ b/src/music-generation/runtime.test.ts
@@ -26,12 +26,14 @@ describe("music-generation runtime", () => {
  it("generates tracks through the active music-generation provider", async () => {
    const authStore = { version: 1, profiles: {} } as const;
    let seenAuthStore: unknown;
+    let seenTimeoutMs: number | undefined;
    mocks.resolveAgentModelPrimaryValue.mockReturnValue("music-plugin/track-v1");
    const provider: MusicGenerationProvider = {
      id: "music-plugin",
      capabilities: {},
-      async generateMusic(req: { authStore?: unknown }) {
+      async generateMusic(req: { authStore?: unknown; timeoutMs?: number }) {
        seenAuthStore = req.authStore;
+        seenTimeoutMs = req.timeoutMs;
        return {
          tracks: [
            {
@@ -57,6 +59,7 @@ describe("music-generation runtime", () => {
      prompt: "play a synth line",
      agentDir: "/tmp/agent",
      authStore,
+      timeoutMs: 12_345,
    });

    expect(result.provider).toBe("music-plugin");
@@ -64,6 +67,7 @@ describe("music-generation runtime", () => {
    expect(result.attempts).toEqual([]);
    expect(result.ignoredOverrides).toEqual([]);
    expect(seenAuthStore).toEqual(authStore);
+    expect(seenTimeoutMs).toBe(12_345);
    expect(result.tracks).toEqual([
      {
        buffer: Buffer.from("mp3-bytes"),
--- a/src/music-generation/runtime.ts
+++ b/src/music-generation/runtime.ts
@@ -82,6 +82,7 @@ export async function generateMusic(
        durationSeconds: sanitized.durationSeconds,
        format: sanitized.format,
        inputImages: params.inputImages,
+        ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}),
      });
      if (!Array.isArray(result.tracks) || result.tracks.length === 0) {
        throw new Error("Music generation provider returned no tracks.");
--- a/src/plugin-sdk/tts-runtime.types.ts
+++ b/src/plugin-sdk/tts-runtime.types.ts
@@ -69,6 +69,7 @@ export type TtsRequestParams = {
  channel?: string;
  overrides?: TtsDirectiveOverrides;
  disableFallback?: boolean;
+  timeoutMs?: number;
 };

 export type TtsTelephonyRequestParams = {
--- a/src/video-generation/runtime-types.ts
+++ b/src/video-generation/runtime-types.ts
@@ -27,6 +27,8 @@ export type GenerateVideoParams = {
  inputAudios?: VideoGenerationSourceAsset[];
  /** Arbitrary provider-specific options forwarded as-is to provider.generateVideo. */
  providerOptions?: Record<string, unknown>;
+  /** Optional per-request provider timeout in milliseconds. */
+  timeoutMs?: number;
 };

 export type GenerateVideoRuntimeResult = {
--- a/src/video-generation/runtime.test.ts
+++ b/src/video-generation/runtime.test.ts
@@ -43,12 +43,14 @@ describe("video-generation runtime", () => {
  it("generates videos through the active video-generation provider", async () => {
    const authStore = { version: 1, profiles: {} } as const;
    let seenAuthStore: unknown;
+    let seenTimeoutMs: number | undefined;
    mocks.resolveAgentModelPrimaryValue.mockReturnValue("video-plugin/vid-v1");
    const provider: VideoGenerationProvider = {
      id: "video-plugin",
      capabilities: {},
-      async generateVideo(req: { authStore?: unknown }) {
+      async generateVideo(req: { authStore?: unknown; timeoutMs?: number }) {
        seenAuthStore = req.authStore;
+        seenTimeoutMs = req.timeoutMs;
        return {
          videos: [
            {
@@ -74,6 +76,7 @@ describe("video-generation runtime", () => {
      prompt: "animate a cat",
      agentDir: "/tmp/agent",
      authStore,
+      timeoutMs: 12_345,
    });

    expect(result.provider).toBe("video-plugin");
@@ -81,6 +84,7 @@ describe("video-generation runtime", () => {
    expect(result.attempts).toEqual([]);
    expect(result.ignoredOverrides).toEqual([]);
    expect(seenAuthStore).toEqual(authStore);
+    expect(seenTimeoutMs).toBe(12_345);
    expect(result.videos).toEqual([
      {
        buffer: Buffer.from("mp4-bytes"),
--- a/src/video-generation/runtime.ts
+++ b/src/video-generation/runtime.ts
@@ -260,6 +260,7 @@ export async function generateVideo(
        inputVideos: params.inputVideos,
        inputAudios: params.inputAudios,
        providerOptions: params.providerOptions,
+        ...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}),
      });
      if (!Array.isArray(result.videos) || result.videos.length === 0) {
        throw new Error("Video generation provider returned no videos.");