diff --git a/src/media-understanding/image.test.ts b/src/media-understanding/image.test.ts index c2aa7187b9e..f4ba23bf27f 100644 --- a/src/media-understanding/image.test.ts +++ b/src/media-understanding/image.test.ts @@ -88,6 +88,7 @@ const { describeImageWithModel } = await import("./image.js"); describe("describeImageWithModel", () => { afterEach(() => { + vi.useRealTimers(); vi.unstubAllGlobals(); vi.restoreAllMocks(); }); @@ -520,6 +521,38 @@ describe("describeImageWithModel", () => { }, ); + it("rejects when a generic image completion ignores the abort signal", async () => { + vi.useFakeTimers(); + discoverModelsMock.mockReturnValue({ + find: vi.fn(() => ({ + api: "openai-responses", + provider: "openai", + id: "gpt-5.4-mini", + input: ["text", "image"], + baseUrl: "https://api.openai.com/v1", + })), + }); + completeMock.mockImplementation(() => new Promise(() => {})); + + const result = describeImageWithModel({ + cfg: {}, + agentDir: "/tmp/openclaw-agent", + provider: "openai", + model: "gpt-5.4-mini", + buffer: Buffer.from("png-bytes"), + fileName: "image.png", + mime: "image/png", + prompt: "Describe the image.", + timeoutMs: 25, + }); + + const assertion = expect(result).rejects.toThrow("image description timed out after 25ms"); + await vi.advanceTimersByTimeAsync(25); + await assertion; + const [, , options] = completeMock.mock.calls[0] ?? []; + expect(options?.signal?.aborted).toBe(true); + }); + it("normalizes deprecated google flash ids before lookup and keeps profile auth selection", async () => { const findMock = vi.fn((provider: string, modelId: string) => { expect(provider).toBe("google"); diff --git a/src/media-understanding/image.ts b/src/media-understanding/image.ts index 572ceba650a..3e2a89536b0 100644 --- a/src/media-understanding/image.ts +++ b/src/media-understanding/image.ts @@ -311,6 +311,39 @@ async function resolveMinimaxVlmFallbackRuntime(params: { }; } +function resolveImageDescriptionTimeoutMs(timeoutMs: number | undefined, startedAtMs: number) { + if (typeof timeoutMs !== "number" || !Number.isFinite(timeoutMs) || timeoutMs <= 0) { + return undefined; + } + return Math.max(1, Math.floor(timeoutMs - (Date.now() - startedAtMs))); +} + +async function withImageDescriptionTimeout(params: { + task: Promise; + timeoutMs: number | undefined; + controller: AbortController; +}): Promise { + if (params.timeoutMs === undefined) { + return await params.task; + } + let timeout: NodeJS.Timeout | undefined; + try { + return await Promise.race([ + params.task, + new Promise((_, reject) => { + timeout = setTimeout(() => { + params.controller.abort(); + reject(new Error(`image description timed out after ${params.timeoutMs}ms`)); + }, params.timeoutMs); + }), + ]); + } finally { + if (timeout) { + clearTimeout(timeout); + } + } +} + async function describeImagesWithModelInternal( params: ImagesDescriptionRequest, options: { onPayload?: ProviderStreamOptions["onPayload"] } = {}, @@ -358,50 +391,45 @@ async function describeImagesWithModelInternal( const context = buildImageContext(prompt, params.images, { promptInUserContent: shouldPlaceImagePromptInUserContent(model), }); + const startedAtMs = Date.now(); const controller = new AbortController(); - const timeout = - typeof params.timeoutMs === "number" && - Number.isFinite(params.timeoutMs) && - params.timeoutMs > 0 - ? setTimeout(() => controller.abort(), params.timeoutMs) - : undefined; const maxTokens = resolveImageToolMaxTokens(model.maxTokens, params.maxTokens ?? 512); const completeImage = async (onPayload?: ProviderStreamOptions["onPayload"]) => { const payloadHandler = composeImageDescriptionPayloadHandlers(onPayload, options.onPayload); - return await complete(model, context, { - apiKey, - maxTokens, - signal: controller.signal, - ...(payloadHandler ? { onPayload: payloadHandler } : {}), + return await withImageDescriptionTimeout({ + controller, + timeoutMs: resolveImageDescriptionTimeoutMs(params.timeoutMs, startedAtMs), + task: complete(model, context, { + apiKey, + maxTokens, + signal: controller.signal, + ...(payloadHandler ? { onPayload: payloadHandler } : {}), + }), }); }; + const message = await completeImage(); try { - const message = await completeImage(); - try { - const text = coerceImageAssistantText({ - message, - provider: model.provider, - model: model.id, - }); - return { text, model: model.id }; - } catch (err) { - if (!isImageModelNoTextError(err) || !hasImageReasoningOnlyResponse(message)) { - throw err; - } - } - - const retryMessage = await completeImage(disableReasoningForImageRetryPayload); const text = coerceImageAssistantText({ - message: retryMessage, + message, provider: model.provider, model: model.id, }); return { text, model: model.id }; - } finally { - clearTimeout(timeout); + } catch (err) { + if (!isImageModelNoTextError(err) || !hasImageReasoningOnlyResponse(message)) { + throw err; + } } + + const retryMessage = await completeImage(disableReasoningForImageRetryPayload); + const text = coerceImageAssistantText({ + message: retryMessage, + provider: model.provider, + model: model.id, + }); + return { text, model: model.id }; } export async function describeImagesWithModel(