fix(ollama): avoid timing out active model pulls

2026-05-06 07:00:43 +00:00 · 2026-04-27 03:40:26 +01:00
parent 8b27c489f5
commit 414fd41a1f
2 changed files with 122 additions and 22 deletions
--- a/extensions/ollama/src/setup.test.ts
+++ b/extensions/ollama/src/setup.test.ts
@@ -398,28 +398,77 @@ describe("ollama setup", () => {

  describe("ensureOllamaModelPulled", () => {
    it("pulls model when not available locally", async () => {
-      const progress = { update: vi.fn(), stop: vi.fn() };
-      const prompter = {
-        progress: vi.fn(() => progress),
-      } as unknown as WizardPrompter;
+      vi.useFakeTimers();
+      try {
+        const progress = { update: vi.fn(), stop: vi.fn() };
+        const prompter = {
+          progress: vi.fn(() => progress),
+        } as unknown as WizardPrompter;

-      const fetchMock = createOllamaFetchMock({
-        tags: ["llama3:8b"],
-        pullResponse: new Response('{"status":"success"}\n', { status: 200 }),
-      });
-      vi.stubGlobal("fetch", fetchMock);
+        const fetchMock = createOllamaFetchMock({
+          tags: ["llama3:8b"],
+          pullResponse: new Response('{"status":"success"}\n', { status: 200 }),
+        });
+        vi.stubGlobal("fetch", fetchMock);

-      await ensureOllamaModelPulled({
-        config: createDefaultOllamaConfig("ollama/gemma4"),
-        model: "ollama/gemma4",
-        prompter,
-      });
+        await ensureOllamaModelPulled({
+          config: createDefaultOllamaConfig("ollama/gemma4"),
+          model: "ollama/gemma4",
+          prompter,
+        });

-      expect(fetchMock).toHaveBeenCalledTimes(2);
-      expect(fetchMock.mock.calls[1][0]).toContain("/api/pull");
-      const pullInit = fetchMock.mock.calls[1][1];
-      expect(pullInit?.signal).toBeInstanceOf(AbortSignal);
-      expect(pullInit?.signal?.aborted).toBe(false);
+        expect(fetchMock).toHaveBeenCalledTimes(2);
+        expect(fetchMock.mock.calls[1][0]).toContain("/api/pull");
+        const pullInit = fetchMock.mock.calls[1][1];
+        expect(pullInit?.signal).toBeInstanceOf(AbortSignal);
+        expect(pullInit?.signal?.aborted).toBe(false);
+
+        await vi.advanceTimersByTimeAsync(30_000);
+        expect(pullInit?.signal?.aborted).toBe(false);
+      } finally {
+        vi.useRealTimers();
+      }
+    });
+
+    it("fails stalled model pull streams after an idle timeout", async () => {
+      vi.useFakeTimers();
+      try {
+        const progress = { update: vi.fn(), stop: vi.fn() };
+        const prompter = {
+          progress: vi.fn(() => progress),
+        } as unknown as WizardPrompter;
+        const fetchMock = vi.fn(async (input: string | URL | Request) => {
+          const url = requestUrl(input);
+          if (url.endsWith("/api/tags")) {
+            return jsonResponse({ models: [] });
+          }
+          if (url.endsWith("/api/pull")) {
+            return new Response(new ReadableStream<Uint8Array>(), { status: 200 });
+          }
+          throw new Error(`Unexpected fetch: ${url}`);
+        });
+        vi.stubGlobal("fetch", fetchMock);
+
+        const pullPromise = ensureOllamaModelPulled({
+          config: createDefaultOllamaConfig("ollama/gemma4"),
+          model: "ollama/gemma4",
+          prompter,
+        }).catch((err: unknown) => err);
+
+        for (let attempts = 0; attempts < 50 && fetchMock.mock.calls.length < 2; attempts += 1) {
+          await vi.advanceTimersByTimeAsync(0);
+          await Promise.resolve();
+        }
+        expect(fetchMock.mock.calls[1]?.[0]).toContain("/api/pull");
+
+        await vi.advanceTimersByTimeAsync(300_000);
+        await expect(pullPromise).resolves.toEqual(
+          expect.objectContaining({ message: "Failed to download selected Ollama model" }),
+        );
+        expect(progress.stop).toHaveBeenCalledWith(expect.stringContaining("Ollama pull stalled"));
+      } finally {
+        vi.useRealTimers();
+      }
    });

    it("skips pull when model is already available", async () => {
--- a/extensions/ollama/src/setup.ts
+++ b/extensions/ollama/src/setup.ts
@@ -42,7 +42,8 @@ const OLLAMA_SUGGESTED_MODELS_LOCAL = [OLLAMA_DEFAULT_MODEL];
 const OLLAMA_SUGGESTED_MODELS_CLOUD = ["kimi-k2.5:cloud", "minimax-m2.7:cloud", "glm-5.1:cloud"];
 const OLLAMA_CONTEXT_ENRICH_LIMIT = 200;
 const OLLAMA_CLOUD_MAX_DISCOVERED_MODELS = 500;
-const OLLAMA_PULL_REQUEST_TIMEOUT_MS = 30_000;
+const OLLAMA_PULL_RESPONSE_TIMEOUT_MS = 30_000;
+const OLLAMA_PULL_STREAM_IDLE_TIMEOUT_MS = 300_000;

 type OllamaSetupOptions = {
  customBaseUrl?: string;
@@ -158,6 +159,48 @@ type OllamaPullChunk = {

 type OllamaPullResult = { ok: true } | { ok: false; message: string };

+async function readOllamaPullChunkWithIdleTimeout(
+  reader: ReadableStreamDefaultReader<Uint8Array>,
+): Promise<ReadableStreamReadResult<Uint8Array>> {
+  let timeoutId: ReturnType<typeof setTimeout> | undefined;
+  let timedOut = false;
+
+  return await new Promise((resolve, reject) => {
+    const clear = () => {
+      if (timeoutId !== undefined) {
+        clearTimeout(timeoutId);
+        timeoutId = undefined;
+      }
+    };
+
+    timeoutId = setTimeout(() => {
+      timedOut = true;
+      clear();
+      void reader.cancel().catch(() => undefined);
+      reject(
+        new Error(
+          `Ollama pull stalled: no data received for ${Math.round(OLLAMA_PULL_STREAM_IDLE_TIMEOUT_MS / 1000)}s`,
+        ),
+      );
+    }, OLLAMA_PULL_STREAM_IDLE_TIMEOUT_MS);
+
+    void reader.read().then(
+      (result) => {
+        clear();
+        if (!timedOut) {
+          resolve(result);
+        }
+      },
+      (err) => {
+        clear();
+        if (!timedOut) {
+          reject(err);
+        }
+      },
+    );
+  });
+}
+
 async function pullOllamaModelCore(params: {
  baseUrl: string;
  modelName: string;
@@ -165,6 +208,11 @@ async function pullOllamaModelCore(params: {
 }): Promise<OllamaPullResult> {
  const baseUrl = resolveOllamaApiBase(params.baseUrl);
  const modelName = normalizeOllamaModelName(params.modelName) ?? params.modelName.trim();
+  const responseController = new AbortController();
+  const responseTimeout = setTimeout(
+    responseController.abort.bind(responseController),
+    OLLAMA_PULL_RESPONSE_TIMEOUT_MS,
+  );
  try {
    const { response, release } = await fetchWithSsrFGuard({
      url: `${baseUrl}/api/pull`,
@@ -173,10 +221,11 @@ async function pullOllamaModelCore(params: {
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({ name: modelName }),
      },
-      timeoutMs: OLLAMA_PULL_REQUEST_TIMEOUT_MS,
+      signal: responseController.signal,
      policy: buildOllamaBaseUrlSsrFPolicy(baseUrl),
      auditContext: "ollama-setup.pull",
    });
+    clearTimeout(responseTimeout);
    try {
      if (!response.ok) {
        return { ok: false, message: `Failed to download ${modelName} (HTTP ${response.status})` };
@@ -225,7 +274,7 @@ async function pullOllamaModelCore(params: {
      };

      for (;;) {
-        const { done, value } = await reader.read();
+        const { done, value } = await readOllamaPullChunkWithIdleTimeout(reader);
        if (done) {
          break;
        }
@@ -255,6 +304,8 @@ async function pullOllamaModelCore(params: {
  } catch (err) {
    const reason = formatErrorMessage(err);
    return { ok: false, message: `Failed to download ${modelName}: ${reason}` };
+  } finally {
+    clearTimeout(responseTimeout);
  }
 }