diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f395a1b80a..441bdee8beb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ Docs: https://docs.openclaw.ai - Agents/Qwen: preserve exact custom `modelstudio` provider configs with foreign `api` owners so explicit OpenAI-compatible Model Studio endpoints no longer get normalized into the bundled Qwen plugin path. Fixes #64483. Thanks @FiredMosquito831. - MCP/bundle-mcp: normalize CLI-native `type: "http"` MCP server entries to OpenClaw `transport: "streamable-http"` on save, repair existing configs with doctor, and keep embedded Pi from falling back to legacy SSE GET-first startup for those servers. Fixes #72757. Thanks @Studioscale. - Media-understanding/audio: migrate deprecated `{input}` placeholders in legacy `audio.transcription.command` configs to `{{MediaPath}}`, so custom audio transcribers no longer receive the literal placeholder after doctor repair. Fixes #72760. Thanks @krisfanue3-hash. +- Ollama/WSL2: warn when GPU-backed WSL2 installs combine CUDA visibility with an autostarting `ollama.service` using `Restart=always`, and document the systemd, `.wslconfig`, and keep-alive mitigation for crash loops. Carries forward #61022; fixes #61185. Thanks @yhyatt. - Ollama/onboarding: de-dupe suggested bare local models against installed `:latest` tags and skip redundant pulls, so setup shows the installed model once and no longer says it is downloading an already available model. Fixes #68952. Thanks @tleyden. - Compaction: skip oversized pre-compaction checkpoint snapshots and prune duplicate long user turns from compaction input and rotated successor transcripts, preventing retry storms from being preserved across checkpoint cycles. Fixes #72780. Thanks @SweetSophia. - Control UI/Cron: render cron job prompts and run summaries as sanitized markdown in the dashboard, with full-width block content, safer link clicks, and no duplicate error text when a failed run has no summary. Supersedes #48504. Thanks @garethdaine. diff --git a/docs/gateway/local-models.md b/docs/gateway/local-models.md index 8e36625ece5..842f9a081b7 100644 --- a/docs/gateway/local-models.md +++ b/docs/gateway/local-models.md @@ -11,6 +11,10 @@ Local is doable, but OpenClaw expects large context + strong defenses against pr If you want the lowest-friction local setup, start with [LM Studio](/providers/lmstudio) or [Ollama](/providers/ollama) and `openclaw onboard`. This page is the opinionated guide for higher-end local stacks and custom OpenAI-compatible local servers. + +**WSL2 + Ollama + NVIDIA/CUDA users:** The official Ollama Linux installer enables a systemd service with `Restart=always`. On WSL2 GPU setups, autostart can reload the last model during boot and pin host memory. If your WSL2 VM repeatedly restarts after enabling Ollama, see [WSL2 crash loop](/providers/ollama#wsl2-crash-loop-repeated-reboots). + + ## Recommended: LM Studio + large local model (Responses API) Best current local stack. Load a large model in LM Studio (for example, a full-size Qwen, DeepSeek, or Llama build), enable the local server (default `http://127.0.0.1:1234`), and use Responses API to keep reasoning separate from final text. diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index 802154382d3..c189c976eb4 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -897,6 +897,41 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s ## Troubleshooting + + On WSL2 with NVIDIA/CUDA, the official Ollama Linux installer creates an `ollama.service` systemd unit with `Restart=always`. If that service autostarts and loads a GPU-backed model during WSL2 boot, Ollama can pin host memory while the model loads. Hyper-V memory reclaim cannot always reclaim those pinned pages, so Windows can terminate the WSL2 VM, systemd starts Ollama again, and the loop repeats. + + Common evidence: + + - repeated WSL2 reboots or terminations from the Windows side + - high CPU in `app.slice` or `ollama.service` shortly after WSL2 startup + - SIGTERM from systemd rather than a Linux OOM-killer event + + OpenClaw logs a startup warning when it detects WSL2, `ollama.service` enabled with `Restart=always`, and visible CUDA markers. + + Mitigation: + + ```bash + sudo systemctl disable ollama + ``` + + Add this to `%USERPROFILE%\.wslconfig` on the Windows side, then run `wsl --shutdown`: + + ```ini + [experimental] + autoMemoryReclaim=disabled + ``` + + Set a shorter keep-alive in the Ollama service environment, or start Ollama manually only when you need it: + + ```bash + export OLLAMA_KEEP_ALIVE=5m + ollama serve + ``` + + See [ollama/ollama#11317](https://github.com/ollama/ollama/issues/11317). + + + Make sure Ollama is running and that you set `OLLAMA_API_KEY` (or an auth profile), and that you did **not** define an explicit `models.providers.ollama` entry: diff --git a/extensions/ollama/index.ts b/extensions/ollama/index.ts index c997f989196..3f405ae8857 100644 --- a/extensions/ollama/index.ts +++ b/extensions/ollama/index.ts @@ -39,6 +39,7 @@ import { resolveConfiguredOllamaProviderConfig, } from "./src/stream.js"; import { createOllamaWebSearchProvider } from "./src/web-search-provider.js"; +import { checkWsl2CrashLoopRisk } from "./src/wsl2-crash-loop-check.js"; function usesOllamaOpenAICompatTransport(model: { api?: unknown; @@ -60,6 +61,9 @@ export default definePluginEntry({ name: "Ollama Provider", description: "Bundled Ollama provider plugin", register(api: OpenClawPluginApi) { + if (api.registrationMode === "full") { + void checkWsl2CrashLoopRisk(api.logger); + } api.registerMemoryEmbeddingProvider(ollamaMemoryEmbeddingProviderAdapter); api.registerMediaUnderstandingProvider(ollamaMediaUnderstandingProvider); const startupPluginConfig = (api.pluginConfig ?? {}) as OllamaPluginConfig; diff --git a/extensions/ollama/src/wsl2-crash-loop-check.test.ts b/extensions/ollama/src/wsl2-crash-loop-check.test.ts new file mode 100644 index 00000000000..78ebc7b6562 --- /dev/null +++ b/extensions/ollama/src/wsl2-crash-loop-check.test.ts @@ -0,0 +1,157 @@ +import { promisify } from "node:util"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const { isWSL2SyncMock } = vi.hoisted(() => ({ + isWSL2SyncMock: vi.fn(() => false), +})); + +vi.mock("openclaw/plugin-sdk/runtime-env", () => ({ + isWSL2Sync: isWSL2SyncMock, +})); + +vi.mock("node:fs/promises", () => ({ + access: vi.fn(), +})); + +vi.mock("node:child_process", async () => { + const { promisify: realPromisify } = await import("node:util"); + const mockExecFile = vi.fn(); + const execFilePromise = vi.fn(); + (mockExecFile as unknown as Record)[realPromisify.custom] = execFilePromise; + return { execFile: mockExecFile }; +}); + +import { execFile } from "node:child_process"; +import { access } from "node:fs/promises"; +import { + checkWsl2CrashLoopRisk, + hasWslCuda, + isOllamaEnabledWithRestartAlways, + parseSystemctlShowProperties, +} from "./wsl2-crash-loop-check.js"; + +const accessMock = vi.mocked(access); +const execFileMock = execFile as unknown as ReturnType & { + [key: symbol]: ReturnType; +}; +const execFilePromiseMock = vi.mocked(execFileMock[promisify.custom]); + +function createLogger() { + return { + debug: vi.fn(), + error: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + }; +} + +function mockSystemctl(stdout: string): void { + execFilePromiseMock.mockResolvedValue({ stdout, stderr: "" }); +} + +describe("wsl2 crash-loop check", () => { + beforeEach(() => { + vi.clearAllMocks(); + isWSL2SyncMock.mockReturnValue(false); + }); + + it("parses systemctl show properties", () => { + expect( + parseSystemctlShowProperties("UnitFileState=enabled\nRestart=always\nIgnoredLine\n"), + ).toEqual( + new Map([ + ["UnitFileState", "enabled"], + ["Restart", "always"], + ]), + ); + }); + + it("detects enabled Restart=always ollama service", async () => { + mockSystemctl("UnitFileState=enabled\nRestart=always\n"); + + await expect(isOllamaEnabledWithRestartAlways()).resolves.toBe(true); + + expect(execFilePromiseMock).toHaveBeenCalledWith( + "systemctl", + ["show", "ollama.service", "--property=UnitFileState,Restart", "--no-pager"], + { timeout: 5000 }, + ); + }); + + it("does not treat enabled-runtime as persistent autostart", async () => { + mockSystemctl("UnitFileState=enabled-runtime\nRestart=always\n"); + + await expect(isOllamaEnabledWithRestartAlways()).resolves.toBe(false); + }); + + it("requires Restart=always", async () => { + mockSystemctl("UnitFileState=enabled\nRestart=on-failure\n"); + + await expect(isOllamaEnabledWithRestartAlways()).resolves.toBe(false); + }); + + it("returns false when systemctl is unavailable", async () => { + execFilePromiseMock.mockRejectedValue(new Error("systemd unavailable")); + + await expect(isOllamaEnabledWithRestartAlways()).resolves.toBe(false); + }); + + it("detects CUDA from the first available WSL marker", async () => { + accessMock.mockResolvedValueOnce(undefined); + + await expect(hasWslCuda()).resolves.toBe(true); + expect(accessMock).toHaveBeenCalledWith("/dev/dxg"); + }); + + it("checks the remaining CUDA markers before returning false", async () => { + accessMock.mockRejectedValue(new Error("missing")); + + await expect(hasWslCuda()).resolves.toBe(false); + expect(accessMock).toHaveBeenCalledTimes(4); + }); + + it("warns for WSL2 plus Ollama autostart plus CUDA", async () => { + isWSL2SyncMock.mockReturnValue(true); + mockSystemctl("UnitFileState=enabled\nRestart=always\n"); + accessMock.mockResolvedValueOnce(undefined); + const logger = createLogger(); + + await checkWsl2CrashLoopRisk(logger); + + expect(logger.warn).toHaveBeenCalledTimes(1); + const message = String(logger.warn.mock.calls[0]?.[0]); + expect(message).toContain("WSL2 crash-loop risk"); + expect(message).toContain("sudo systemctl disable ollama"); + expect(message).toContain("autoMemoryReclaim=disabled"); + expect(message).toContain("OLLAMA_KEEP_ALIVE=5m"); + }); + + it("does not probe systemd outside WSL2", async () => { + const logger = createLogger(); + + await checkWsl2CrashLoopRisk(logger); + + expect(execFilePromiseMock).not.toHaveBeenCalled(); + expect(logger.warn).not.toHaveBeenCalled(); + }); + + it("does not warn when CUDA is not visible", async () => { + isWSL2SyncMock.mockReturnValue(true); + mockSystemctl("UnitFileState=enabled\nRestart=always\n"); + accessMock.mockRejectedValue(new Error("missing")); + const logger = createLogger(); + + await checkWsl2CrashLoopRisk(logger); + + expect(logger.warn).not.toHaveBeenCalled(); + }); + + it("never throws from advisory checks", async () => { + isWSL2SyncMock.mockReturnValue(true); + execFilePromiseMock.mockRejectedValue(new Error("boom")); + const logger = createLogger(); + + await expect(checkWsl2CrashLoopRisk(logger)).resolves.toBeUndefined(); + expect(logger.warn).not.toHaveBeenCalled(); + }); +}); diff --git a/extensions/ollama/src/wsl2-crash-loop-check.ts b/extensions/ollama/src/wsl2-crash-loop-check.ts new file mode 100644 index 00000000000..809dd8b75b2 --- /dev/null +++ b/extensions/ollama/src/wsl2-crash-loop-check.ts @@ -0,0 +1,84 @@ +import { execFile } from "node:child_process"; +import { access } from "node:fs/promises"; +import { promisify } from "node:util"; +import type { PluginLogger } from "openclaw/plugin-sdk/plugin-entry"; +import { isWSL2Sync } from "openclaw/plugin-sdk/runtime-env"; + +const execFileAsync = promisify(execFile); +const SYSTEMCTL_TIMEOUT_MS = 5_000; +const WSL_CUDA_MARKERS = [ + "/dev/dxg", + "/usr/lib/wsl/lib/nvidia-smi", + "/usr/lib/wsl/lib/libcuda.so.1", + "/usr/local/cuda", +]; + +export function parseSystemctlShowProperties(stdout: string): Map { + const properties = new Map(); + for (const line of stdout.split(/\r?\n/u)) { + const separator = line.indexOf("="); + if (separator <= 0) { + continue; + } + properties.set(line.slice(0, separator), line.slice(separator + 1)); + } + return properties; +} + +export async function isOllamaEnabledWithRestartAlways(): Promise { + try { + const { stdout } = await execFileAsync( + "systemctl", + ["show", "ollama.service", "--property=UnitFileState,Restart", "--no-pager"], + { timeout: SYSTEMCTL_TIMEOUT_MS }, + ); + const properties = parseSystemctlShowProperties(stdout); + return properties.get("UnitFileState") === "enabled" && properties.get("Restart") === "always"; + } catch { + return false; + } +} + +export async function hasWslCuda(): Promise { + for (const marker of WSL_CUDA_MARKERS) { + try { + await access(marker); + return true; + } catch { + // Try the next cheap marker. + } + } + return false; +} + +export async function checkWsl2CrashLoopRisk(logger: PluginLogger): Promise { + try { + if (!isWSL2Sync()) { + return; + } + if (!(await isOllamaEnabledWithRestartAlways())) { + return; + } + if (!(await hasWslCuda())) { + return; + } + + logger.warn( + [ + "[ollama] WSL2 crash-loop risk: ollama.service is enabled with Restart=always and CUDA is visible.", + "On WSL2, GPU-backed Ollama can pin host memory while loading a model.", + "Hyper-V memory reclaim cannot always reclaim those pinned pages, so Windows can terminate and restart the WSL2 VM.", + "", + "Common evidence: repeated WSL2 reboots, high CPU in app.slice at startup, and SIGTERM from systemd rather than the Linux OOM killer.", + "See: https://github.com/ollama/ollama/issues/11317", + "", + "Mitigation:", + " 1. Disable autostart: sudo systemctl disable ollama", + " 2. Add [experimental] autoMemoryReclaim=disabled to %USERPROFILE%\\.wslconfig on Windows, then run wsl --shutdown", + " 3. Set OLLAMA_KEEP_ALIVE=5m in the Ollama service environment or start ollama serve manually when needed", + ].join("\n"), + ); + } catch { + // Advisory only: never break provider registration or model discovery. + } +}