diff --git a/CHANGELOG.md b/CHANGELOG.md index e59064b70e8..7b2167692b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai - Telegram/forum topics: persist learned topic names to the Telegram session sidecar store so agent context can keep using human topic names after a restart instead of relearning from future service metadata. (#66107) Thanks @obviyus. - Doctor/systemd: keep `openclaw doctor --repair` and service reinstall from re-embedding dotenv-backed secrets in user systemd units, while preserving newer inline overrides over stale state-dir `.env` values. (#66249) Thanks @tmimmanuel. - Doctor/plugins: cache external `preferOver` catalog lookups within each plugin auto-enable pass so large `agents.list` configs no longer peg CPU and repeatedly reread plugin catalogs during doctor/plugins resolution. (#66246) Thanks @yfge. +- Agents/local models: clarify low-context preflight hints for self-hosted models, point config-backed caps at the relevant OpenClaw setting, and stop suggesting larger models when `agents.defaults.contextTokens` is the real limit. (#66236) Thanks @ImLukeF. ## 2026.4.14-beta.1 diff --git a/docs/gateway/local-models.md b/docs/gateway/local-models.md index d7bd15bfe85..208330d70a2 100644 --- a/docs/gateway/local-models.md +++ b/docs/gateway/local-models.md @@ -174,6 +174,7 @@ Compatibility notes for stricter OpenAI-compatible backends: - Gateway can reach the proxy? `curl http://127.0.0.1:1234/v1/models`. - LM Studio model unloaded? Reload; cold start is a common “hanging” cause. +- OpenClaw warns when the detected context window is below **32k** and blocks below **16k**. If you hit that preflight, raise the server/model context limit or choose a larger model. - Context errors? Lower `contextWindow` or raise your server limit. - OpenAI-compatible server returns `messages[].content ... expected a string`? Add `compat.requiresStringContent: true` on that model entry. diff --git a/extensions/qqbot/src/utils/platform.test.ts b/extensions/qqbot/src/utils/platform.test.ts index 246683d6fa5..208d6a507cd 100644 --- a/extensions/qqbot/src/utils/platform.test.ts +++ b/extensions/qqbot/src/utils/platform.test.ts @@ -93,7 +93,7 @@ describe("qqbot local media path remapping", () => { it("allows structured payload files inside the QQ Bot media directory", () => { const { mediaFile } = createQqbotMediaFile("allowed.png"); - expect(resolveQQBotPayloadLocalFilePath(mediaFile)).toBe(mediaFile); + expect(resolveQQBotPayloadLocalFilePath(mediaFile)).toBe(fs.realpathSync(mediaFile)); }); it("blocks structured payload files inside the QQ Bot data directory", () => { @@ -127,6 +127,6 @@ describe("qqbot local media path remapping", () => { "legacy.png", ); - expect(resolveQQBotPayloadLocalFilePath(missingWorkspacePath)).toBe(mediaFile); + expect(resolveQQBotPayloadLocalFilePath(missingWorkspacePath)).toBe(fs.realpathSync(mediaFile)); }); }); diff --git a/extensions/telegram/src/topic-name-cache.test.ts b/extensions/telegram/src/topic-name-cache.test.ts index c892bcf00db..d814125e77b 100644 --- a/extensions/telegram/src/topic-name-cache.test.ts +++ b/extensions/telegram/src/topic-name-cache.test.ts @@ -2,7 +2,7 @@ import syncFs from "node:fs"; import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; -import { beforeEach, describe, expect, it, vi } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { clearTopicNameCache, getTopicEntry, @@ -14,10 +14,15 @@ import { describe("topic-name-cache", () => { beforeEach(() => { + vi.useRealTimers(); clearTopicNameCache(); resetTopicNameCacheForTest(); }); + afterEach(() => { + vi.useRealTimers(); + }); + it("stores and retrieves a topic name", () => { updateTopicName(-100123, 42, { name: "Deployments" }); expect(getTopicName(-100123, 42)).toBe("Deployments"); @@ -63,9 +68,11 @@ describe("topic-name-cache", () => { expect(topicNameCacheSize()).toBe(0); }); - it("updates timestamps on write", () => { + it("updates timestamps on write", async () => { + vi.useFakeTimers(); updateTopicName(-100123, 42, { name: "A" }); const t1 = getTopicEntry(-100123, 42)?.updatedAt ?? 0; + await vi.advanceTimersByTimeAsync(10); updateTopicName(-100123, 42, { name: "B" }); const t2 = getTopicEntry(-100123, 42)?.updatedAt ?? 0; expect(t2).toBeGreaterThan(t1); @@ -85,8 +92,10 @@ describe("topic-name-cache", () => { expect(getTopicName(-100000, 2048)).toBe("Topic 2048"); }); - it("refreshes recency on read so active topics survive eviction", () => { + it("refreshes recency on read so active topics survive eviction", async () => { + vi.useFakeTimers(); updateTopicName(-100000, 1, { name: "Active" }); + await vi.advanceTimersByTimeAsync(10); for (let i = 2; i <= 2048; i++) { updateTopicName(-100000, i, { name: `Topic ${i}` }); } diff --git a/src/agents/context-window-guard.test.ts b/src/agents/context-window-guard.test.ts index 58872bdee2b..1c481e745e6 100644 --- a/src/agents/context-window-guard.test.ts +++ b/src/agents/context-window-guard.test.ts @@ -4,6 +4,8 @@ import { CONTEXT_WINDOW_HARD_MIN_TOKENS, CONTEXT_WINDOW_WARN_BELOW_TOKENS, evaluateContextWindowGuard, + formatContextWindowBlockMessage, + formatContextWindowWarningMessage, resolveContextWindowInfo, } from "./context-window-guard.js"; @@ -222,4 +224,87 @@ describe("context-window-guard", () => { expect(CONTEXT_WINDOW_HARD_MIN_TOKENS).toBe(16_000); expect(CONTEXT_WINDOW_WARN_BELOW_TOKENS).toBe(32_000); }); + + it("adds a local-model hint to warning messages for localhost endpoints", () => { + const guard = evaluateContextWindowGuard({ + info: { tokens: 24_000, source: "model" }, + }); + + expect( + formatContextWindowWarningMessage({ + provider: "lmstudio", + modelId: "qwen3", + guard, + runtimeBaseUrl: "http://127.0.0.1:1234/v1", + }), + ).toContain("local/self-hosted runs work best at 32000+ tokens"); + }); + + it("does not add local-model hints for generic custom endpoints", () => { + const guard = evaluateContextWindowGuard({ + info: { tokens: 24_000, source: "model" }, + }); + + expect( + formatContextWindowWarningMessage({ + provider: "custom", + modelId: "hosted-proxy-model", + guard, + runtimeBaseUrl: "https://models.example.com/v1", + }), + ).toBe("low context window: custom/hosted-proxy-model ctx=24000 (warn<32000) source=model"); + }); + + it("adds a local-model hint to block messages for localhost endpoints", () => { + const guard = evaluateContextWindowGuard({ + info: { tokens: 8_000, source: "model" }, + }); + + expect( + formatContextWindowBlockMessage({ + guard, + runtimeBaseUrl: "http://127.0.0.1:11434/v1", + }), + ).toContain("This looks like a local model endpoint."); + }); + + it("points config-backed block remediation at agents.defaults.contextTokens", () => { + const guard = evaluateContextWindowGuard({ + info: { tokens: 8_000, source: "agentContextTokens" }, + }); + + const message = formatContextWindowBlockMessage({ + guard, + runtimeBaseUrl: "http://127.0.0.1:11434/v1", + }); + + expect(message).toContain("OpenClaw is capped by agents.defaults.contextTokens."); + expect(message).not.toContain("choose a larger model"); + }); + + it("points model config block remediation at contextWindow/contextTokens", () => { + const guard = evaluateContextWindowGuard({ + info: { tokens: 8_000, source: "modelsConfig" }, + }); + + expect( + formatContextWindowBlockMessage({ + guard, + runtimeBaseUrl: "http://127.0.0.1:11434/v1", + }), + ).toContain("Raise contextWindow/contextTokens or choose a larger model."); + }); + + it("keeps block messages concise for public providers", () => { + const guard = evaluateContextWindowGuard({ + info: { tokens: 8_000, source: "model" }, + }); + + expect( + formatContextWindowBlockMessage({ + guard, + runtimeBaseUrl: "https://api.openai.com/v1", + }), + ).toBe(`Model context window too small (8000 tokens; source=model). Minimum is 16000.`); + }); }); diff --git a/src/agents/context-window-guard.ts b/src/agents/context-window-guard.ts index 3221b7e758c..25b0b7a088c 100644 --- a/src/agents/context-window-guard.ts +++ b/src/agents/context-window-guard.ts @@ -1,4 +1,5 @@ import type { OpenClawConfig } from "../config/types.openclaw.js"; +import { resolveProviderEndpoint } from "./provider-attribution.js"; import { findNormalizedProviderValue } from "./provider-id.js"; export const CONTEXT_WINDOW_HARD_MIN_TOKENS = 16_000; @@ -61,6 +62,77 @@ export type ContextWindowGuardResult = ContextWindowInfo & { shouldBlock: boolean; }; +export type ContextWindowGuardHint = { + endpointClass: ReturnType["endpointClass"]; + likelySelfHosted: boolean; +}; + +export function resolveContextWindowGuardHint(params: { + runtimeBaseUrl?: string | null; +}): ContextWindowGuardHint { + const endpoint = resolveProviderEndpoint(params.runtimeBaseUrl ?? undefined); + return { + endpointClass: endpoint.endpointClass, + likelySelfHosted: endpoint.endpointClass === "local", + }; +} + +export function formatContextWindowWarningMessage(params: { + provider: string; + modelId: string; + guard: ContextWindowGuardResult; + runtimeBaseUrl?: string | null; +}): string { + const base = `low context window: ${params.provider}/${params.modelId} ctx=${params.guard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${params.guard.source}`; + const hint = resolveContextWindowGuardHint({ runtimeBaseUrl: params.runtimeBaseUrl }); + if (!hint.likelySelfHosted) { + return base; + } + if (params.guard.source === "agentContextTokens") { + return ( + `${base}; OpenClaw is capped by agents.defaults.contextTokens, so raise that cap ` + + `if you want to use more of the model context window` + ); + } + if (params.guard.source === "modelsConfig") { + return ( + `${base}; OpenClaw is using the configured model context limit for this model, ` + + `so raise contextWindow/contextTokens if it is set too low` + ); + } + return ( + `${base}; local/self-hosted runs work best at ` + + `${CONTEXT_WINDOW_WARN_BELOW_TOKENS}+ tokens and may show weaker tool use or more compaction until the server/model context limit is raised` + ); +} + +export function formatContextWindowBlockMessage(params: { + guard: ContextWindowGuardResult; + runtimeBaseUrl?: string | null; +}): string { + const base = + `Model context window too small (${params.guard.tokens} tokens; ` + + `source=${params.guard.source}). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`; + const hint = resolveContextWindowGuardHint({ runtimeBaseUrl: params.runtimeBaseUrl }); + if (!hint.likelySelfHosted) { + return base; + } + if (params.guard.source === "agentContextTokens") { + return `${base} OpenClaw is capped by agents.defaults.contextTokens. Raise that cap.`; + } + if (params.guard.source === "modelsConfig") { + return ( + `${base} OpenClaw is using the configured model context limit for this model. ` + + `Raise contextWindow/contextTokens or choose a larger model.` + ); + } + return ( + `${base} This looks like a local model endpoint. ` + + `Raise the server/model context limit or choose a larger model. ` + + `OpenClaw local/self-hosted runs work best at ${CONTEXT_WINDOW_WARN_BELOW_TOKENS}+ tokens.` + ); +} + export function evaluateContextWindowGuard(params: { info: ContextWindowInfo; warnBelowTokens?: number; diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts index 999b9c2c007..bffb4903059 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts @@ -169,6 +169,14 @@ export const mockedResolveContextWindowInfo = vi.fn(() => ({ tokens: 200000, source: "model", })); +export const mockedFormatContextWindowWarningMessage = vi.fn( + (params: { provider: string; modelId: string; guard: { tokens: number; source: string } }) => + `low context window: ${params.provider}/${params.modelId} ctx=${params.guard.tokens} source=${params.guard.source}`, +); +export const mockedFormatContextWindowBlockMessage = vi.fn( + (params: { guard: { tokens: number; source: string } }) => + `Model context window too small (${params.guard.tokens} tokens; source=${params.guard.source}). Minimum is 1000.`, +); export const mockedGetApiKeyForModel = vi.fn( async ({ profileId }: { profileId?: string } = {}) => ({ apiKey: "test-key", @@ -300,6 +308,16 @@ export function resetRunOverflowCompactionHarnessMocks(): void { tokens: 200000, source: "model", }); + mockedFormatContextWindowWarningMessage.mockReset(); + mockedFormatContextWindowWarningMessage.mockImplementation( + (params: { provider: string; modelId: string; guard: { tokens: number; source: string } }) => + `low context window: ${params.provider}/${params.modelId} ctx=${params.guard.tokens} source=${params.guard.source}`, + ); + mockedFormatContextWindowBlockMessage.mockReset(); + mockedFormatContextWindowBlockMessage.mockImplementation( + (params: { guard: { tokens: number; source: string } }) => + `Model context window too small (${params.guard.tokens} tokens; source=${params.guard.source}). Minimum is 1000.`, + ); mockedGetApiKeyForModel.mockReset(); mockedGetApiKeyForModel.mockImplementation( async ({ profileId }: { profileId?: string } = {}) => ({ @@ -443,6 +461,8 @@ export async function loadRunOverflowCompactionHarness(): Promise<{ CONTEXT_WINDOW_HARD_MIN_TOKENS: 1000, CONTEXT_WINDOW_WARN_BELOW_TOKENS: 5000, evaluateContextWindowGuard: mockedEvaluateContextWindowGuard, + formatContextWindowBlockMessage: mockedFormatContextWindowBlockMessage, + formatContextWindowWarningMessage: mockedFormatContextWindowWarningMessage, resolveContextWindowInfo: mockedResolveContextWindowInfo, })); diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts index 70e88c817b1..8c90e7c7108 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts @@ -98,7 +98,9 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { ...overflowBaseRunParams, runId: "run-small-context", }), - ).rejects.toThrow("Model context window too small (800 tokens). Minimum is 1000."); + ).rejects.toThrow( + "Model context window too small (800 tokens; source=model). Minimum is 1000.", + ); expect(mockedRunEmbeddedAttempt).not.toHaveBeenCalled(); }); diff --git a/src/agents/pi-embedded-runner/run/setup.ts b/src/agents/pi-embedded-runner/run/setup.ts index c1c33d448b8..6a52bbf7cb8 100644 --- a/src/agents/pi-embedded-runner/run/setup.ts +++ b/src/agents/pi-embedded-runner/run/setup.ts @@ -5,6 +5,8 @@ import { CONTEXT_WINDOW_HARD_MIN_TOKENS, CONTEXT_WINDOW_WARN_BELOW_TOKENS, evaluateContextWindowGuard, + formatContextWindowBlockMessage, + formatContextWindowWarningMessage, resolveContextWindowInfo, type ContextWindowInfo, } from "../../context-window-guard.js"; @@ -126,19 +128,33 @@ export function resolveEffectiveRuntimeModel(params: { warnBelowTokens: CONTEXT_WINDOW_WARN_BELOW_TOKENS, hardMinTokens: CONTEXT_WINDOW_HARD_MIN_TOKENS, }); + const runtimeBaseUrl = + typeof (params.runtimeModel as { baseUrl?: unknown }).baseUrl === "string" + ? (params.runtimeModel as { baseUrl: string }).baseUrl + : undefined; if (ctxGuard.shouldWarn) { log.warn( - `low context window: ${params.provider}/${params.modelId} ctx=${ctxGuard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${ctxGuard.source}`, + formatContextWindowWarningMessage({ + provider: params.provider, + modelId: params.modelId, + guard: ctxGuard, + runtimeBaseUrl, + }), ); } if (ctxGuard.shouldBlock) { + const message = formatContextWindowBlockMessage({ + guard: ctxGuard, + runtimeBaseUrl, + }); log.error( - `blocked model (context window too small): ${params.provider}/${params.modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}`, - ); - throw new FailoverError( - `Model context window too small (${ctxGuard.tokens} tokens). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`, - { reason: "unknown", provider: params.provider, model: params.modelId }, + `blocked model (context window too small): ${params.provider}/${params.modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}; ${message}`, ); + throw new FailoverError(message, { + reason: "unknown", + provider: params.provider, + model: params.modelId, + }); } return { diff --git a/src/gateway/server.hooks.test.ts b/src/gateway/server.hooks.test.ts index 89e1936bd25..09926576040 100644 --- a/src/gateway/server.hooks.test.ts +++ b/src/gateway/server.hooks.test.ts @@ -97,7 +97,7 @@ async function expectFirstHookDelivery( const first = await postAgentHookWithIdempotency(port, idempotencyKey, headers); const firstBody = (await first.json()) as { runId?: string }; expect(firstBody.runId).toBeTruthy(); - await waitForSystemEvent(); + await waitForSystemEvent(5_000); drainSystemEvents(resolveMainKey()); return firstBody; } diff --git a/src/gateway/test-helpers.server.ts b/src/gateway/test-helpers.server.ts index 65a776489c0..90084ba9ee2 100644 --- a/src/gateway/test-helpers.server.ts +++ b/src/gateway/test-helpers.server.ts @@ -472,7 +472,7 @@ export function installGatewayTestHooks(options?: { scope?: "test" | "suite" }) if (activeSuiteHookScopeCount === 0) { await cleanupGatewayTestHome({ restoreEnv: true }); } - }); + }, 300_000); return; } diff --git a/src/media-understanding/attachments.cache.ts b/src/media-understanding/attachments.cache.ts index 4e0f06d5da0..c5664a7ab82 100644 --- a/src/media-understanding/attachments.cache.ts +++ b/src/media-understanding/attachments.cache.ts @@ -154,7 +154,7 @@ export class MediaAttachmentCache { try { const fetchImpl = (input: RequestInfo | URL, init?: RequestInit) => - fetchWithTimeout(resolveRequestUrl(input), init ?? {}, params.timeoutMs, fetch); + fetchWithTimeout(resolveRequestUrl(input), init ?? {}, params.timeoutMs, globalThis.fetch); const fetched = await fetchRemoteMedia({ url, fetchImpl, maxBytes: params.maxBytes }); entry.buffer = fetched.buffer; entry.bufferMime = diff --git a/src/media-understanding/media-understanding-url-fallback.test.ts b/src/media-understanding/media-understanding-url-fallback.test.ts index 350deb57eed..566d5af1087 100644 --- a/src/media-understanding/media-understanding-url-fallback.test.ts +++ b/src/media-understanding/media-understanding-url-fallback.test.ts @@ -2,15 +2,22 @@ import fs from "node:fs/promises"; import path from "node:path"; import { afterEach, describe, expect, it, vi } from "vitest"; import { withTempDir } from "../test-helpers/temp-dir.js"; -import { withFetchPreconnect } from "../test-utils/fetch-mock.js"; import { MediaAttachmentCache } from "./attachments.js"; -const originalFetch = globalThis.fetch; +const fetchRemoteMediaMock = vi.hoisted(() => vi.fn()); + +vi.mock("../media/fetch.js", async () => { + const actual = await vi.importActual("../media/fetch.js"); + return { + ...actual, + fetchRemoteMedia: fetchRemoteMediaMock, + }; +}); describe("media understanding attachment URL fallback", () => { afterEach(() => { - globalThis.fetch = originalFetch; vi.restoreAllMocks(); + fetchRemoteMediaMock.mockReset(); }); it("getPath falls back to URL fetch when local path is blocked", async () => { @@ -28,17 +35,12 @@ describe("media understanding attachment URL fallback", () => { }, ); const originalRealpath = fs.realpath.bind(fs); - const fetchSpy = vi.fn( - async () => - new Response(Buffer.from("fallback-buffer"), { - status: 200, - headers: { - "content-type": "image/jpeg", - }, - }), - ); + fetchRemoteMediaMock.mockResolvedValue({ + buffer: Buffer.from("fallback-buffer"), + contentType: "image/jpeg", + fileName: "fallback.jpg", + }); - globalThis.fetch = withFetchPreconnect(fetchSpy); vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => { if (String(candidatePath) === attachmentPath) { throw new Error("EACCES"); @@ -54,8 +56,10 @@ describe("media understanding attachment URL fallback", () => { // getPath should fall through to getBuffer URL fetch, write a temp file, // and return a path to that temp file instead of throwing. expect(result.path).toBeTruthy(); - expect(fetchSpy).toHaveBeenCalledTimes(1); - expect(fetchSpy).toHaveBeenCalledWith(fallbackUrl, expect.anything()); + expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1); + expect(fetchRemoteMediaMock).toHaveBeenCalledWith( + expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }), + ); // Clean up the temp file if (result.cleanup) { await result.cleanup(); @@ -78,17 +82,12 @@ describe("media understanding attachment URL fallback", () => { }, ); const originalRealpath = fs.realpath.bind(fs); - const fetchSpy = vi.fn( - async () => - new Response(Buffer.from("fallback-buffer"), { - status: 200, - headers: { - "content-type": "image/jpeg", - }, - }), - ); + fetchRemoteMediaMock.mockResolvedValue({ + buffer: Buffer.from("fallback-buffer"), + contentType: "image/jpeg", + fileName: "fallback.jpg", + }); - globalThis.fetch = withFetchPreconnect(fetchSpy); vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => { if (String(candidatePath) === attachmentPath) { throw new Error("EACCES"); @@ -102,8 +101,10 @@ describe("media understanding attachment URL fallback", () => { timeoutMs: 1000, }); expect(result.buffer.toString()).toBe("fallback-buffer"); - expect(fetchSpy).toHaveBeenCalledTimes(1); - expect(fetchSpy).toHaveBeenCalledWith(fallbackUrl, expect.anything()); + expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1); + expect(fetchRemoteMediaMock).toHaveBeenCalledWith( + expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }), + ); }); }); });