mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 12:00:44 +00:00
Agents: clarify local model context preflight (#66236)
Merged via squash.
Prepared head SHA: 11bfaf15f6
Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com>
Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com>
Reviewed-by: @ImLukeF
This commit is contained in:
@@ -4,6 +4,8 @@ import {
|
||||
CONTEXT_WINDOW_HARD_MIN_TOKENS,
|
||||
CONTEXT_WINDOW_WARN_BELOW_TOKENS,
|
||||
evaluateContextWindowGuard,
|
||||
formatContextWindowBlockMessage,
|
||||
formatContextWindowWarningMessage,
|
||||
resolveContextWindowInfo,
|
||||
} from "./context-window-guard.js";
|
||||
|
||||
@@ -222,4 +224,87 @@ describe("context-window-guard", () => {
|
||||
expect(CONTEXT_WINDOW_HARD_MIN_TOKENS).toBe(16_000);
|
||||
expect(CONTEXT_WINDOW_WARN_BELOW_TOKENS).toBe(32_000);
|
||||
});
|
||||
|
||||
it("adds a local-model hint to warning messages for localhost endpoints", () => {
|
||||
const guard = evaluateContextWindowGuard({
|
||||
info: { tokens: 24_000, source: "model" },
|
||||
});
|
||||
|
||||
expect(
|
||||
formatContextWindowWarningMessage({
|
||||
provider: "lmstudio",
|
||||
modelId: "qwen3",
|
||||
guard,
|
||||
runtimeBaseUrl: "http://127.0.0.1:1234/v1",
|
||||
}),
|
||||
).toContain("local/self-hosted runs work best at 32000+ tokens");
|
||||
});
|
||||
|
||||
it("does not add local-model hints for generic custom endpoints", () => {
|
||||
const guard = evaluateContextWindowGuard({
|
||||
info: { tokens: 24_000, source: "model" },
|
||||
});
|
||||
|
||||
expect(
|
||||
formatContextWindowWarningMessage({
|
||||
provider: "custom",
|
||||
modelId: "hosted-proxy-model",
|
||||
guard,
|
||||
runtimeBaseUrl: "https://models.example.com/v1",
|
||||
}),
|
||||
).toBe("low context window: custom/hosted-proxy-model ctx=24000 (warn<32000) source=model");
|
||||
});
|
||||
|
||||
it("adds a local-model hint to block messages for localhost endpoints", () => {
|
||||
const guard = evaluateContextWindowGuard({
|
||||
info: { tokens: 8_000, source: "model" },
|
||||
});
|
||||
|
||||
expect(
|
||||
formatContextWindowBlockMessage({
|
||||
guard,
|
||||
runtimeBaseUrl: "http://127.0.0.1:11434/v1",
|
||||
}),
|
||||
).toContain("This looks like a local model endpoint.");
|
||||
});
|
||||
|
||||
it("points config-backed block remediation at agents.defaults.contextTokens", () => {
|
||||
const guard = evaluateContextWindowGuard({
|
||||
info: { tokens: 8_000, source: "agentContextTokens" },
|
||||
});
|
||||
|
||||
const message = formatContextWindowBlockMessage({
|
||||
guard,
|
||||
runtimeBaseUrl: "http://127.0.0.1:11434/v1",
|
||||
});
|
||||
|
||||
expect(message).toContain("OpenClaw is capped by agents.defaults.contextTokens.");
|
||||
expect(message).not.toContain("choose a larger model");
|
||||
});
|
||||
|
||||
it("points model config block remediation at contextWindow/contextTokens", () => {
|
||||
const guard = evaluateContextWindowGuard({
|
||||
info: { tokens: 8_000, source: "modelsConfig" },
|
||||
});
|
||||
|
||||
expect(
|
||||
formatContextWindowBlockMessage({
|
||||
guard,
|
||||
runtimeBaseUrl: "http://127.0.0.1:11434/v1",
|
||||
}),
|
||||
).toContain("Raise contextWindow/contextTokens or choose a larger model.");
|
||||
});
|
||||
|
||||
it("keeps block messages concise for public providers", () => {
|
||||
const guard = evaluateContextWindowGuard({
|
||||
info: { tokens: 8_000, source: "model" },
|
||||
});
|
||||
|
||||
expect(
|
||||
formatContextWindowBlockMessage({
|
||||
guard,
|
||||
runtimeBaseUrl: "https://api.openai.com/v1",
|
||||
}),
|
||||
).toBe(`Model context window too small (8000 tokens; source=model). Minimum is 16000.`);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import { resolveProviderEndpoint } from "./provider-attribution.js";
|
||||
import { findNormalizedProviderValue } from "./provider-id.js";
|
||||
|
||||
export const CONTEXT_WINDOW_HARD_MIN_TOKENS = 16_000;
|
||||
@@ -61,6 +62,77 @@ export type ContextWindowGuardResult = ContextWindowInfo & {
|
||||
shouldBlock: boolean;
|
||||
};
|
||||
|
||||
export type ContextWindowGuardHint = {
|
||||
endpointClass: ReturnType<typeof resolveProviderEndpoint>["endpointClass"];
|
||||
likelySelfHosted: boolean;
|
||||
};
|
||||
|
||||
export function resolveContextWindowGuardHint(params: {
|
||||
runtimeBaseUrl?: string | null;
|
||||
}): ContextWindowGuardHint {
|
||||
const endpoint = resolveProviderEndpoint(params.runtimeBaseUrl ?? undefined);
|
||||
return {
|
||||
endpointClass: endpoint.endpointClass,
|
||||
likelySelfHosted: endpoint.endpointClass === "local",
|
||||
};
|
||||
}
|
||||
|
||||
export function formatContextWindowWarningMessage(params: {
|
||||
provider: string;
|
||||
modelId: string;
|
||||
guard: ContextWindowGuardResult;
|
||||
runtimeBaseUrl?: string | null;
|
||||
}): string {
|
||||
const base = `low context window: ${params.provider}/${params.modelId} ctx=${params.guard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${params.guard.source}`;
|
||||
const hint = resolveContextWindowGuardHint({ runtimeBaseUrl: params.runtimeBaseUrl });
|
||||
if (!hint.likelySelfHosted) {
|
||||
return base;
|
||||
}
|
||||
if (params.guard.source === "agentContextTokens") {
|
||||
return (
|
||||
`${base}; OpenClaw is capped by agents.defaults.contextTokens, so raise that cap ` +
|
||||
`if you want to use more of the model context window`
|
||||
);
|
||||
}
|
||||
if (params.guard.source === "modelsConfig") {
|
||||
return (
|
||||
`${base}; OpenClaw is using the configured model context limit for this model, ` +
|
||||
`so raise contextWindow/contextTokens if it is set too low`
|
||||
);
|
||||
}
|
||||
return (
|
||||
`${base}; local/self-hosted runs work best at ` +
|
||||
`${CONTEXT_WINDOW_WARN_BELOW_TOKENS}+ tokens and may show weaker tool use or more compaction until the server/model context limit is raised`
|
||||
);
|
||||
}
|
||||
|
||||
export function formatContextWindowBlockMessage(params: {
|
||||
guard: ContextWindowGuardResult;
|
||||
runtimeBaseUrl?: string | null;
|
||||
}): string {
|
||||
const base =
|
||||
`Model context window too small (${params.guard.tokens} tokens; ` +
|
||||
`source=${params.guard.source}). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`;
|
||||
const hint = resolveContextWindowGuardHint({ runtimeBaseUrl: params.runtimeBaseUrl });
|
||||
if (!hint.likelySelfHosted) {
|
||||
return base;
|
||||
}
|
||||
if (params.guard.source === "agentContextTokens") {
|
||||
return `${base} OpenClaw is capped by agents.defaults.contextTokens. Raise that cap.`;
|
||||
}
|
||||
if (params.guard.source === "modelsConfig") {
|
||||
return (
|
||||
`${base} OpenClaw is using the configured model context limit for this model. ` +
|
||||
`Raise contextWindow/contextTokens or choose a larger model.`
|
||||
);
|
||||
}
|
||||
return (
|
||||
`${base} This looks like a local model endpoint. ` +
|
||||
`Raise the server/model context limit or choose a larger model. ` +
|
||||
`OpenClaw local/self-hosted runs work best at ${CONTEXT_WINDOW_WARN_BELOW_TOKENS}+ tokens.`
|
||||
);
|
||||
}
|
||||
|
||||
export function evaluateContextWindowGuard(params: {
|
||||
info: ContextWindowInfo;
|
||||
warnBelowTokens?: number;
|
||||
|
||||
@@ -169,6 +169,14 @@ export const mockedResolveContextWindowInfo = vi.fn(() => ({
|
||||
tokens: 200000,
|
||||
source: "model",
|
||||
}));
|
||||
export const mockedFormatContextWindowWarningMessage = vi.fn(
|
||||
(params: { provider: string; modelId: string; guard: { tokens: number; source: string } }) =>
|
||||
`low context window: ${params.provider}/${params.modelId} ctx=${params.guard.tokens} source=${params.guard.source}`,
|
||||
);
|
||||
export const mockedFormatContextWindowBlockMessage = vi.fn(
|
||||
(params: { guard: { tokens: number; source: string } }) =>
|
||||
`Model context window too small (${params.guard.tokens} tokens; source=${params.guard.source}). Minimum is 1000.`,
|
||||
);
|
||||
export const mockedGetApiKeyForModel = vi.fn(
|
||||
async ({ profileId }: { profileId?: string } = {}) => ({
|
||||
apiKey: "test-key",
|
||||
@@ -300,6 +308,16 @@ export function resetRunOverflowCompactionHarnessMocks(): void {
|
||||
tokens: 200000,
|
||||
source: "model",
|
||||
});
|
||||
mockedFormatContextWindowWarningMessage.mockReset();
|
||||
mockedFormatContextWindowWarningMessage.mockImplementation(
|
||||
(params: { provider: string; modelId: string; guard: { tokens: number; source: string } }) =>
|
||||
`low context window: ${params.provider}/${params.modelId} ctx=${params.guard.tokens} source=${params.guard.source}`,
|
||||
);
|
||||
mockedFormatContextWindowBlockMessage.mockReset();
|
||||
mockedFormatContextWindowBlockMessage.mockImplementation(
|
||||
(params: { guard: { tokens: number; source: string } }) =>
|
||||
`Model context window too small (${params.guard.tokens} tokens; source=${params.guard.source}). Minimum is 1000.`,
|
||||
);
|
||||
mockedGetApiKeyForModel.mockReset();
|
||||
mockedGetApiKeyForModel.mockImplementation(
|
||||
async ({ profileId }: { profileId?: string } = {}) => ({
|
||||
@@ -443,6 +461,8 @@ export async function loadRunOverflowCompactionHarness(): Promise<{
|
||||
CONTEXT_WINDOW_HARD_MIN_TOKENS: 1000,
|
||||
CONTEXT_WINDOW_WARN_BELOW_TOKENS: 5000,
|
||||
evaluateContextWindowGuard: mockedEvaluateContextWindowGuard,
|
||||
formatContextWindowBlockMessage: mockedFormatContextWindowBlockMessage,
|
||||
formatContextWindowWarningMessage: mockedFormatContextWindowWarningMessage,
|
||||
resolveContextWindowInfo: mockedResolveContextWindowInfo,
|
||||
}));
|
||||
|
||||
|
||||
@@ -98,7 +98,9 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
|
||||
...overflowBaseRunParams,
|
||||
runId: "run-small-context",
|
||||
}),
|
||||
).rejects.toThrow("Model context window too small (800 tokens). Minimum is 1000.");
|
||||
).rejects.toThrow(
|
||||
"Model context window too small (800 tokens; source=model). Minimum is 1000.",
|
||||
);
|
||||
|
||||
expect(mockedRunEmbeddedAttempt).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
@@ -5,6 +5,8 @@ import {
|
||||
CONTEXT_WINDOW_HARD_MIN_TOKENS,
|
||||
CONTEXT_WINDOW_WARN_BELOW_TOKENS,
|
||||
evaluateContextWindowGuard,
|
||||
formatContextWindowBlockMessage,
|
||||
formatContextWindowWarningMessage,
|
||||
resolveContextWindowInfo,
|
||||
type ContextWindowInfo,
|
||||
} from "../../context-window-guard.js";
|
||||
@@ -126,19 +128,33 @@ export function resolveEffectiveRuntimeModel(params: {
|
||||
warnBelowTokens: CONTEXT_WINDOW_WARN_BELOW_TOKENS,
|
||||
hardMinTokens: CONTEXT_WINDOW_HARD_MIN_TOKENS,
|
||||
});
|
||||
const runtimeBaseUrl =
|
||||
typeof (params.runtimeModel as { baseUrl?: unknown }).baseUrl === "string"
|
||||
? (params.runtimeModel as { baseUrl: string }).baseUrl
|
||||
: undefined;
|
||||
if (ctxGuard.shouldWarn) {
|
||||
log.warn(
|
||||
`low context window: ${params.provider}/${params.modelId} ctx=${ctxGuard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${ctxGuard.source}`,
|
||||
formatContextWindowWarningMessage({
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
guard: ctxGuard,
|
||||
runtimeBaseUrl,
|
||||
}),
|
||||
);
|
||||
}
|
||||
if (ctxGuard.shouldBlock) {
|
||||
const message = formatContextWindowBlockMessage({
|
||||
guard: ctxGuard,
|
||||
runtimeBaseUrl,
|
||||
});
|
||||
log.error(
|
||||
`blocked model (context window too small): ${params.provider}/${params.modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}`,
|
||||
);
|
||||
throw new FailoverError(
|
||||
`Model context window too small (${ctxGuard.tokens} tokens). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`,
|
||||
{ reason: "unknown", provider: params.provider, model: params.modelId },
|
||||
`blocked model (context window too small): ${params.provider}/${params.modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}; ${message}`,
|
||||
);
|
||||
throw new FailoverError(message, {
|
||||
reason: "unknown",
|
||||
provider: params.provider,
|
||||
model: params.modelId,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
|
||||
@@ -97,7 +97,7 @@ async function expectFirstHookDelivery(
|
||||
const first = await postAgentHookWithIdempotency(port, idempotencyKey, headers);
|
||||
const firstBody = (await first.json()) as { runId?: string };
|
||||
expect(firstBody.runId).toBeTruthy();
|
||||
await waitForSystemEvent();
|
||||
await waitForSystemEvent(5_000);
|
||||
drainSystemEvents(resolveMainKey());
|
||||
return firstBody;
|
||||
}
|
||||
|
||||
@@ -472,7 +472,7 @@ export function installGatewayTestHooks(options?: { scope?: "test" | "suite" })
|
||||
if (activeSuiteHookScopeCount === 0) {
|
||||
await cleanupGatewayTestHome({ restoreEnv: true });
|
||||
}
|
||||
});
|
||||
}, 300_000);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -154,7 +154,7 @@ export class MediaAttachmentCache {
|
||||
|
||||
try {
|
||||
const fetchImpl = (input: RequestInfo | URL, init?: RequestInit) =>
|
||||
fetchWithTimeout(resolveRequestUrl(input), init ?? {}, params.timeoutMs, fetch);
|
||||
fetchWithTimeout(resolveRequestUrl(input), init ?? {}, params.timeoutMs, globalThis.fetch);
|
||||
const fetched = await fetchRemoteMedia({ url, fetchImpl, maxBytes: params.maxBytes });
|
||||
entry.buffer = fetched.buffer;
|
||||
entry.bufferMime =
|
||||
|
||||
@@ -2,15 +2,22 @@ import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { withTempDir } from "../test-helpers/temp-dir.js";
|
||||
import { withFetchPreconnect } from "../test-utils/fetch-mock.js";
|
||||
import { MediaAttachmentCache } from "./attachments.js";
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
const fetchRemoteMediaMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("../media/fetch.js", async () => {
|
||||
const actual = await vi.importActual<typeof import("../media/fetch.js")>("../media/fetch.js");
|
||||
return {
|
||||
...actual,
|
||||
fetchRemoteMedia: fetchRemoteMediaMock,
|
||||
};
|
||||
});
|
||||
|
||||
describe("media understanding attachment URL fallback", () => {
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
vi.restoreAllMocks();
|
||||
fetchRemoteMediaMock.mockReset();
|
||||
});
|
||||
|
||||
it("getPath falls back to URL fetch when local path is blocked", async () => {
|
||||
@@ -28,17 +35,12 @@ describe("media understanding attachment URL fallback", () => {
|
||||
},
|
||||
);
|
||||
const originalRealpath = fs.realpath.bind(fs);
|
||||
const fetchSpy = vi.fn(
|
||||
async () =>
|
||||
new Response(Buffer.from("fallback-buffer"), {
|
||||
status: 200,
|
||||
headers: {
|
||||
"content-type": "image/jpeg",
|
||||
},
|
||||
}),
|
||||
);
|
||||
fetchRemoteMediaMock.mockResolvedValue({
|
||||
buffer: Buffer.from("fallback-buffer"),
|
||||
contentType: "image/jpeg",
|
||||
fileName: "fallback.jpg",
|
||||
});
|
||||
|
||||
globalThis.fetch = withFetchPreconnect(fetchSpy);
|
||||
vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => {
|
||||
if (String(candidatePath) === attachmentPath) {
|
||||
throw new Error("EACCES");
|
||||
@@ -54,8 +56,10 @@ describe("media understanding attachment URL fallback", () => {
|
||||
// getPath should fall through to getBuffer URL fetch, write a temp file,
|
||||
// and return a path to that temp file instead of throwing.
|
||||
expect(result.path).toBeTruthy();
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
||||
expect(fetchSpy).toHaveBeenCalledWith(fallbackUrl, expect.anything());
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1);
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }),
|
||||
);
|
||||
// Clean up the temp file
|
||||
if (result.cleanup) {
|
||||
await result.cleanup();
|
||||
@@ -78,17 +82,12 @@ describe("media understanding attachment URL fallback", () => {
|
||||
},
|
||||
);
|
||||
const originalRealpath = fs.realpath.bind(fs);
|
||||
const fetchSpy = vi.fn(
|
||||
async () =>
|
||||
new Response(Buffer.from("fallback-buffer"), {
|
||||
status: 200,
|
||||
headers: {
|
||||
"content-type": "image/jpeg",
|
||||
},
|
||||
}),
|
||||
);
|
||||
fetchRemoteMediaMock.mockResolvedValue({
|
||||
buffer: Buffer.from("fallback-buffer"),
|
||||
contentType: "image/jpeg",
|
||||
fileName: "fallback.jpg",
|
||||
});
|
||||
|
||||
globalThis.fetch = withFetchPreconnect(fetchSpy);
|
||||
vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => {
|
||||
if (String(candidatePath) === attachmentPath) {
|
||||
throw new Error("EACCES");
|
||||
@@ -102,8 +101,10 @@ describe("media understanding attachment URL fallback", () => {
|
||||
timeoutMs: 1000,
|
||||
});
|
||||
expect(result.buffer.toString()).toBe("fallback-buffer");
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
||||
expect(fetchSpy).toHaveBeenCalledWith(fallbackUrl, expect.anything());
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledTimes(1);
|
||||
expect(fetchRemoteMediaMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ url: fallbackUrl, maxBytes: 1024 }),
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user