From 686751f639dce005bba243874d01bd0bbdb01efd Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Thu, 28 May 2026 21:17:40 +0100 Subject: [PATCH] test(agents): add small model live profile (#87638) --- docs/help/testing-live.md | 11 +++- src/agents/live-model-filter.ts | 75 ++++++++++++++++++++++--- src/agents/model-compat.test.ts | 66 ++++++++++++++++++++++ src/agents/models.profiles.live.test.ts | 46 ++++++++++++--- 4 files changed, 182 insertions(+), 16 deletions(-) diff --git a/docs/help/testing-live.md b/docs/help/testing-live.md index ea71a1c611c..5043cc1e835 100644 --- a/docs/help/testing-live.md +++ b/docs/help/testing-live.md @@ -71,12 +71,13 @@ Live tests are split into two layers so we can isolate failures: - Run a small completion per model (and targeted regressions where needed) - How to enable: - `pnpm test:live` (or `OPENCLAW_LIVE_TEST=1` if invoking Vitest directly) -- Set `OPENCLAW_LIVE_MODELS=modern` (or `all`, alias for modern) to actually run this suite; otherwise it skips to keep `pnpm test:live` focused on gateway smoke +- Set `OPENCLAW_LIVE_MODELS=modern`, `small`, or `all` (alias for modern) to actually run this suite; otherwise it skips to keep `pnpm test:live` focused on gateway smoke - How to select models: - `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet 4.6+, GPT-5.2 + Codex, Gemini 3, DeepSeek V4, GLM 4.7, MiniMax M2.7, Grok 4.3) + - `OPENCLAW_LIVE_MODELS=small` to run the constrained small-model allowlist (Qwen 8B/9B local-compatible routes, OpenRouter Qwen/GLM, and Z.AI GLM) - `OPENCLAW_LIVE_MODELS=all` is an alias for the modern allowlist - or `OPENCLAW_LIVE_MODELS="openai/gpt-5.5,openai-codex/gpt-5.5,anthropic/claude-opus-4-6,..."` (comma allowlist) - - Modern/all sweeps default to a curated high-signal cap; set `OPENCLAW_LIVE_MAX_MODELS=0` for an exhaustive modern sweep or a positive number for a smaller cap. + - Modern/all and small sweeps default to their curated caps; set `OPENCLAW_LIVE_MAX_MODELS=0` for an exhaustive selected-profile sweep or a positive number for a smaller cap. - Exhaustive sweeps use `OPENCLAW_LIVE_TEST_TIMEOUT_MS` for the whole direct-model test timeout. Default: 60 minutes. - Direct-model probes run with 20-way parallelism by default; set `OPENCLAW_LIVE_MODEL_CONCURRENCY` to override. - How to select providers: @@ -339,6 +340,12 @@ Narrow, explicit allowlists are fastest and least flaky: - Single model, direct (no gateway): - `OPENCLAW_LIVE_MODELS="openai/gpt-5.5" pnpm test:live src/agents/models.profiles.live.test.ts` +- Small-model direct profile: + - `OPENCLAW_LIVE_MODELS=small pnpm test:live src/agents/models.profiles.live.test.ts` + +- Ollama Cloud API smoke: + - `OPENCLAW_LIVE_TEST=1 OPENCLAW_LIVE_OLLAMA=1 OPENCLAW_LIVE_OLLAMA_BASE_URL=https://ollama.com OPENCLAW_LIVE_OLLAMA_MODEL=glm-5.1:cloud OPENCLAW_LIVE_OLLAMA_WEB_SEARCH=0 pnpm test:live -- extensions/ollama/ollama.live.test.ts` + - Single model, gateway smoke: - `OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.5" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` diff --git a/src/agents/live-model-filter.ts b/src/agents/live-model-filter.ts index 073e926fba4..27bbf694af7 100644 --- a/src/agents/live-model-filter.ts +++ b/src/agents/live-model-filter.ts @@ -30,7 +30,18 @@ const HIGH_SIGNAL_LIVE_MODEL_PRIORITY = [ "minimax-portal/minimax-m2.7", ] as const; +const SMALL_LIVE_MODEL_PRIORITY = [ + "lmstudio/qwen/qwen3.5-9b", + "vllm/qwen/qwen3-8b", + "sglang/qwen/qwen3-8b", + "openrouter/qwen/qwen3.5-9b", + "openrouter/z-ai/glm-5.1", + "openrouter/z-ai/glm-5", + "zai/glm-5.1", +] as const; + export const DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT = HIGH_SIGNAL_LIVE_MODEL_PRIORITY.length; +export const DEFAULT_SMALL_LIVE_MODEL_LIMIT = SMALL_LIVE_MODEL_PRIORITY.length; const DEFAULT_HIGH_SIGNAL_LIVE_EXCLUDED_PROVIDERS = new Set(["codex", "codex-cli", "openai-codex"]); const CURATED_ONLY_HIGH_SIGNAL_LIVE_PROVIDERS = new Set([ "fireworks", @@ -42,6 +53,9 @@ const CURATED_ONLY_HIGH_SIGNAL_LIVE_PROVIDERS = new Set([ const HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX = new Map( HIGH_SIGNAL_LIVE_MODEL_PRIORITY.map((key, index) => [key, index]), ); +const SMALL_LIVE_MODEL_PRIORITY_INDEX = new Map( + SMALL_LIVE_MODEL_PRIORITY.map((key, index) => [key, index]), +); const HIGH_SIGNAL_LIVE_MODEL_IDS_BY_PROVIDER = new Map>(); for (const key of HIGH_SIGNAL_LIVE_MODEL_PRIORITY) { const separatorIndex = key.indexOf("/"); @@ -200,12 +214,29 @@ export function isHighSignalLiveModelRef(ref: ModelRef): boolean { } export function isPrioritizedHighSignalLiveModelRef(ref: ModelRef): boolean { - const key = toCanonicalHighSignalLiveModelKey(ref); - return key !== null && HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX.has(key); + return hasPrioritizedLiveModelRef(HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX, ref); +} + +export function isSmallLiveModelRef(ref: ModelRef): boolean { + return hasPrioritizedLiveModelRef(SMALL_LIVE_MODEL_PRIORITY_INDEX, ref); +} + +export function isPrioritizedSmallLiveModelRef(ref: ModelRef): boolean { + return isSmallLiveModelRef(ref); } export function listPrioritizedHighSignalLiveModelRefs(): Array<{ provider: string; id: string }> { - return HIGH_SIGNAL_LIVE_MODEL_PRIORITY.map((key) => { + return listPrioritizedLiveModelRefs(HIGH_SIGNAL_LIVE_MODEL_PRIORITY); +} + +export function listPrioritizedSmallLiveModelRefs(): Array<{ provider: string; id: string }> { + return listPrioritizedLiveModelRefs(SMALL_LIVE_MODEL_PRIORITY); +} + +function listPrioritizedLiveModelRefs( + priority: readonly string[], +): Array<{ provider: string; id: string }> { + return priority.map((key) => { const separatorIndex = key.indexOf("/"); return { provider: key.slice(0, separatorIndex), @@ -258,7 +289,7 @@ export function shouldExcludeProviderFromDefaultHighSignalLiveSweep(params: { return true; } -function toCanonicalHighSignalLiveModelKey(ref: ModelRef): string | null { +function toCanonicalLiveModelKey(ref: ModelRef): string | null { const provider = normalizeProviderId(ref.provider ?? ""); const rawId = normalizeLowercaseStringOrEmpty(ref.id); if (!provider || !rawId) { @@ -267,6 +298,11 @@ function toCanonicalHighSignalLiveModelKey(ref: ModelRef): string | null { return `${provider}/${rawId}`; } +function hasPrioritizedLiveModelRef(index: ReadonlyMap, ref: ModelRef): boolean { + const key = toCanonicalLiveModelKey(ref); + return key !== null && index.has(key); +} + function capByProviderSpread( items: T[], maxItems: number, @@ -315,6 +351,31 @@ export function selectHighSignalLiveItems( maxItems: number, refOf: (item: T) => ModelRef, providerOf: (item: T) => string, +): T[] { + return selectPrioritizedLiveItems( + items, + maxItems, + refOf, + providerOf, + HIGH_SIGNAL_LIVE_MODEL_PRIORITY, + ); +} + +export function selectSmallLiveItems( + items: T[], + maxItems: number, + refOf: (item: T) => ModelRef, + providerOf: (item: T) => string, +): T[] { + return selectPrioritizedLiveItems(items, maxItems, refOf, providerOf, SMALL_LIVE_MODEL_PRIORITY); +} + +function selectPrioritizedLiveItems( + items: T[], + maxItems: number, + refOf: (item: T) => ModelRef, + providerOf: (item: T) => string, + priority: readonly string[], ): T[] { if (maxItems <= 0 || items.length <= maxItems) { return items; @@ -322,12 +383,12 @@ export function selectHighSignalLiveItems( const remaining = [...items]; const selected: T[] = []; - for (const preferredKey of HIGH_SIGNAL_LIVE_MODEL_PRIORITY) { + for (const preferredKey of priority) { if (selected.length >= maxItems) { break; } const preferredIndex = remaining.findIndex( - (item) => toCanonicalHighSignalLiveModelKey(refOf(item)) === preferredKey, + (item) => toCanonicalLiveModelKey(refOf(item)) === preferredKey, ); if (preferredIndex < 0) { continue; @@ -362,7 +423,7 @@ export function resolveHighSignalLiveModelLimit(params: { } export function getHighSignalLiveModelPriorityIndex(ref: ModelRef): number | null { - const key = toCanonicalHighSignalLiveModelKey(ref); + const key = toCanonicalLiveModelKey(ref); if (!key) { return null; } diff --git a/src/agents/model-compat.test.ts b/src/agents/model-compat.test.ts index e1ebd3c451e..f9c513782bf 100644 --- a/src/agents/model-compat.test.ts +++ b/src/agents/model-compat.test.ts @@ -14,12 +14,17 @@ vi.mock("../plugins/provider-runtime.js", () => { import { normalizeModelCompat } from "../plugins/provider-model-compat.js"; import { DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT, + DEFAULT_SMALL_LIVE_MODEL_LIMIT, isHighSignalLiveModelRef, isModernModelRef, isPrioritizedHighSignalLiveModelRef, + isPrioritizedSmallLiveModelRef, + isSmallLiveModelRef, listPrioritizedHighSignalLiveModelRefs, + listPrioritizedSmallLiveModelRefs, resolveHighSignalLiveModelLimit, selectHighSignalLiveItems, + selectSmallLiveItems, } from "./live-model-filter.js"; const baseModel = (): Model => @@ -678,6 +683,33 @@ describe("isPrioritizedHighSignalLiveModelRef", () => { }); }); +describe("isSmallLiveModelRef", () => { + it("matches the small-model live matrix without requiring provider modern hooks", () => { + expect(isSmallLiveModelRef({ provider: "lmstudio", id: "Qwen/Qwen3.5-9B" })).toBe(true); + expect(isSmallLiveModelRef({ provider: "openrouter", id: "qwen/qwen3.5-9b" })).toBe(true); + expect(isSmallLiveModelRef({ provider: "openrouter", id: "z-ai/glm-5.1" })).toBe(true); + expect(isSmallLiveModelRef({ provider: "openai", id: "gpt-5.5" })).toBe(false); + expect(providerRuntimeMocks.resolveProviderModernModelRef).not.toHaveBeenCalled(); + }); +}); + +describe("isPrioritizedSmallLiveModelRef", () => { + it("lists priority refs as provider/id pairs", () => { + expect(isPrioritizedSmallLiveModelRef({ provider: "lmstudio", id: "qwen/qwen3.5-9b" })).toBe( + true, + ); + expect(listPrioritizedSmallLiveModelRefs()).toStrictEqual([ + { provider: "lmstudio", id: "qwen/qwen3.5-9b" }, + { provider: "vllm", id: "qwen/qwen3-8b" }, + { provider: "sglang", id: "qwen/qwen3-8b" }, + { provider: "openrouter", id: "qwen/qwen3.5-9b" }, + { provider: "openrouter", id: "z-ai/glm-5.1" }, + { provider: "openrouter", id: "z-ai/glm-5" }, + { provider: "zai", id: "glm-5.1" }, + ]); + }); +}); + describe("selectHighSignalLiveItems", () => { it("prefers curated Google replacements before fallback provider spread", () => { const items = [ @@ -748,6 +780,31 @@ describe("selectHighSignalLiveItems", () => { }); }); +describe("selectSmallLiveItems", () => { + it("prefers constrained local and hosted small-model routes before fallback spread", () => { + const items = [ + { provider: "openrouter", id: "z-ai/glm-5" }, + { provider: "openai", id: "gpt-5.5" }, + { provider: "vllm", id: "qwen/qwen3-8b" }, + { provider: "lmstudio", id: "qwen/qwen3.5-9b" }, + { provider: "openrouter", id: "qwen/qwen3.5-9b" }, + ]; + + expect( + selectSmallLiveItems( + items, + 3, + (item) => item, + (item) => item.provider, + ), + ).toEqual([ + { provider: "lmstudio", id: "qwen/qwen3.5-9b" }, + { provider: "vllm", id: "qwen/qwen3-8b" }, + { provider: "openrouter", id: "qwen/qwen3.5-9b" }, + ]); + }); +}); + describe("resolveHighSignalLiveModelLimit", () => { it("defaults modern live sweeps to the curated high-signal cap", () => { expect( @@ -757,6 +814,15 @@ describe("resolveHighSignalLiveModelLimit", () => { ).toBe(DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT); }); + it("can default small live sweeps to the curated small-model cap", () => { + expect( + resolveHighSignalLiveModelLimit({ + useExplicitModels: false, + defaultLimit: DEFAULT_SMALL_LIVE_MODEL_LIMIT, + }), + ).toBe(DEFAULT_SMALL_LIVE_MODEL_LIMIT); + }); + it("leaves explicit model lists uncapped unless a cap is provided", () => { expect( resolveHighSignalLiveModelLimit({ diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts index 026a73d3560..11702721fb4 100644 --- a/src/agents/models.profiles.live.test.ts +++ b/src/agents/models.profiles.live.test.ts @@ -3,6 +3,7 @@ import { type Api, completeSimple, type Model } from "openclaw/plugin-sdk/llm"; import { Type } from "typebox"; import { describe, expect, it } from "vitest"; import { getRuntimeConfig } from "../config/config.js"; +import type { OpenClawConfig } from "../config/types.openclaw.js"; import { parseLiveCsvFilter } from "../media-generation/live-test-helpers.js"; import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js"; import { @@ -17,10 +18,15 @@ import { collectAnthropicApiKeys } from "./live-auth-keys.js"; import { appendPrioritizedDynamicLiveModels } from "./live-model-dynamic-candidates.js"; import { isModelNotFoundErrorMessage } from "./live-model-errors.js"; import { + DEFAULT_SMALL_LIVE_MODEL_LIMIT, isHighSignalLiveModelRef, isPrioritizedHighSignalLiveModelRef, + isPrioritizedSmallLiveModelRef, + isSmallLiveModelRef, + listPrioritizedSmallLiveModelRefs, resolveHighSignalLiveModelLimit, selectHighSignalLiveItems, + selectSmallLiveItems, shouldExcludeProviderFromDefaultHighSignalLiveSweep, } from "./live-model-filter.js"; import { @@ -54,6 +60,7 @@ import { import { getApiKeyForModel, requireApiKey } from "./model-auth.js"; import { shouldSuppressBuiltInModel } from "./model-suppression.js"; import { ensureOpenClawModelsJson } from "./models-config.js"; +import { prepareModelForSimpleCompletion } from "./simple-completion-transport.js"; const LIVE = isLiveTestEnabled(); const DIRECT_ENABLED = Boolean(process.env.OPENCLAW_LIVE_MODELS?.trim()); @@ -76,6 +83,7 @@ const LIVE_MODELS_JSON_TIMEOUT_MS = resolveLiveModelsJsonTimeoutMs( ); const LIVE_FILE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_FILE_PROBE_ENV); const LIVE_IMAGE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_IMAGE_PROBE_ENV); +let activeLiveCompletionConfig: OpenClawConfig | undefined; const describeLive = LIVE ? describe : describe.skip; @@ -430,9 +438,13 @@ async function completeSimpleWithTimeout( hardTimer.unref?.(); }); try { + const completionModel = prepareModelForSimpleCompletion({ + model, + cfg: activeLiveCompletionConfig, + }); return await withLiveHeartbeat( Promise.race([ - completeSimple(model, context, { + completeSimple(completionModel, context, { ...options, signal: controller.signal, }), @@ -716,6 +728,7 @@ describeLive("live models (profile keys)", () => { Promise.resolve().then(() => getRuntimeConfig()), "[live-models] load config", ); + activeLiveCompletionConfig = cfg; logProgress("[live-models] preparing models.json"); await withLiveStageTimeout( ensureOpenClawModelsJson(cfg), @@ -724,7 +737,7 @@ describeLive("live models (profile keys)", () => { ); if (!DIRECT_ENABLED) { logProgress( - "[live-models] skipping (set OPENCLAW_LIVE_MODELS=modern|all|; all=modern)", + "[live-models] skipping (set OPENCLAW_LIVE_MODELS=modern|small|all|; all=modern)", ); return; } @@ -740,14 +753,19 @@ describeLive("live models (profile keys)", () => { const agentDir = resolveDefaultAgentDir(cfg); const rawModels = process.env.OPENCLAW_LIVE_MODELS?.trim(); const useModern = rawModels === "modern" || rawModels === "all"; - const useExplicit = Boolean(rawModels) && !useModern; + const useSmall = rawModels === "small"; + const useExplicit = Boolean(rawModels) && !useModern && !useSmall; const filter = useExplicit ? parseModelFilter(rawModels) : null; const useDefaultPriorityOnly = !filter && useModern && !providers; - const allowNotFoundSkip = useModern; + const useSmallPriorityOnly = !filter && useSmall && !providers; + const allowNotFoundSkip = useModern || useSmall; const models = await (async () => { if (useDefaultPriorityOnly) { logProgress("[live-models] loading configured prioritized model refs"); } + if (useSmallPriorityOnly) { + logProgress("[live-models] loading configured small model refs"); + } logProgress("[live-models] loading auth storage"); const authStorage = await withLiveStageTimeout( Promise.resolve().then(() => @@ -779,6 +797,7 @@ describeLive("live models (profile keys)", () => { agentDir, env: process.env, modelRegistry, + ...(useSmall ? { refs: listPrioritizedSmallLiveModelRefs() } : {}), }); if (augmented.added.length > 0) { logProgress( @@ -791,6 +810,7 @@ describeLive("live models (profile keys)", () => { const maxModels = resolveHighSignalLiveModelLimit({ rawMaxModels: process.env.OPENCLAW_LIVE_MAX_MODELS, useExplicitModels: useExplicit, + ...(useSmall ? { defaultLimit: DEFAULT_SMALL_LIVE_MODEL_LIMIT } : {}), }); const targetMatcher = createLiveTargetMatcher({ providerFilter: providers, @@ -817,7 +837,17 @@ describeLive("live models (profile keys)", () => { if (!targetMatcher.matchesModel(model.provider, model.id)) { continue; } - if (!filter && useModern) { + if (!filter && useSmall) { + if ( + useSmallPriorityOnly && + !isPrioritizedSmallLiveModelRef({ provider: model.provider, id: model.id }) + ) { + continue; + } + if (!isSmallLiveModelRef({ provider: model.provider, id: model.id })) { + continue; + } + } else if (!filter && useModern) { if ( useDefaultPriorityOnly && !isPrioritizedHighSignalLiveModelRef({ provider: model.provider, id: model.id }) @@ -879,13 +909,15 @@ describeLive("live models (profile keys)", () => { return; } - const selectedCandidates = selectHighSignalLiveItems( + const selectCandidates = useSmall ? selectSmallLiveItems : selectHighSignalLiveItems; + const selectedCandidates = selectCandidates( candidates, maxModels > 0 ? maxModels : candidates.length, (entry) => ({ provider: entry.model.provider, id: entry.model.id }), (entry) => entry.model.provider, ); - logProgress(`[live-models] selection=${useExplicit ? "explicit" : "high-signal"}`); + const selectionLabel = useExplicit ? "explicit" : useSmall ? "small" : "high-signal"; + logProgress(`[live-models] selection=${selectionLabel}`); if (selectedCandidates.length < candidates.length) { logProgress( `[live-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_MAX_MODELS=${maxModels}`,