test(agents): add small model live profile (#87638)

This commit is contained in:
Vincent Koc
2026-05-28 21:17:40 +01:00
committed by GitHub
parent f7507fd921
commit 686751f639
4 changed files with 182 additions and 16 deletions

View File

@@ -71,12 +71,13 @@ Live tests are split into two layers so we can isolate failures:
- Run a small completion per model (and targeted regressions where needed)
- How to enable:
- `pnpm test:live` (or `OPENCLAW_LIVE_TEST=1` if invoking Vitest directly)
- Set `OPENCLAW_LIVE_MODELS=modern` (or `all`, alias for modern) to actually run this suite; otherwise it skips to keep `pnpm test:live` focused on gateway smoke
- Set `OPENCLAW_LIVE_MODELS=modern`, `small`, or `all` (alias for modern) to actually run this suite; otherwise it skips to keep `pnpm test:live` focused on gateway smoke
- How to select models:
- `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet 4.6+, GPT-5.2 + Codex, Gemini 3, DeepSeek V4, GLM 4.7, MiniMax M2.7, Grok 4.3)
- `OPENCLAW_LIVE_MODELS=small` to run the constrained small-model allowlist (Qwen 8B/9B local-compatible routes, OpenRouter Qwen/GLM, and Z.AI GLM)
- `OPENCLAW_LIVE_MODELS=all` is an alias for the modern allowlist
- or `OPENCLAW_LIVE_MODELS="openai/gpt-5.5,openai-codex/gpt-5.5,anthropic/claude-opus-4-6,..."` (comma allowlist)
- Modern/all sweeps default to a curated high-signal cap; set `OPENCLAW_LIVE_MAX_MODELS=0` for an exhaustive modern sweep or a positive number for a smaller cap.
- Modern/all and small sweeps default to their curated caps; set `OPENCLAW_LIVE_MAX_MODELS=0` for an exhaustive selected-profile sweep or a positive number for a smaller cap.
- Exhaustive sweeps use `OPENCLAW_LIVE_TEST_TIMEOUT_MS` for the whole direct-model test timeout. Default: 60 minutes.
- Direct-model probes run with 20-way parallelism by default; set `OPENCLAW_LIVE_MODEL_CONCURRENCY` to override.
- How to select providers:
@@ -339,6 +340,12 @@ Narrow, explicit allowlists are fastest and least flaky:
- Single model, direct (no gateway):
- `OPENCLAW_LIVE_MODELS="openai/gpt-5.5" pnpm test:live src/agents/models.profiles.live.test.ts`
- Small-model direct profile:
- `OPENCLAW_LIVE_MODELS=small pnpm test:live src/agents/models.profiles.live.test.ts`
- Ollama Cloud API smoke:
- `OPENCLAW_LIVE_TEST=1 OPENCLAW_LIVE_OLLAMA=1 OPENCLAW_LIVE_OLLAMA_BASE_URL=https://ollama.com OPENCLAW_LIVE_OLLAMA_MODEL=glm-5.1:cloud OPENCLAW_LIVE_OLLAMA_WEB_SEARCH=0 pnpm test:live -- extensions/ollama/ollama.live.test.ts`
- Single model, gateway smoke:
- `OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.5" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts`

View File

@@ -30,7 +30,18 @@ const HIGH_SIGNAL_LIVE_MODEL_PRIORITY = [
"minimax-portal/minimax-m2.7",
] as const;
const SMALL_LIVE_MODEL_PRIORITY = [
"lmstudio/qwen/qwen3.5-9b",
"vllm/qwen/qwen3-8b",
"sglang/qwen/qwen3-8b",
"openrouter/qwen/qwen3.5-9b",
"openrouter/z-ai/glm-5.1",
"openrouter/z-ai/glm-5",
"zai/glm-5.1",
] as const;
export const DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT = HIGH_SIGNAL_LIVE_MODEL_PRIORITY.length;
export const DEFAULT_SMALL_LIVE_MODEL_LIMIT = SMALL_LIVE_MODEL_PRIORITY.length;
const DEFAULT_HIGH_SIGNAL_LIVE_EXCLUDED_PROVIDERS = new Set(["codex", "codex-cli", "openai-codex"]);
const CURATED_ONLY_HIGH_SIGNAL_LIVE_PROVIDERS = new Set([
"fireworks",
@@ -42,6 +53,9 @@ const CURATED_ONLY_HIGH_SIGNAL_LIVE_PROVIDERS = new Set([
const HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX = new Map<string, number>(
HIGH_SIGNAL_LIVE_MODEL_PRIORITY.map((key, index) => [key, index]),
);
const SMALL_LIVE_MODEL_PRIORITY_INDEX = new Map<string, number>(
SMALL_LIVE_MODEL_PRIORITY.map((key, index) => [key, index]),
);
const HIGH_SIGNAL_LIVE_MODEL_IDS_BY_PROVIDER = new Map<string, Set<string>>();
for (const key of HIGH_SIGNAL_LIVE_MODEL_PRIORITY) {
const separatorIndex = key.indexOf("/");
@@ -200,12 +214,29 @@ export function isHighSignalLiveModelRef(ref: ModelRef): boolean {
}
export function isPrioritizedHighSignalLiveModelRef(ref: ModelRef): boolean {
const key = toCanonicalHighSignalLiveModelKey(ref);
return key !== null && HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX.has(key);
return hasPrioritizedLiveModelRef(HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX, ref);
}
export function isSmallLiveModelRef(ref: ModelRef): boolean {
return hasPrioritizedLiveModelRef(SMALL_LIVE_MODEL_PRIORITY_INDEX, ref);
}
export function isPrioritizedSmallLiveModelRef(ref: ModelRef): boolean {
return isSmallLiveModelRef(ref);
}
export function listPrioritizedHighSignalLiveModelRefs(): Array<{ provider: string; id: string }> {
return HIGH_SIGNAL_LIVE_MODEL_PRIORITY.map((key) => {
return listPrioritizedLiveModelRefs(HIGH_SIGNAL_LIVE_MODEL_PRIORITY);
}
export function listPrioritizedSmallLiveModelRefs(): Array<{ provider: string; id: string }> {
return listPrioritizedLiveModelRefs(SMALL_LIVE_MODEL_PRIORITY);
}
function listPrioritizedLiveModelRefs(
priority: readonly string[],
): Array<{ provider: string; id: string }> {
return priority.map((key) => {
const separatorIndex = key.indexOf("/");
return {
provider: key.slice(0, separatorIndex),
@@ -258,7 +289,7 @@ export function shouldExcludeProviderFromDefaultHighSignalLiveSweep(params: {
return true;
}
function toCanonicalHighSignalLiveModelKey(ref: ModelRef): string | null {
function toCanonicalLiveModelKey(ref: ModelRef): string | null {
const provider = normalizeProviderId(ref.provider ?? "");
const rawId = normalizeLowercaseStringOrEmpty(ref.id);
if (!provider || !rawId) {
@@ -267,6 +298,11 @@ function toCanonicalHighSignalLiveModelKey(ref: ModelRef): string | null {
return `${provider}/${rawId}`;
}
function hasPrioritizedLiveModelRef(index: ReadonlyMap<string, number>, ref: ModelRef): boolean {
const key = toCanonicalLiveModelKey(ref);
return key !== null && index.has(key);
}
function capByProviderSpread<T>(
items: T[],
maxItems: number,
@@ -315,6 +351,31 @@ export function selectHighSignalLiveItems<T>(
maxItems: number,
refOf: (item: T) => ModelRef,
providerOf: (item: T) => string,
): T[] {
return selectPrioritizedLiveItems(
items,
maxItems,
refOf,
providerOf,
HIGH_SIGNAL_LIVE_MODEL_PRIORITY,
);
}
export function selectSmallLiveItems<T>(
items: T[],
maxItems: number,
refOf: (item: T) => ModelRef,
providerOf: (item: T) => string,
): T[] {
return selectPrioritizedLiveItems(items, maxItems, refOf, providerOf, SMALL_LIVE_MODEL_PRIORITY);
}
function selectPrioritizedLiveItems<T>(
items: T[],
maxItems: number,
refOf: (item: T) => ModelRef,
providerOf: (item: T) => string,
priority: readonly string[],
): T[] {
if (maxItems <= 0 || items.length <= maxItems) {
return items;
@@ -322,12 +383,12 @@ export function selectHighSignalLiveItems<T>(
const remaining = [...items];
const selected: T[] = [];
for (const preferredKey of HIGH_SIGNAL_LIVE_MODEL_PRIORITY) {
for (const preferredKey of priority) {
if (selected.length >= maxItems) {
break;
}
const preferredIndex = remaining.findIndex(
(item) => toCanonicalHighSignalLiveModelKey(refOf(item)) === preferredKey,
(item) => toCanonicalLiveModelKey(refOf(item)) === preferredKey,
);
if (preferredIndex < 0) {
continue;
@@ -362,7 +423,7 @@ export function resolveHighSignalLiveModelLimit(params: {
}
export function getHighSignalLiveModelPriorityIndex(ref: ModelRef): number | null {
const key = toCanonicalHighSignalLiveModelKey(ref);
const key = toCanonicalLiveModelKey(ref);
if (!key) {
return null;
}

View File

@@ -14,12 +14,17 @@ vi.mock("../plugins/provider-runtime.js", () => {
import { normalizeModelCompat } from "../plugins/provider-model-compat.js";
import {
DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT,
DEFAULT_SMALL_LIVE_MODEL_LIMIT,
isHighSignalLiveModelRef,
isModernModelRef,
isPrioritizedHighSignalLiveModelRef,
isPrioritizedSmallLiveModelRef,
isSmallLiveModelRef,
listPrioritizedHighSignalLiveModelRefs,
listPrioritizedSmallLiveModelRefs,
resolveHighSignalLiveModelLimit,
selectHighSignalLiveItems,
selectSmallLiveItems,
} from "./live-model-filter.js";
const baseModel = (): Model =>
@@ -678,6 +683,33 @@ describe("isPrioritizedHighSignalLiveModelRef", () => {
});
});
describe("isSmallLiveModelRef", () => {
it("matches the small-model live matrix without requiring provider modern hooks", () => {
expect(isSmallLiveModelRef({ provider: "lmstudio", id: "Qwen/Qwen3.5-9B" })).toBe(true);
expect(isSmallLiveModelRef({ provider: "openrouter", id: "qwen/qwen3.5-9b" })).toBe(true);
expect(isSmallLiveModelRef({ provider: "openrouter", id: "z-ai/glm-5.1" })).toBe(true);
expect(isSmallLiveModelRef({ provider: "openai", id: "gpt-5.5" })).toBe(false);
expect(providerRuntimeMocks.resolveProviderModernModelRef).not.toHaveBeenCalled();
});
});
describe("isPrioritizedSmallLiveModelRef", () => {
it("lists priority refs as provider/id pairs", () => {
expect(isPrioritizedSmallLiveModelRef({ provider: "lmstudio", id: "qwen/qwen3.5-9b" })).toBe(
true,
);
expect(listPrioritizedSmallLiveModelRefs()).toStrictEqual([
{ provider: "lmstudio", id: "qwen/qwen3.5-9b" },
{ provider: "vllm", id: "qwen/qwen3-8b" },
{ provider: "sglang", id: "qwen/qwen3-8b" },
{ provider: "openrouter", id: "qwen/qwen3.5-9b" },
{ provider: "openrouter", id: "z-ai/glm-5.1" },
{ provider: "openrouter", id: "z-ai/glm-5" },
{ provider: "zai", id: "glm-5.1" },
]);
});
});
describe("selectHighSignalLiveItems", () => {
it("prefers curated Google replacements before fallback provider spread", () => {
const items = [
@@ -748,6 +780,31 @@ describe("selectHighSignalLiveItems", () => {
});
});
describe("selectSmallLiveItems", () => {
it("prefers constrained local and hosted small-model routes before fallback spread", () => {
const items = [
{ provider: "openrouter", id: "z-ai/glm-5" },
{ provider: "openai", id: "gpt-5.5" },
{ provider: "vllm", id: "qwen/qwen3-8b" },
{ provider: "lmstudio", id: "qwen/qwen3.5-9b" },
{ provider: "openrouter", id: "qwen/qwen3.5-9b" },
];
expect(
selectSmallLiveItems(
items,
3,
(item) => item,
(item) => item.provider,
),
).toEqual([
{ provider: "lmstudio", id: "qwen/qwen3.5-9b" },
{ provider: "vllm", id: "qwen/qwen3-8b" },
{ provider: "openrouter", id: "qwen/qwen3.5-9b" },
]);
});
});
describe("resolveHighSignalLiveModelLimit", () => {
it("defaults modern live sweeps to the curated high-signal cap", () => {
expect(
@@ -757,6 +814,15 @@ describe("resolveHighSignalLiveModelLimit", () => {
).toBe(DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT);
});
it("can default small live sweeps to the curated small-model cap", () => {
expect(
resolveHighSignalLiveModelLimit({
useExplicitModels: false,
defaultLimit: DEFAULT_SMALL_LIVE_MODEL_LIMIT,
}),
).toBe(DEFAULT_SMALL_LIVE_MODEL_LIMIT);
});
it("leaves explicit model lists uncapped unless a cap is provided", () => {
expect(
resolveHighSignalLiveModelLimit({

View File

@@ -3,6 +3,7 @@ import { type Api, completeSimple, type Model } from "openclaw/plugin-sdk/llm";
import { Type } from "typebox";
import { describe, expect, it } from "vitest";
import { getRuntimeConfig } from "../config/config.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { parseLiveCsvFilter } from "../media-generation/live-test-helpers.js";
import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
import {
@@ -17,10 +18,15 @@ import { collectAnthropicApiKeys } from "./live-auth-keys.js";
import { appendPrioritizedDynamicLiveModels } from "./live-model-dynamic-candidates.js";
import { isModelNotFoundErrorMessage } from "./live-model-errors.js";
import {
DEFAULT_SMALL_LIVE_MODEL_LIMIT,
isHighSignalLiveModelRef,
isPrioritizedHighSignalLiveModelRef,
isPrioritizedSmallLiveModelRef,
isSmallLiveModelRef,
listPrioritizedSmallLiveModelRefs,
resolveHighSignalLiveModelLimit,
selectHighSignalLiveItems,
selectSmallLiveItems,
shouldExcludeProviderFromDefaultHighSignalLiveSweep,
} from "./live-model-filter.js";
import {
@@ -54,6 +60,7 @@ import {
import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
import { shouldSuppressBuiltInModel } from "./model-suppression.js";
import { ensureOpenClawModelsJson } from "./models-config.js";
import { prepareModelForSimpleCompletion } from "./simple-completion-transport.js";
const LIVE = isLiveTestEnabled();
const DIRECT_ENABLED = Boolean(process.env.OPENCLAW_LIVE_MODELS?.trim());
@@ -76,6 +83,7 @@ const LIVE_MODELS_JSON_TIMEOUT_MS = resolveLiveModelsJsonTimeoutMs(
);
const LIVE_FILE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_FILE_PROBE_ENV);
const LIVE_IMAGE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_IMAGE_PROBE_ENV);
let activeLiveCompletionConfig: OpenClawConfig | undefined;
const describeLive = LIVE ? describe : describe.skip;
@@ -430,9 +438,13 @@ async function completeSimpleWithTimeout<TApi extends Api>(
hardTimer.unref?.();
});
try {
const completionModel = prepareModelForSimpleCompletion({
model,
cfg: activeLiveCompletionConfig,
});
return await withLiveHeartbeat(
Promise.race([
completeSimple(model, context, {
completeSimple(completionModel, context, {
...options,
signal: controller.signal,
}),
@@ -716,6 +728,7 @@ describeLive("live models (profile keys)", () => {
Promise.resolve().then(() => getRuntimeConfig()),
"[live-models] load config",
);
activeLiveCompletionConfig = cfg;
logProgress("[live-models] preparing models.json");
await withLiveStageTimeout(
ensureOpenClawModelsJson(cfg),
@@ -724,7 +737,7 @@ describeLive("live models (profile keys)", () => {
);
if (!DIRECT_ENABLED) {
logProgress(
"[live-models] skipping (set OPENCLAW_LIVE_MODELS=modern|all|<list>; all=modern)",
"[live-models] skipping (set OPENCLAW_LIVE_MODELS=modern|small|all|<list>; all=modern)",
);
return;
}
@@ -740,14 +753,19 @@ describeLive("live models (profile keys)", () => {
const agentDir = resolveDefaultAgentDir(cfg);
const rawModels = process.env.OPENCLAW_LIVE_MODELS?.trim();
const useModern = rawModels === "modern" || rawModels === "all";
const useExplicit = Boolean(rawModels) && !useModern;
const useSmall = rawModels === "small";
const useExplicit = Boolean(rawModels) && !useModern && !useSmall;
const filter = useExplicit ? parseModelFilter(rawModels) : null;
const useDefaultPriorityOnly = !filter && useModern && !providers;
const allowNotFoundSkip = useModern;
const useSmallPriorityOnly = !filter && useSmall && !providers;
const allowNotFoundSkip = useModern || useSmall;
const models = await (async () => {
if (useDefaultPriorityOnly) {
logProgress("[live-models] loading configured prioritized model refs");
}
if (useSmallPriorityOnly) {
logProgress("[live-models] loading configured small model refs");
}
logProgress("[live-models] loading auth storage");
const authStorage = await withLiveStageTimeout(
Promise.resolve().then(() =>
@@ -779,6 +797,7 @@ describeLive("live models (profile keys)", () => {
agentDir,
env: process.env,
modelRegistry,
...(useSmall ? { refs: listPrioritizedSmallLiveModelRefs() } : {}),
});
if (augmented.added.length > 0) {
logProgress(
@@ -791,6 +810,7 @@ describeLive("live models (profile keys)", () => {
const maxModels = resolveHighSignalLiveModelLimit({
rawMaxModels: process.env.OPENCLAW_LIVE_MAX_MODELS,
useExplicitModels: useExplicit,
...(useSmall ? { defaultLimit: DEFAULT_SMALL_LIVE_MODEL_LIMIT } : {}),
});
const targetMatcher = createLiveTargetMatcher({
providerFilter: providers,
@@ -817,7 +837,17 @@ describeLive("live models (profile keys)", () => {
if (!targetMatcher.matchesModel(model.provider, model.id)) {
continue;
}
if (!filter && useModern) {
if (!filter && useSmall) {
if (
useSmallPriorityOnly &&
!isPrioritizedSmallLiveModelRef({ provider: model.provider, id: model.id })
) {
continue;
}
if (!isSmallLiveModelRef({ provider: model.provider, id: model.id })) {
continue;
}
} else if (!filter && useModern) {
if (
useDefaultPriorityOnly &&
!isPrioritizedHighSignalLiveModelRef({ provider: model.provider, id: model.id })
@@ -879,13 +909,15 @@ describeLive("live models (profile keys)", () => {
return;
}
const selectedCandidates = selectHighSignalLiveItems(
const selectCandidates = useSmall ? selectSmallLiveItems : selectHighSignalLiveItems;
const selectedCandidates = selectCandidates(
candidates,
maxModels > 0 ? maxModels : candidates.length,
(entry) => ({ provider: entry.model.provider, id: entry.model.id }),
(entry) => entry.model.provider,
);
logProgress(`[live-models] selection=${useExplicit ? "explicit" : "high-signal"}`);
const selectionLabel = useExplicit ? "explicit" : useSmall ? "small" : "high-signal";
logProgress(`[live-models] selection=${selectionLabel}`);
if (selectedCandidates.length < candidates.length) {
logProgress(
`[live-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_MAX_MODELS=${maxModels}`,