test(agents): add small model live profile (#87638)

2026-06-02 03:44:57 +00:00 · 2026-05-28 21:17:40 +01:00
parent f7507fd921
commit 686751f639
4 changed files with 182 additions and 16 deletions
--- a/docs/help/testing-live.md
+++ b/docs/help/testing-live.md
@@ -71,12 +71,13 @@ Live tests are split into two layers so we can isolate failures:
  - Run a small completion per model (and targeted regressions where needed)
 - How to enable:
  - `pnpm test:live` (or `OPENCLAW_LIVE_TEST=1` if invoking Vitest directly)
- Set `OPENCLAW_LIVE_MODELS=modern` (or `all`, alias for modern) to actually run this suite; otherwise it skips to keep `pnpm test:live` focused on gateway smoke
+- Set `OPENCLAW_LIVE_MODELS=modern`, `small`, or `all` (alias for modern) to actually run this suite; otherwise it skips to keep `pnpm test:live` focused on gateway smoke
 - How to select models:
  - `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet 4.6+, GPT-5.2 + Codex, Gemini 3, DeepSeek V4, GLM 4.7, MiniMax M2.7, Grok 4.3)
+  - `OPENCLAW_LIVE_MODELS=small` to run the constrained small-model allowlist (Qwen 8B/9B local-compatible routes, OpenRouter Qwen/GLM, and Z.AI GLM)
  - `OPENCLAW_LIVE_MODELS=all` is an alias for the modern allowlist
  - or `OPENCLAW_LIVE_MODELS="openai/gpt-5.5,openai-codex/gpt-5.5,anthropic/claude-opus-4-6,..."` (comma allowlist)
-  - Modern/all sweeps default to a curated high-signal cap; set `OPENCLAW_LIVE_MAX_MODELS=0` for an exhaustive modern sweep or a positive number for a smaller cap.
+  - Modern/all and small sweeps default to their curated caps; set `OPENCLAW_LIVE_MAX_MODELS=0` for an exhaustive selected-profile sweep or a positive number for a smaller cap.
  - Exhaustive sweeps use `OPENCLAW_LIVE_TEST_TIMEOUT_MS` for the whole direct-model test timeout. Default: 60 minutes.
  - Direct-model probes run with 20-way parallelism by default; set `OPENCLAW_LIVE_MODEL_CONCURRENCY` to override.
 - How to select providers:
@@ -339,6 +340,12 @@ Narrow, explicit allowlists are fastest and least flaky:
 - Single model, direct (no gateway):
  - `OPENCLAW_LIVE_MODELS="openai/gpt-5.5" pnpm test:live src/agents/models.profiles.live.test.ts`

+- Small-model direct profile:
+  - `OPENCLAW_LIVE_MODELS=small pnpm test:live src/agents/models.profiles.live.test.ts`
+
+- Ollama Cloud API smoke:
+  - `OPENCLAW_LIVE_TEST=1 OPENCLAW_LIVE_OLLAMA=1 OPENCLAW_LIVE_OLLAMA_BASE_URL=https://ollama.com OPENCLAW_LIVE_OLLAMA_MODEL=glm-5.1:cloud OPENCLAW_LIVE_OLLAMA_WEB_SEARCH=0 pnpm test:live -- extensions/ollama/ollama.live.test.ts`
+
 - Single model, gateway smoke:
  - `OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.5" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts`

--- a/src/agents/live-model-filter.ts
+++ b/src/agents/live-model-filter.ts
@@ -30,7 +30,18 @@ const HIGH_SIGNAL_LIVE_MODEL_PRIORITY = [
  "minimax-portal/minimax-m2.7",
 ] as const;

+const SMALL_LIVE_MODEL_PRIORITY = [
+  "lmstudio/qwen/qwen3.5-9b",
+  "vllm/qwen/qwen3-8b",
+  "sglang/qwen/qwen3-8b",
+  "openrouter/qwen/qwen3.5-9b",
+  "openrouter/z-ai/glm-5.1",
+  "openrouter/z-ai/glm-5",
+  "zai/glm-5.1",
+] as const;
+
 export const DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT = HIGH_SIGNAL_LIVE_MODEL_PRIORITY.length;
+export const DEFAULT_SMALL_LIVE_MODEL_LIMIT = SMALL_LIVE_MODEL_PRIORITY.length;
 const DEFAULT_HIGH_SIGNAL_LIVE_EXCLUDED_PROVIDERS = new Set(["codex", "codex-cli", "openai-codex"]);
 const CURATED_ONLY_HIGH_SIGNAL_LIVE_PROVIDERS = new Set([
  "fireworks",
@@ -42,6 +53,9 @@ const CURATED_ONLY_HIGH_SIGNAL_LIVE_PROVIDERS = new Set([
 const HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX = new Map<string, number>(
  HIGH_SIGNAL_LIVE_MODEL_PRIORITY.map((key, index) => [key, index]),
 );
+const SMALL_LIVE_MODEL_PRIORITY_INDEX = new Map<string, number>(
+  SMALL_LIVE_MODEL_PRIORITY.map((key, index) => [key, index]),
+);
 const HIGH_SIGNAL_LIVE_MODEL_IDS_BY_PROVIDER = new Map<string, Set<string>>();
 for (const key of HIGH_SIGNAL_LIVE_MODEL_PRIORITY) {
  const separatorIndex = key.indexOf("/");
@@ -200,12 +214,29 @@ export function isHighSignalLiveModelRef(ref: ModelRef): boolean {
 }

 export function isPrioritizedHighSignalLiveModelRef(ref: ModelRef): boolean {
-  const key = toCanonicalHighSignalLiveModelKey(ref);
-  return key !== null && HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX.has(key);
+  return hasPrioritizedLiveModelRef(HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX, ref);
+}
+
+export function isSmallLiveModelRef(ref: ModelRef): boolean {
+  return hasPrioritizedLiveModelRef(SMALL_LIVE_MODEL_PRIORITY_INDEX, ref);
+}
+
+export function isPrioritizedSmallLiveModelRef(ref: ModelRef): boolean {
+  return isSmallLiveModelRef(ref);
 }

 export function listPrioritizedHighSignalLiveModelRefs(): Array<{ provider: string; id: string }> {
-  return HIGH_SIGNAL_LIVE_MODEL_PRIORITY.map((key) => {
+  return listPrioritizedLiveModelRefs(HIGH_SIGNAL_LIVE_MODEL_PRIORITY);
+}
+
+export function listPrioritizedSmallLiveModelRefs(): Array<{ provider: string; id: string }> {
+  return listPrioritizedLiveModelRefs(SMALL_LIVE_MODEL_PRIORITY);
+}
+
+function listPrioritizedLiveModelRefs(
+  priority: readonly string[],
+): Array<{ provider: string; id: string }> {
+  return priority.map((key) => {
    const separatorIndex = key.indexOf("/");
    return {
      provider: key.slice(0, separatorIndex),
@@ -258,7 +289,7 @@ export function shouldExcludeProviderFromDefaultHighSignalLiveSweep(params: {
  return true;
 }

-function toCanonicalHighSignalLiveModelKey(ref: ModelRef): string | null {
+function toCanonicalLiveModelKey(ref: ModelRef): string | null {
  const provider = normalizeProviderId(ref.provider ?? "");
  const rawId = normalizeLowercaseStringOrEmpty(ref.id);
  if (!provider || !rawId) {
@@ -267,6 +298,11 @@ function toCanonicalHighSignalLiveModelKey(ref: ModelRef): string | null {
  return `${provider}/${rawId}`;
 }

+function hasPrioritizedLiveModelRef(index: ReadonlyMap<string, number>, ref: ModelRef): boolean {
+  const key = toCanonicalLiveModelKey(ref);
+  return key !== null && index.has(key);
+}
+
 function capByProviderSpread<T>(
  items: T[],
  maxItems: number,
@@ -315,6 +351,31 @@ export function selectHighSignalLiveItems<T>(
  maxItems: number,
  refOf: (item: T) => ModelRef,
  providerOf: (item: T) => string,
+): T[] {
+  return selectPrioritizedLiveItems(
+    items,
+    maxItems,
+    refOf,
+    providerOf,
+    HIGH_SIGNAL_LIVE_MODEL_PRIORITY,
+  );
+}
+
+export function selectSmallLiveItems<T>(
+  items: T[],
+  maxItems: number,
+  refOf: (item: T) => ModelRef,
+  providerOf: (item: T) => string,
+): T[] {
+  return selectPrioritizedLiveItems(items, maxItems, refOf, providerOf, SMALL_LIVE_MODEL_PRIORITY);
+}
+
+function selectPrioritizedLiveItems<T>(
+  items: T[],
+  maxItems: number,
+  refOf: (item: T) => ModelRef,
+  providerOf: (item: T) => string,
+  priority: readonly string[],
 ): T[] {
  if (maxItems <= 0 || items.length <= maxItems) {
    return items;
@@ -322,12 +383,12 @@ export function selectHighSignalLiveItems<T>(

  const remaining = [...items];
  const selected: T[] = [];
-  for (const preferredKey of HIGH_SIGNAL_LIVE_MODEL_PRIORITY) {
+  for (const preferredKey of priority) {
    if (selected.length >= maxItems) {
      break;
    }
    const preferredIndex = remaining.findIndex(
-      (item) => toCanonicalHighSignalLiveModelKey(refOf(item)) === preferredKey,
+      (item) => toCanonicalLiveModelKey(refOf(item)) === preferredKey,
    );
    if (preferredIndex < 0) {
      continue;
@@ -362,7 +423,7 @@ export function resolveHighSignalLiveModelLimit(params: {
 }

 export function getHighSignalLiveModelPriorityIndex(ref: ModelRef): number | null {
-  const key = toCanonicalHighSignalLiveModelKey(ref);
+  const key = toCanonicalLiveModelKey(ref);
  if (!key) {
    return null;
  }
--- a/src/agents/model-compat.test.ts
+++ b/src/agents/model-compat.test.ts
@@ -14,12 +14,17 @@ vi.mock("../plugins/provider-runtime.js", () => {
 import { normalizeModelCompat } from "../plugins/provider-model-compat.js";
 import {
  DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT,
+  DEFAULT_SMALL_LIVE_MODEL_LIMIT,
  isHighSignalLiveModelRef,
  isModernModelRef,
  isPrioritizedHighSignalLiveModelRef,
+  isPrioritizedSmallLiveModelRef,
+  isSmallLiveModelRef,
  listPrioritizedHighSignalLiveModelRefs,
+  listPrioritizedSmallLiveModelRefs,
  resolveHighSignalLiveModelLimit,
  selectHighSignalLiveItems,
+  selectSmallLiveItems,
 } from "./live-model-filter.js";

 const baseModel = (): Model =>
@@ -678,6 +683,33 @@ describe("isPrioritizedHighSignalLiveModelRef", () => {
  });
 });

+describe("isSmallLiveModelRef", () => {
+  it("matches the small-model live matrix without requiring provider modern hooks", () => {
+    expect(isSmallLiveModelRef({ provider: "lmstudio", id: "Qwen/Qwen3.5-9B" })).toBe(true);
+    expect(isSmallLiveModelRef({ provider: "openrouter", id: "qwen/qwen3.5-9b" })).toBe(true);
+    expect(isSmallLiveModelRef({ provider: "openrouter", id: "z-ai/glm-5.1" })).toBe(true);
+    expect(isSmallLiveModelRef({ provider: "openai", id: "gpt-5.5" })).toBe(false);
+    expect(providerRuntimeMocks.resolveProviderModernModelRef).not.toHaveBeenCalled();
+  });
+});
+
+describe("isPrioritizedSmallLiveModelRef", () => {
+  it("lists priority refs as provider/id pairs", () => {
+    expect(isPrioritizedSmallLiveModelRef({ provider: "lmstudio", id: "qwen/qwen3.5-9b" })).toBe(
+      true,
+    );
+    expect(listPrioritizedSmallLiveModelRefs()).toStrictEqual([
+      { provider: "lmstudio", id: "qwen/qwen3.5-9b" },
+      { provider: "vllm", id: "qwen/qwen3-8b" },
+      { provider: "sglang", id: "qwen/qwen3-8b" },
+      { provider: "openrouter", id: "qwen/qwen3.5-9b" },
+      { provider: "openrouter", id: "z-ai/glm-5.1" },
+      { provider: "openrouter", id: "z-ai/glm-5" },
+      { provider: "zai", id: "glm-5.1" },
+    ]);
+  });
+});
+
 describe("selectHighSignalLiveItems", () => {
  it("prefers curated Google replacements before fallback provider spread", () => {
    const items = [
@@ -748,6 +780,31 @@ describe("selectHighSignalLiveItems", () => {
  });
 });

+describe("selectSmallLiveItems", () => {
+  it("prefers constrained local and hosted small-model routes before fallback spread", () => {
+    const items = [
+      { provider: "openrouter", id: "z-ai/glm-5" },
+      { provider: "openai", id: "gpt-5.5" },
+      { provider: "vllm", id: "qwen/qwen3-8b" },
+      { provider: "lmstudio", id: "qwen/qwen3.5-9b" },
+      { provider: "openrouter", id: "qwen/qwen3.5-9b" },
+    ];
+
+    expect(
+      selectSmallLiveItems(
+        items,
+        3,
+        (item) => item,
+        (item) => item.provider,
+      ),
+    ).toEqual([
+      { provider: "lmstudio", id: "qwen/qwen3.5-9b" },
+      { provider: "vllm", id: "qwen/qwen3-8b" },
+      { provider: "openrouter", id: "qwen/qwen3.5-9b" },
+    ]);
+  });
+});
+
 describe("resolveHighSignalLiveModelLimit", () => {
  it("defaults modern live sweeps to the curated high-signal cap", () => {
    expect(
@@ -757,6 +814,15 @@ describe("resolveHighSignalLiveModelLimit", () => {
    ).toBe(DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT);
  });

+  it("can default small live sweeps to the curated small-model cap", () => {
+    expect(
+      resolveHighSignalLiveModelLimit({
+        useExplicitModels: false,
+        defaultLimit: DEFAULT_SMALL_LIVE_MODEL_LIMIT,
+      }),
+    ).toBe(DEFAULT_SMALL_LIVE_MODEL_LIMIT);
+  });
+
  it("leaves explicit model lists uncapped unless a cap is provided", () => {
    expect(
      resolveHighSignalLiveModelLimit({
--- a/src/agents/models.profiles.live.test.ts
+++ b/src/agents/models.profiles.live.test.ts
@@ -3,6 +3,7 @@ import { type Api, completeSimple, type Model } from "openclaw/plugin-sdk/llm";
 import { Type } from "typebox";
 import { describe, expect, it } from "vitest";
 import { getRuntimeConfig } from "../config/config.js";
+import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { parseLiveCsvFilter } from "../media-generation/live-test-helpers.js";
 import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
 import {
@@ -17,10 +18,15 @@ import { collectAnthropicApiKeys } from "./live-auth-keys.js";
 import { appendPrioritizedDynamicLiveModels } from "./live-model-dynamic-candidates.js";
 import { isModelNotFoundErrorMessage } from "./live-model-errors.js";
 import {
+  DEFAULT_SMALL_LIVE_MODEL_LIMIT,
  isHighSignalLiveModelRef,
  isPrioritizedHighSignalLiveModelRef,
+  isPrioritizedSmallLiveModelRef,
+  isSmallLiveModelRef,
+  listPrioritizedSmallLiveModelRefs,
  resolveHighSignalLiveModelLimit,
  selectHighSignalLiveItems,
+  selectSmallLiveItems,
  shouldExcludeProviderFromDefaultHighSignalLiveSweep,
 } from "./live-model-filter.js";
 import {
@@ -54,6 +60,7 @@ import {
 import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
 import { shouldSuppressBuiltInModel } from "./model-suppression.js";
 import { ensureOpenClawModelsJson } from "./models-config.js";
+import { prepareModelForSimpleCompletion } from "./simple-completion-transport.js";

 const LIVE = isLiveTestEnabled();
 const DIRECT_ENABLED = Boolean(process.env.OPENCLAW_LIVE_MODELS?.trim());
@@ -76,6 +83,7 @@ const LIVE_MODELS_JSON_TIMEOUT_MS = resolveLiveModelsJsonTimeoutMs(
 );
 const LIVE_FILE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_FILE_PROBE_ENV);
 const LIVE_IMAGE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_IMAGE_PROBE_ENV);
+let activeLiveCompletionConfig: OpenClawConfig | undefined;

 const describeLive = LIVE ? describe : describe.skip;

@@ -430,9 +438,13 @@ async function completeSimpleWithTimeout<TApi extends Api>(
    hardTimer.unref?.();
  });
  try {
+    const completionModel = prepareModelForSimpleCompletion({
+      model,
+      cfg: activeLiveCompletionConfig,
+    });
    return await withLiveHeartbeat(
      Promise.race([
-        completeSimple(model, context, {
+        completeSimple(completionModel, context, {
          ...options,
          signal: controller.signal,
        }),
@@ -716,6 +728,7 @@ describeLive("live models (profile keys)", () => {
        Promise.resolve().then(() => getRuntimeConfig()),
        "[live-models] load config",
      );
+      activeLiveCompletionConfig = cfg;
      logProgress("[live-models] preparing models.json");
      await withLiveStageTimeout(
        ensureOpenClawModelsJson(cfg),
@@ -724,7 +737,7 @@ describeLive("live models (profile keys)", () => {
      );
      if (!DIRECT_ENABLED) {
        logProgress(
-          "[live-models] skipping (set OPENCLAW_LIVE_MODELS=modern|all|<list>; all=modern)",
+          "[live-models] skipping (set OPENCLAW_LIVE_MODELS=modern|small|all|<list>; all=modern)",
        );
        return;
      }
@@ -740,14 +753,19 @@ describeLive("live models (profile keys)", () => {
      const agentDir = resolveDefaultAgentDir(cfg);
      const rawModels = process.env.OPENCLAW_LIVE_MODELS?.trim();
      const useModern = rawModels === "modern" || rawModels === "all";
-      const useExplicit = Boolean(rawModels) && !useModern;
+      const useSmall = rawModels === "small";
+      const useExplicit = Boolean(rawModels) && !useModern && !useSmall;
      const filter = useExplicit ? parseModelFilter(rawModels) : null;
      const useDefaultPriorityOnly = !filter && useModern && !providers;
-      const allowNotFoundSkip = useModern;
+      const useSmallPriorityOnly = !filter && useSmall && !providers;
+      const allowNotFoundSkip = useModern || useSmall;
      const models = await (async () => {
        if (useDefaultPriorityOnly) {
          logProgress("[live-models] loading configured prioritized model refs");
        }
+        if (useSmallPriorityOnly) {
+          logProgress("[live-models] loading configured small model refs");
+        }
        logProgress("[live-models] loading auth storage");
        const authStorage = await withLiveStageTimeout(
          Promise.resolve().then(() =>
@@ -779,6 +797,7 @@ describeLive("live models (profile keys)", () => {
          agentDir,
          env: process.env,
          modelRegistry,
+          ...(useSmall ? { refs: listPrioritizedSmallLiveModelRefs() } : {}),
        });
        if (augmented.added.length > 0) {
          logProgress(
@@ -791,6 +810,7 @@ describeLive("live models (profile keys)", () => {
      const maxModels = resolveHighSignalLiveModelLimit({
        rawMaxModels: process.env.OPENCLAW_LIVE_MAX_MODELS,
        useExplicitModels: useExplicit,
+        ...(useSmall ? { defaultLimit: DEFAULT_SMALL_LIVE_MODEL_LIMIT } : {}),
      });
      const targetMatcher = createLiveTargetMatcher({
        providerFilter: providers,
@@ -817,7 +837,17 @@ describeLive("live models (profile keys)", () => {
        if (!targetMatcher.matchesModel(model.provider, model.id)) {
          continue;
        }
-        if (!filter && useModern) {
+        if (!filter && useSmall) {
+          if (
+            useSmallPriorityOnly &&
+            !isPrioritizedSmallLiveModelRef({ provider: model.provider, id: model.id })
+          ) {
+            continue;
+          }
+          if (!isSmallLiveModelRef({ provider: model.provider, id: model.id })) {
+            continue;
+          }
+        } else if (!filter && useModern) {
          if (
            useDefaultPriorityOnly &&
            !isPrioritizedHighSignalLiveModelRef({ provider: model.provider, id: model.id })
@@ -879,13 +909,15 @@ describeLive("live models (profile keys)", () => {
        return;
      }

-      const selectedCandidates = selectHighSignalLiveItems(
+      const selectCandidates = useSmall ? selectSmallLiveItems : selectHighSignalLiveItems;
+      const selectedCandidates = selectCandidates(
        candidates,
        maxModels > 0 ? maxModels : candidates.length,
        (entry) => ({ provider: entry.model.provider, id: entry.model.id }),
        (entry) => entry.model.provider,
      );
-      logProgress(`[live-models] selection=${useExplicit ? "explicit" : "high-signal"}`);
+      const selectionLabel = useExplicit ? "explicit" : useSmall ? "small" : "high-signal";
+      logProgress(`[live-models] selection=${selectionLabel}`);
      if (selectedCandidates.length < candidates.length) {
        logProgress(
          `[live-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_MAX_MODELS=${maxModels}`,