From 686751f639dce005bba243874d01bd0bbdb01efd Mon Sep 17 00:00:00 2001
From: Vincent Koc <vincentkoc@ieee.org>
Date: Thu, 28 May 2026 21:17:40 +0100
Subject: [PATCH] test(agents): add small model live profile (#87638)

---
 docs/help/testing-live.md               | 11 +++-
 src/agents/live-model-filter.ts         | 75 ++++++++++++++++++++++---
 src/agents/model-compat.test.ts         | 66 ++++++++++++++++++++++
 src/agents/models.profiles.live.test.ts | 46 ++++++++++++---
 4 files changed, 182 insertions(+), 16 deletions(-)

diff --git a/docs/help/testing-live.md b/docs/help/testing-live.md
index ea71a1c611c..5043cc1e835 100644
--- a/docs/help/testing-live.md
+++ b/docs/help/testing-live.md
@@ -71,12 +71,13 @@ Live tests are split into two layers so we can isolate failures:
   - Run a small completion per model (and targeted regressions where needed)
 - How to enable:
   - `pnpm test:live` (or `OPENCLAW_LIVE_TEST=1` if invoking Vitest directly)
-- Set `OPENCLAW_LIVE_MODELS=modern` (or `all`, alias for modern) to actually run this suite; otherwise it skips to keep `pnpm test:live` focused on gateway smoke
+- Set `OPENCLAW_LIVE_MODELS=modern`, `small`, or `all` (alias for modern) to actually run this suite; otherwise it skips to keep `pnpm test:live` focused on gateway smoke
 - How to select models:
   - `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet 4.6+, GPT-5.2 + Codex, Gemini 3, DeepSeek V4, GLM 4.7, MiniMax M2.7, Grok 4.3)
+  - `OPENCLAW_LIVE_MODELS=small` to run the constrained small-model allowlist (Qwen 8B/9B local-compatible routes, OpenRouter Qwen/GLM, and Z.AI GLM)
   - `OPENCLAW_LIVE_MODELS=all` is an alias for the modern allowlist
   - or `OPENCLAW_LIVE_MODELS="openai/gpt-5.5,openai-codex/gpt-5.5,anthropic/claude-opus-4-6,..."` (comma allowlist)
-  - Modern/all sweeps default to a curated high-signal cap; set `OPENCLAW_LIVE_MAX_MODELS=0` for an exhaustive modern sweep or a positive number for a smaller cap.
+  - Modern/all and small sweeps default to their curated caps; set `OPENCLAW_LIVE_MAX_MODELS=0` for an exhaustive selected-profile sweep or a positive number for a smaller cap.
   - Exhaustive sweeps use `OPENCLAW_LIVE_TEST_TIMEOUT_MS` for the whole direct-model test timeout. Default: 60 minutes.
   - Direct-model probes run with 20-way parallelism by default; set `OPENCLAW_LIVE_MODEL_CONCURRENCY` to override.
 - How to select providers:
@@ -339,6 +340,12 @@ Narrow, explicit allowlists are fastest and least flaky:
 - Single model, direct (no gateway):
   - `OPENCLAW_LIVE_MODELS="openai/gpt-5.5" pnpm test:live src/agents/models.profiles.live.test.ts`
 
+- Small-model direct profile:
+  - `OPENCLAW_LIVE_MODELS=small pnpm test:live src/agents/models.profiles.live.test.ts`
+
+- Ollama Cloud API smoke:
+  - `OPENCLAW_LIVE_TEST=1 OPENCLAW_LIVE_OLLAMA=1 OPENCLAW_LIVE_OLLAMA_BASE_URL=https://ollama.com OPENCLAW_LIVE_OLLAMA_MODEL=glm-5.1:cloud OPENCLAW_LIVE_OLLAMA_WEB_SEARCH=0 pnpm test:live -- extensions/ollama/ollama.live.test.ts`
+
 - Single model, gateway smoke:
   - `OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.5" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts`
 
diff --git a/src/agents/live-model-filter.ts b/src/agents/live-model-filter.ts
index 073e926fba4..27bbf694af7 100644
--- a/src/agents/live-model-filter.ts
+++ b/src/agents/live-model-filter.ts
@@ -30,7 +30,18 @@ const HIGH_SIGNAL_LIVE_MODEL_PRIORITY = [
   "minimax-portal/minimax-m2.7",
 ] as const;
 
+const SMALL_LIVE_MODEL_PRIORITY = [
+  "lmstudio/qwen/qwen3.5-9b",
+  "vllm/qwen/qwen3-8b",
+  "sglang/qwen/qwen3-8b",
+  "openrouter/qwen/qwen3.5-9b",
+  "openrouter/z-ai/glm-5.1",
+  "openrouter/z-ai/glm-5",
+  "zai/glm-5.1",
+] as const;
+
 export const DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT = HIGH_SIGNAL_LIVE_MODEL_PRIORITY.length;
+export const DEFAULT_SMALL_LIVE_MODEL_LIMIT = SMALL_LIVE_MODEL_PRIORITY.length;
 const DEFAULT_HIGH_SIGNAL_LIVE_EXCLUDED_PROVIDERS = new Set(["codex", "codex-cli", "openai-codex"]);
 const CURATED_ONLY_HIGH_SIGNAL_LIVE_PROVIDERS = new Set([
   "fireworks",
@@ -42,6 +53,9 @@ const CURATED_ONLY_HIGH_SIGNAL_LIVE_PROVIDERS = new Set([
 const HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX = new Map<string, number>(
   HIGH_SIGNAL_LIVE_MODEL_PRIORITY.map((key, index) => [key, index]),
 );
+const SMALL_LIVE_MODEL_PRIORITY_INDEX = new Map<string, number>(
+  SMALL_LIVE_MODEL_PRIORITY.map((key, index) => [key, index]),
+);
 const HIGH_SIGNAL_LIVE_MODEL_IDS_BY_PROVIDER = new Map<string, Set<string>>();
 for (const key of HIGH_SIGNAL_LIVE_MODEL_PRIORITY) {
   const separatorIndex = key.indexOf("/");
@@ -200,12 +214,29 @@ export function isHighSignalLiveModelRef(ref: ModelRef): boolean {
 }
 
 export function isPrioritizedHighSignalLiveModelRef(ref: ModelRef): boolean {
-  const key = toCanonicalHighSignalLiveModelKey(ref);
-  return key !== null && HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX.has(key);
+  return hasPrioritizedLiveModelRef(HIGH_SIGNAL_LIVE_MODEL_PRIORITY_INDEX, ref);
+}
+
+export function isSmallLiveModelRef(ref: ModelRef): boolean {
+  return hasPrioritizedLiveModelRef(SMALL_LIVE_MODEL_PRIORITY_INDEX, ref);
+}
+
+export function isPrioritizedSmallLiveModelRef(ref: ModelRef): boolean {
+  return isSmallLiveModelRef(ref);
 }
 
 export function listPrioritizedHighSignalLiveModelRefs(): Array<{ provider: string; id: string }> {
-  return HIGH_SIGNAL_LIVE_MODEL_PRIORITY.map((key) => {
+  return listPrioritizedLiveModelRefs(HIGH_SIGNAL_LIVE_MODEL_PRIORITY);
+}
+
+export function listPrioritizedSmallLiveModelRefs(): Array<{ provider: string; id: string }> {
+  return listPrioritizedLiveModelRefs(SMALL_LIVE_MODEL_PRIORITY);
+}
+
+function listPrioritizedLiveModelRefs(
+  priority: readonly string[],
+): Array<{ provider: string; id: string }> {
+  return priority.map((key) => {
     const separatorIndex = key.indexOf("/");
     return {
       provider: key.slice(0, separatorIndex),
@@ -258,7 +289,7 @@ export function shouldExcludeProviderFromDefaultHighSignalLiveSweep(params: {
   return true;
 }
 
-function toCanonicalHighSignalLiveModelKey(ref: ModelRef): string | null {
+function toCanonicalLiveModelKey(ref: ModelRef): string | null {
   const provider = normalizeProviderId(ref.provider ?? "");
   const rawId = normalizeLowercaseStringOrEmpty(ref.id);
   if (!provider || !rawId) {
@@ -267,6 +298,11 @@ function toCanonicalHighSignalLiveModelKey(ref: ModelRef): string | null {
   return `${provider}/${rawId}`;
 }
 
+function hasPrioritizedLiveModelRef(index: ReadonlyMap<string, number>, ref: ModelRef): boolean {
+  const key = toCanonicalLiveModelKey(ref);
+  return key !== null && index.has(key);
+}
+
 function capByProviderSpread<T>(
   items: T[],
   maxItems: number,
@@ -315,6 +351,31 @@ export function selectHighSignalLiveItems<T>(
   maxItems: number,
   refOf: (item: T) => ModelRef,
   providerOf: (item: T) => string,
+): T[] {
+  return selectPrioritizedLiveItems(
+    items,
+    maxItems,
+    refOf,
+    providerOf,
+    HIGH_SIGNAL_LIVE_MODEL_PRIORITY,
+  );
+}
+
+export function selectSmallLiveItems<T>(
+  items: T[],
+  maxItems: number,
+  refOf: (item: T) => ModelRef,
+  providerOf: (item: T) => string,
+): T[] {
+  return selectPrioritizedLiveItems(items, maxItems, refOf, providerOf, SMALL_LIVE_MODEL_PRIORITY);
+}
+
+function selectPrioritizedLiveItems<T>(
+  items: T[],
+  maxItems: number,
+  refOf: (item: T) => ModelRef,
+  providerOf: (item: T) => string,
+  priority: readonly string[],
 ): T[] {
   if (maxItems <= 0 || items.length <= maxItems) {
     return items;
@@ -322,12 +383,12 @@ export function selectHighSignalLiveItems<T>(
 
   const remaining = [...items];
   const selected: T[] = [];
-  for (const preferredKey of HIGH_SIGNAL_LIVE_MODEL_PRIORITY) {
+  for (const preferredKey of priority) {
     if (selected.length >= maxItems) {
       break;
     }
     const preferredIndex = remaining.findIndex(
-      (item) => toCanonicalHighSignalLiveModelKey(refOf(item)) === preferredKey,
+      (item) => toCanonicalLiveModelKey(refOf(item)) === preferredKey,
     );
     if (preferredIndex < 0) {
       continue;
@@ -362,7 +423,7 @@ export function resolveHighSignalLiveModelLimit(params: {
 }
 
 export function getHighSignalLiveModelPriorityIndex(ref: ModelRef): number | null {
-  const key = toCanonicalHighSignalLiveModelKey(ref);
+  const key = toCanonicalLiveModelKey(ref);
   if (!key) {
     return null;
   }
diff --git a/src/agents/model-compat.test.ts b/src/agents/model-compat.test.ts
index e1ebd3c451e..f9c513782bf 100644
--- a/src/agents/model-compat.test.ts
+++ b/src/agents/model-compat.test.ts
@@ -14,12 +14,17 @@ vi.mock("../plugins/provider-runtime.js", () => {
 import { normalizeModelCompat } from "../plugins/provider-model-compat.js";
 import {
   DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT,
+  DEFAULT_SMALL_LIVE_MODEL_LIMIT,
   isHighSignalLiveModelRef,
   isModernModelRef,
   isPrioritizedHighSignalLiveModelRef,
+  isPrioritizedSmallLiveModelRef,
+  isSmallLiveModelRef,
   listPrioritizedHighSignalLiveModelRefs,
+  listPrioritizedSmallLiveModelRefs,
   resolveHighSignalLiveModelLimit,
   selectHighSignalLiveItems,
+  selectSmallLiveItems,
 } from "./live-model-filter.js";
 
 const baseModel = (): Model =>
@@ -678,6 +683,33 @@ describe("isPrioritizedHighSignalLiveModelRef", () => {
   });
 });
 
+describe("isSmallLiveModelRef", () => {
+  it("matches the small-model live matrix without requiring provider modern hooks", () => {
+    expect(isSmallLiveModelRef({ provider: "lmstudio", id: "Qwen/Qwen3.5-9B" })).toBe(true);
+    expect(isSmallLiveModelRef({ provider: "openrouter", id: "qwen/qwen3.5-9b" })).toBe(true);
+    expect(isSmallLiveModelRef({ provider: "openrouter", id: "z-ai/glm-5.1" })).toBe(true);
+    expect(isSmallLiveModelRef({ provider: "openai", id: "gpt-5.5" })).toBe(false);
+    expect(providerRuntimeMocks.resolveProviderModernModelRef).not.toHaveBeenCalled();
+  });
+});
+
+describe("isPrioritizedSmallLiveModelRef", () => {
+  it("lists priority refs as provider/id pairs", () => {
+    expect(isPrioritizedSmallLiveModelRef({ provider: "lmstudio", id: "qwen/qwen3.5-9b" })).toBe(
+      true,
+    );
+    expect(listPrioritizedSmallLiveModelRefs()).toStrictEqual([
+      { provider: "lmstudio", id: "qwen/qwen3.5-9b" },
+      { provider: "vllm", id: "qwen/qwen3-8b" },
+      { provider: "sglang", id: "qwen/qwen3-8b" },
+      { provider: "openrouter", id: "qwen/qwen3.5-9b" },
+      { provider: "openrouter", id: "z-ai/glm-5.1" },
+      { provider: "openrouter", id: "z-ai/glm-5" },
+      { provider: "zai", id: "glm-5.1" },
+    ]);
+  });
+});
+
 describe("selectHighSignalLiveItems", () => {
   it("prefers curated Google replacements before fallback provider spread", () => {
     const items = [
@@ -748,6 +780,31 @@ describe("selectHighSignalLiveItems", () => {
   });
 });
 
+describe("selectSmallLiveItems", () => {
+  it("prefers constrained local and hosted small-model routes before fallback spread", () => {
+    const items = [
+      { provider: "openrouter", id: "z-ai/glm-5" },
+      { provider: "openai", id: "gpt-5.5" },
+      { provider: "vllm", id: "qwen/qwen3-8b" },
+      { provider: "lmstudio", id: "qwen/qwen3.5-9b" },
+      { provider: "openrouter", id: "qwen/qwen3.5-9b" },
+    ];
+
+    expect(
+      selectSmallLiveItems(
+        items,
+        3,
+        (item) => item,
+        (item) => item.provider,
+      ),
+    ).toEqual([
+      { provider: "lmstudio", id: "qwen/qwen3.5-9b" },
+      { provider: "vllm", id: "qwen/qwen3-8b" },
+      { provider: "openrouter", id: "qwen/qwen3.5-9b" },
+    ]);
+  });
+});
+
 describe("resolveHighSignalLiveModelLimit", () => {
   it("defaults modern live sweeps to the curated high-signal cap", () => {
     expect(
@@ -757,6 +814,15 @@ describe("resolveHighSignalLiveModelLimit", () => {
     ).toBe(DEFAULT_HIGH_SIGNAL_LIVE_MODEL_LIMIT);
   });
 
+  it("can default small live sweeps to the curated small-model cap", () => {
+    expect(
+      resolveHighSignalLiveModelLimit({
+        useExplicitModels: false,
+        defaultLimit: DEFAULT_SMALL_LIVE_MODEL_LIMIT,
+      }),
+    ).toBe(DEFAULT_SMALL_LIVE_MODEL_LIMIT);
+  });
+
   it("leaves explicit model lists uncapped unless a cap is provided", () => {
     expect(
       resolveHighSignalLiveModelLimit({
diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts
index 026a73d3560..11702721fb4 100644
--- a/src/agents/models.profiles.live.test.ts
+++ b/src/agents/models.profiles.live.test.ts
@@ -3,6 +3,7 @@ import { type Api, completeSimple, type Model } from "openclaw/plugin-sdk/llm";
 import { Type } from "typebox";
 import { describe, expect, it } from "vitest";
 import { getRuntimeConfig } from "../config/config.js";
+import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { parseLiveCsvFilter } from "../media-generation/live-test-helpers.js";
 import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
 import {
@@ -17,10 +18,15 @@ import { collectAnthropicApiKeys } from "./live-auth-keys.js";
 import { appendPrioritizedDynamicLiveModels } from "./live-model-dynamic-candidates.js";
 import { isModelNotFoundErrorMessage } from "./live-model-errors.js";
 import {
+  DEFAULT_SMALL_LIVE_MODEL_LIMIT,
   isHighSignalLiveModelRef,
   isPrioritizedHighSignalLiveModelRef,
+  isPrioritizedSmallLiveModelRef,
+  isSmallLiveModelRef,
+  listPrioritizedSmallLiveModelRefs,
   resolveHighSignalLiveModelLimit,
   selectHighSignalLiveItems,
+  selectSmallLiveItems,
   shouldExcludeProviderFromDefaultHighSignalLiveSweep,
 } from "./live-model-filter.js";
 import {
@@ -54,6 +60,7 @@ import {
 import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
 import { shouldSuppressBuiltInModel } from "./model-suppression.js";
 import { ensureOpenClawModelsJson } from "./models-config.js";
+import { prepareModelForSimpleCompletion } from "./simple-completion-transport.js";
 
 const LIVE = isLiveTestEnabled();
 const DIRECT_ENABLED = Boolean(process.env.OPENCLAW_LIVE_MODELS?.trim());
@@ -76,6 +83,7 @@ const LIVE_MODELS_JSON_TIMEOUT_MS = resolveLiveModelsJsonTimeoutMs(
 );
 const LIVE_FILE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_FILE_PROBE_ENV);
 const LIVE_IMAGE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_IMAGE_PROBE_ENV);
+let activeLiveCompletionConfig: OpenClawConfig | undefined;
 
 const describeLive = LIVE ? describe : describe.skip;
 
@@ -430,9 +438,13 @@ async function completeSimpleWithTimeout<TApi extends Api>(
     hardTimer.unref?.();
   });
   try {
+    const completionModel = prepareModelForSimpleCompletion({
+      model,
+      cfg: activeLiveCompletionConfig,
+    });
     return await withLiveHeartbeat(
       Promise.race([
-        completeSimple(model, context, {
+        completeSimple(completionModel, context, {
           ...options,
           signal: controller.signal,
         }),
@@ -716,6 +728,7 @@ describeLive("live models (profile keys)", () => {
         Promise.resolve().then(() => getRuntimeConfig()),
         "[live-models] load config",
       );
+      activeLiveCompletionConfig = cfg;
       logProgress("[live-models] preparing models.json");
       await withLiveStageTimeout(
         ensureOpenClawModelsJson(cfg),
@@ -724,7 +737,7 @@ describeLive("live models (profile keys)", () => {
       );
       if (!DIRECT_ENABLED) {
         logProgress(
-          "[live-models] skipping (set OPENCLAW_LIVE_MODELS=modern|all|<list>; all=modern)",
+          "[live-models] skipping (set OPENCLAW_LIVE_MODELS=modern|small|all|<list>; all=modern)",
         );
         return;
       }
@@ -740,14 +753,19 @@ describeLive("live models (profile keys)", () => {
       const agentDir = resolveDefaultAgentDir(cfg);
       const rawModels = process.env.OPENCLAW_LIVE_MODELS?.trim();
       const useModern = rawModels === "modern" || rawModels === "all";
-      const useExplicit = Boolean(rawModels) && !useModern;
+      const useSmall = rawModels === "small";
+      const useExplicit = Boolean(rawModels) && !useModern && !useSmall;
       const filter = useExplicit ? parseModelFilter(rawModels) : null;
       const useDefaultPriorityOnly = !filter && useModern && !providers;
-      const allowNotFoundSkip = useModern;
+      const useSmallPriorityOnly = !filter && useSmall && !providers;
+      const allowNotFoundSkip = useModern || useSmall;
       const models = await (async () => {
         if (useDefaultPriorityOnly) {
           logProgress("[live-models] loading configured prioritized model refs");
         }
+        if (useSmallPriorityOnly) {
+          logProgress("[live-models] loading configured small model refs");
+        }
         logProgress("[live-models] loading auth storage");
         const authStorage = await withLiveStageTimeout(
           Promise.resolve().then(() =>
@@ -779,6 +797,7 @@ describeLive("live models (profile keys)", () => {
           agentDir,
           env: process.env,
           modelRegistry,
+          ...(useSmall ? { refs: listPrioritizedSmallLiveModelRefs() } : {}),
         });
         if (augmented.added.length > 0) {
           logProgress(
@@ -791,6 +810,7 @@ describeLive("live models (profile keys)", () => {
       const maxModels = resolveHighSignalLiveModelLimit({
         rawMaxModels: process.env.OPENCLAW_LIVE_MAX_MODELS,
         useExplicitModels: useExplicit,
+        ...(useSmall ? { defaultLimit: DEFAULT_SMALL_LIVE_MODEL_LIMIT } : {}),
       });
       const targetMatcher = createLiveTargetMatcher({
         providerFilter: providers,
@@ -817,7 +837,17 @@ describeLive("live models (profile keys)", () => {
         if (!targetMatcher.matchesModel(model.provider, model.id)) {
           continue;
         }
-        if (!filter && useModern) {
+        if (!filter && useSmall) {
+          if (
+            useSmallPriorityOnly &&
+            !isPrioritizedSmallLiveModelRef({ provider: model.provider, id: model.id })
+          ) {
+            continue;
+          }
+          if (!isSmallLiveModelRef({ provider: model.provider, id: model.id })) {
+            continue;
+          }
+        } else if (!filter && useModern) {
           if (
             useDefaultPriorityOnly &&
             !isPrioritizedHighSignalLiveModelRef({ provider: model.provider, id: model.id })
@@ -879,13 +909,15 @@ describeLive("live models (profile keys)", () => {
         return;
       }
 
-      const selectedCandidates = selectHighSignalLiveItems(
+      const selectCandidates = useSmall ? selectSmallLiveItems : selectHighSignalLiveItems;
+      const selectedCandidates = selectCandidates(
         candidates,
         maxModels > 0 ? maxModels : candidates.length,
         (entry) => ({ provider: entry.model.provider, id: entry.model.id }),
         (entry) => entry.model.provider,
       );
-      logProgress(`[live-models] selection=${useExplicit ? "explicit" : "high-signal"}`);
+      const selectionLabel = useExplicit ? "explicit" : useSmall ? "small" : "high-signal";
+      logProgress(`[live-models] selection=${selectionLabel}`);
       if (selectedCandidates.length < candidates.length) {
         logProgress(
           `[live-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_MAX_MODELS=${maxModels}`,