diff --git a/docs/help/testing.md b/docs/help/testing.md
index 870e1daaeb7..78f4e2fee2c 100644
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -498,7 +498,15 @@ If you want to rely on env keys (e.g. exported in your `~/.profile`), run local
 
 These Docker runners split into two buckets:
 
-- Live-model runners: `test:docker:live-models` and `test:docker:live-gateway` run `pnpm test:live` inside the repo Docker image, mounting your local config dir and workspace (and sourcing `~/.profile` if mounted).
+- Live-model runners: `test:docker:live-models` and `test:docker:live-gateway` run only their matching profile-key live file inside the repo Docker image (`src/agents/models.profiles.live.test.ts` and `src/gateway/gateway-models.profiles.live.test.ts`), mounting your local config dir and workspace (and sourcing `~/.profile` if mounted). The matching local entrypoints are `test:live:models-profiles` and `test:live:gateway-profiles`.
+- Docker live runners default to a smaller smoke cap so a full Docker sweep stays practical:
+  `test:docker:live-models` defaults to `OPENCLAW_LIVE_MAX_MODELS=12`, and
+  `test:docker:live-gateway` defaults to `OPENCLAW_LIVE_GATEWAY_SMOKE=1`,
+  `OPENCLAW_LIVE_GATEWAY_MAX_MODELS=8`,
+  `OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=45000`, and
+  `OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=90000`. Override those env vars when you
+  explicitly want the larger exhaustive scan.
+- `test:docker:all` builds the live Docker image once via `test:docker:live-build`, then reuses it for the two live Docker lanes.
 - Container smoke runners: `test:docker:openwebui`, `test:docker:onboard`, `test:docker:gateway-network`, `test:docker:mcp-channels`, and `test:docker:plugins` boot one or more real containers and verify higher-level integration paths.
 
 The live-model Docker runners also bind-mount only the needed CLI auth homes (or all supported ones when the run is not narrowed), then copy them into the container home before the run so external-CLI OAuth can refresh tokens without mutating the host auth store:
diff --git a/package.json b/package.json
index f51b6b24775..d3eb1d6d967 100644
--- a/package.json
+++ b/package.json
@@ -1155,11 +1155,12 @@
     "test:contracts:plugins": "OPENCLAW_TEST_PROFILE=serial pnpm exec vitest run --config vitest.contracts.config.ts src/plugins/contracts",
     "test:coverage": "vitest run --config vitest.unit.config.ts --coverage",
     "test:coverage:changed": "vitest run --config vitest.unit.config.ts --coverage --changed origin/main",
-    "test:docker:all": "pnpm test:docker:live-models && pnpm test:docker:live-gateway && pnpm test:docker:openwebui && pnpm test:docker:onboard && pnpm test:docker:gateway-network && pnpm test:docker:mcp-channels && pnpm test:docker:qr && pnpm test:docker:doctor-switch && pnpm test:docker:plugins && pnpm test:docker:cleanup",
+    "test:docker:all": "pnpm test:docker:live-build && OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-models && OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:live-gateway && pnpm test:docker:openwebui && pnpm test:docker:onboard && pnpm test:docker:gateway-network && pnpm test:docker:mcp-channels && pnpm test:docker:qr && pnpm test:docker:doctor-switch && pnpm test:docker:plugins && pnpm test:docker:cleanup",
     "test:docker:cleanup": "bash scripts/test-cleanup-docker.sh",
     "test:docker:doctor-switch": "bash scripts/e2e/doctor-install-switch-docker.sh",
     "test:docker:gateway-network": "bash scripts/e2e/gateway-network-docker.sh",
     "test:docker:live-acp-bind": "bash scripts/test-live-acp-bind-docker.sh",
+    "test:docker:live-build": "bash scripts/test-live-build-docker.sh",
     "test:docker:live-cli-backend": "bash scripts/test-live-cli-backend-docker.sh",
     "test:docker:live-gateway": "bash scripts/test-live-gateway-models-docker.sh",
     "test:docker:live-models": "bash scripts/test-live-models-docker.sh",
@@ -1182,6 +1183,8 @@
     "test:install:e2e:openai": "OPENCLAW_E2E_MODELS=openai bash scripts/test-install-sh-e2e-docker.sh",
     "test:install:smoke": "bash scripts/test-install-sh-docker.sh",
     "test:live": "node scripts/test-live.mjs",
+    "test:live:gateway-profiles": "node scripts/test-live.mjs -- src/gateway/gateway-models.profiles.live.test.ts",
+    "test:live:models-profiles": "node scripts/test-live.mjs -- src/agents/models.profiles.live.test.ts",
     "test:max": "node scripts/test-parallel.mjs --profile max",
     "test:parallels:linux": "bash scripts/e2e/parallels-linux-smoke.sh",
     "test:parallels:macos": "bash scripts/e2e/parallels-macos-smoke.sh",
diff --git a/scripts/e2e/plugins-docker.sh b/scripts/e2e/plugins-docker.sh
index 6c974755144..814cf732794 100755
--- a/scripts/e2e/plugins-docker.sh
+++ b/scripts/e2e/plugins-docker.sh
@@ -282,11 +282,13 @@ cat > "$demo_plugin_root/openclaw.plugin.json" <<'JSON'
 JSON
 
 node "$OPENCLAW_ENTRY" plugins list --json > /tmp/plugins.json
+node "$OPENCLAW_ENTRY" plugins inspect demo-plugin --json > /tmp/plugins-inspect.json
 
 node - <<'NODE'
 const fs = require("node:fs");
 
 const data = JSON.parse(fs.readFileSync("/tmp/plugins.json", "utf8"));
+const inspect = JSON.parse(fs.readFileSync("/tmp/plugins-inspect.json", "utf8"));
 const plugin = (data.plugins || []).find((entry) => entry.id === "demo-plugin");
 if (!plugin) throw new Error("plugin not found");
 if (plugin.status !== "loaded") {
@@ -299,10 +301,13 @@ const assertIncludes = (list, value, label) => {
   }
 };
 
-assertIncludes(plugin.toolNames, "demo_tool", "tool");
-assertIncludes(plugin.gatewayMethods, "demo.ping", "gateway method");
-assertIncludes(plugin.cliCommands, "demo", "cli command");
-assertIncludes(plugin.services, "demo-service", "service");
+const inspectToolNames = Array.isArray(inspect.tools)
+  ? inspect.tools.flatMap((entry) => (Array.isArray(entry?.names) ? entry.names : []))
+  : [];
+assertIncludes(inspectToolNames, "demo_tool", "tool");
+assertIncludes(inspect.gatewayMethods, "demo.ping", "gateway method");
+assertIncludes(inspect.cliCommands, "demo", "cli command");
+assertIncludes(inspect.services, "demo-service", "service");
 
 const diagErrors = (data.diagnostics || []).filter((diag) => diag.level === "error");
 if (diagErrors.length > 0) {
@@ -344,17 +349,19 @@ tar -czf /tmp/demo-plugin-tgz.tgz -C "$pack_dir" package
 
 node "$OPENCLAW_ENTRY" plugins install /tmp/demo-plugin-tgz.tgz
 node "$OPENCLAW_ENTRY" plugins list --json > /tmp/plugins2.json
+node "$OPENCLAW_ENTRY" plugins inspect demo-plugin-tgz --json > /tmp/plugins2-inspect.json
 
 node - <<'NODE'
 const fs = require("node:fs");
 
 const data = JSON.parse(fs.readFileSync("/tmp/plugins2.json", "utf8"));
+const inspect = JSON.parse(fs.readFileSync("/tmp/plugins2-inspect.json", "utf8"));
 const plugin = (data.plugins || []).find((entry) => entry.id === "demo-plugin-tgz");
 if (!plugin) throw new Error("tgz plugin not found");
 if (plugin.status !== "loaded") {
   throw new Error(`unexpected status: ${plugin.status}`);
 }
-if (!Array.isArray(plugin.gatewayMethods) || !plugin.gatewayMethods.includes("demo.tgz")) {
+if (!Array.isArray(inspect.gatewayMethods) || !inspect.gatewayMethods.includes("demo.tgz")) {
   throw new Error("expected gateway method demo.tgz");
 }
 console.log("ok");
@@ -390,17 +397,19 @@ JSON
 
 node "$OPENCLAW_ENTRY" plugins install "$dir_plugin"
 node "$OPENCLAW_ENTRY" plugins list --json > /tmp/plugins3.json
+node "$OPENCLAW_ENTRY" plugins inspect demo-plugin-dir --json > /tmp/plugins3-inspect.json
 
 node - <<'NODE'
 const fs = require("node:fs");
 
 const data = JSON.parse(fs.readFileSync("/tmp/plugins3.json", "utf8"));
+const inspect = JSON.parse(fs.readFileSync("/tmp/plugins3-inspect.json", "utf8"));
 const plugin = (data.plugins || []).find((entry) => entry.id === "demo-plugin-dir");
 if (!plugin) throw new Error("dir plugin not found");
 if (plugin.status !== "loaded") {
   throw new Error(`unexpected status: ${plugin.status}`);
 }
-if (!Array.isArray(plugin.gatewayMethods) || !plugin.gatewayMethods.includes("demo.dir")) {
+if (!Array.isArray(inspect.gatewayMethods) || !inspect.gatewayMethods.includes("demo.dir")) {
   throw new Error("expected gateway method demo.dir");
 }
 console.log("ok");
@@ -437,17 +446,19 @@ JSON
 
 node "$OPENCLAW_ENTRY" plugins install "file:$file_pack_dir/package"
 node "$OPENCLAW_ENTRY" plugins list --json > /tmp/plugins4.json
+node "$OPENCLAW_ENTRY" plugins inspect demo-plugin-file --json > /tmp/plugins4-inspect.json
 
 node - <<'NODE'
 const fs = require("node:fs");
 
 const data = JSON.parse(fs.readFileSync("/tmp/plugins4.json", "utf8"));
+const inspect = JSON.parse(fs.readFileSync("/tmp/plugins4-inspect.json", "utf8"));
 const plugin = (data.plugins || []).find((entry) => entry.id === "demo-plugin-file");
 if (!plugin) throw new Error("file plugin not found");
 if (plugin.status !== "loaded") {
   throw new Error(`unexpected status: ${plugin.status}`);
 }
-if (!Array.isArray(plugin.gatewayMethods) || !plugin.gatewayMethods.includes("demo.file")) {
+if (!Array.isArray(inspect.gatewayMethods) || !inspect.gatewayMethods.includes("demo.file")) {
   throw new Error("expected gateway method demo.file");
 }
 console.log("ok");
@@ -704,11 +715,19 @@ NODE
 node "$OPENCLAW_ENTRY" plugins install marketplace-shortcut@claude-fixtures
 node "$OPENCLAW_ENTRY" plugins install marketplace-direct --marketplace claude-fixtures
 node "$OPENCLAW_ENTRY" plugins list --json > /tmp/plugins-marketplace.json
+node "$OPENCLAW_ENTRY" plugins inspect marketplace-shortcut --json > /tmp/plugins-marketplace-shortcut-inspect.json
+node "$OPENCLAW_ENTRY" plugins inspect marketplace-direct --json > /tmp/plugins-marketplace-direct-inspect.json
 
 node - <<'NODE'
 const fs = require("node:fs");
 
 const data = JSON.parse(fs.readFileSync("/tmp/plugins-marketplace.json", "utf8"));
+const shortcutInspect = JSON.parse(
+  fs.readFileSync("/tmp/plugins-marketplace-shortcut-inspect.json", "utf8"),
+);
+const directInspect = JSON.parse(
+  fs.readFileSync("/tmp/plugins-marketplace-direct-inspect.json", "utf8"),
+);
 const getPlugin = (id) => {
   const plugin = (data.plugins || []).find((entry) => entry.id === id);
   if (!plugin) throw new Error(`plugin not found: ${id}`);
@@ -726,10 +745,10 @@ if (shortcut.version !== "0.0.1") {
 if (direct.version !== "0.0.1") {
   throw new Error(`unexpected direct version: ${direct.version}`);
 }
-if (!shortcut.gatewayMethods.includes("demo.marketplace.shortcut.v1")) {
+if (!shortcutInspect.gatewayMethods.includes("demo.marketplace.shortcut.v1")) {
   throw new Error("expected marketplace shortcut gateway method");
 }
-if (!direct.gatewayMethods.includes("demo.marketplace.direct.v1")) {
+if (!directInspect.gatewayMethods.includes("demo.marketplace.direct.v1")) {
   throw new Error("expected marketplace direct gateway method");
 }
 console.log("ok");
@@ -766,18 +785,20 @@ write_fixture_plugin \
 node "$OPENCLAW_ENTRY" plugins update marketplace-shortcut --dry-run
 node "$OPENCLAW_ENTRY" plugins update marketplace-shortcut
 node "$OPENCLAW_ENTRY" plugins list --json > /tmp/plugins-marketplace-updated.json
+node "$OPENCLAW_ENTRY" plugins inspect marketplace-shortcut --json > /tmp/plugins-marketplace-updated-inspect.json
 
 node - <<'NODE'
 const fs = require("node:fs");
 
 const data = JSON.parse(fs.readFileSync("/tmp/plugins-marketplace-updated.json", "utf8"));
+const inspect = JSON.parse(fs.readFileSync("/tmp/plugins-marketplace-updated-inspect.json", "utf8"));
 const plugin = (data.plugins || []).find((entry) => entry.id === "marketplace-shortcut");
 if (!plugin) throw new Error("updated marketplace plugin not found");
 if (plugin.version !== "0.0.2") {
   throw new Error(`unexpected updated version: ${plugin.version}`);
 }
-if (!plugin.gatewayMethods.includes("demo.marketplace.shortcut.v2")) {
-  throw new Error(`expected updated gateway method, got ${plugin.gatewayMethods.join(", ")}`);
+if (!inspect.gatewayMethods.includes("demo.marketplace.shortcut.v2")) {
+  throw new Error(`expected updated gateway method, got ${inspect.gatewayMethods.join(", ")}`);
 }
 console.log("ok");
 NODE
diff --git a/scripts/test-live-build-docker.sh b/scripts/test-live-build-docker.sh
new file mode 100755
index 00000000000..af8f815ca4b
--- /dev/null
+++ b/scripts/test-live-build-docker.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+IMAGE_NAME="${OPENCLAW_IMAGE:-openclaw:local}"
+LIVE_IMAGE_NAME="${OPENCLAW_LIVE_IMAGE:-${IMAGE_NAME}-live}"
+
+if [[ "${OPENCLAW_SKIP_DOCKER_BUILD:-}" == "1" ]]; then
+  echo "==> Reuse live-test image: $LIVE_IMAGE_NAME"
+  exit 0
+fi
+
+echo "==> Build live-test image: $LIVE_IMAGE_NAME (target=build)"
+docker build --target build -t "$LIVE_IMAGE_NAME" -f "$ROOT_DIR/Dockerfile" "$ROOT_DIR"
diff --git a/scripts/test-live-gateway-models-docker.sh b/scripts/test-live-gateway-models-docker.sh
index 98c32fe0940..e3dc63d037a 100755
--- a/scripts/test-live-gateway-models-docker.sh
+++ b/scripts/test-live-gateway-models-docker.sh
@@ -115,13 +115,13 @@ elif [ -d /app/dist/extensions ]; then
   export OPENCLAW_BUNDLED_PLUGINS_DIR=/app/dist/extensions
 fi
 cd "$tmp_dir"
-pnpm test:live
+pnpm test:live:gateway-profiles
 EOF
 
-echo "==> Build live-test image: $LIVE_IMAGE_NAME (target=build)"
-docker build --target build -t "$LIVE_IMAGE_NAME" -f "$ROOT_DIR/Dockerfile" "$ROOT_DIR"
+"$ROOT_DIR/scripts/test-live-build-docker.sh"
 
 echo "==> Run gateway live model tests (profile keys)"
+echo "==> Target: src/gateway/gateway-models.profiles.live.test.ts"
 echo "==> External auth dirs: ${AUTH_DIRS_CSV:-none}"
 echo "==> External auth files: ${AUTH_FILES_CSV:-none}"
 docker run --rm -t \
@@ -135,8 +135,10 @@ docker run --rm -t \
   -e OPENCLAW_LIVE_TEST=1 \
   -e OPENCLAW_LIVE_GATEWAY_MODELS="${OPENCLAW_LIVE_GATEWAY_MODELS:-modern}" \
   -e OPENCLAW_LIVE_GATEWAY_PROVIDERS="${OPENCLAW_LIVE_GATEWAY_PROVIDERS:-}" \
-  -e OPENCLAW_LIVE_GATEWAY_MAX_MODELS="${OPENCLAW_LIVE_GATEWAY_MAX_MODELS:-24}" \
-  -e OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS="${OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS:-}" \
+  -e OPENCLAW_LIVE_GATEWAY_SMOKE="${OPENCLAW_LIVE_GATEWAY_SMOKE:-1}" \
+  -e OPENCLAW_LIVE_GATEWAY_MAX_MODELS="${OPENCLAW_LIVE_GATEWAY_MAX_MODELS:-8}" \
+  -e OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS="${OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS:-45000}" \
+  -e OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS="${OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS:-90000}" \
   -v "$ROOT_DIR":/src:ro \
   -v "$CONFIG_DIR":/home/node/.openclaw \
   -v "$WORKSPACE_DIR":/home/node/.openclaw/workspace \
diff --git a/scripts/test-live-models-docker.sh b/scripts/test-live-models-docker.sh
index e432847a341..7dba7ff677e 100755
--- a/scripts/test-live-models-docker.sh
+++ b/scripts/test-live-models-docker.sh
@@ -125,13 +125,13 @@ elif [ -d /app/dist/extensions ]; then
   export OPENCLAW_BUNDLED_PLUGINS_DIR=/app/dist/extensions
 fi
 cd "$tmp_dir"
-pnpm test:live
+pnpm test:live:models-profiles
 EOF
 
-echo "==> Build live-test image: $LIVE_IMAGE_NAME (target=build)"
-docker build --target build -t "$LIVE_IMAGE_NAME" -f "$ROOT_DIR/Dockerfile" "$ROOT_DIR"
+"$ROOT_DIR/scripts/test-live-build-docker.sh"
 
 echo "==> Run live model tests (profile keys)"
+echo "==> Target: src/agents/models.profiles.live.test.ts"
 echo "==> External auth dirs: ${AUTH_DIRS_CSV:-none}"
 echo "==> External auth files: ${AUTH_FILES_CSV:-none}"
 docker run --rm -t \
@@ -145,7 +145,7 @@ docker run --rm -t \
   -e OPENCLAW_LIVE_TEST=1 \
   -e OPENCLAW_LIVE_MODELS="${OPENCLAW_LIVE_MODELS:-modern}" \
   -e OPENCLAW_LIVE_PROVIDERS="${OPENCLAW_LIVE_PROVIDERS:-}" \
-  -e OPENCLAW_LIVE_MAX_MODELS="${OPENCLAW_LIVE_MAX_MODELS:-48}" \
+  -e OPENCLAW_LIVE_MAX_MODELS="${OPENCLAW_LIVE_MAX_MODELS:-12}" \
   -e OPENCLAW_LIVE_MODEL_TIMEOUT_MS="${OPENCLAW_LIVE_MODEL_TIMEOUT_MS:-}" \
   -e OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS="${OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS:-}" \
   -e OPENCLAW_LIVE_GATEWAY_MODELS="${OPENCLAW_LIVE_GATEWAY_MODELS:-}" \
diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts
index c1bde246dc4..8a41037bdc7 100644
--- a/src/gateway/gateway-models.profiles.live.test.ts
+++ b/src/gateway/gateway-models.profiles.live.test.ts
@@ -46,7 +46,10 @@ import { loadSessionEntry, readSessionMessages } from "./session-utils.js";
 const ZAI_FALLBACK = isTruthyEnvValue(process.env.OPENCLAW_LIVE_GATEWAY_ZAI_FALLBACK);
 const REQUIRE_PROFILE_KEYS = isLiveProfileKeyModeEnabled();
 const PROVIDERS = parseFilter(process.env.OPENCLAW_LIVE_GATEWAY_PROVIDERS);
-const THINKING_LEVEL = "high";
+const GATEWAY_LIVE_SMOKE = isTruthyEnvValue(process.env.OPENCLAW_LIVE_GATEWAY_SMOKE);
+const THINKING_LEVEL = GATEWAY_LIVE_SMOKE ? "low" : "high";
+const ENABLE_EXTRA_TOOL_PROBES = !GATEWAY_LIVE_SMOKE;
+const ENABLE_EXTRA_IMAGE_PROBES = !GATEWAY_LIVE_SMOKE;
 const THINKING_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
 const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/i;
 const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
@@ -57,6 +60,7 @@ const GATEWAY_LIVE_PROBE_TIMEOUT_MS = Math.max(
   30_000,
   toInt(process.env.OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS, 90_000),
 );
+const GATEWAY_LIVE_MODEL_TIMEOUT_MS = resolveGatewayLiveModelTimeoutMs();
 const GATEWAY_LIVE_HEARTBEAT_MS = Math.max(
   1_000,
   toInt(process.env.OPENCLAW_LIVE_GATEWAY_HEARTBEAT_MS, 30_000),
@@ -90,6 +94,28 @@ function parseFilter(raw?: string): Set<string> | null {
   return ids.length ? new Set(ids) : null;
 }
 
+function shouldSuppressGatewayLiveOllamaWarnings(): boolean {
+  return PROVIDERS !== null && !PROVIDERS.has("ollama");
+}
+
+async function withSuppressedGatewayLiveWarnings<T>(run: () => Promise<T>): Promise<T> {
+  if (!shouldSuppressGatewayLiveOllamaWarnings()) {
+    return await run();
+  }
+  const originalWarn = console.warn;
+  console.warn = (...args: unknown[]) => {
+    if (args.some((arg) => typeof arg === "string" && isOllamaUnavailableErrorMessage(arg))) {
+      return;
+    }
+    originalWarn(...args);
+  };
+  try {
+    return await run();
+  } finally {
+    console.warn = originalWarn;
+  }
+}
+
 function toInt(value: string | undefined, fallback: number): number {
   const trimmed = value?.trim();
   if (!trimmed) {
@@ -120,28 +146,50 @@ function resolveGatewayLiveSuiteTimeoutMs(maxModels: number): number {
   );
 }
 
+function resolveGatewayLiveModelTimeoutMs(
+  gatewayModelTimeoutRaw = process.env.OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS,
+  liveModelTimeoutRaw = process.env.OPENCLAW_LIVE_MODEL_TIMEOUT_MS,
+  stepTimeoutMs = GATEWAY_LIVE_PROBE_TIMEOUT_MS,
+): number {
+  const requested = toInt(gatewayModelTimeoutRaw, toInt(liveModelTimeoutRaw, 120_000));
+  return Math.max(stepTimeoutMs, requested);
+}
+
 function isGatewayLiveProbeTimeout(error: string): boolean {
   return /probe timeout after \d+ms/i.test(error);
 }
 
-async function withGatewayLiveProbeTimeout<T>(operation: Promise<T>, context: string): Promise<T> {
+function isGatewayLiveModelTimeout(error: string): boolean {
+  return /model timeout after \d+ms/i.test(error);
+}
+
+async function withGatewayLiveTimeout<T>(params: {
+  operation: Promise<T>;
+  timeoutMs: number;
+  timeoutLabel: "probe" | "model";
+  context: string;
+}): Promise<T> {
   let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
   const startedAt = Date.now();
   let heartbeatCount = 0;
   const heartbeat = setInterval(() => {
     heartbeatCount += 1;
     logProgress(
-      `${context}: still running (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`,
+      `${params.context}: still running (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`,
     );
   }, GATEWAY_LIVE_HEARTBEAT_MS);
   heartbeat.unref?.();
   try {
     return await Promise.race([
-      operation,
+      params.operation,
       new Promise<never>((_, reject) => {
         timeoutHandle = setTimeout(() => {
-          reject(new Error(`probe timeout after ${GATEWAY_LIVE_PROBE_TIMEOUT_MS}ms (${context})`));
-        }, GATEWAY_LIVE_PROBE_TIMEOUT_MS);
+          reject(
+            new Error(
+              `${params.timeoutLabel} timeout after ${params.timeoutMs}ms (${params.context})`,
+            ),
+          );
+        }, params.timeoutMs);
       }),
     ]);
   } finally {
@@ -151,12 +199,30 @@ async function withGatewayLiveProbeTimeout<T>(operation: Promise<T>, context: st
     }
     if (heartbeatCount > 0) {
       logProgress(
-        `${context}: completed after ${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s`,
+        `${params.context}: completed after ${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s`,
       );
     }
   }
 }
 
+async function withGatewayLiveProbeTimeout<T>(operation: Promise<T>, context: string): Promise<T> {
+  return await withGatewayLiveTimeout({
+    operation,
+    timeoutMs: GATEWAY_LIVE_PROBE_TIMEOUT_MS,
+    timeoutLabel: "probe",
+    context,
+  });
+}
+
+async function withGatewayLiveModelTimeout<T>(operation: Promise<T>, context: string): Promise<T> {
+  return await withGatewayLiveTimeout({
+    operation,
+    timeoutMs: GATEWAY_LIVE_MODEL_TIMEOUT_MS,
+    timeoutLabel: "model",
+    context,
+  });
+}
+
 function capByProviderSpread<T>(
   items: T[],
   maxItems: number,
@@ -422,6 +488,20 @@ describe("shouldSkipExecReadNonceMissForLiveModel", () => {
   });
 });
 
+describe("resolveGatewayLiveModelTimeoutMs", () => {
+  it("prefers gateway-specific timeout when provided", () => {
+    expect(resolveGatewayLiveModelTimeoutMs("180000", "45000", 90_000)).toBe(180_000);
+  });
+
+  it("falls back to the shared live timeout", () => {
+    expect(resolveGatewayLiveModelTimeoutMs("", "45000", 30_000)).toBe(45_000);
+  });
+
+  it("never goes below the probe timeout", () => {
+    expect(resolveGatewayLiveModelTimeoutMs("45000", undefined, 90_000)).toBe(90_000);
+  });
+});
+
 function isGoogleModelNotFoundText(text: string): boolean {
   const trimmed = text.trim();
   if (!trimmed) {
@@ -1133,7 +1213,7 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
       `[${params.label}] running ${params.candidates.length} models (thinking=${params.thinkingLevel})`,
     );
     logProgress(
-      `[${params.label}] heartbeat=${Math.max(1, Math.round(GATEWAY_LIVE_HEARTBEAT_MS / 1_000))}s probe-timeout=${Math.max(1, Math.round(GATEWAY_LIVE_PROBE_TIMEOUT_MS / 1_000))}s`,
+      `[${params.label}] heartbeat=${Math.max(1, Math.round(GATEWAY_LIVE_HEARTBEAT_MS / 1_000))}s probe-timeout=${Math.max(1, Math.round(GATEWAY_LIVE_PROBE_TIMEOUT_MS / 1_000))}s model-timeout=${Math.max(1, Math.round(GATEWAY_LIVE_MODEL_TIMEOUT_MS / 1_000))}s`,
     );
     const anthropicKeys = collectAnthropicApiKeys();
     if (anthropicKeys.length > 0) {
@@ -1157,347 +1237,355 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
           process.env.ANTHROPIC_API_KEY = anthropicKeys[attempt];
         }
         try {
-          // Ensure session exists + override model for this run.
-          // Reset between models: avoids cross-provider transcript incompatibilities
-          // (notably OpenAI Responses requiring reasoning replay for function_call items).
-          await withGatewayLiveProbeTimeout(
-            client.request("sessions.reset", {
-              key: sessionKey,
-            }),
-            `${progressLabel}: sessions-reset`,
-          );
-          await withGatewayLiveProbeTimeout(
-            client.request("sessions.patch", {
-              key: sessionKey,
-              model: modelKey,
-            }),
-            `${progressLabel}: sessions-patch`,
-          );
-
-          logProgress(`${progressLabel}: prompt`);
-          let text = await requestGatewayAgentText({
-            client,
-            sessionKey,
-            idempotencyKey: `idem-${randomUUID()}`,
-            modelKey,
-            message:
-              "Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
-            thinkingLevel: params.thinkingLevel,
-            context: `${progressLabel}: prompt`,
-          });
-          if (!text) {
-            logProgress(`${progressLabel}: empty response, retrying`);
-            text = await requestGatewayAgentText({
-              client,
-              sessionKey,
-              idempotencyKey: `idem-${randomUUID()}-retry`,
-              modelKey,
-              message:
-                "Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
-              thinkingLevel: params.thinkingLevel,
-              context: `${progressLabel}: prompt-retry`,
-            });
-          }
-          if (
-            !text &&
-            shouldSkipEmptyResponseForLiveModel({
-              provider: model.provider,
-              allowNotFoundSkip: params.allowNotFoundSkip,
-            })
-          ) {
-            logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
-            break;
-          }
-          if (
-            isEmptyStreamText(text) &&
-            shouldSkipEmptyResponseForLiveModel({
-              provider: model.provider,
-              allowNotFoundSkip: params.allowNotFoundSkip,
-            })
-          ) {
-            logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
-            break;
-          }
-          if (isGoogleishProvider(model.provider) && isGoogleModelNotFoundText(text)) {
-            // Catalog drift: model IDs can disappear or become unavailable on the API.
-            // Treat as skip when scanning "all models" for Google.
-            logProgress(`${progressLabel}: skip (google model not found)`);
-            break;
-          }
-          if (params.allowNotFoundSkip && isModelNotFoundErrorMessage(text)) {
-            logProgress(`${progressLabel}: skip (model not found)`);
-            break;
-          }
-          assertNoReasoningTags({
-            text,
-            model: modelKey,
-            phase: "prompt",
-            label: params.label,
-          });
-          if (!isMeaningful(text)) {
-            if (isGoogleishProvider(model.provider) && /gemini/i.test(model.id)) {
-              logProgress(`${progressLabel}: skip (google not meaningful)`);
-              break;
-            }
-            throw new Error(`not meaningful: ${text}`);
-          }
-          if (!/\bmicro\s*-?\s*tasks?\b/i.test(text) || !/\bmacro\s*-?\s*tasks?\b/i.test(text)) {
-            throw new Error(`missing required keywords: ${text}`);
-          }
-
-          // Real tool invocation: force the agent to Read a local file and echo a nonce.
-          logProgress(`${progressLabel}: tool-read`);
-          const runIdTool = randomUUID();
-          const maxToolReadAttempts = 3;
-          let toolText = "";
-          for (
-            let toolReadAttempt = 0;
-            toolReadAttempt < maxToolReadAttempts;
-            toolReadAttempt += 1
-          ) {
-            const strictReply = toolReadAttempt > 0;
-            toolText = await requestGatewayAgentText({
-              client,
-              sessionKey,
-              idempotencyKey: `idem-${runIdTool}-tool-${toolReadAttempt + 1}`,
-              modelKey,
-              message: strictReply
-                ? "OpenClaw live tool probe (local, safe): " +
-                  `use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
-                  `Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`
-                : "OpenClaw live tool probe (local, safe): " +
-                  `use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
-                  "Then reply with the two nonce values you read (include both).",
-              thinkingLevel: params.thinkingLevel,
-              context: `${progressLabel}: tool-read`,
-            });
-            if (
-              isEmptyStreamText(toolText) &&
-              shouldSkipEmptyResponseForLiveModel({
-                provider: model.provider,
-                allowNotFoundSkip: params.allowNotFoundSkip,
-              })
-            ) {
-              logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
-              break;
-            }
-            assertNoReasoningTags({
-              text: toolText,
-              model: modelKey,
-              phase: "tool-read",
-              label: params.label,
-            });
-            if (hasExpectedToolNonce(toolText, nonceA, nonceB)) {
-              break;
-            }
-            if (
-              shouldRetryToolReadProbe({
-                text: toolText,
-                nonceA,
-                nonceB,
-                provider: model.provider,
-                attempt: toolReadAttempt,
-                maxAttempts: maxToolReadAttempts,
-              })
-            ) {
-              logProgress(
-                `${progressLabel}: tool-read retry (${toolReadAttempt + 2}/${maxToolReadAttempts}) malformed tool output`,
+          const modelResult = await withGatewayLiveModelTimeout<"done" | "skip">(
+            (async () => {
+              // Ensure session exists + override model for this run.
+              // Reset between models: avoids cross-provider transcript incompatibilities
+              // (notably OpenAI Responses requiring reasoning replay for function_call items).
+              await withGatewayLiveProbeTimeout(
+                client.request("sessions.reset", {
+                  key: sessionKey,
+                }),
+                `${progressLabel}: sessions-reset`,
+              );
+              await withGatewayLiveProbeTimeout(
+                client.request("sessions.patch", {
+                  key: sessionKey,
+                  model: modelKey,
+                }),
+                `${progressLabel}: sessions-patch`,
               );
-              continue;
-            }
-            throw new Error(`tool probe missing nonce: ${toolText}`);
-          }
-          if (!hasExpectedToolNonce(toolText, nonceA, nonceB)) {
-            throw new Error(`tool probe missing nonce: ${toolText}`);
-          }
 
-          if (params.extraToolProbes) {
-            logProgress(`${progressLabel}: tool-exec`);
-            const nonceC = randomUUID();
-            const toolWritePath = path.join(tempDir, `write-${runIdTool}.txt`);
-            const maxExecReadAttempts = 3;
-            let execReadText = "";
-            for (
-              let execReadAttempt = 0;
-              execReadAttempt < maxExecReadAttempts;
-              execReadAttempt += 1
-            ) {
-              const strictReply = execReadAttempt > 0;
-              execReadText = await requestGatewayAgentText({
+              logProgress(`${progressLabel}: prompt`);
+              let text = await requestGatewayAgentText({
                 client,
                 sessionKey,
-                idempotencyKey: `idem-${runIdTool}-exec-read-${execReadAttempt + 1}`,
+                idempotencyKey: `idem-${randomUUID()}`,
                 modelKey,
-                message: strictReply
-                  ? "OpenClaw live tool probe (local, safe): " +
-                    "use the tool named `exec` (or `Exec`) to run this command: " +
-                    `mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
-                    `Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
-                    `Then reply with exactly: ${nonceC}. No extra text.`
-                  : "OpenClaw live tool probe (local, safe): " +
-                    "use the tool named `exec` (or `Exec`) to run this command: " +
-                    `mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
-                    `Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
-                    "Finally reply including the nonce text you read back.",
+                message:
+                  "Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
                 thinkingLevel: params.thinkingLevel,
-                context: `${progressLabel}: tool-exec`,
+                context: `${progressLabel}: prompt`,
               });
+              if (!text) {
+                logProgress(`${progressLabel}: empty response, retrying`);
+                text = await requestGatewayAgentText({
+                  client,
+                  sessionKey,
+                  idempotencyKey: `idem-${randomUUID()}-retry`,
+                  modelKey,
+                  message:
+                    "Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
+                  thinkingLevel: params.thinkingLevel,
+                  context: `${progressLabel}: prompt-retry`,
+                });
+              }
               if (
-                isEmptyStreamText(execReadText) &&
+                !text &&
                 shouldSkipEmptyResponseForLiveModel({
                   provider: model.provider,
                   allowNotFoundSkip: params.allowNotFoundSkip,
                 })
               ) {
                 logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
-                break;
-              }
-              assertNoReasoningTags({
-                text: execReadText,
-                model: modelKey,
-                phase: "tool-exec",
-                label: params.label,
-              });
-              if (hasExpectedSingleNonce(execReadText, nonceC)) {
-                break;
+                return "skip";
               }
               if (
-                shouldRetryExecReadProbe({
-                  text: execReadText,
-                  nonce: nonceC,
+                isEmptyStreamText(text) &&
+                shouldSkipEmptyResponseForLiveModel({
                   provider: model.provider,
-                  attempt: execReadAttempt,
-                  maxAttempts: maxExecReadAttempts,
+                  allowNotFoundSkip: params.allowNotFoundSkip,
                 })
               ) {
-                logProgress(
-                  `${progressLabel}: tool-exec retry (${execReadAttempt + 2}/${maxExecReadAttempts}) malformed tool output`,
-                );
-                continue;
+                logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
+                return "skip";
+              }
+              if (isGoogleishProvider(model.provider) && isGoogleModelNotFoundText(text)) {
+                // Catalog drift: model IDs can disappear or become unavailable on the API.
+                // Treat as skip when scanning "all models" for Google.
+                logProgress(`${progressLabel}: skip (google model not found)`);
+                return "skip";
+              }
+              if (params.allowNotFoundSkip && isModelNotFoundErrorMessage(text)) {
+                logProgress(`${progressLabel}: skip (model not found)`);
+                return "skip";
               }
-              throw new Error(`exec+read probe missing nonce: ${execReadText}`);
-            }
-            if (!hasExpectedSingleNonce(execReadText, nonceC)) {
-              throw new Error(`exec+read probe missing nonce: ${execReadText}`);
-            }
-
-            await fs.rm(toolWritePath, { force: true });
-          }
-
-          if (params.extraImageProbes && model.input?.includes("image")) {
-            logProgress(`${progressLabel}: image`);
-            // Shorter code => less OCR flake across providers, still tests image attachments end-to-end.
-            const imageCode = randomImageProbeCode();
-            const imageBase64 = renderCatNoncePngBase64(imageCode);
-            const runIdImage = randomUUID();
-
-            const imageText = await requestGatewayAgentText({
-              client,
-              sessionKey,
-              idempotencyKey: `idem-${runIdImage}-image`,
-              modelKey,
-              message:
-                "Look at the attached image. Reply with exactly two tokens separated by a single space: " +
-                "(1) the animal shown or written in the image, lowercase; " +
-                "(2) the code printed in the image, uppercase. No extra text.",
-              attachments: [
-                {
-                  mimeType: "image/png",
-                  fileName: `probe-${runIdImage}.png`,
-                  content: imageBase64,
-                },
-              ],
-              thinkingLevel: params.thinkingLevel,
-              context: `${progressLabel}: image`,
-            });
-            // Best-effort: do not fail the whole live suite on flaky image handling.
-            // (We still keep prompt + tool probes as hard checks.)
-            if (
-              isEmptyStreamText(imageText) &&
-              shouldSkipEmptyResponseForLiveModel({
-                provider: model.provider,
-                allowNotFoundSkip: params.allowNotFoundSkip,
-              })
-            ) {
-              logProgress(`${progressLabel}: image skip (${model.provider} empty response)`);
-            } else {
               assertNoReasoningTags({
-                text: imageText,
+                text,
                 model: modelKey,
-                phase: "image",
+                phase: "prompt",
                 label: params.label,
               });
-              if (!/\bcat\b/i.test(imageText)) {
-                logProgress(`${progressLabel}: image skip (missing 'cat')`);
-              } else {
-                const candidates = imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? [];
-                const bestDistance = candidates.reduce((best, cand) => {
-                  if (Math.abs(cand.length - imageCode.length) > 2) {
-                    return best;
+              if (!isMeaningful(text)) {
+                if (isGoogleishProvider(model.provider) && /gemini/i.test(model.id)) {
+                  logProgress(`${progressLabel}: skip (google not meaningful)`);
+                  return "skip";
+                }
+                throw new Error(`not meaningful: ${text}`);
+              }
+              if (
+                !/\bmicro\s*-?\s*tasks?\b/i.test(text) ||
+                !/\bmacro\s*-?\s*tasks?\b/i.test(text)
+              ) {
+                throw new Error(`missing required keywords: ${text}`);
+              }
+
+              // Real tool invocation: force the agent to Read a local file and echo a nonce.
+              logProgress(`${progressLabel}: tool-read`);
+              const runIdTool = randomUUID();
+              const maxToolReadAttempts = 3;
+              let toolText = "";
+              for (
+                let toolReadAttempt = 0;
+                toolReadAttempt < maxToolReadAttempts;
+                toolReadAttempt += 1
+              ) {
+                const strictReply = toolReadAttempt > 0;
+                toolText = await requestGatewayAgentText({
+                  client,
+                  sessionKey,
+                  idempotencyKey: `idem-${runIdTool}-tool-${toolReadAttempt + 1}`,
+                  modelKey,
+                  message: strictReply
+                    ? "OpenClaw live tool probe (local, safe): " +
+                      `use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
+                      `Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`
+                    : "OpenClaw live tool probe (local, safe): " +
+                      `use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
+                      "Then reply with the two nonce values you read (include both).",
+                  thinkingLevel: params.thinkingLevel,
+                  context: `${progressLabel}: tool-read`,
+                });
+                if (
+                  isEmptyStreamText(toolText) &&
+                  shouldSkipEmptyResponseForLiveModel({
+                    provider: model.provider,
+                    allowNotFoundSkip: params.allowNotFoundSkip,
+                  })
+                ) {
+                  logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
+                  return "skip";
+                }
+                assertNoReasoningTags({
+                  text: toolText,
+                  model: modelKey,
+                  phase: "tool-read",
+                  label: params.label,
+                });
+                if (hasExpectedToolNonce(toolText, nonceA, nonceB)) {
+                  break;
+                }
+                if (
+                  shouldRetryToolReadProbe({
+                    text: toolText,
+                    nonceA,
+                    nonceB,
+                    provider: model.provider,
+                    attempt: toolReadAttempt,
+                    maxAttempts: maxToolReadAttempts,
+                  })
+                ) {
+                  logProgress(
+                    `${progressLabel}: tool-read retry (${toolReadAttempt + 2}/${maxToolReadAttempts}) malformed tool output`,
+                  );
+                  continue;
+                }
+                throw new Error(`tool probe missing nonce: ${toolText}`);
+              }
+              if (!hasExpectedToolNonce(toolText, nonceA, nonceB)) {
+                throw new Error(`tool probe missing nonce: ${toolText}`);
+              }
+
+              if (params.extraToolProbes) {
+                logProgress(`${progressLabel}: tool-exec`);
+                const nonceC = randomUUID();
+                const toolWritePath = path.join(tempDir, `write-${runIdTool}.txt`);
+                const maxExecReadAttempts = 3;
+                let execReadText = "";
+                for (
+                  let execReadAttempt = 0;
+                  execReadAttempt < maxExecReadAttempts;
+                  execReadAttempt += 1
+                ) {
+                  const strictReply = execReadAttempt > 0;
+                  execReadText = await requestGatewayAgentText({
+                    client,
+                    sessionKey,
+                    idempotencyKey: `idem-${runIdTool}-exec-read-${execReadAttempt + 1}`,
+                    modelKey,
+                    message: strictReply
+                      ? "OpenClaw live tool probe (local, safe): " +
+                        "use the tool named `exec` (or `Exec`) to run this command: " +
+                        `mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
+                        `Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
+                        `Then reply with exactly: ${nonceC}. No extra text.`
+                      : "OpenClaw live tool probe (local, safe): " +
+                        "use the tool named `exec` (or `Exec`) to run this command: " +
+                        `mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
+                        `Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
+                        "Finally reply including the nonce text you read back.",
+                    thinkingLevel: params.thinkingLevel,
+                    context: `${progressLabel}: tool-exec`,
+                  });
+                  if (
+                    isEmptyStreamText(execReadText) &&
+                    shouldSkipEmptyResponseForLiveModel({
+                      provider: model.provider,
+                      allowNotFoundSkip: params.allowNotFoundSkip,
+                    })
+                  ) {
+                    logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
+                    return "skip";
+                  }
+                  assertNoReasoningTags({
+                    text: execReadText,
+                    model: modelKey,
+                    phase: "tool-exec",
+                    label: params.label,
+                  });
+                  if (hasExpectedSingleNonce(execReadText, nonceC)) {
+                    break;
+                  }
+                  if (
+                    shouldRetryExecReadProbe({
+                      text: execReadText,
+                      nonce: nonceC,
+                      provider: model.provider,
+                      attempt: execReadAttempt,
+                      maxAttempts: maxExecReadAttempts,
+                    })
+                  ) {
+                    logProgress(
+                      `${progressLabel}: tool-exec retry (${execReadAttempt + 2}/${maxExecReadAttempts}) malformed tool output`,
+                    );
+                    continue;
+                  }
+                  throw new Error(`exec+read probe missing nonce: ${execReadText}`);
+                }
+                if (!hasExpectedSingleNonce(execReadText, nonceC)) {
+                  throw new Error(`exec+read probe missing nonce: ${execReadText}`);
+                }
+
+                await fs.rm(toolWritePath, { force: true });
+              }
+
+              if (params.extraImageProbes && model.input?.includes("image")) {
+                logProgress(`${progressLabel}: image`);
+                // Shorter code => less OCR flake across providers, still tests image attachments end-to-end.
+                const imageCode = randomImageProbeCode();
+                const imageBase64 = renderCatNoncePngBase64(imageCode);
+                const runIdImage = randomUUID();
+
+                const imageText = await requestGatewayAgentText({
+                  client,
+                  sessionKey,
+                  idempotencyKey: `idem-${runIdImage}-image`,
+                  modelKey,
+                  message:
+                    "Look at the attached image. Reply with exactly two tokens separated by a single space: " +
+                    "(1) the animal shown or written in the image, lowercase; " +
+                    "(2) the code printed in the image, uppercase. No extra text.",
+                  attachments: [
+                    {
+                      mimeType: "image/png",
+                      fileName: `probe-${runIdImage}.png`,
+                      content: imageBase64,
+                    },
+                  ],
+                  thinkingLevel: params.thinkingLevel,
+                  context: `${progressLabel}: image`,
+                });
+                if (
+                  isEmptyStreamText(imageText) &&
+                  shouldSkipEmptyResponseForLiveModel({
+                    provider: model.provider,
+                    allowNotFoundSkip: params.allowNotFoundSkip,
+                  })
+                ) {
+                  logProgress(`${progressLabel}: image skip (${model.provider} empty response)`);
+                } else {
+                  assertNoReasoningTags({
+                    text: imageText,
+                    model: modelKey,
+                    phase: "image",
+                    label: params.label,
+                  });
+                  if (!/\bcat\b/i.test(imageText)) {
+                    logProgress(`${progressLabel}: image skip (missing 'cat')`);
+                  } else {
+                    const candidates = imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? [];
+                    const bestDistance = candidates.reduce((best, cand) => {
+                      if (Math.abs(cand.length - imageCode.length) > 2) {
+                        return best;
+                      }
+                      return Math.min(best, editDistance(cand, imageCode));
+                    }, Number.POSITIVE_INFINITY);
+                    if (!(bestDistance <= 3)) {
+                      logProgress(`${progressLabel}: image skip (code mismatch)`);
+                    }
                   }
-                  return Math.min(best, editDistance(cand, imageCode));
-                }, Number.POSITIVE_INFINITY);
-                // OCR / image-read flake: allow a small edit distance, but still require the "cat" token above.
-                if (!(bestDistance <= 3)) {
-                  logProgress(`${progressLabel}: image skip (code mismatch)`);
                 }
               }
-            }
+
+              if (
+                (model.provider === "openai" && model.api === "openai-responses") ||
+                (model.provider === "openai-codex" && model.api === "openai-codex-responses")
+              ) {
+                logProgress(`${progressLabel}: tool-only regression`);
+                const runId2 = randomUUID();
+                const firstText = await requestGatewayAgentText({
+                  client,
+                  sessionKey,
+                  idempotencyKey: `idem-${runId2}-1`,
+                  modelKey,
+                  message: `Call the tool named \`read\` (or \`Read\`) on "${toolProbePath}". Do not write any other text.`,
+                  thinkingLevel: params.thinkingLevel,
+                  context: `${progressLabel}: tool-only-regression-first`,
+                });
+                assertNoReasoningTags({
+                  text: firstText,
+                  model: modelKey,
+                  phase: "tool-only",
+                  label: params.label,
+                });
+
+                const reply = await requestGatewayAgentText({
+                  client,
+                  sessionKey,
+                  idempotencyKey: `idem-${runId2}-2`,
+                  modelKey,
+                  message: `Now answer: what are the values of nonceA and nonceB in "${toolProbePath}"? Reply with exactly: ${nonceA} ${nonceB}.`,
+                  thinkingLevel: params.thinkingLevel,
+                  context: `${progressLabel}: tool-only-regression-second`,
+                });
+                assertNoReasoningTags({
+                  text: reply,
+                  model: modelKey,
+                  phase: "tool-only-followup",
+                  label: params.label,
+                });
+                if (!reply.includes(nonceA) || !reply.includes(nonceB)) {
+                  throw new Error(`unexpected reply: ${reply}`);
+                }
+              }
+
+              if (model.provider === "anthropic") {
+                await runAnthropicRefusalProbe({
+                  client,
+                  sessionKey,
+                  modelKey,
+                  label: progressLabel,
+                  thinkingLevel: params.thinkingLevel,
+                });
+              }
+              return "done";
+            })(),
+            `${progressLabel}: model`,
+          );
+          if (modelResult === "skip") {
+            skippedCount += 1;
+            break;
           }
-
-          // Regression: tool-call-only turn followed by a user message (OpenAI responses bug class).
-          if (
-            (model.provider === "openai" && model.api === "openai-responses") ||
-            (model.provider === "openai-codex" && model.api === "openai-codex-responses")
-          ) {
-            logProgress(`${progressLabel}: tool-only regression`);
-            const runId2 = randomUUID();
-            const firstText = await requestGatewayAgentText({
-              client,
-              sessionKey,
-              idempotencyKey: `idem-${runId2}-1`,
-              modelKey,
-              message: `Call the tool named \`read\` (or \`Read\`) on "${toolProbePath}". Do not write any other text.`,
-              thinkingLevel: params.thinkingLevel,
-              context: `${progressLabel}: tool-only-regression-first`,
-            });
-            assertNoReasoningTags({
-              text: firstText,
-              model: modelKey,
-              phase: "tool-only",
-              label: params.label,
-            });
-
-            const reply = await requestGatewayAgentText({
-              client,
-              sessionKey,
-              idempotencyKey: `idem-${runId2}-2`,
-              modelKey,
-              message: `Now answer: what are the values of nonceA and nonceB in "${toolProbePath}"? Reply with exactly: ${nonceA} ${nonceB}.`,
-              thinkingLevel: params.thinkingLevel,
-              context: `${progressLabel}: tool-only-regression-second`,
-            });
-            assertNoReasoningTags({
-              text: reply,
-              model: modelKey,
-              phase: "tool-only-followup",
-              label: params.label,
-            });
-            if (!reply.includes(nonceA) || !reply.includes(nonceB)) {
-              throw new Error(`unexpected reply: ${reply}`);
-            }
-          }
-
-          if (model.provider === "anthropic") {
-            await runAnthropicRefusalProbe({
-              client,
-              sessionKey,
-              modelKey,
-              label: progressLabel,
-              thinkingLevel: params.thinkingLevel,
-            });
-          }
-
           logProgress(`${progressLabel}: done`);
           break;
         } catch (err) {
@@ -1591,6 +1679,11 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
             logProgress(`${progressLabel}: skip (probe timeout)`);
             break;
           }
+          if (isGatewayLiveModelTimeout(message)) {
+            skippedCount += 1;
+            logProgress(`${progressLabel}: skip (model timeout)`);
+            break;
+          }
           // OpenAI Codex refresh tokens can become single-use; skip instead of failing all live tests.
           if (model.provider === "openai-codex" && isRefreshTokenReused(message)) {
             skippedCount += 1;
@@ -1698,110 +1791,113 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
 describeLive("gateway live (dev agent, profile keys)", () => {
   it(
     "runs meaningful prompts across models with available keys",
-    async () => {
-      clearRuntimeConfigSnapshot();
-      const cfg = loadConfig();
-      await ensureOpenClawModelsJson(cfg);
+    async () =>
+      await withSuppressedGatewayLiveWarnings(async () => {
+        clearRuntimeConfigSnapshot();
+        const cfg = loadConfig();
+        await ensureOpenClawModelsJson(cfg);
 
-      const agentDir = resolveOpenClawAgentDir();
-      const authStorage = discoverAuthStorage(agentDir);
-      const modelRegistry = discoverModels(authStorage, agentDir);
-      const all = modelRegistry.getAll();
+        const agentDir = resolveOpenClawAgentDir();
+        const authStorage = discoverAuthStorage(agentDir);
+        const modelRegistry = discoverModels(authStorage, agentDir);
+        const all = modelRegistry.getAll();
 
-      const rawModels = process.env.OPENCLAW_LIVE_GATEWAY_MODELS?.trim();
-      const useModern = !rawModels || rawModels === "modern" || rawModels === "all";
-      const useExplicit = Boolean(rawModels) && !useModern;
-      const filter = useExplicit ? parseFilter(rawModels) : null;
-      const maxModels = GATEWAY_LIVE_MAX_MODELS;
-      const wanted = filter
-        ? all.filter((m) => filter.has(`${m.provider}/${m.id}`))
-        : all.filter((m) => isHighSignalLiveModelRef({ provider: m.provider, id: m.id }));
+        const rawModels = process.env.OPENCLAW_LIVE_GATEWAY_MODELS?.trim();
+        const useModern = !rawModels || rawModels === "modern" || rawModels === "all";
+        const useExplicit = Boolean(rawModels) && !useModern;
+        const filter = useExplicit ? parseFilter(rawModels) : null;
+        const maxModels = GATEWAY_LIVE_MAX_MODELS;
+        const wanted = filter
+          ? all.filter((m) => filter.has(`${m.provider}/${m.id}`))
+          : all.filter((m) => isHighSignalLiveModelRef({ provider: m.provider, id: m.id }));
 
-      const candidates: Array<Model<Api>> = [];
-      const skipped: Array<{ model: string; error: string }> = [];
-      for (const model of wanted) {
-        if (shouldSuppressBuiltInModel({ provider: model.provider, id: model.id })) {
-          continue;
-        }
-        if (PROVIDERS && !PROVIDERS.has(model.provider)) {
-          continue;
-        }
-        const modelRef = `${model.provider}/${model.id}`;
-        try {
-          const apiKeyInfo = await getApiKeyForModel({ model, cfg });
-          if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
-            skipped.push({
-              model: modelRef,
-              error: `non-profile credential source: ${apiKeyInfo.source}`,
-            });
+        const candidates: Array<Model<Api>> = [];
+        const skipped: Array<{ model: string; error: string }> = [];
+        for (const model of wanted) {
+          if (shouldSuppressBuiltInModel({ provider: model.provider, id: model.id })) {
             continue;
           }
-          candidates.push(model);
-        } catch (error) {
-          skipped.push({ model: modelRef, error: String(error) });
+          if (PROVIDERS && !PROVIDERS.has(model.provider)) {
+            continue;
+          }
+          const modelRef = `${model.provider}/${model.id}`;
+          try {
+            const apiKeyInfo = await getApiKeyForModel({ model, cfg });
+            if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
+              skipped.push({
+                model: modelRef,
+                error: `non-profile credential source: ${apiKeyInfo.source}`,
+              });
+              continue;
+            }
+            candidates.push(model);
+          } catch (error) {
+            skipped.push({ model: modelRef, error: String(error) });
+          }
         }
-      }
 
-      if (candidates.length === 0) {
-        if (skipped.length > 0) {
+        if (candidates.length === 0) {
+          if (skipped.length > 0) {
+            logProgress(
+              `[all-models] auth lookup skipped candidates:\n${formatFailurePreview(skipped, 8)}`,
+            );
+          }
+          logProgress("[all-models] no API keys found; skipping");
+          return;
+        }
+        const selectedCandidates = capByProviderSpread(
+          candidates,
+          maxModels > 0 ? maxModels : candidates.length,
+          (model) => model.provider,
+        );
+        logProgress(`[all-models] selection=${useExplicit ? "explicit" : "high-signal"}`);
+        if (selectedCandidates.length < candidates.length) {
           logProgress(
-            `[all-models] auth lookup skipped candidates:\n${formatFailurePreview(skipped, 8)}`,
+            `[all-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_GATEWAY_MAX_MODELS=${maxModels}`,
           );
         }
-        logProgress("[all-models] no API keys found; skipping");
-        return;
-      }
-      const selectedCandidates = capByProviderSpread(
-        candidates,
-        maxModels > 0 ? maxModels : candidates.length,
-        (model) => model.provider,
-      );
-      logProgress(`[all-models] selection=${useExplicit ? "explicit" : "high-signal"}`);
-      if (selectedCandidates.length < candidates.length) {
-        logProgress(
-          `[all-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_GATEWAY_MAX_MODELS=${maxModels}`,
-        );
-      }
-      const imageCandidates = selectedCandidates.filter((m) => m.input?.includes("image"));
-      if (imageCandidates.length === 0) {
-        logProgress("[all-models] no image-capable models selected; image probe will be skipped");
-      }
-      await runGatewayModelSuite({
-        label: "all-models",
-        cfg,
-        candidates: selectedCandidates,
-        allowNotFoundSkip: useModern,
-        extraToolProbes: true,
-        extraImageProbes: true,
-        thinkingLevel: THINKING_LEVEL,
-      });
-
-      const minimaxCandidates = selectedCandidates.filter((model) => model.provider === "minimax");
-      if (minimaxCandidates.length === 0) {
-        logProgress("[minimax] no candidates with keys; skipping dual endpoint probes");
-        return;
-      }
-
-      const minimaxAnthropic = buildMinimaxProviderOverride({
-        cfg,
-        api: "anthropic-messages",
-        baseUrl: "https://api.minimax.io/anthropic",
-      });
-      if (minimaxAnthropic) {
+        const imageCandidates = selectedCandidates.filter((m) => m.input?.includes("image"));
+        if (imageCandidates.length === 0) {
+          logProgress("[all-models] no image-capable models selected; image probe will be skipped");
+        }
         await runGatewayModelSuite({
-          label: "minimax-anthropic",
+          label: "all-models",
           cfg,
-          candidates: minimaxCandidates,
+          candidates: selectedCandidates,
           allowNotFoundSkip: useModern,
-          extraToolProbes: true,
-          extraImageProbes: true,
+          extraToolProbes: ENABLE_EXTRA_TOOL_PROBES,
+          extraImageProbes: ENABLE_EXTRA_IMAGE_PROBES,
           thinkingLevel: THINKING_LEVEL,
-          providerOverrides: { minimax: minimaxAnthropic },
         });
-      } else {
-        logProgress("[minimax-anthropic] missing minimax provider config; skipping");
-      }
-    },
+
+        const minimaxCandidates = selectedCandidates.filter(
+          (model) => model.provider === "minimax",
+        );
+        if (minimaxCandidates.length === 0) {
+          logProgress("[minimax] no candidates with keys; skipping dual endpoint probes");
+          return;
+        }
+
+        const minimaxAnthropic = buildMinimaxProviderOverride({
+          cfg,
+          api: "anthropic-messages",
+          baseUrl: "https://api.minimax.io/anthropic",
+        });
+        if (minimaxAnthropic) {
+          await runGatewayModelSuite({
+            label: "minimax-anthropic",
+            cfg,
+            candidates: minimaxCandidates,
+            allowNotFoundSkip: useModern,
+            extraToolProbes: ENABLE_EXTRA_TOOL_PROBES,
+            extraImageProbes: ENABLE_EXTRA_IMAGE_PROBES,
+            thinkingLevel: THINKING_LEVEL,
+            providerOverrides: { minimax: minimaxAnthropic },
+          });
+        } else {
+          logProgress("[minimax-anthropic] missing minimax provider config; skipping");
+        }
+      }),
     GATEWAY_LIVE_SUITE_TIMEOUT_MS,
   );