openclaw/src/gateway/gateway-models.profiles.live.test.ts

import { randomBytes, randomUUID } from "node:crypto";
import fs from "node:fs/promises";
import { createServer } from "node:net";
import os from "node:os";
import path from "node:path";
import type { Api, Model } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import { resolveOpenClawAgentDir } from "../agents/agent-paths.js";
import { resolveAgentWorkspaceDir } from "../agents/agent-scope.js";
import {
  type AuthProfileStore,
  ensureAuthProfileStore,
  saveAuthProfileStore,
} from "../agents/auth-profiles.js";
import {
  collectAnthropicApiKeys,
  isAnthropicBillingError,
  isAnthropicRateLimitError,
} from "../agents/live-auth-keys.js";
import { isModelNotFoundErrorMessage } from "../agents/live-model-errors.js";
import { isHighSignalLiveModelRef } from "../agents/live-model-filter.js";
import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "../agents/live-test-helpers.js";
import { getApiKeyForModel } from "../agents/model-auth.js";
import { shouldSuppressBuiltInModel } from "../agents/model-suppression.js";
import { ensureOpenClawModelsJson } from "../agents/models-config.js";
import { isRateLimitErrorMessage } from "../agents/pi-embedded-helpers/errors.js";
import { discoverAuthStorage, discoverModels } from "../agents/pi-model-discovery.js";
import { clearRuntimeConfigSnapshot, loadConfig } from "../config/config.js";
import type { ModelsConfig, OpenClawConfig, ModelProviderConfig } from "../config/types.js";
import { isTruthyEnvValue } from "../infra/env.js";
import { normalizeGoogleModelId } from "../plugin-sdk/google.js";
import { DEFAULT_AGENT_ID } from "../routing/session-key.js";
import { stripAssistantInternalScaffolding } from "../shared/text/assistant-visible-text.js";
import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-channel.js";
import { GatewayClient } from "./client.js";
import { renderCatNoncePngBase64 } from "./live-image-probe.js";
import {
  hasExpectedSingleNonce,
  hasExpectedToolNonce,
  shouldRetryExecReadProbe,
  shouldRetryToolReadProbe,
} from "./live-tool-probe-utils.js";
import { startGatewayServer } from "./server.js";
import { loadSessionEntry, readSessionMessages } from "./session-utils.js";

const ZAI_FALLBACK = isTruthyEnvValue(process.env.OPENCLAW_LIVE_GATEWAY_ZAI_FALLBACK);
const REQUIRE_PROFILE_KEYS = isLiveProfileKeyModeEnabled();
const PROVIDERS = parseFilter(process.env.OPENCLAW_LIVE_GATEWAY_PROVIDERS);
const GATEWAY_LIVE_SMOKE = isTruthyEnvValue(process.env.OPENCLAW_LIVE_GATEWAY_SMOKE);
const THINKING_LEVEL = GATEWAY_LIVE_SMOKE ? "low" : "high";
const ENABLE_EXTRA_TOOL_PROBES = !GATEWAY_LIVE_SMOKE;
const ENABLE_EXTRA_IMAGE_PROBES = !GATEWAY_LIVE_SMOKE;
const THINKING_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/i;
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
const GATEWAY_LIVE_DEFAULT_TIMEOUT_MS = 20 * 60 * 1000;
const GATEWAY_LIVE_UNBOUNDED_TIMEOUT_MS = 60 * 60 * 1000;
const GATEWAY_LIVE_MAX_TIMEOUT_MS = 2 * 60 * 60 * 1000;
const GATEWAY_LIVE_PROBE_TIMEOUT_MS = Math.max(
  30_000,
  toInt(process.env.OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS, 90_000),
);
const GATEWAY_LIVE_MODEL_TIMEOUT_MS = resolveGatewayLiveModelTimeoutMs();
const GATEWAY_LIVE_HEARTBEAT_MS = Math.max(
  1_000,
  toInt(process.env.OPENCLAW_LIVE_GATEWAY_HEARTBEAT_MS, 30_000),
);
const GATEWAY_LIVE_STRIP_SCAFFOLDING_MODEL_KEYS = new Set([
  "google/gemini-3-flash-preview",
  "google/gemini-3-pro-preview",
  "google/gemini-3.1-flash-lite-preview",
  "google/gemini-3.1-pro-preview",
  "google/gemini-3.1-pro-preview-customtools",
  "openai/gpt-5.2-pro",
]);
const GATEWAY_LIVE_EXEC_READ_NONCE_MISS_SKIP_MODEL_KEYS = new Set([
  "google/gemini-3.1-flash-lite-preview",
]);
const GATEWAY_LIVE_MAX_MODELS = resolveGatewayLiveMaxModels();
const GATEWAY_LIVE_SUITE_TIMEOUT_MS = resolveGatewayLiveSuiteTimeoutMs(GATEWAY_LIVE_MAX_MODELS);
const QUIET_LIVE_LOGS = process.env.OPENCLAW_LIVE_TEST_QUIET !== "0";

const describeLive = isLiveTestEnabled(["OPENCLAW_LIVE_GATEWAY"]) ? describe : describe.skip;

function parseFilter(raw?: string): Set<string> | null {
  const trimmed = raw?.trim();
  if (!trimmed || trimmed === "all") {
    return null;
  }
  const ids = trimmed
    .split(",")
    .map((s) => s.trim())
    .filter(Boolean);
  return ids.length ? new Set(ids) : null;
}

function shouldSuppressGatewayLiveOllamaWarnings(): boolean {
  return PROVIDERS !== null && !PROVIDERS.has("ollama");
}

async function withSuppressedGatewayLiveWarnings<T>(run: () => Promise<T>): Promise<T> {
  if (!shouldSuppressGatewayLiveOllamaWarnings()) {
    return await run();
  }
  const originalWarn = console.warn;
  console.warn = (...args: unknown[]) => {
    if (args.some((arg) => typeof arg === "string" && isOllamaUnavailableErrorMessage(arg))) {
      return;
    }
    originalWarn(...args);
  };
  try {
    return await run();
  } finally {
    console.warn = originalWarn;
  }
}

function toInt(value: string | undefined, fallback: number): number {
  const trimmed = value?.trim();
  if (!trimmed) {
    return fallback;
  }
  const parsed = Number.parseInt(trimmed, 10);
  return Number.isFinite(parsed) ? parsed : fallback;
}

function resolveGatewayLiveMaxModels(): number {
  const gatewayMax = toInt(process.env.OPENCLAW_LIVE_GATEWAY_MAX_MODELS, -1);
  if (gatewayMax >= 0) {
    return gatewayMax;
  }
  // Reuse shared live-model cap when gateway-specific cap is not provided.
  return Math.max(0, toInt(process.env.OPENCLAW_LIVE_MAX_MODELS, 0));
}

function resolveGatewayLiveSuiteTimeoutMs(maxModels: number): number {
  if (maxModels <= 0) {
    return GATEWAY_LIVE_UNBOUNDED_TIMEOUT_MS;
  }
  // Gateway live runs multiple probes per model; scale timeout by model cap.
  const estimated = 5 * 60 * 1000 + maxModels * 90 * 1000;
  return Math.max(
    GATEWAY_LIVE_DEFAULT_TIMEOUT_MS,
    Math.min(GATEWAY_LIVE_MAX_TIMEOUT_MS, estimated),
  );
}

function resolveGatewayLiveModelTimeoutMs(
  gatewayModelTimeoutRaw = process.env.OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS,
  liveModelTimeoutRaw = process.env.OPENCLAW_LIVE_MODEL_TIMEOUT_MS,
  stepTimeoutMs = GATEWAY_LIVE_PROBE_TIMEOUT_MS,
): number {
  const requested = toInt(gatewayModelTimeoutRaw, toInt(liveModelTimeoutRaw, 120_000));
  return Math.max(stepTimeoutMs, requested);
}

function isGatewayLiveProbeTimeout(error: string): boolean {
  return /probe timeout after \d+ms/i.test(error);
}

function isGatewayLiveModelTimeout(error: string): boolean {
  return /model timeout after \d+ms/i.test(error);
}

async function withGatewayLiveTimeout<T>(params: {
  operation: Promise<T>;
  timeoutMs: number;
  timeoutLabel: "probe" | "model";
  context: string;
}): Promise<T> {
  let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
  const startedAt = Date.now();
  let heartbeatCount = 0;
  const heartbeat = setInterval(() => {
    heartbeatCount += 1;
    logProgress(
      `${params.context}: still running (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`,
    );
  }, GATEWAY_LIVE_HEARTBEAT_MS);
  heartbeat.unref?.();
  try {
    return await Promise.race([
      params.operation,
      new Promise<never>((_, reject) => {
        timeoutHandle = setTimeout(() => {
          reject(
            new Error(
              `${params.timeoutLabel} timeout after ${params.timeoutMs}ms (${params.context})`,
            ),
          );
        }, params.timeoutMs);
      }),
    ]);
  } finally {
    clearInterval(heartbeat);
    if (timeoutHandle) {
      clearTimeout(timeoutHandle);
    }
    if (heartbeatCount > 0) {
      logProgress(
        `${params.context}: completed after ${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s`,
      );
    }
  }
}

async function withGatewayLiveProbeTimeout<T>(operation: Promise<T>, context: string): Promise<T> {
  return await withGatewayLiveTimeout({
    operation,
    timeoutMs: GATEWAY_LIVE_PROBE_TIMEOUT_MS,
    timeoutLabel: "probe",
    context,
  });
}

async function withGatewayLiveModelTimeout<T>(operation: Promise<T>, context: string): Promise<T> {
  return await withGatewayLiveTimeout({
    operation,
    timeoutMs: GATEWAY_LIVE_MODEL_TIMEOUT_MS,
    timeoutLabel: "model",
    context,
  });
}

function capByProviderSpread<T>(
  items: T[],
  maxItems: number,
  providerOf: (item: T) => string,
): T[] {
  if (maxItems <= 0 || items.length <= maxItems) {
    return items;
  }
  const providerOrder: string[] = [];
  const grouped = new Map<string, T[]>();
  for (const item of items) {
    const provider = providerOf(item);
    const bucket = grouped.get(provider);
    if (bucket) {
      bucket.push(item);
      continue;
    }
    providerOrder.push(provider);
    grouped.set(provider, [item]);
  }

  const selected: T[] = [];
  while (selected.length < maxItems && grouped.size > 0) {
    for (const provider of providerOrder) {
      const bucket = grouped.get(provider);
      if (!bucket || bucket.length === 0) {
        continue;
      }
      const item = bucket.shift();
      if (item) {
        selected.push(item);
      }
      if (bucket.length === 0) {
        grouped.delete(provider);
      }
      if (selected.length >= maxItems) {
        break;
      }
    }
  }
  return selected;
}

function logProgress(message: string): void {
  process.stderr.write(`[live] ${message}\n`);
}

function enterProductionEnvForLiveRun() {
  const previous = {
    vitest: process.env.VITEST,
    nodeEnv: process.env.NODE_ENV,
  };
  delete process.env.VITEST;
  process.env.NODE_ENV = "production";
  return previous;
}

function restoreProductionEnvForLiveRun(previous: {
  vitest: string | undefined;
  nodeEnv: string | undefined;
}) {
  if (previous.vitest === undefined) {
    delete process.env.VITEST;
  } else {
    process.env.VITEST = previous.vitest;
  }
  if (previous.nodeEnv === undefined) {
    delete process.env.NODE_ENV;
  } else {
    process.env.NODE_ENV = previous.nodeEnv;
  }
}

function formatFailurePreview(
  failures: Array<{ model: string; error: string }>,
  maxItems: number,
): string {
  const limit = Math.max(1, maxItems);
  const lines = failures.slice(0, limit).map((failure, index) => {
    const normalized = failure.error.replace(/\s+/g, " ").trim();
    const clipped = normalized.length > 320 ? `${normalized.slice(0, 317)}...` : normalized;
    return `${index + 1}. ${failure.model}: ${clipped}`;
  });
  const remaining = failures.length - limit;
  if (remaining > 0) {
    lines.push(`... and ${remaining} more`);
  }
  return lines.join("\n");
}

function assertNoReasoningTags(params: {
  text: string;
  model: string;
  phase: string;
  label: string;
}): void {
  if (!params.text) {
    return;
  }
  if (THINKING_TAG_RE.test(params.text) || FINAL_TAG_RE.test(params.text)) {
    const snippet = params.text.length > 200 ? `${params.text.slice(0, 200)}…` : params.text;
    throw new Error(
      `[${params.label}] reasoning tag leak (${params.model} / ${params.phase}): ${snippet}`,
    );
  }
}

function isMeaningful(text: string): boolean {
  if (!text) {
    return false;
  }
  const trimmed = text.trim();
  if (trimmed.toLowerCase() === "ok") {
    return false;
  }
  if (trimmed.length < 60) {
    return false;
  }
  const words = trimmed.split(/\s+/g).filter(Boolean);
  if (words.length < 12) {
    return false;
  }
  return true;
}

function shouldStripAssistantScaffoldingForLiveModel(modelKey?: string): boolean {
  if (!modelKey) {
    return false;
  }
  if (GATEWAY_LIVE_STRIP_SCAFFOLDING_MODEL_KEYS.has(modelKey)) {
    return true;
  }
  const [provider, ...rest] = modelKey.split("/");
  const modelId = rest.join("/");
  if (provider === "minimax" || provider === "minimax-portal") {
    // MiniMax transcript persistence can mirror our <final> wrapper style even
    // though user-visible surfaces already strip it. Keep the live reader
    // aligned with the runtime-facing sanitizers for the whole provider family.
    return true;
  }
  if (provider !== "google" || rest.length === 0) {
    return false;
  }
  const normalizedKey = `${provider}/${normalizeGoogleModelId(modelId)}`;
  return GATEWAY_LIVE_STRIP_SCAFFOLDING_MODEL_KEYS.has(normalizedKey);
}

function maybeStripAssistantScaffoldingForLiveModel(text: string, modelKey?: string): string {
  if (!shouldStripAssistantScaffoldingForLiveModel(modelKey)) {
    return text;
  }
  return stripAssistantInternalScaffolding(text).trim();
}

function shouldSkipExecReadNonceMissForLiveModel(modelKey?: string): boolean {
  if (!modelKey) {
    return false;
  }
  if (GATEWAY_LIVE_EXEC_READ_NONCE_MISS_SKIP_MODEL_KEYS.has(modelKey)) {
    return true;
  }
  const [provider, ...rest] = modelKey.split("/");
  if (provider !== "google" || rest.length === 0) {
    return false;
  }
  const normalizedKey = `${provider}/${normalizeGoogleModelId(rest.join("/"))}`;
  return GATEWAY_LIVE_EXEC_READ_NONCE_MISS_SKIP_MODEL_KEYS.has(normalizedKey);
}

function shouldSkipEmptyResponseForLiveModel(params: {
  provider: string;
  allowNotFoundSkip: boolean;
}): boolean {
  if (isGoogleishProvider(params.provider)) {
    return true;
  }
  if (params.provider === "openrouter" || params.provider === "opencode") {
    return true;
  }
  if (params.provider === "opencode-go") {
    return true;
  }
  if (!params.allowNotFoundSkip) {
    return false;
  }
  return (
    params.provider === "google-antigravity" ||
    params.provider === "minimax" ||
    params.provider === "openai-codex" ||
    params.provider === "zai"
  );
}

describe("maybeStripAssistantScaffoldingForLiveModel", () => {
  it("strips scaffolding for Gemini preview models with known transcript wrappers", () => {
    expect(
      maybeStripAssistantScaffoldingForLiveModel(
        "<think>hidden</think>Visible",
        "google/gemini-3.1-flash-preview",
      ),
    ).toBe("Visible");
    expect(
      maybeStripAssistantScaffoldingForLiveModel(
        "<think>hidden</think>Visible",
        "google/gemini-3.1-flash-lite-preview",
      ),
    ).toBe("Visible");
    expect(
      maybeStripAssistantScaffoldingForLiveModel(
        "<think>hidden</think>Visible",
        "google/gemini-3.1-pro-preview",
      ),
    ).toBe("Visible");
    expect(
      maybeStripAssistantScaffoldingForLiveModel(
        "<think>hidden</think>Visible",
        "google/gemini-3.1-pro-preview-customtools",
      ),
    ).toBe("Visible");
    expect(
      maybeStripAssistantScaffoldingForLiveModel(
        "<think>hidden</think>Visible",
        "google/gemini-2.5-flash",
      ),
    ).toBe("<think>hidden</think>Visible");
  });

  it("strips scaffolding for known OpenAI transcript wrappers", () => {
    expect(
      maybeStripAssistantScaffoldingForLiveModel("<final>Visible</final>", "openai/gpt-5.2-pro"),
    ).toBe("Visible");
    expect(
      maybeStripAssistantScaffoldingForLiveModel("<final>Visible</final>", "openai/gpt-5.2"),
    ).toBe("<final>Visible</final>");
  });

  it("strips scaffolding for MiniMax transcript wrappers", () => {
    expect(
      maybeStripAssistantScaffoldingForLiveModel(
        "<final>Visible</final>",
        "minimax/MiniMax-M2.5-highspeed",
      ),
    ).toBe("Visible");
    expect(
      maybeStripAssistantScaffoldingForLiveModel(
        "<final>Visible</final>",
        "minimax-portal/MiniMax-M2.7-highspeed",
      ),
    ).toBe("Visible");
    expect(
      maybeStripAssistantScaffoldingForLiveModel("<final>Visible</final>", "minimax/MiniMax-M2.7"),
    ).toBe("Visible");
  });
});

describe("shouldSkipExecReadNonceMissForLiveModel", () => {
  it("matches the known Gemini lite exec/read isolation case", () => {
    expect(shouldSkipExecReadNonceMissForLiveModel("google/gemini-3.1-flash-lite-preview")).toBe(
      true,
    );
    expect(shouldSkipExecReadNonceMissForLiveModel("google/gemini-3.1-flash-lite")).toBe(true);
    expect(shouldSkipExecReadNonceMissForLiveModel("google/gemini-3.1-flash-preview")).toBe(false);
  });
});

describe("resolveGatewayLiveModelTimeoutMs", () => {
  it("prefers gateway-specific timeout when provided", () => {
    expect(resolveGatewayLiveModelTimeoutMs("180000", "45000", 90_000)).toBe(180_000);
  });

  it("falls back to the shared live timeout", () => {
    expect(resolveGatewayLiveModelTimeoutMs("", "45000", 30_000)).toBe(45_000);
  });

  it("never goes below the probe timeout", () => {
    expect(resolveGatewayLiveModelTimeoutMs("45000", undefined, 90_000)).toBe(90_000);
  });
});

function isGoogleModelNotFoundText(text: string): boolean {
  const trimmed = text.trim();
  if (!trimmed) {
    return false;
  }
  if (!/not found/i.test(trimmed)) {
    return false;
  }
  if (/models\/.+ is not found for api version/i.test(trimmed)) {
    return true;
  }
  if (/"status"\s*:\s*"NOT_FOUND"/.test(trimmed)) {
    return true;
  }
  if (/"code"\s*:\s*404/.test(trimmed)) {
    return true;
  }
  return false;
}

function isGoogleishProvider(provider: string): boolean {
  return provider === "google" || provider.startsWith("google-");
}

function isRefreshTokenReused(error: string): boolean {
  return /refresh_token_reused/i.test(error);
}

function isChatGPTUsageLimitErrorMessage(raw: string): boolean {
  const msg = raw.toLowerCase();
  return msg.includes("hit your chatgpt usage limit") && msg.includes("try again in");
}

function isProviderUnavailableErrorMessage(raw: string): boolean {
  const msg = raw.toLowerCase();
  return (
    msg.includes("no allowed providers are available") ||
    msg.includes("provider unavailable") ||
    msg.includes("upstream provider unavailable") ||
    msg.includes("upstream error from google")
  );
}

function isOllamaUnavailableErrorMessage(raw: string): boolean {
  const msg = raw.toLowerCase();
  return (
    msg.includes("ollama could not be reached") ||
    (msg.includes("127.0.0.1:11434") && msg.includes("econnrefused")) ||
    (msg.includes("localhost:11434") && msg.includes("econnrefused"))
  );
}

function isInstructionsRequiredError(error: string): boolean {
  return /instructions are required/i.test(error);
}

function isOpenAIReasoningSequenceError(error: string): boolean {
  const msg = error.toLowerCase();
  return msg.includes("required following item") && msg.includes("reasoning");
}

function isToolNonceRefusal(error: string): boolean {
  const msg = error.toLowerCase();
  if (!msg.includes("nonce")) {
    return false;
  }
  return (
    msg.includes("token") ||
    msg.includes("secret") ||
    msg.includes("local file") ||
    msg.includes("disclose") ||
    msg.includes("can't help") ||
    msg.includes("can’t help") ||
    msg.includes("can't comply") ||
    msg.includes("can’t comply")
  );
}

function isToolNonceProbeMiss(error: string): boolean {
  const msg = error.toLowerCase();
  return msg.includes("tool probe missing nonce") || msg.includes("exec+read probe missing nonce");
}

function isExecReadNonceProbeMiss(error: string): boolean {
  return error.toLowerCase().includes("exec+read probe missing nonce");
}

function isPromptProbeMiss(error: string): boolean {
  const msg = error.toLowerCase();
  return msg.includes("not meaningful:") || msg.includes("missing required keywords:");
}

function shouldSkipToolNonceProbeMiss(provider: string): boolean {
  return (
    provider === "anthropic" ||
    provider === "minimax" ||
    provider === "opencode" ||
    provider === "opencode-go" ||
    provider === "xai" ||
    provider === "zai"
  );
}

describe("shouldSkipToolNonceProbeMiss", () => {
  it.each([
    { provider: "anthropic", expected: true },
    { provider: "minimax", expected: true },
    { provider: "opencode", expected: true },
    { provider: "opencode-go", expected: true },
    { provider: "xai", expected: true },
    { provider: "zai", expected: true },
    { provider: "openai", expected: false },
  ])("returns $expected for $provider", ({ provider, expected }) => {
    expect(shouldSkipToolNonceProbeMiss(provider)).toBe(expected);
  });
});

describe("shouldSkipEmptyResponseForLiveModel", () => {
  it.each([
    { provider: "google", allowNotFoundSkip: false, expected: true },
    { provider: "google-antigravity", allowNotFoundSkip: false, expected: true },
    { provider: "openrouter", allowNotFoundSkip: false, expected: true },
    { provider: "opencode", allowNotFoundSkip: false, expected: true },
    { provider: "opencode-go", allowNotFoundSkip: false, expected: true },
    { provider: "minimax", allowNotFoundSkip: false, expected: false },
    { provider: "minimax", allowNotFoundSkip: true, expected: true },
    { provider: "zai", allowNotFoundSkip: true, expected: true },
    { provider: "openai-codex", allowNotFoundSkip: true, expected: true },
    { provider: "xai", allowNotFoundSkip: true, expected: false },
  ])(
    "returns $expected for $provider (allowNotFoundSkip=$allowNotFoundSkip)",
    ({ provider, allowNotFoundSkip, expected }) => {
      expect(shouldSkipEmptyResponseForLiveModel({ provider, allowNotFoundSkip })).toBe(expected);
    },
  );
});

describe("isPromptProbeMiss", () => {
  it.each([
    { error: "not meaningful: let me think", expected: true },
    { error: "missing required keywords: event loop summary", expected: true },
    { error: "tool probe missing nonce: nonce-a", expected: false },
  ])("returns $expected for $error", ({ error, expected }) => {
    expect(isPromptProbeMiss(error)).toBe(expected);
  });
});
function isMissingProfileError(error: string): boolean {
  return /no credentials found for profile/i.test(error);
}

function isEmptyStreamText(text: string): boolean {
  return text.includes("request ended without sending any chunks");
}

function buildAnthropicRefusalToken(): string {
  const suffix = randomUUID().replace(/-/g, "");
  return `${ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL}_${suffix}`;
}

async function runAnthropicRefusalProbe(params: {
  client: GatewayClient;
  sessionKey: string;
  modelKey: string;
  label: string;
  thinkingLevel: string;
}): Promise<void> {
  logProgress(`${params.label}: refusal-probe`);
  const magic = buildAnthropicRefusalToken();
  const probeText = await requestGatewayAgentText({
    client: params.client,
    sessionKey: params.sessionKey,
    idempotencyKey: `idem-${randomUUID()}-refusal`,
    message: `Reply with the single word ok. Test token: ${magic}`,
    thinkingLevel: params.thinkingLevel,
    context: `${params.label}: refusal-probe`,
    modelKey: params.modelKey,
  });
  assertNoReasoningTags({
    text: probeText,
    model: params.modelKey,
    phase: "refusal-probe",
    label: params.label,
  });
  if (!/\bok\b/i.test(probeText)) {
    throw new Error(`refusal probe missing ok: ${probeText}`);
  }

  const followupText = await requestGatewayAgentText({
    client: params.client,
    sessionKey: params.sessionKey,
    idempotencyKey: `idem-${randomUUID()}-refusal-followup`,
    message: "Now reply with exactly: still ok.",
    thinkingLevel: params.thinkingLevel,
    context: `${params.label}: refusal-followup`,
    modelKey: params.modelKey,
  });
  assertNoReasoningTags({
    text: followupText,
    model: params.modelKey,
    phase: "refusal-followup",
    label: params.label,
  });
  if (!/\bstill\b/i.test(followupText) || !/\bok\b/i.test(followupText)) {
    throw new Error(`refusal followup missing expected text: ${followupText}`);
  }
}

function randomImageProbeCode(len = 6): string {
  // Chosen to avoid common OCR confusions in our 5x7 bitmap font.
  // Notably: 0↔8, B↔8, 6↔9, 3↔B, D↔0.
  // Must stay within the glyph set in `src/gateway/live-image-probe.ts`.
  const alphabet = "24567ACEF";
  const bytes = randomBytes(len);
  let out = "";
  for (let i = 0; i < len; i += 1) {
    out += alphabet[bytes[i] % alphabet.length];
  }
  return out;
}

function editDistance(a: string, b: string): number {
  if (a === b) {
    return 0;
  }
  const aLen = a.length;
  const bLen = b.length;
  if (aLen === 0) {
    return bLen;
  }
  if (bLen === 0) {
    return aLen;
  }

  let prev = Array.from({ length: bLen + 1 }, (_v, idx) => idx);
  let curr = Array.from({ length: bLen + 1 }, () => 0);

  for (let i = 1; i <= aLen; i += 1) {
    curr[0] = i;
    const aCh = a.charCodeAt(i - 1);
    for (let j = 1; j <= bLen; j += 1) {
      const cost = aCh === b.charCodeAt(j - 1) ? 0 : 1;
      curr[j] = Math.min(
        prev[j] + 1, // delete
        curr[j - 1] + 1, // insert
        prev[j - 1] + cost, // substitute
      );
    }
    [prev, curr] = [curr, prev];
  }

  return prev[bLen] ?? Number.POSITIVE_INFINITY;
}
async function getFreePort(): Promise<number> {
  return await new Promise((resolve, reject) => {
    const srv = createServer();
    srv.on("error", reject);
    srv.listen(0, "127.0.0.1", () => {
      const addr = srv.address();
      if (!addr || typeof addr === "string") {
        srv.close();
        reject(new Error("failed to acquire free port"));
        return;
      }
      const port = addr.port;
      srv.close((err) => {
        if (err) {
          reject(err);
        } else {
          resolve(port);
        }
      });
    });
  });
}

async function isPortFree(port: number): Promise<boolean> {
  if (!Number.isFinite(port) || port <= 0 || port > 65535) {
    return false;
  }
  return await new Promise((resolve) => {
    const srv = createServer();
    srv.once("error", () => resolve(false));
    srv.listen(port, "127.0.0.1", () => {
      srv.close(() => resolve(true));
    });
  });
}

async function getFreeGatewayPort(): Promise<number> {
  // Gateway uses derived ports (browser/canvas). Avoid flaky collisions by
  // ensuring the common derived offsets are free too.
  for (let attempt = 0; attempt < 25; attempt += 1) {
    const port = await getFreePort();
    const candidates = [port, port + 1, port + 2, port + 4];
    const ok = (await Promise.all(candidates.map((candidate) => isPortFree(candidate)))).every(
      Boolean,
    );
    if (ok) {
      return port;
    }
  }
  throw new Error("failed to acquire a free gateway port block");
}

async function connectClient(params: { url: string; token: string }) {
  return await new Promise<GatewayClient>((resolve, reject) => {
    let settled = false;
    const stop = (err?: Error, client?: GatewayClient) => {
      if (settled) {
        return;
      }
      settled = true;
      clearTimeout(timer);
      if (err) {
        reject(err);
      } else {
        resolve(client as GatewayClient);
      }
    };
    const client = new GatewayClient({
      url: params.url,
      token: params.token,
      clientName: GATEWAY_CLIENT_NAMES.TEST,
      clientDisplayName: "vitest-live",
      clientVersion: "dev",
      mode: GATEWAY_CLIENT_MODES.TEST,
      onHelloOk: () => stop(undefined, client),
      onConnectError: (err) => stop(err),
      onClose: (code, reason) =>
        stop(new Error(`gateway closed during connect (${code}): ${reason}`)),
    });
    const timer = setTimeout(() => stop(new Error("gateway connect timeout")), 10_000);
    timer.unref();
    client.start();
  });
}

function extractTranscriptMessageText(message: unknown): string {
  if (!message || typeof message !== "object") {
    return "";
  }
  const record = message as {
    text?: unknown;
    content?: unknown;
  };
  if (typeof record.text === "string" && record.text.trim()) {
    return record.text.trim();
  }
  if (typeof record.content === "string" && record.content.trim()) {
    return record.content.trim();
  }
  if (!Array.isArray(record.content)) {
    return "";
  }
  return record.content
    .map((entry) => {
      if (!entry || typeof entry !== "object") {
        return "";
      }
      const text = (entry as { text?: unknown }).text;
      return typeof text === "string" && text.trim() ? text.trim() : "";
    })
    .filter(Boolean)
    .join("\n")
    .trim();
}

function readSessionAssistantTexts(sessionKey: string, modelKey?: string): string[] {
  const { storePath, entry } = loadSessionEntry(sessionKey);
  if (!entry?.sessionId) {
    return [];
  }
  const messages = readSessionMessages(entry.sessionId, storePath, entry.sessionFile);
  const assistantTexts: string[] = [];
  for (const message of messages) {
    if (!message || typeof message !== "object") {
      continue;
    }
    const role = (message as { role?: unknown }).role;
    if (role !== "assistant") {
      continue;
    }
    assistantTexts.push(
      maybeStripAssistantScaffoldingForLiveModel(extractTranscriptMessageText(message), modelKey),
    );
  }
  return assistantTexts;
}

async function waitForSessionAssistantText(params: {
  sessionKey: string;
  baselineAssistantCount: number;
  context: string;
  modelKey?: string;
}) {
  const startedAt = Date.now();
  let lastHeartbeatAt = startedAt;
  let delayMs = 50;
  while (Date.now() - startedAt < GATEWAY_LIVE_PROBE_TIMEOUT_MS) {
    const assistantTexts = readSessionAssistantTexts(params.sessionKey, params.modelKey);
    if (assistantTexts.length > params.baselineAssistantCount) {
      const freshText = assistantTexts
        .slice(params.baselineAssistantCount)
        .map((text) => text.trim())
        .findLast((text) => text.length > 0);
      if (freshText) {
        return freshText;
      }
    }
    if (Date.now() - lastHeartbeatAt >= GATEWAY_LIVE_HEARTBEAT_MS) {
      lastHeartbeatAt = Date.now();
      logProgress(
        `${params.context}: waiting for transcript (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`,
      );
    }
    await new Promise((resolve) => setTimeout(resolve, delayMs));
    delayMs = Math.min(delayMs * 2, 250);
  }
  throw new Error(`probe timeout after ${GATEWAY_LIVE_PROBE_TIMEOUT_MS}ms (${params.context})`);
}

async function requestGatewayAgentText(params: {
  client: GatewayClient;
  sessionKey: string;
  message: string;
  thinkingLevel: string;
  context: string;
  idempotencyKey: string;
  modelKey?: string;
  attachments?: Array<{
    mimeType: string;
    fileName: string;
    content: string;
  }>;
}) {
  const baselineAssistantCount = readSessionAssistantTexts(
    params.sessionKey,
    params.modelKey,
  ).length;
  const accepted = await withGatewayLiveProbeTimeout(
    params.client.request<{ runId?: unknown; status?: unknown }>("agent", {
      sessionKey: params.sessionKey,
      idempotencyKey: params.idempotencyKey,
      message: params.message,
      thinking: params.thinkingLevel,
      deliver: false,
      attachments: params.attachments,
    }),
    `${params.context}: agent-accept`,
  );
  if (accepted?.status !== "accepted") {
    throw new Error(`agent status=${String(accepted?.status)}`);
  }
  return await waitForSessionAssistantText({
    sessionKey: params.sessionKey,
    baselineAssistantCount,
    context: `${params.context}: transcript-final`,
    modelKey: params.modelKey,
  });
}

type GatewayModelSuiteParams = {
  label: string;
  cfg: OpenClawConfig;
  candidates: Array<Model<Api>>;
  allowNotFoundSkip: boolean;
  extraToolProbes: boolean;
  extraImageProbes: boolean;
  thinkingLevel: string;
  providerOverrides?: Record<string, ModelProviderConfig>;
};

function buildLiveGatewayConfig(params: {
  cfg: OpenClawConfig;
  candidates: Array<Model<Api>>;
  providerOverrides?: Record<string, ModelProviderConfig>;
}): OpenClawConfig {
  const providerOverrides = params.providerOverrides ?? {};
  const lmstudioProvider = params.cfg.models?.providers?.lmstudio;
  const baseProviders = params.cfg.models?.providers ?? {};
  const nextProviders = {
    ...baseProviders,
    ...(lmstudioProvider
      ? {
          lmstudio: {
            ...lmstudioProvider,
            api: "openai-completions",
          },
        }
      : {}),
    ...providerOverrides,
  };
  const providers = Object.keys(nextProviders).length > 0 ? nextProviders : baseProviders;
  const baseModels = params.cfg.models;
  return {
    ...params.cfg,
    agents: {
      ...params.cfg.agents,
      list: (params.cfg.agents?.list ?? []).map((entry) => ({
        ...entry,
        sandbox: { mode: "off" },
      })),
      defaults: {
        ...params.cfg.agents?.defaults,
        // Live tests should avoid Docker sandboxing so tool probes can
        // operate on the temporary probe files we create in the host workspace.
        sandbox: { mode: "off" },
        models: Object.fromEntries(params.candidates.map((m) => [`${m.provider}/${m.id}`, {}])),
      },
    },
    models:
      Object.keys(providers).length > 0
        ? ({ ...baseModels, providers } as ModelsConfig)
        : baseModels,
  };
}

function sanitizeAuthConfig(params: {
  cfg: OpenClawConfig;
  agentDir: string;
}): OpenClawConfig["auth"] | undefined {
  const auth = params.cfg.auth;
  if (!auth) {
    return auth;
  }
  const store = ensureAuthProfileStore(params.agentDir, {
    allowKeychainPrompt: false,
  });

  let profiles: NonNullable<OpenClawConfig["auth"]>["profiles"] | undefined;
  if (auth.profiles) {
    profiles = {};
    for (const [profileId, profile] of Object.entries(auth.profiles)) {
      if (!store.profiles[profileId]) {
        continue;
      }
      profiles[profileId] = profile;
    }
    if (Object.keys(profiles).length === 0) {
      profiles = undefined;
    }
  }

  let order: Record<string, string[]> | undefined;
  if (auth.order) {
    order = {};
    for (const [provider, ids] of Object.entries(auth.order)) {
      const filtered = ids.filter((id) => Boolean(store.profiles[id]));
      if (filtered.length === 0) {
        continue;
      }
      order[provider] = filtered;
    }
    if (Object.keys(order).length === 0) {
      order = undefined;
    }
  }

  if (!profiles && !order && !auth.cooldowns) {
    return undefined;
  }
  return {
    ...auth,
    profiles,
    order,
  };
}

function buildMinimaxProviderOverride(params: {
  cfg: OpenClawConfig;
  api: "openai-completions" | "anthropic-messages";
  baseUrl: string;
}): ModelProviderConfig | null {
  const existing = params.cfg.models?.providers?.minimax;
  if (!existing || !Array.isArray(existing.models) || existing.models.length === 0) {
    return null;
  }
  return {
    ...existing,
    api: params.api,
    baseUrl: params.baseUrl,
  };
}

async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
  clearRuntimeConfigSnapshot();
  const runtimeEnv = enterProductionEnvForLiveRun();
  const previous = {
    configPath: process.env.OPENCLAW_CONFIG_PATH,
    token: process.env.OPENCLAW_GATEWAY_TOKEN,
    skipChannels: process.env.OPENCLAW_SKIP_CHANNELS,
    skipGmail: process.env.OPENCLAW_SKIP_GMAIL_WATCHER,
    skipCron: process.env.OPENCLAW_SKIP_CRON,
    skipCanvas: process.env.OPENCLAW_SKIP_CANVAS_HOST,
    disableBonjour: process.env.OPENCLAW_DISABLE_BONJOUR,
    logLevel: process.env.OPENCLAW_LOG_LEVEL,
    agentDir: process.env.OPENCLAW_AGENT_DIR,
    piAgentDir: process.env.PI_CODING_AGENT_DIR,
    stateDir: process.env.OPENCLAW_STATE_DIR,
  };
  let tempAgentDir: string | undefined;
  let tempStateDir: string | undefined;

  process.env.OPENCLAW_SKIP_CHANNELS = "1";
  process.env.OPENCLAW_SKIP_GMAIL_WATCHER = "1";
  process.env.OPENCLAW_SKIP_CRON = "1";
  process.env.OPENCLAW_SKIP_CANVAS_HOST = "1";
  if (QUIET_LIVE_LOGS) {
    process.env.OPENCLAW_DISABLE_BONJOUR = "1";
    process.env.OPENCLAW_LOG_LEVEL = "silent";
  }

  const token = `test-${randomUUID()}`;
  process.env.OPENCLAW_GATEWAY_TOKEN = token;
  const agentId = "dev";

  const hostAgentDir = resolveOpenClawAgentDir();
  const hostStore = ensureAuthProfileStore(hostAgentDir, {
    allowKeychainPrompt: false,
  });
  const sanitizedStore: AuthProfileStore = {
    version: hostStore.version,
    profiles: { ...hostStore.profiles },
    // Keep selection state so the gateway picks the same known-good profiles
    // as the host (important when some profiles are rate-limited/disabled).
    order: hostStore.order ? { ...hostStore.order } : undefined,
    lastGood: hostStore.lastGood ? { ...hostStore.lastGood } : undefined,
    usageStats: hostStore.usageStats ? { ...hostStore.usageStats } : undefined,
  };
  tempStateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-live-state-"));
  process.env.OPENCLAW_STATE_DIR = tempStateDir;
  tempAgentDir = path.join(tempStateDir, "agents", DEFAULT_AGENT_ID, "agent");
  saveAuthProfileStore(sanitizedStore, tempAgentDir);
  const tempSessionAgentDir = path.join(tempStateDir, "agents", agentId, "agent");
  if (tempSessionAgentDir !== tempAgentDir) {
    saveAuthProfileStore(sanitizedStore, tempSessionAgentDir);
  }
  process.env.OPENCLAW_AGENT_DIR = tempAgentDir;
  process.env.PI_CODING_AGENT_DIR = tempAgentDir;

  const workspaceDir = resolveAgentWorkspaceDir(params.cfg, agentId);
  await fs.mkdir(workspaceDir, { recursive: true });
  const nonceA = randomUUID();
  const nonceB = randomUUID();
  const toolProbePath = path.join(workspaceDir, `.openclaw-live-tool-probe.${nonceA}.txt`);
  await fs.writeFile(toolProbePath, `nonceA=${nonceA}\nnonceB=${nonceB}\n`);

  const agentDir = resolveOpenClawAgentDir();
  const sanitizedCfg: OpenClawConfig = {
    ...params.cfg,
    auth: sanitizeAuthConfig({ cfg: params.cfg, agentDir }),
  };
  const nextCfg = buildLiveGatewayConfig({
    cfg: sanitizedCfg,
    candidates: params.candidates,
    providerOverrides: params.providerOverrides,
  });
  const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-live-"));
  const tempConfigPath = path.join(tempDir, "openclaw.json");
  await fs.writeFile(tempConfigPath, `${JSON.stringify(nextCfg, null, 2)}\n`);
  process.env.OPENCLAW_CONFIG_PATH = tempConfigPath;

  const liveProviders = nextCfg.models?.providers;
  if (liveProviders && Object.keys(liveProviders).length > 0) {
    const modelsPath = path.join(tempAgentDir, "models.json");
    await fs.mkdir(tempAgentDir, { recursive: true });
    await fs.writeFile(modelsPath, `${JSON.stringify({ providers: liveProviders }, null, 2)}\n`);
  }

  let server: Awaited<ReturnType<typeof startGatewayServer>> | undefined;
  let client: GatewayClient | undefined;
  try {
    const port = await withGatewayLiveProbeTimeout(
      getFreeGatewayPort(),
      `${params.label}: gateway-port`,
    );
    server = await withGatewayLiveProbeTimeout(
      startGatewayServer(port, {
        bind: "loopback",
        auth: { mode: "token", token },
        controlUiEnabled: false,
      }),
      `${params.label}: gateway-start`,
    );

    client = await withGatewayLiveProbeTimeout(
      connectClient({
        url: `ws://127.0.0.1:${port}`,
        token,
      }),
      `${params.label}: gateway-connect`,
    );
  } catch (error) {
    const message = String(error);
    if (isGatewayLiveProbeTimeout(message)) {
      logProgress(`[${params.label}] skip (gateway startup timeout)`);
      return;
    }
    throw error;
  }

  if (!server || !client) {
    logProgress(`[${params.label}] skip (gateway startup incomplete)`);
    return;
  }

  try {
    logProgress(
      `[${params.label}] running ${params.candidates.length} models (thinking=${params.thinkingLevel})`,
    );
    logProgress(
      `[${params.label}] heartbeat=${Math.max(1, Math.round(GATEWAY_LIVE_HEARTBEAT_MS / 1_000))}s probe-timeout=${Math.max(1, Math.round(GATEWAY_LIVE_PROBE_TIMEOUT_MS / 1_000))}s model-timeout=${Math.max(1, Math.round(GATEWAY_LIVE_MODEL_TIMEOUT_MS / 1_000))}s`,
    );
    const anthropicKeys = collectAnthropicApiKeys();
    if (anthropicKeys.length > 0) {
      process.env.ANTHROPIC_API_KEY = anthropicKeys[0];
      logProgress(`[${params.label}] anthropic keys loaded: ${anthropicKeys.length}`);
    }
    const sessionKey = `agent:${agentId}:${params.label}`;
    const failures: Array<{ model: string; error: string }> = [];
    let skippedCount = 0;
    const total = params.candidates.length;

    for (const [index, model] of params.candidates.entries()) {
      const modelKey = `${model.provider}/${model.id}`;
      const progressLabel = `[${params.label}] ${index + 1}/${total} ${modelKey}`;

      const attemptMax =
        model.provider === "anthropic" && anthropicKeys.length > 0 ? anthropicKeys.length : 1;

      for (let attempt = 0; attempt < attemptMax; attempt += 1) {
        if (model.provider === "anthropic" && anthropicKeys.length > 0) {
          process.env.ANTHROPIC_API_KEY = anthropicKeys[attempt];
        }
        try {
          const modelResult = await withGatewayLiveModelTimeout<"done" | "skip">(
            (async () => {
              // Ensure session exists + override model for this run.
              // Reset between models: avoids cross-provider transcript incompatibilities
              // (notably OpenAI Responses requiring reasoning replay for function_call items).
              await withGatewayLiveProbeTimeout(
                client.request("sessions.reset", {
                  key: sessionKey,
                }),
                `${progressLabel}: sessions-reset`,
              );
              await withGatewayLiveProbeTimeout(
                client.request("sessions.patch", {
                  key: sessionKey,
                  model: modelKey,
                }),
                `${progressLabel}: sessions-patch`,
              );

              logProgress(`${progressLabel}: prompt`);
              let text = await requestGatewayAgentText({
                client,
                sessionKey,
                idempotencyKey: `idem-${randomUUID()}`,
                modelKey,
                message:
                  "Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
                thinkingLevel: params.thinkingLevel,
                context: `${progressLabel}: prompt`,
              });
              if (!text) {
                logProgress(`${progressLabel}: empty response, retrying`);
                text = await requestGatewayAgentText({
                  client,
                  sessionKey,
                  idempotencyKey: `idem-${randomUUID()}-retry`,
                  modelKey,
                  message:
                    "Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
                  thinkingLevel: params.thinkingLevel,
                  context: `${progressLabel}: prompt-retry`,
                });
              }
              if (
                !text &&
                shouldSkipEmptyResponseForLiveModel({
                  provider: model.provider,
                  allowNotFoundSkip: params.allowNotFoundSkip,
                })
              ) {
                logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
                return "skip";
              }
              if (
                isEmptyStreamText(text) &&
                shouldSkipEmptyResponseForLiveModel({
                  provider: model.provider,
                  allowNotFoundSkip: params.allowNotFoundSkip,
                })
              ) {
                logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
                return "skip";
              }
              if (isGoogleishProvider(model.provider) && isGoogleModelNotFoundText(text)) {
                // Catalog drift: model IDs can disappear or become unavailable on the API.
                // Treat as skip when scanning "all models" for Google.
                logProgress(`${progressLabel}: skip (google model not found)`);
                return "skip";
              }
              if (params.allowNotFoundSkip && isModelNotFoundErrorMessage(text)) {
                logProgress(`${progressLabel}: skip (model not found)`);
                return "skip";
              }
              assertNoReasoningTags({
                text,
                model: modelKey,
                phase: "prompt",
                label: params.label,
              });
              if (!isMeaningful(text)) {
                if (isGoogleishProvider(model.provider) && /gemini/i.test(model.id)) {
                  logProgress(`${progressLabel}: skip (google not meaningful)`);
                  return "skip";
                }
                throw new Error(`not meaningful: ${text}`);
              }
              if (
                !/\bmicro\s*-?\s*tasks?\b/i.test(text) ||
                !/\bmacro\s*-?\s*tasks?\b/i.test(text)
              ) {
                throw new Error(`missing required keywords: ${text}`);
              }

              // Real tool invocation: force the agent to Read a local file and echo a nonce.
              logProgress(`${progressLabel}: tool-read`);
              const runIdTool = randomUUID();
              const maxToolReadAttempts = 3;
              let toolText = "";
              for (
                let toolReadAttempt = 0;
                toolReadAttempt < maxToolReadAttempts;
                toolReadAttempt += 1
              ) {
                const strictReply = toolReadAttempt > 0;
                toolText = await requestGatewayAgentText({
                  client,
                  sessionKey,
                  idempotencyKey: `idem-${runIdTool}-tool-${toolReadAttempt + 1}`,
                  modelKey,
                  message: strictReply
                    ? "OpenClaw live tool probe (local, safe): " +
                      `use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
                      `Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`
                    : "OpenClaw live tool probe (local, safe): " +
                      `use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
                      "Then reply with the two nonce values you read (include both).",
                  thinkingLevel: params.thinkingLevel,
                  context: `${progressLabel}: tool-read`,
                });
                if (
                  isEmptyStreamText(toolText) &&
                  shouldSkipEmptyResponseForLiveModel({
                    provider: model.provider,
                    allowNotFoundSkip: params.allowNotFoundSkip,
                  })
                ) {
                  logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
                  return "skip";
                }
                assertNoReasoningTags({
                  text: toolText,
                  model: modelKey,
                  phase: "tool-read",
                  label: params.label,
                });
                if (hasExpectedToolNonce(toolText, nonceA, nonceB)) {
                  break;
                }
                if (
                  shouldRetryToolReadProbe({
                    text: toolText,
                    nonceA,
                    nonceB,
                    provider: model.provider,
                    attempt: toolReadAttempt,
                    maxAttempts: maxToolReadAttempts,
                  })
                ) {
                  logProgress(
                    `${progressLabel}: tool-read retry (${toolReadAttempt + 2}/${maxToolReadAttempts}) malformed tool output`,
                  );
                  continue;
                }
                throw new Error(`tool probe missing nonce: ${toolText}`);
              }
              if (!hasExpectedToolNonce(toolText, nonceA, nonceB)) {
                throw new Error(`tool probe missing nonce: ${toolText}`);
              }

              if (params.extraToolProbes) {
                logProgress(`${progressLabel}: tool-exec`);
                const nonceC = randomUUID();
                const toolWritePath = path.join(tempDir, `write-${runIdTool}.txt`);
                const maxExecReadAttempts = 3;
                let execReadText = "";
                for (
                  let execReadAttempt = 0;
                  execReadAttempt < maxExecReadAttempts;
                  execReadAttempt += 1
                ) {
                  const strictReply = execReadAttempt > 0;
                  execReadText = await requestGatewayAgentText({
                    client,
                    sessionKey,
                    idempotencyKey: `idem-${runIdTool}-exec-read-${execReadAttempt + 1}`,
                    modelKey,
                    message: strictReply
                      ? "OpenClaw live tool probe (local, safe): " +
                        "use the tool named `exec` (or `Exec`) to run this command: " +
                        `mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
                        `Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
                        `Then reply with exactly: ${nonceC}. No extra text.`
                      : "OpenClaw live tool probe (local, safe): " +
                        "use the tool named `exec` (or `Exec`) to run this command: " +
                        `mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
                        `Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
                        "Finally reply including the nonce text you read back.",
                    thinkingLevel: params.thinkingLevel,
                    context: `${progressLabel}: tool-exec`,
                  });
                  if (
                    isEmptyStreamText(execReadText) &&
                    shouldSkipEmptyResponseForLiveModel({
                      provider: model.provider,
                      allowNotFoundSkip: params.allowNotFoundSkip,
                    })
                  ) {
                    logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
                    return "skip";
                  }
                  assertNoReasoningTags({
                    text: execReadText,
                    model: modelKey,
                    phase: "tool-exec",
                    label: params.label,
                  });
                  if (hasExpectedSingleNonce(execReadText, nonceC)) {
                    break;
                  }
                  if (
                    shouldRetryExecReadProbe({
                      text: execReadText,
                      nonce: nonceC,
                      provider: model.provider,
                      attempt: execReadAttempt,
                      maxAttempts: maxExecReadAttempts,
                    })
                  ) {
                    logProgress(
                      `${progressLabel}: tool-exec retry (${execReadAttempt + 2}/${maxExecReadAttempts}) malformed tool output`,
                    );
                    continue;
                  }
                  throw new Error(`exec+read probe missing nonce: ${execReadText}`);
                }
                if (!hasExpectedSingleNonce(execReadText, nonceC)) {
                  throw new Error(`exec+read probe missing nonce: ${execReadText}`);
                }

                await fs.rm(toolWritePath, { force: true });
              }

              if (params.extraImageProbes && model.input?.includes("image")) {
                logProgress(`${progressLabel}: image`);
                // Shorter code => less OCR flake across providers, still tests image attachments end-to-end.
                const imageCode = randomImageProbeCode();
                const imageBase64 = renderCatNoncePngBase64(imageCode);
                const runIdImage = randomUUID();

                const imageText = await requestGatewayAgentText({
                  client,
                  sessionKey,
                  idempotencyKey: `idem-${runIdImage}-image`,
                  modelKey,
                  message:
                    "Look at the attached image. Reply with exactly two tokens separated by a single space: " +
                    "(1) the animal shown or written in the image, lowercase; " +
                    "(2) the code printed in the image, uppercase. No extra text.",
                  attachments: [
                    {
                      mimeType: "image/png",
                      fileName: `probe-${runIdImage}.png`,
                      content: imageBase64,
                    },
                  ],
                  thinkingLevel: params.thinkingLevel,
                  context: `${progressLabel}: image`,
                });
                if (
                  isEmptyStreamText(imageText) &&
                  shouldSkipEmptyResponseForLiveModel({
                    provider: model.provider,
                    allowNotFoundSkip: params.allowNotFoundSkip,
                  })
                ) {
                  logProgress(`${progressLabel}: image skip (${model.provider} empty response)`);
                } else {
                  assertNoReasoningTags({
                    text: imageText,
                    model: modelKey,
                    phase: "image",
                    label: params.label,
                  });
                  if (!/\bcat\b/i.test(imageText)) {
                    logProgress(`${progressLabel}: image skip (missing 'cat')`);
                  } else {
                    const candidates = imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? [];
                    const bestDistance = candidates.reduce((best, cand) => {
                      if (Math.abs(cand.length - imageCode.length) > 2) {
                        return best;
                      }
                      return Math.min(best, editDistance(cand, imageCode));
                    }, Number.POSITIVE_INFINITY);
                    if (!(bestDistance <= 3)) {
                      logProgress(`${progressLabel}: image skip (code mismatch)`);
                    }
                  }
                }
              }

              if (
                (model.provider === "openai" && model.api === "openai-responses") ||
                (model.provider === "openai-codex" && model.api === "openai-codex-responses")
              ) {
                logProgress(`${progressLabel}: tool-only regression`);
                const runId2 = randomUUID();
                const firstText = await requestGatewayAgentText({
                  client,
                  sessionKey,
                  idempotencyKey: `idem-${runId2}-1`,
                  modelKey,
                  message: `Call the tool named \`read\` (or \`Read\`) on "${toolProbePath}". Do not write any other text.`,
                  thinkingLevel: params.thinkingLevel,
                  context: `${progressLabel}: tool-only-regression-first`,
                });
                assertNoReasoningTags({
                  text: firstText,
                  model: modelKey,
                  phase: "tool-only",
                  label: params.label,
                });

                const reply = await requestGatewayAgentText({
                  client,
                  sessionKey,
                  idempotencyKey: `idem-${runId2}-2`,
                  modelKey,
                  message: `Now answer: what are the values of nonceA and nonceB in "${toolProbePath}"? Reply with exactly: ${nonceA} ${nonceB}.`,
                  thinkingLevel: params.thinkingLevel,
                  context: `${progressLabel}: tool-only-regression-second`,
                });
                assertNoReasoningTags({
                  text: reply,
                  model: modelKey,
                  phase: "tool-only-followup",
                  label: params.label,
                });
                if (!reply.includes(nonceA) || !reply.includes(nonceB)) {
                  throw new Error(`unexpected reply: ${reply}`);
                }
              }

              if (model.provider === "anthropic") {
                await runAnthropicRefusalProbe({
                  client,
                  sessionKey,
                  modelKey,
                  label: progressLabel,
                  thinkingLevel: params.thinkingLevel,
                });
              }
              return "done";
            })(),
            `${progressLabel}: model`,
          );
          if (modelResult === "skip") {
            skippedCount += 1;
            break;
          }
          logProgress(`${progressLabel}: done`);
          break;
        } catch (err) {
          const message = String(err);
          if (
            model.provider === "anthropic" &&
            isAnthropicRateLimitError(message) &&
            attempt + 1 < attemptMax
          ) {
            logProgress(`${progressLabel}: rate limit, retrying with next key`);
            continue;
          }
          if (model.provider === "anthropic" && isAnthropicRateLimitError(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (anthropic rate limit)`);
            break;
          }
          if (model.provider === "anthropic" && isAnthropicBillingError(message)) {
            if (attempt + 1 < attemptMax) {
              logProgress(`${progressLabel}: billing issue, retrying with next key`);
              continue;
            }
            logProgress(`${progressLabel}: skip (anthropic billing)`);
            break;
          }
          if (
            model.provider === "anthropic" &&
            isEmptyStreamText(message) &&
            attempt + 1 < attemptMax
          ) {
            logProgress(`${progressLabel}: empty response, retrying with next key`);
            continue;
          }
          if (model.provider === "anthropic" && isEmptyStreamText(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (anthropic empty response)`);
            break;
          }
          if (
            isEmptyStreamText(message) &&
            shouldSkipEmptyResponseForLiveModel({
              provider: model.provider,
              allowNotFoundSkip: params.allowNotFoundSkip,
            })
          ) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
            break;
          }
          if (isGoogleishProvider(model.provider) && isRateLimitErrorMessage(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (google rate limit)`);
            break;
          }
          if (
            (model.provider === "minimax" ||
              model.provider === "opencode" ||
              model.provider === "opencode-go" ||
              model.provider === "zai") &&
            isRateLimitErrorMessage(message)
          ) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (rate limit)`);
            break;
          }
          if (isProviderUnavailableErrorMessage(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (provider unavailable)`);
            break;
          }
          if (model.provider === "openrouter" && isPromptProbeMiss(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (openrouter prompt probe miss)`);
            break;
          }
          if (params.allowNotFoundSkip && isModelNotFoundErrorMessage(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (model not found)`);
            break;
          }
          if (
            model.provider === "anthropic" &&
            isGatewayLiveProbeTimeout(message) &&
            attempt + 1 < attemptMax
          ) {
            logProgress(`${progressLabel}: probe timeout, retrying with next key`);
            continue;
          }
          if (isGatewayLiveProbeTimeout(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (probe timeout)`);
            break;
          }
          if (isGatewayLiveModelTimeout(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (model timeout)`);
            break;
          }
          // OpenAI Codex refresh tokens can become single-use; skip instead of failing all live tests.
          if (model.provider === "openai-codex" && isRefreshTokenReused(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (codex refresh token reused)`);
            break;
          }
          if (model.provider === "openai-codex" && isChatGPTUsageLimitErrorMessage(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (chatgpt usage limit)`);
            break;
          }
          if (model.provider === "openai-codex" && isInstructionsRequiredError(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (instructions required)`);
            break;
          }
          if (
            (model.provider === "openai" || model.provider === "openai-codex") &&
            isOpenAIReasoningSequenceError(message)
          ) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (openai reasoning sequence error)`);
            break;
          }
          if (
            (model.provider === "openai" || model.provider === "openai-codex") &&
            isToolNonceRefusal(message)
          ) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (tool probe refusal)`);
            break;
          }
          if (
            isExecReadNonceProbeMiss(message) &&
            shouldSkipExecReadNonceMissForLiveModel(modelKey)
          ) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (exec/read workspace isolation)`);
            break;
          }
          if (shouldSkipToolNonceProbeMiss(model.provider) && isToolNonceProbeMiss(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (${model.provider} tool probe nonce miss)`);
            break;
          }
          if (isMissingProfileError(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (missing auth profile)`);
            break;
          }
          if (model.provider === "ollama" && isOllamaUnavailableErrorMessage(message)) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (ollama unavailable)`);
            break;
          }
          if (params.label.startsWith("minimax-")) {
            skippedCount += 1;
            logProgress(`${progressLabel}: skip (minimax endpoint error)`);
            break;
          }
          logProgress(`${progressLabel}: failed`);
          failures.push({ model: modelKey, error: message });
          break;
        }
      }
    }

    if (failures.length > 0) {
      const preview = formatFailurePreview(failures, 20);
      throw new Error(
        `gateway live model failures (${failures.length}, showing ${Math.min(failures.length, 20)}):\n${preview}`,
      );
    }
    if (skippedCount === total) {
      logProgress(`[${params.label}] skipped all models (missing profiles)`);
    }
  } finally {
    clearRuntimeConfigSnapshot();
    restoreProductionEnvForLiveRun(runtimeEnv);
    client.stop();
    await server.close({ reason: "live test complete" });
    await fs.rm(toolProbePath, { force: true });
    await fs.rm(tempDir, { recursive: true, force: true });
    if (tempAgentDir) {
      await fs.rm(tempAgentDir, { recursive: true, force: true });
    }
    if (tempStateDir) {
      await fs.rm(tempStateDir, { recursive: true, force: true });
    }

    process.env.OPENCLAW_CONFIG_PATH = previous.configPath;
    process.env.OPENCLAW_GATEWAY_TOKEN = previous.token;
    process.env.OPENCLAW_SKIP_CHANNELS = previous.skipChannels;
    process.env.OPENCLAW_SKIP_GMAIL_WATCHER = previous.skipGmail;
    process.env.OPENCLAW_SKIP_CRON = previous.skipCron;
    process.env.OPENCLAW_SKIP_CANVAS_HOST = previous.skipCanvas;
    process.env.OPENCLAW_DISABLE_BONJOUR = previous.disableBonjour;
    process.env.OPENCLAW_LOG_LEVEL = previous.logLevel;
    process.env.OPENCLAW_AGENT_DIR = previous.agentDir;
    process.env.PI_CODING_AGENT_DIR = previous.piAgentDir;
    process.env.OPENCLAW_STATE_DIR = previous.stateDir;
  }
}

describeLive("gateway live (dev agent, profile keys)", () => {
  it(
    "runs meaningful prompts across models with available keys",
    async () =>
      await withSuppressedGatewayLiveWarnings(async () => {
        clearRuntimeConfigSnapshot();
        const cfg = loadConfig();
        await ensureOpenClawModelsJson(cfg);

        const agentDir = resolveOpenClawAgentDir();
        const authStorage = discoverAuthStorage(agentDir);
        const modelRegistry = discoverModels(authStorage, agentDir);
        const all = modelRegistry.getAll();

        const rawModels = process.env.OPENCLAW_LIVE_GATEWAY_MODELS?.trim();
        const useModern = !rawModels || rawModels === "modern" || rawModels === "all";
        const useExplicit = Boolean(rawModels) && !useModern;
        const filter = useExplicit ? parseFilter(rawModels) : null;
        const maxModels = GATEWAY_LIVE_MAX_MODELS;
        const wanted = filter
          ? all.filter((m) => filter.has(`${m.provider}/${m.id}`))
          : all.filter((m) => isHighSignalLiveModelRef({ provider: m.provider, id: m.id }));

        const candidates: Array<Model<Api>> = [];
        const skipped: Array<{ model: string; error: string }> = [];
        for (const model of wanted) {
          if (shouldSuppressBuiltInModel({ provider: model.provider, id: model.id })) {
            continue;
          }
          if (PROVIDERS && !PROVIDERS.has(model.provider)) {
            continue;
          }
          const modelRef = `${model.provider}/${model.id}`;
          try {
            const apiKeyInfo = await getApiKeyForModel({ model, cfg });
            if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
              skipped.push({
                model: modelRef,
                error: `non-profile credential source: ${apiKeyInfo.source}`,
              });
              continue;
            }
            candidates.push(model);
          } catch (error) {
            skipped.push({ model: modelRef, error: String(error) });
          }
        }

        if (candidates.length === 0) {
          if (skipped.length > 0) {
            logProgress(
              `[all-models] auth lookup skipped candidates:\n${formatFailurePreview(skipped, 8)}`,
            );
          }
          logProgress("[all-models] no API keys found; skipping");
          return;
        }
        const selectedCandidates = capByProviderSpread(
          candidates,
          maxModels > 0 ? maxModels : candidates.length,
          (model) => model.provider,
        );
        logProgress(`[all-models] selection=${useExplicit ? "explicit" : "high-signal"}`);
        if (selectedCandidates.length < candidates.length) {
          logProgress(
            `[all-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_GATEWAY_MAX_MODELS=${maxModels}`,
          );
        }
        const imageCandidates = selectedCandidates.filter((m) => m.input?.includes("image"));
        if (imageCandidates.length === 0) {
          logProgress("[all-models] no image-capable models selected; image probe will be skipped");
        }
        await runGatewayModelSuite({
          label: "all-models",
          cfg,
          candidates: selectedCandidates,
          allowNotFoundSkip: useModern,
          extraToolProbes: ENABLE_EXTRA_TOOL_PROBES,
          extraImageProbes: ENABLE_EXTRA_IMAGE_PROBES,
          thinkingLevel: THINKING_LEVEL,
        });

        const minimaxCandidates = selectedCandidates.filter(
          (model) => model.provider === "minimax",
        );
        if (minimaxCandidates.length === 0) {
          logProgress("[minimax] no candidates with keys; skipping dual endpoint probes");
          return;
        }

        const minimaxAnthropic = buildMinimaxProviderOverride({
          cfg,
          api: "anthropic-messages",
          baseUrl: "https://api.minimax.io/anthropic",
        });
        if (minimaxAnthropic) {
          await runGatewayModelSuite({
            label: "minimax-anthropic",
            cfg,
            candidates: minimaxCandidates,
            allowNotFoundSkip: useModern,
            extraToolProbes: ENABLE_EXTRA_TOOL_PROBES,
            extraImageProbes: ENABLE_EXTRA_IMAGE_PROBES,
            thinkingLevel: THINKING_LEVEL,
            providerOverrides: { minimax: minimaxAnthropic },
          });
        } else {
          logProgress("[minimax-anthropic] missing minimax provider config; skipping");
        }
      }),
    GATEWAY_LIVE_SUITE_TIMEOUT_MS,
  );

  it("z.ai fallback handles anthropic tool history", async () => {
    if (!ZAI_FALLBACK) {
      return;
    }
    clearRuntimeConfigSnapshot();
    const runtimeEnv = enterProductionEnvForLiveRun();
    const previous = {
      configPath: process.env.OPENCLAW_CONFIG_PATH,
      token: process.env.OPENCLAW_GATEWAY_TOKEN,
      skipChannels: process.env.OPENCLAW_SKIP_CHANNELS,
      skipGmail: process.env.OPENCLAW_SKIP_GMAIL_WATCHER,
      skipCron: process.env.OPENCLAW_SKIP_CRON,
      skipCanvas: process.env.OPENCLAW_SKIP_CANVAS_HOST,
    };

    process.env.OPENCLAW_SKIP_CHANNELS = "1";
    process.env.OPENCLAW_SKIP_GMAIL_WATCHER = "1";
    process.env.OPENCLAW_SKIP_CRON = "1";
    process.env.OPENCLAW_SKIP_CANVAS_HOST = "1";

    const token = `test-${randomUUID()}`;
    process.env.OPENCLAW_GATEWAY_TOKEN = token;

    const cfg = loadConfig();
    await ensureOpenClawModelsJson(cfg);

    const agentDir = resolveOpenClawAgentDir();
    const authStorage = discoverAuthStorage(agentDir);
    const modelRegistry = discoverModels(authStorage, agentDir);
    const anthropic = modelRegistry.find("anthropic", "claude-opus-4-6") as Model<Api> | null;
    const zai = modelRegistry.find("zai", "glm-4.7") as Model<Api> | null;

    if (!anthropic || !zai) {
      return;
    }
    try {
      await getApiKeyForModel({ model: anthropic, cfg });
      await getApiKeyForModel({ model: zai, cfg });
    } catch {
      return;
    }

    const agentId = "dev";
    const workspaceDir = resolveAgentWorkspaceDir(cfg, agentId);
    await fs.mkdir(workspaceDir, { recursive: true });
    const nonceA = randomUUID();
    const nonceB = randomUUID();
    const toolProbePath = path.join(workspaceDir, `.openclaw-live-zai-fallback.${nonceA}.txt`);
    await fs.writeFile(toolProbePath, `nonceA=${nonceA}\nnonceB=${nonceB}\n`);

    let server: Awaited<ReturnType<typeof startGatewayServer>> | undefined;
    let client: GatewayClient | undefined;
    try {
      const port = await withGatewayLiveProbeTimeout(
        getFreeGatewayPort(),
        "zai-fallback: gateway-port",
      );
      server = await withGatewayLiveProbeTimeout(
        startGatewayServer(port, {
          bind: "loopback",
          auth: { mode: "token", token },
          controlUiEnabled: false,
        }),
        "zai-fallback: gateway-start",
      );

      client = await withGatewayLiveProbeTimeout(
        connectClient({
          url: `ws://127.0.0.1:${port}`,
          token,
        }),
        "zai-fallback: gateway-connect",
      );
    } catch (error) {
      const message = String(error);
      if (isGatewayLiveProbeTimeout(message)) {
        logProgress("[zai-fallback] skip (gateway startup timeout)");
        return;
      }
      throw error;
    }

    if (!server || !client) {
      logProgress("[zai-fallback] skip (gateway startup incomplete)");
      return;
    }

    try {
      const sessionKey = `agent:${agentId}:live-zai-fallback`;

      await withGatewayLiveProbeTimeout(
        client.request("sessions.patch", {
          key: sessionKey,
          model: "anthropic/claude-opus-4-6",
        }),
        "zai-fallback: sessions-patch-anthropic",
      );
      await withGatewayLiveProbeTimeout(
        client.request("sessions.reset", {
          key: sessionKey,
        }),
        "zai-fallback: sessions-reset",
      );

      const toolText = await requestGatewayAgentText({
        client,
        sessionKey,
        idempotencyKey: `idem-${randomUUID()}-tool`,
        modelKey: "anthropic/claude-opus-4-6",
        message:
          `Call the tool named \`read\` (or \`Read\` if \`read\` is unavailable) with JSON arguments {"path":"${toolProbePath}"}. ` +
          `Then reply with exactly: ${nonceA} ${nonceB}. No extra text.`,
        thinkingLevel: THINKING_LEVEL,
        context: "zai-fallback: tool-probe",
      });
      assertNoReasoningTags({
        text: toolText,
        model: "anthropic/claude-opus-4-6",
        phase: "zai-fallback-tool",
        label: "zai-fallback",
      });
      if (!toolText.includes(nonceA) || !toolText.includes(nonceB)) {
        throw new Error(`anthropic tool probe missing nonce: ${toolText}`);
      }

      await withGatewayLiveProbeTimeout(
        client.request("sessions.patch", {
          key: sessionKey,
          model: "zai/glm-4.7",
        }),
        "zai-fallback: sessions-patch-zai",
      );

      const followupText = await requestGatewayAgentText({
        client,
        sessionKey,
        idempotencyKey: `idem-${randomUUID()}-followup`,
        modelKey: "zai/glm-4.7",
        message:
          `What are the values of nonceA and nonceB in "${toolProbePath}"? ` +
          `Reply with exactly: ${nonceA} ${nonceB}.`,
        thinkingLevel: THINKING_LEVEL,
        context: "zai-fallback: followup",
      });
      assertNoReasoningTags({
        text: followupText,
        model: "zai/glm-4.7",
        phase: "zai-fallback-followup",
        label: "zai-fallback",
      });
      if (!followupText.includes(nonceA) || !followupText.includes(nonceB)) {
        throw new Error(`zai followup missing nonce: ${followupText}`);
      }
    } finally {
      clearRuntimeConfigSnapshot();
      restoreProductionEnvForLiveRun(runtimeEnv);
      client.stop();
      await server.close({ reason: "live test complete" });
      await fs.rm(toolProbePath, { force: true });

      process.env.OPENCLAW_CONFIG_PATH = previous.configPath;
      process.env.OPENCLAW_GATEWAY_TOKEN = previous.token;
      process.env.OPENCLAW_SKIP_CHANNELS = previous.skipChannels;
      process.env.OPENCLAW_SKIP_GMAIL_WATCHER = previous.skipGmail;
      process.env.OPENCLAW_SKIP_CRON = previous.skipCron;
      process.env.OPENCLAW_SKIP_CANVAS_HOST = previous.skipCanvas;
    }
  }, 180_000);
});