mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 18:40:43 +00:00
1241 lines
42 KiB
TypeScript
1241 lines
42 KiB
TypeScript
import { type Api, completeSimple, type Model } from "@mariozechner/pi-ai";
|
|
import { Type } from "typebox";
|
|
import { describe, expect, it } from "vitest";
|
|
import { getRuntimeConfig } from "../config/config.js";
|
|
import { parseLiveCsvFilter } from "../media-generation/live-test-helpers.js";
|
|
import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
|
|
import { resolveOpenClawAgentDir } from "./agent-paths.js";
|
|
import {
|
|
collectAnthropicApiKeys,
|
|
isAnthropicBillingError,
|
|
isAnthropicRateLimitError,
|
|
} from "./live-auth-keys.js";
|
|
import { isModelNotFoundErrorMessage } from "./live-model-errors.js";
|
|
import {
|
|
isHighSignalLiveModelRef,
|
|
resolveHighSignalLiveModelLimit,
|
|
selectHighSignalLiveItems,
|
|
shouldExcludeProviderFromDefaultHighSignalLiveSweep,
|
|
} from "./live-model-filter.js";
|
|
import {
|
|
buildLiveModelFileProbeContext,
|
|
buildLiveModelFileProbeRetryContext,
|
|
buildLiveModelImageProbeContext,
|
|
extractAssistantText,
|
|
fileProbeTextMatches,
|
|
imageProbeTextMatches,
|
|
isLiveModelProbeEnabled,
|
|
LIVE_MODEL_FILE_PROBE_ENV,
|
|
LIVE_MODEL_FILE_PROBE_TOKEN,
|
|
LIVE_MODEL_IMAGE_PROBE_ENV,
|
|
modelSupportsImageInput,
|
|
shouldSkipLiveModelExtraProbes,
|
|
shouldSkipLiveModelFileProbe,
|
|
shouldSkipLiveModelImageProbe,
|
|
} from "./live-model-turn-probes.js";
|
|
import { createLiveTargetMatcher } from "./live-target-matcher.js";
|
|
import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
|
|
import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
|
|
import { shouldSuppressBuiltInModel } from "./model-suppression.js";
|
|
import { ensureOpenClawModelsJson } from "./models-config.js";
|
|
import {
|
|
isCloudflareOrHtmlErrorPage,
|
|
isRateLimitErrorMessage,
|
|
} from "./pi-embedded-helpers/errors.js";
|
|
import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js";
|
|
|
|
const LIVE = isLiveTestEnabled();
|
|
const DIRECT_ENABLED = Boolean(process.env.OPENCLAW_LIVE_MODELS?.trim());
|
|
const REQUIRE_PROFILE_KEYS = isLiveProfileKeyModeEnabled();
|
|
const LIVE_CREDENTIAL_PRECEDENCE = REQUIRE_PROFILE_KEYS ? "profile-first" : "env-first";
|
|
const LIVE_HEARTBEAT_MS = Math.max(1_000, toInt(process.env.OPENCLAW_LIVE_HEARTBEAT_MS, 30_000));
|
|
const LIVE_SETUP_TIMEOUT_MS = Math.max(
|
|
1_000,
|
|
toInt(process.env.OPENCLAW_LIVE_SETUP_TIMEOUT_MS, 45_000),
|
|
);
|
|
const LIVE_TEST_TIMEOUT_MS = Math.max(
|
|
1_000,
|
|
toInt(process.env.OPENCLAW_LIVE_TEST_TIMEOUT_MS, 60 * 60 * 1000),
|
|
);
|
|
const DEFAULT_LIVE_MODEL_CONCURRENCY = 20;
|
|
const LIVE_MODEL_CONCURRENCY = resolveLiveModelConcurrency(
|
|
process.env.OPENCLAW_LIVE_MODEL_CONCURRENCY,
|
|
);
|
|
const LIVE_MODELS_JSON_TIMEOUT_MS = resolveLiveModelsJsonTimeoutMs(
|
|
process.env.OPENCLAW_LIVE_MODELS_JSON_TIMEOUT_MS,
|
|
);
|
|
const LIVE_FILE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_FILE_PROBE_ENV);
|
|
const LIVE_IMAGE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_IMAGE_PROBE_ENV);
|
|
|
|
const describeLive = LIVE ? describe : describe.skip;
|
|
|
|
function parseCsvFilter(raw?: string): Set<string> | null {
|
|
return parseLiveCsvFilter(raw, { lowercase: false });
|
|
}
|
|
|
|
function parseProviderFilter(raw?: string): Set<string> | null {
|
|
return parseCsvFilter(raw);
|
|
}
|
|
|
|
function parseModelFilter(raw?: string): Set<string> | null {
|
|
return parseCsvFilter(raw);
|
|
}
|
|
|
|
function logProgress(message: string): void {
|
|
process.stderr.write(`[live] ${message}\n`);
|
|
}
|
|
|
|
function formatElapsedSeconds(ms: number): string {
|
|
return `${Math.max(1, Math.round(ms / 1_000))}s`;
|
|
}
|
|
|
|
async function withLiveHeartbeat<T>(operation: Promise<T>, context: string): Promise<T> {
|
|
const startedAt = Date.now();
|
|
let heartbeatCount = 0;
|
|
const timer = setInterval(() => {
|
|
heartbeatCount += 1;
|
|
logProgress(`${context}: still running (${formatElapsedSeconds(Date.now() - startedAt)})`);
|
|
}, LIVE_HEARTBEAT_MS);
|
|
timer.unref?.();
|
|
try {
|
|
return await operation;
|
|
} finally {
|
|
clearInterval(timer);
|
|
if (heartbeatCount > 0) {
|
|
logProgress(`${context}: completed after ${formatElapsedSeconds(Date.now() - startedAt)}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
async function withLiveStageTimeout<T>(
|
|
operation: Promise<T>,
|
|
context: string,
|
|
timeoutMs = LIVE_SETUP_TIMEOUT_MS,
|
|
): Promise<T> {
|
|
let hardTimer: ReturnType<typeof setTimeout> | undefined;
|
|
try {
|
|
return await withLiveHeartbeat(
|
|
Promise.race([
|
|
operation,
|
|
new Promise<never>((_, reject) => {
|
|
hardTimer = setTimeout(() => {
|
|
reject(new Error(`${context} timed out after ${timeoutMs}ms`));
|
|
}, timeoutMs);
|
|
hardTimer.unref?.();
|
|
}),
|
|
]),
|
|
context,
|
|
);
|
|
} finally {
|
|
if (hardTimer) {
|
|
clearTimeout(hardTimer);
|
|
}
|
|
}
|
|
}
|
|
|
|
function formatFailurePreview(
|
|
failures: Array<{ model: string; error: string }>,
|
|
maxItems: number,
|
|
): string {
|
|
const limit = Math.max(1, maxItems);
|
|
const lines = failures.slice(0, limit).map((failure, index) => {
|
|
const normalized = failure.error.replace(/\s+/g, " ").trim();
|
|
const clipped = normalized.length > 320 ? `${normalized.slice(0, 317)}...` : normalized;
|
|
return `${index + 1}. ${failure.model}: ${clipped}`;
|
|
});
|
|
const remaining = failures.length - limit;
|
|
if (remaining > 0) {
|
|
lines.push(`... and ${remaining} more`);
|
|
}
|
|
return lines.join("\n");
|
|
}
|
|
|
|
function isGoogleModelNotFoundError(err: unknown): boolean {
|
|
const msg = String(err);
|
|
if (!/not found/i.test(msg)) {
|
|
return false;
|
|
}
|
|
if (/\b404\b/.test(msg)) {
|
|
return true;
|
|
}
|
|
if (/models\/.+ is not found for api version/i.test(msg)) {
|
|
return true;
|
|
}
|
|
if (/"status"\\s*:\\s*"NOT_FOUND"/.test(msg)) {
|
|
return true;
|
|
}
|
|
if (/"code"\\s*:\\s*404/.test(msg)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
describe("isModelNotFoundErrorMessage", () => {
|
|
it("matches whitespace-separated not found errors", () => {
|
|
expect(isModelNotFoundErrorMessage("404 model not found")).toBe(true);
|
|
expect(isModelNotFoundErrorMessage("model: minimax-text-01 not found")).toBe(true);
|
|
});
|
|
|
|
it("still matches underscore and hyphen variants", () => {
|
|
expect(isModelNotFoundErrorMessage("404 model not_found")).toBe(true);
|
|
expect(isModelNotFoundErrorMessage("404 model not-found")).toBe(true);
|
|
});
|
|
|
|
it("matches deprecated free model transition messages", () => {
|
|
expect(
|
|
isModelNotFoundErrorMessage(
|
|
"404 The free model has been deprecated. Transition to qwen/qwen3.6-plus for continued paid access.",
|
|
),
|
|
).toBe(true);
|
|
});
|
|
|
|
it("matches OpenRouter no-endpoints wording", () => {
|
|
expect(
|
|
isModelNotFoundErrorMessage("404 No endpoints found for deepseek/deepseek-r1:free."),
|
|
).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe("isProviderUnavailableErrorMessage", () => {
|
|
it("matches raw HTML provider error pages from transient upstreams", () => {
|
|
expect(
|
|
isProviderUnavailableErrorMessage(
|
|
"Error: <html><head><title>Service Unavailable</title></head><body>try again</body></html>",
|
|
),
|
|
).toBe(true);
|
|
});
|
|
|
|
it("matches status-prefixed Cloudflare HTML pages", () => {
|
|
expect(
|
|
isProviderUnavailableErrorMessage(
|
|
"521 <!DOCTYPE html><html><head><title>Web server is down</title></head><body>Cloudflare</body></html>",
|
|
),
|
|
).toBe(true);
|
|
});
|
|
|
|
it("matches transient upstream 502 errors", () => {
|
|
expect(isProviderUnavailableErrorMessage("502 internal server error")).toBe(true);
|
|
expect(
|
|
isProviderUnavailableErrorMessage("provider returned error: 502 Internal Server Error"),
|
|
).toBe(true);
|
|
});
|
|
});
|
|
|
|
function isChatGPTUsageLimitErrorMessage(raw: string): boolean {
|
|
const msg = raw.toLowerCase();
|
|
return msg.includes("hit your chatgpt usage limit") && msg.includes("try again in");
|
|
}
|
|
|
|
function isRefreshTokenReused(raw: string): boolean {
|
|
return /refresh_token_reused/i.test(raw);
|
|
}
|
|
|
|
function isAccountIdExtractionError(raw: string): boolean {
|
|
return /failed to extract accountid from token/i.test(raw);
|
|
}
|
|
|
|
function isInstructionsRequiredError(raw: string): boolean {
|
|
return /instructions are required/i.test(raw);
|
|
}
|
|
|
|
function isOpenAiCodexHtmlInterruption(raw: string): boolean {
|
|
const trimmed = raw.trim().replace(/^Error:\s*/i, "");
|
|
return (
|
|
/^(?:<!doctype\s+html\b|<html\b)/i.test(trimmed) &&
|
|
(/<meta\s+name=["']viewport["']/i.test(trimmed) || /<body\b/i.test(trimmed))
|
|
);
|
|
}
|
|
|
|
function isModelTimeoutError(raw: string): boolean {
|
|
return /model call timed out after \d+ms/i.test(raw);
|
|
}
|
|
|
|
function isProviderUnavailableErrorMessage(raw: string): boolean {
|
|
const msg = raw.toLowerCase();
|
|
return (
|
|
isRawHtmlProviderErrorPage(raw) ||
|
|
isCloudflareOrHtmlErrorPage(raw) ||
|
|
msg.includes("no allowed providers are available") ||
|
|
msg.includes("provider unavailable") ||
|
|
msg.includes("upstream provider unavailable") ||
|
|
msg.includes("upstream error from google") ||
|
|
msg.includes("temporarily rate-limited upstream") ||
|
|
msg.includes("unable to access non-serverless model") ||
|
|
msg.includes("create and start a new dedicated endpoint") ||
|
|
msg.includes("no available capacity was found for the model") ||
|
|
(msg.includes("502") && msg.includes("internal server error"))
|
|
);
|
|
}
|
|
|
|
function isRawHtmlProviderErrorPage(raw: string): boolean {
|
|
const normalized = raw
|
|
.trim()
|
|
.replace(/^error:\s*/i, "")
|
|
.trim();
|
|
return /^(?:<!doctype\s+html\b|<html\b)/i.test(normalized) && /<\/html>/i.test(normalized);
|
|
}
|
|
|
|
function isOllamaUnavailableErrorMessage(raw: string): boolean {
|
|
const msg = raw.toLowerCase();
|
|
return (
|
|
msg.includes("ollama could not be reached") ||
|
|
(msg.includes("127.0.0.1:11434") && msg.includes("econnrefused")) ||
|
|
(msg.includes("localhost:11434") && msg.includes("econnrefused"))
|
|
);
|
|
}
|
|
|
|
function isAudioOnlyModelErrorMessage(raw: string): boolean {
|
|
return /requires that either input content or output modality contain audio/i.test(raw);
|
|
}
|
|
|
|
function isUnsupportedReasoningEffortErrorMessage(raw: string): boolean {
|
|
return (
|
|
/does not support parameter reasoningeffort/i.test(raw) ||
|
|
/unsupported value:\s*'low'.*reasoning\.effort.*supported values are:\s*'medium'/i.test(raw)
|
|
);
|
|
}
|
|
|
|
function isUnsupportedThinkingToggleErrorMessage(raw: string): boolean {
|
|
return /does not support parameter [`"]?enable_thinking[`"]?/i.test(raw);
|
|
}
|
|
|
|
function isUnsupportedPlanErrorMessage(raw: string): boolean {
|
|
return /current token plan (?:does )?not support (?:this )?model/i.test(raw);
|
|
}
|
|
|
|
function isOpenRouterOpaqueBadRequestErrorMessage(raw: string): boolean {
|
|
const msg = raw.toLowerCase();
|
|
return (
|
|
msg.includes("provider returned error") &&
|
|
msg.includes('"code":400') &&
|
|
msg.includes('"msg":"bad request"')
|
|
);
|
|
}
|
|
|
|
describe("isUnsupportedPlanErrorMessage", () => {
|
|
it("matches provider plan-gated models", () => {
|
|
expect(isUnsupportedPlanErrorMessage("current token plan does not support this model")).toBe(
|
|
true,
|
|
);
|
|
expect(isUnsupportedPlanErrorMessage("your current token plan not support model")).toBe(true);
|
|
expect(isUnsupportedPlanErrorMessage("model not found")).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe("isOpenRouterOpaqueBadRequestErrorMessage", () => {
|
|
it("matches opaque OpenRouter upstream bad requests", () => {
|
|
expect(
|
|
isOpenRouterOpaqueBadRequestErrorMessage(
|
|
'Error: 400 Provider returned error {"code":400,"msg":"bad request","request_id":"abc"}',
|
|
),
|
|
).toBe(true);
|
|
expect(isOpenRouterOpaqueBadRequestErrorMessage("Error: 400 bad request")).toBe(false);
|
|
});
|
|
});
|
|
|
|
function toInt(value: string | undefined, fallback: number): number {
|
|
const trimmed = value?.trim();
|
|
if (!trimmed) {
|
|
return fallback;
|
|
}
|
|
const parsed = Number.parseInt(trimmed, 10);
|
|
return Number.isFinite(parsed) ? parsed : fallback;
|
|
}
|
|
|
|
function resolveLiveModelConcurrency(raw?: string): number {
|
|
return Math.max(1, toInt(raw, DEFAULT_LIVE_MODEL_CONCURRENCY));
|
|
}
|
|
|
|
describe("resolveLiveModelConcurrency", () => {
|
|
it("defaults direct-model probes to 20-way concurrency", () => {
|
|
expect(resolveLiveModelConcurrency()).toBe(20);
|
|
});
|
|
|
|
it("accepts explicit concurrency overrides", () => {
|
|
expect(resolveLiveModelConcurrency("7")).toBe(7);
|
|
expect(resolveLiveModelConcurrency("0")).toBe(1);
|
|
});
|
|
});
|
|
|
|
function resolveLiveModelsJsonTimeoutMs(
|
|
modelsJsonTimeoutRaw?: string,
|
|
setupTimeoutMs = LIVE_SETUP_TIMEOUT_MS,
|
|
): number {
|
|
return Math.max(setupTimeoutMs, toInt(modelsJsonTimeoutRaw, 120_000));
|
|
}
|
|
|
|
describe("resolveLiveModelsJsonTimeoutMs", () => {
|
|
it("defaults models.json preparation to a longer setup timeout", () => {
|
|
expect(resolveLiveModelsJsonTimeoutMs(undefined, 45_000)).toBe(120_000);
|
|
});
|
|
|
|
it("never goes below the shared live setup timeout", () => {
|
|
expect(resolveLiveModelsJsonTimeoutMs("30000", 45_000)).toBe(45_000);
|
|
});
|
|
});
|
|
|
|
function resolveTestReasoning(
|
|
model: Model<Api>,
|
|
): "minimal" | "low" | "medium" | "high" | "xhigh" | undefined {
|
|
if (!model.reasoning) {
|
|
return undefined;
|
|
}
|
|
const id = model.id.toLowerCase();
|
|
if (id.includes("deep-research")) {
|
|
return "medium";
|
|
}
|
|
if (model.provider === "openrouter" && id.startsWith("qwq")) {
|
|
return undefined;
|
|
}
|
|
if (model.provider === "xai" && id.startsWith("grok-4")) {
|
|
return undefined;
|
|
}
|
|
if (model.provider === "openai" || model.provider === "openai-codex") {
|
|
if (id.includes("pro")) {
|
|
return "high";
|
|
}
|
|
return "medium";
|
|
}
|
|
return "low";
|
|
}
|
|
|
|
function resolveLiveSystemPrompt(model: Model<Api>): string | undefined {
|
|
if (model.provider === "openai-codex") {
|
|
return "You are a concise assistant. Follow the user's instruction exactly.";
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
describe("resolveLiveSystemPrompt", () => {
|
|
it("adds instructions for openai-codex probes", () => {
|
|
expect(
|
|
resolveLiveSystemPrompt({
|
|
provider: "openai-codex",
|
|
} as Model<Api>),
|
|
).toContain("Follow the user's instruction exactly.");
|
|
});
|
|
|
|
it("keeps other providers unchanged", () => {
|
|
expect(
|
|
resolveLiveSystemPrompt({
|
|
provider: "openai",
|
|
} as Model<Api>),
|
|
).toBeUndefined();
|
|
});
|
|
|
|
it("matches OpenAI Codex HTML interruption pages", () => {
|
|
expect(
|
|
isOpenAiCodexHtmlInterruption(
|
|
'Error: <html><head><meta name="viewport" content="width=device-width" /></head><body>Try again</body></html>',
|
|
),
|
|
).toBe(true);
|
|
expect(isOpenAiCodexHtmlInterruption("Error: connection reset")).toBe(false);
|
|
});
|
|
});
|
|
|
|
async function completeSimpleWithTimeout<TApi extends Api>(
|
|
model: Model<TApi>,
|
|
context: Parameters<typeof completeSimple<TApi>>[1],
|
|
options: Parameters<typeof completeSimple<TApi>>[2],
|
|
timeoutMs: number,
|
|
progressContext: string,
|
|
) {
|
|
const maxTimeoutMs = Math.max(1, timeoutMs);
|
|
const controller = new AbortController();
|
|
const abortTimer = setTimeout(() => {
|
|
controller.abort();
|
|
}, maxTimeoutMs);
|
|
abortTimer.unref?.();
|
|
let hardTimer: ReturnType<typeof setTimeout> | undefined;
|
|
const timeout = new Promise<never>((_, reject) => {
|
|
hardTimer = setTimeout(() => {
|
|
reject(new Error(`model call timed out after ${maxTimeoutMs}ms`));
|
|
}, maxTimeoutMs);
|
|
hardTimer.unref?.();
|
|
});
|
|
try {
|
|
return await withLiveHeartbeat(
|
|
Promise.race([
|
|
completeSimple(model, context, {
|
|
...options,
|
|
signal: controller.signal,
|
|
}),
|
|
timeout,
|
|
]),
|
|
progressContext,
|
|
);
|
|
} finally {
|
|
clearTimeout(abortTimer);
|
|
if (hardTimer) {
|
|
clearTimeout(hardTimer);
|
|
}
|
|
}
|
|
}
|
|
|
|
async function completeOkWithRetry(params: {
|
|
model: Model<Api>;
|
|
apiKey: string;
|
|
timeoutMs: number;
|
|
progressLabel: string;
|
|
}) {
|
|
const runOnce = async (maxTokens: number) => {
|
|
const res = await completeSimpleWithTimeout(
|
|
params.model,
|
|
{
|
|
systemPrompt: resolveLiveSystemPrompt(params.model),
|
|
messages: [
|
|
{
|
|
role: "user",
|
|
content: "Reply with the word ok.",
|
|
timestamp: Date.now(),
|
|
},
|
|
],
|
|
},
|
|
{
|
|
apiKey: params.apiKey,
|
|
reasoning: resolveTestReasoning(params.model),
|
|
maxTokens,
|
|
},
|
|
params.timeoutMs,
|
|
`${params.progressLabel}: prompt call (maxTokens=${maxTokens})`,
|
|
);
|
|
const text = res.content
|
|
.filter((block) => block.type === "text")
|
|
.map((block) => block.text.trim())
|
|
.join(" ");
|
|
return { res, text };
|
|
};
|
|
|
|
const first = await runOnce(64);
|
|
if (first.text.length > 0) {
|
|
return first;
|
|
}
|
|
// Some providers (for example Moonshot Kimi and MiniMax M2.5) may emit
|
|
// reasoning blocks first and only return text once token budget is higher.
|
|
return await runOnce(256);
|
|
}
|
|
|
|
function isDeepSeekV4Model(model: Pick<Model<Api>, "id" | "provider">): boolean {
|
|
return (
|
|
model.provider === "deepseek" &&
|
|
(model.id === "deepseek-v4-flash" || model.id === "deepseek-v4-pro")
|
|
);
|
|
}
|
|
|
|
async function runDeepSeekV4ReplayRegression(params: {
|
|
model: Model<Api>;
|
|
apiKey: string;
|
|
timeoutMs: number;
|
|
progressLabel: string;
|
|
}) {
|
|
const noopTool = {
|
|
name: "noop",
|
|
description: "Return ok.",
|
|
parameters: Type.Object({}, { additionalProperties: false }),
|
|
};
|
|
let firstUser = {
|
|
role: "user" as const,
|
|
content: "Call the tool `noop` with {}. Do not write any other text.",
|
|
timestamp: Date.now(),
|
|
};
|
|
let first = await completeSimpleWithTimeout(
|
|
params.model,
|
|
{ messages: [firstUser], tools: [noopTool] },
|
|
{
|
|
apiKey: params.apiKey,
|
|
reasoning: resolveTestReasoning(params.model),
|
|
maxTokens: 256,
|
|
},
|
|
params.timeoutMs,
|
|
`${params.progressLabel}: DeepSeek V4 replay first call`,
|
|
);
|
|
let toolCall = first.content.find((block) => block.type === "toolCall");
|
|
|
|
for (let i = 0; i < 2 && !toolCall; i += 1) {
|
|
firstUser = {
|
|
role: "user" as const,
|
|
content: "Call the tool `noop` with {}. IMPORTANT: respond with the tool call.",
|
|
timestamp: Date.now(),
|
|
};
|
|
first = await completeSimpleWithTimeout(
|
|
params.model,
|
|
{ messages: [firstUser], tools: [noopTool] },
|
|
{
|
|
apiKey: params.apiKey,
|
|
reasoning: resolveTestReasoning(params.model),
|
|
maxTokens: 256,
|
|
},
|
|
params.timeoutMs,
|
|
`${params.progressLabel}: DeepSeek V4 replay retry ${i + 1}`,
|
|
);
|
|
toolCall = first.content.find((block) => block.type === "toolCall");
|
|
}
|
|
|
|
expect(toolCall).toBeTruthy();
|
|
if (!toolCall || toolCall.type !== "toolCall") {
|
|
throw new Error("expected DeepSeek V4 tool call");
|
|
}
|
|
|
|
const second = await completeSimpleWithTimeout(
|
|
params.model,
|
|
{
|
|
messages: [
|
|
firstUser,
|
|
first,
|
|
{
|
|
role: "toolResult",
|
|
toolCallId: toolCall.id,
|
|
toolName: "noop",
|
|
content: [{ type: "text", text: "ok" }],
|
|
isError: false,
|
|
timestamp: Date.now(),
|
|
},
|
|
{
|
|
role: "user",
|
|
content: "Reply with the word ok.",
|
|
timestamp: Date.now(),
|
|
},
|
|
],
|
|
},
|
|
{
|
|
apiKey: params.apiKey,
|
|
reasoning: resolveTestReasoning(params.model),
|
|
maxTokens: 256,
|
|
},
|
|
params.timeoutMs,
|
|
`${params.progressLabel}: DeepSeek V4 replay followup`,
|
|
);
|
|
if (second.stopReason === "error") {
|
|
throw new Error(second.errorMessage || "DeepSeek V4 replay followup returned error");
|
|
}
|
|
expect(extractAssistantText(second).length).toBeGreaterThan(0);
|
|
}
|
|
|
|
async function runExtraTurnProbes(params: {
|
|
model: Model<Api>;
|
|
apiKey: string;
|
|
timeoutMs: number;
|
|
progressLabel: string;
|
|
}) {
|
|
if (shouldSkipLiveModelExtraProbes(params.model)) {
|
|
logProgress(`${params.progressLabel}: extra probes skipped (known empty route)`);
|
|
return;
|
|
}
|
|
const options = {
|
|
apiKey: params.apiKey,
|
|
reasoning: resolveTestReasoning(params.model),
|
|
maxTokens: 128,
|
|
};
|
|
if (LIVE_FILE_PROBE_ENABLED && !shouldSkipLiveModelFileProbe(params.model)) {
|
|
logProgress(`${params.progressLabel}: file-read probe`);
|
|
const file = await completeSimpleWithTimeout(
|
|
params.model,
|
|
buildLiveModelFileProbeContext({ systemPrompt: resolveLiveSystemPrompt(params.model) }),
|
|
options,
|
|
params.timeoutMs,
|
|
`${params.progressLabel}: file-read probe`,
|
|
);
|
|
if (file.stopReason === "error") {
|
|
throw new Error(file.errorMessage || "file-read probe returned error with no message");
|
|
}
|
|
let fileText = extractAssistantText(file);
|
|
if (!fileProbeTextMatches(fileText)) {
|
|
logProgress(`${params.progressLabel}: file-read probe retry`);
|
|
const retry = await completeSimpleWithTimeout(
|
|
params.model,
|
|
buildLiveModelFileProbeRetryContext({
|
|
systemPrompt: resolveLiveSystemPrompt(params.model),
|
|
}),
|
|
options,
|
|
params.timeoutMs,
|
|
`${params.progressLabel}: file-read probe retry`,
|
|
);
|
|
if (retry.stopReason === "error") {
|
|
throw new Error(
|
|
retry.errorMessage || "file-read probe retry returned error with no message",
|
|
);
|
|
}
|
|
fileText = extractAssistantText(retry);
|
|
}
|
|
if (!fileProbeTextMatches(fileText)) {
|
|
if (fileText.length === 0) {
|
|
logProgress(`${params.progressLabel}: file-read probe skipped (empty response)`);
|
|
} else {
|
|
throw new Error(
|
|
`file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`,
|
|
);
|
|
}
|
|
}
|
|
} else if (LIVE_FILE_PROBE_ENABLED) {
|
|
logProgress(`${params.progressLabel}: file-read probe skipped (known empty route)`);
|
|
}
|
|
|
|
if (!LIVE_IMAGE_PROBE_ENABLED) {
|
|
return;
|
|
}
|
|
if (!modelSupportsImageInput(params.model)) {
|
|
logProgress(`${params.progressLabel}: image probe skipped (no image input)`);
|
|
return;
|
|
}
|
|
if (shouldSkipLiveModelImageProbe(params.model)) {
|
|
logProgress(`${params.progressLabel}: image probe skipped (known empty route)`);
|
|
return;
|
|
}
|
|
|
|
logProgress(`${params.progressLabel}: image probe`);
|
|
const image = await completeSimpleWithTimeout(
|
|
params.model,
|
|
buildLiveModelImageProbeContext({ systemPrompt: resolveLiveSystemPrompt(params.model) }),
|
|
options,
|
|
params.timeoutMs,
|
|
`${params.progressLabel}: image probe`,
|
|
);
|
|
if (image.stopReason === "error") {
|
|
throw new Error(image.errorMessage || "image probe returned error with no message");
|
|
}
|
|
const imageText = extractAssistantText(image);
|
|
if (!imageProbeTextMatches(imageText)) {
|
|
if (imageText.length === 0) {
|
|
logProgress(`${params.progressLabel}: image probe skipped (empty response)`);
|
|
return;
|
|
}
|
|
throw new Error(`image probe did not return ok: ${imageText}`);
|
|
}
|
|
}
|
|
|
|
describeLive("live models (profile keys)", () => {
|
|
it(
|
|
"completes across selected models",
|
|
async () => {
|
|
logProgress("[live-models] loading config");
|
|
const cfg = await withLiveStageTimeout(
|
|
Promise.resolve().then(() => getRuntimeConfig()),
|
|
"[live-models] load config",
|
|
);
|
|
logProgress("[live-models] preparing models.json");
|
|
await withLiveStageTimeout(
|
|
ensureOpenClawModelsJson(cfg),
|
|
"[live-models] prepare models.json",
|
|
LIVE_MODELS_JSON_TIMEOUT_MS,
|
|
);
|
|
if (!DIRECT_ENABLED) {
|
|
logProgress(
|
|
"[live-models] skipping (set OPENCLAW_LIVE_MODELS=modern|all|<list>; all=modern)",
|
|
);
|
|
return;
|
|
}
|
|
const anthropicKeys = collectAnthropicApiKeys();
|
|
if (anthropicKeys.length > 0) {
|
|
process.env.ANTHROPIC_API_KEY = anthropicKeys[0];
|
|
logProgress(`[live-models] anthropic keys loaded: ${anthropicKeys.length}`);
|
|
}
|
|
|
|
const agentDir = resolveOpenClawAgentDir();
|
|
const authStorage = discoverAuthStorage(agentDir);
|
|
logProgress("[live-models] loading model registry");
|
|
const models = await withLiveStageTimeout(
|
|
Promise.resolve().then(() => discoverModels(authStorage, agentDir).getAll()),
|
|
"[live-models] load model registry",
|
|
);
|
|
|
|
const rawModels = process.env.OPENCLAW_LIVE_MODELS?.trim();
|
|
const useModern = rawModels === "modern" || rawModels === "all";
|
|
const useExplicit = Boolean(rawModels) && !useModern;
|
|
const filter = useExplicit ? parseModelFilter(rawModels) : null;
|
|
const allowNotFoundSkip = useModern;
|
|
const providers = parseProviderFilter(process.env.OPENCLAW_LIVE_PROVIDERS);
|
|
const perModelTimeoutMs = toInt(process.env.OPENCLAW_LIVE_MODEL_TIMEOUT_MS, 30_000);
|
|
const maxModels = resolveHighSignalLiveModelLimit({
|
|
rawMaxModels: process.env.OPENCLAW_LIVE_MAX_MODELS,
|
|
useExplicitModels: useExplicit,
|
|
});
|
|
const targetMatcher = createLiveTargetMatcher({
|
|
providerFilter: providers,
|
|
modelFilter: filter,
|
|
config: cfg,
|
|
env: process.env,
|
|
});
|
|
|
|
const failures: Array<{ model: string; error: string }> = [];
|
|
const skipped: Array<{ model: string; reason: string }> = [];
|
|
const candidates: Array<{
|
|
model: Model<Api>;
|
|
apiKeyInfo: Awaited<ReturnType<typeof getApiKeyForModel>>;
|
|
}> = [];
|
|
|
|
for (const model of models) {
|
|
if (shouldSuppressBuiltInModel({ provider: model.provider, id: model.id })) {
|
|
continue;
|
|
}
|
|
if (!targetMatcher.matchesProvider(model.provider)) {
|
|
continue;
|
|
}
|
|
const id = `${model.provider}/${model.id}`;
|
|
if (!targetMatcher.matchesModel(model.provider, model.id)) {
|
|
continue;
|
|
}
|
|
if (!filter && useModern) {
|
|
if (
|
|
shouldExcludeProviderFromDefaultHighSignalLiveSweep({
|
|
provider: model.provider,
|
|
useExplicitModels: useExplicit,
|
|
providerFilter: providers,
|
|
config: cfg,
|
|
env: process.env,
|
|
})
|
|
) {
|
|
continue;
|
|
}
|
|
if (!isHighSignalLiveModelRef({ provider: model.provider, id: model.id })) {
|
|
continue;
|
|
}
|
|
}
|
|
try {
|
|
const apiKeyInfo = await getApiKeyForModel({
|
|
model,
|
|
cfg,
|
|
credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE,
|
|
});
|
|
if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
|
|
skipped.push({
|
|
model: id,
|
|
reason: `non-profile credential source: ${apiKeyInfo.source}`,
|
|
});
|
|
continue;
|
|
}
|
|
candidates.push({ model, apiKeyInfo });
|
|
} catch (err) {
|
|
skipped.push({ model: id, reason: String(err) });
|
|
}
|
|
}
|
|
|
|
if (candidates.length === 0) {
|
|
logProgress("[live-models] no API keys found; skipping");
|
|
return;
|
|
}
|
|
|
|
const selectedCandidates = selectHighSignalLiveItems(
|
|
candidates,
|
|
maxModels > 0 ? maxModels : candidates.length,
|
|
(entry) => ({ provider: entry.model.provider, id: entry.model.id }),
|
|
(entry) => entry.model.provider,
|
|
);
|
|
logProgress(`[live-models] selection=${useExplicit ? "explicit" : "high-signal"}`);
|
|
if (selectedCandidates.length < candidates.length) {
|
|
logProgress(
|
|
`[live-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_MAX_MODELS=${maxModels}`,
|
|
);
|
|
}
|
|
logProgress(`[live-models] running ${selectedCandidates.length} models`);
|
|
logProgress(
|
|
`[live-models] heartbeat=${formatElapsedSeconds(LIVE_HEARTBEAT_MS)} timeout=${formatElapsedSeconds(perModelTimeoutMs)} concurrency=${LIVE_MODEL_CONCURRENCY}`,
|
|
);
|
|
const total = selectedCandidates.length;
|
|
|
|
const tasks = selectedCandidates.map((entry, index) => async () => {
|
|
const { model, apiKeyInfo } = entry;
|
|
const id = `${model.provider}/${model.id}`;
|
|
const progressLabel = `[live-models] ${index + 1}/${total} ${id}`;
|
|
const attemptMax =
|
|
model.provider === "anthropic" && anthropicKeys.length > 0 ? anthropicKeys.length : 1;
|
|
for (let attempt = 0; attempt < attemptMax; attempt += 1) {
|
|
if (model.provider === "anthropic" && anthropicKeys.length > 0) {
|
|
process.env.ANTHROPIC_API_KEY = anthropicKeys[attempt];
|
|
}
|
|
const apiKey =
|
|
model.provider === "anthropic" && anthropicKeys.length > 0
|
|
? anthropicKeys[attempt]
|
|
: requireApiKey(apiKeyInfo, model.provider);
|
|
try {
|
|
// Special regression: OpenAI requires replayed `reasoning` items for tool-only turns.
|
|
if (
|
|
model.provider === "openai" &&
|
|
model.api === "openai-responses" &&
|
|
model.id === "gpt-5.2"
|
|
) {
|
|
logProgress(`${progressLabel}: tool-only regression`);
|
|
const noopTool = {
|
|
name: "noop",
|
|
description: "Return ok.",
|
|
parameters: Type.Object({}, { additionalProperties: false }),
|
|
};
|
|
|
|
let firstUserContent = "Call the tool `noop` with {}. Do not write any other text.";
|
|
let firstUser = {
|
|
role: "user" as const,
|
|
content: firstUserContent,
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
let first = await completeSimpleWithTimeout(
|
|
model,
|
|
{ messages: [firstUser], tools: [noopTool] },
|
|
{
|
|
apiKey,
|
|
reasoning: resolveTestReasoning(model),
|
|
maxTokens: 128,
|
|
},
|
|
perModelTimeoutMs,
|
|
`${progressLabel}: tool-only regression first call`,
|
|
);
|
|
|
|
let toolCall = first.content.find((b) => b.type === "toolCall");
|
|
let firstText = first.content
|
|
.filter((b) => b.type === "text")
|
|
.map((b) => b.text.trim())
|
|
.join(" ")
|
|
.trim();
|
|
|
|
// Occasional flake: model answers in text instead of tool call (or adds text).
|
|
// Retry a couple times with a stronger instruction so we still exercise the tool-only replay path.
|
|
for (let i = 0; i < 2 && (!toolCall || firstText.length > 0); i += 1) {
|
|
firstUserContent =
|
|
"Call the tool `noop` with {}. IMPORTANT: respond ONLY with the tool call; no other text.";
|
|
firstUser = {
|
|
role: "user" as const,
|
|
content: firstUserContent,
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
first = await completeSimpleWithTimeout(
|
|
model,
|
|
{ messages: [firstUser], tools: [noopTool] },
|
|
{
|
|
apiKey,
|
|
reasoning: resolveTestReasoning(model),
|
|
maxTokens: 128,
|
|
},
|
|
perModelTimeoutMs,
|
|
`${progressLabel}: tool-only regression retry ${i + 1}`,
|
|
);
|
|
|
|
toolCall = first.content.find((b) => b.type === "toolCall");
|
|
firstText = first.content
|
|
.filter((b) => b.type === "text")
|
|
.map((b) => b.text.trim())
|
|
.join(" ")
|
|
.trim();
|
|
}
|
|
|
|
expect(toolCall).toBeTruthy();
|
|
expect(firstText.length).toBe(0);
|
|
if (!toolCall || toolCall.type !== "toolCall") {
|
|
throw new Error("expected tool call");
|
|
}
|
|
|
|
const second = await completeSimpleWithTimeout(
|
|
model,
|
|
{
|
|
messages: [
|
|
firstUser,
|
|
first,
|
|
{
|
|
role: "toolResult",
|
|
toolCallId: toolCall.id,
|
|
toolName: "noop",
|
|
content: [{ type: "text", text: "ok" }],
|
|
isError: false,
|
|
timestamp: Date.now(),
|
|
},
|
|
{
|
|
role: "user",
|
|
content: "Reply with the word ok.",
|
|
timestamp: Date.now(),
|
|
},
|
|
],
|
|
},
|
|
{
|
|
apiKey,
|
|
reasoning: resolveTestReasoning(model),
|
|
// Headroom: reasoning summary can consume most of the output budget.
|
|
maxTokens: 256,
|
|
},
|
|
perModelTimeoutMs,
|
|
`${progressLabel}: tool-only regression followup`,
|
|
);
|
|
|
|
const secondText = second.content
|
|
.filter((b) => b.type === "text")
|
|
.map((b) => b.text.trim())
|
|
.join(" ");
|
|
expect(secondText.length).toBeGreaterThan(0);
|
|
await runExtraTurnProbes({
|
|
model,
|
|
apiKey,
|
|
timeoutMs: perModelTimeoutMs,
|
|
progressLabel,
|
|
});
|
|
logProgress(`${progressLabel}: done`);
|
|
break;
|
|
}
|
|
|
|
if (isDeepSeekV4Model(model)) {
|
|
logProgress(`${progressLabel}: DeepSeek V4 replay regression`);
|
|
await runDeepSeekV4ReplayRegression({
|
|
model,
|
|
apiKey,
|
|
timeoutMs: perModelTimeoutMs,
|
|
progressLabel,
|
|
});
|
|
await runExtraTurnProbes({
|
|
model,
|
|
apiKey,
|
|
timeoutMs: perModelTimeoutMs,
|
|
progressLabel,
|
|
});
|
|
logProgress(`${progressLabel}: done`);
|
|
break;
|
|
}
|
|
|
|
logProgress(`${progressLabel}: prompt`);
|
|
const ok = await completeOkWithRetry({
|
|
model,
|
|
apiKey,
|
|
timeoutMs: perModelTimeoutMs,
|
|
progressLabel,
|
|
});
|
|
|
|
if (ok.res.stopReason === "error") {
|
|
const msg = ok.res.errorMessage ?? "";
|
|
if (allowNotFoundSkip && isModelNotFoundErrorMessage(msg)) {
|
|
skipped.push({ model: id, reason: msg });
|
|
logProgress(`${progressLabel}: skip (model not found)`);
|
|
break;
|
|
}
|
|
throw new Error(msg || "model returned error with no message");
|
|
}
|
|
|
|
if (
|
|
ok.text.length === 0 &&
|
|
(model.provider === "google" || model.provider === "google-gemini-cli")
|
|
) {
|
|
skipped.push({
|
|
model: id,
|
|
reason: "no text returned (likely unavailable model id)",
|
|
});
|
|
logProgress(`${progressLabel}: skip (google model not found)`);
|
|
break;
|
|
}
|
|
if (
|
|
ok.text.length === 0 &&
|
|
(model.provider === "openrouter" ||
|
|
model.provider === "opencode" ||
|
|
model.provider === "opencode-go")
|
|
) {
|
|
skipped.push({
|
|
model: id,
|
|
reason: "no text returned (provider returned empty content)",
|
|
});
|
|
logProgress(`${progressLabel}: skip (empty response)`);
|
|
break;
|
|
}
|
|
if (
|
|
ok.text.length === 0 &&
|
|
allowNotFoundSkip &&
|
|
(model.provider === "fireworks" ||
|
|
model.provider === "google-antigravity" ||
|
|
model.provider === "minimax" ||
|
|
model.provider === "openai-codex" ||
|
|
model.provider === "xai" ||
|
|
model.provider === "zai")
|
|
) {
|
|
skipped.push({
|
|
model: id,
|
|
reason: "no text returned (provider returned empty content)",
|
|
});
|
|
logProgress(`${progressLabel}: skip (empty response)`);
|
|
break;
|
|
}
|
|
expect(ok.text.length).toBeGreaterThan(0);
|
|
await runExtraTurnProbes({
|
|
model,
|
|
apiKey,
|
|
timeoutMs: perModelTimeoutMs,
|
|
progressLabel,
|
|
});
|
|
logProgress(`${progressLabel}: done`);
|
|
break;
|
|
} catch (err) {
|
|
const message = String(err);
|
|
if (
|
|
model.provider === "anthropic" &&
|
|
isAnthropicRateLimitError(message) &&
|
|
attempt + 1 < attemptMax
|
|
) {
|
|
logProgress(`${progressLabel}: rate limit, retrying with next key`);
|
|
continue;
|
|
}
|
|
if (model.provider === "anthropic" && isAnthropicRateLimitError(message)) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (anthropic rate limit)`);
|
|
break;
|
|
}
|
|
if (model.provider === "anthropic" && isAnthropicBillingError(message)) {
|
|
if (attempt + 1 < attemptMax) {
|
|
logProgress(`${progressLabel}: billing issue, retrying with next key`);
|
|
continue;
|
|
}
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (anthropic billing)`);
|
|
break;
|
|
}
|
|
if (
|
|
(model.provider === "google" || model.provider === "google-gemini-cli") &&
|
|
isGoogleModelNotFoundError(err)
|
|
) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (google model not found)`);
|
|
break;
|
|
}
|
|
if (
|
|
allowNotFoundSkip &&
|
|
model.provider === "minimax" &&
|
|
message.includes("request ended without sending any chunks")
|
|
) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (minimax empty response)`);
|
|
break;
|
|
}
|
|
if (
|
|
allowNotFoundSkip &&
|
|
(model.provider === "minimax" ||
|
|
model.provider === "zai" ||
|
|
model.provider === "openrouter") &&
|
|
isRateLimitErrorMessage(message)
|
|
) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (rate limit)`);
|
|
break;
|
|
}
|
|
if (
|
|
allowNotFoundSkip &&
|
|
(model.provider === "opencode" || model.provider === "opencode-go") &&
|
|
isRateLimitErrorMessage(message)
|
|
) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (rate limit)`);
|
|
break;
|
|
}
|
|
if (
|
|
allowNotFoundSkip &&
|
|
model.provider === "openai-codex" &&
|
|
isRefreshTokenReused(message)
|
|
) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (codex refresh token reused)`);
|
|
break;
|
|
}
|
|
if (
|
|
allowNotFoundSkip &&
|
|
model.provider === "openai-codex" &&
|
|
isAccountIdExtractionError(message)
|
|
) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (codex account id extraction)`);
|
|
break;
|
|
}
|
|
if (
|
|
allowNotFoundSkip &&
|
|
model.provider === "openai-codex" &&
|
|
isChatGPTUsageLimitErrorMessage(message)
|
|
) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (chatgpt usage limit)`);
|
|
break;
|
|
}
|
|
if (
|
|
allowNotFoundSkip &&
|
|
model.provider === "openai-codex" &&
|
|
isInstructionsRequiredError(message)
|
|
) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (instructions required)`);
|
|
break;
|
|
}
|
|
if (
|
|
allowNotFoundSkip &&
|
|
model.provider === "openai-codex" &&
|
|
isOpenAiCodexHtmlInterruption(message)
|
|
) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (codex html interruption)`);
|
|
break;
|
|
}
|
|
if (allowNotFoundSkip && isModelTimeoutError(message)) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (timeout)`);
|
|
break;
|
|
}
|
|
if (allowNotFoundSkip && isProviderUnavailableErrorMessage(message)) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (provider unavailable)`);
|
|
break;
|
|
}
|
|
if (
|
|
allowNotFoundSkip &&
|
|
model.provider === "openrouter" &&
|
|
isOpenRouterOpaqueBadRequestErrorMessage(message)
|
|
) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (openrouter upstream bad request)`);
|
|
break;
|
|
}
|
|
if (allowNotFoundSkip && isModelNotFoundErrorMessage(message)) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (model not found)`);
|
|
break;
|
|
}
|
|
if (allowNotFoundSkip && isAudioOnlyModelErrorMessage(message)) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (audio-only model)`);
|
|
break;
|
|
}
|
|
if (allowNotFoundSkip && isUnsupportedReasoningEffortErrorMessage(message)) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (reasoning unsupported)`);
|
|
break;
|
|
}
|
|
if (allowNotFoundSkip && isUnsupportedThinkingToggleErrorMessage(message)) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (thinking toggle unsupported)`);
|
|
break;
|
|
}
|
|
if (allowNotFoundSkip && isUnsupportedPlanErrorMessage(message)) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (plan unsupported)`);
|
|
break;
|
|
}
|
|
if (
|
|
allowNotFoundSkip &&
|
|
model.provider === "ollama" &&
|
|
isOllamaUnavailableErrorMessage(message)
|
|
) {
|
|
skipped.push({ model: id, reason: message });
|
|
logProgress(`${progressLabel}: skip (ollama unavailable)`);
|
|
break;
|
|
}
|
|
logProgress(`${progressLabel}: failed`);
|
|
failures.push({ model: id, error: message });
|
|
break;
|
|
}
|
|
}
|
|
});
|
|
|
|
await runTasksWithConcurrency({
|
|
tasks,
|
|
limit: LIVE_MODEL_CONCURRENCY,
|
|
});
|
|
|
|
if (failures.length > 0) {
|
|
const preview = formatFailurePreview(failures, 20);
|
|
throw new Error(
|
|
`live model failures (${failures.length}, showing ${Math.min(failures.length, 20)}):\n${preview}`,
|
|
);
|
|
}
|
|
|
|
void skipped;
|
|
},
|
|
LIVE_TEST_TIMEOUT_MS,
|
|
);
|
|
});
|