openclaw/src/agents/models.profiles.live.test.ts

import { type Api, completeSimple, type Model } from "@mariozechner/pi-ai";
import {
  discoverAuthStorage,
  discoverModels,
} from "@mariozechner/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { describe, expect, it } from "vitest";
import { loadConfig } from "../config/config.js";
import { resolveClawdbotAgentDir } from "./agent-paths.js";
import { getApiKeyForModel } from "./model-auth.js";
import { ensureClawdbotModelsJson } from "./models-config.js";

const LIVE = process.env.LIVE === "1" || process.env.CLAWDBOT_LIVE_TEST === "1";
const ALL_MODELS =
  process.env.CLAWDBOT_LIVE_ALL_MODELS === "1" ||
  process.env.CLAWDBOT_LIVE_MODELS === "all";
const REQUIRE_PROFILE_KEYS =
  process.env.CLAWDBOT_LIVE_REQUIRE_PROFILE_KEYS === "1";

const describeLive = LIVE && ALL_MODELS ? describe : describe.skip;

function parseModelFilter(raw?: string): Set<string> | null {
  const trimmed = raw?.trim();
  if (!trimmed || trimmed === "all") return null;
  const ids = trimmed
    .split(",")
    .map((s) => s.trim())
    .filter(Boolean);
  return ids.length ? new Set(ids) : null;
}

describeLive("live models (profile keys)", () => {
  it(
    "completes across configured models",
    async () => {
      const cfg = loadConfig();
      await ensureClawdbotModelsJson(cfg);

      const agentDir = resolveClawdbotAgentDir();
      const authStorage = discoverAuthStorage(agentDir);
      const modelRegistry = discoverModels(authStorage, agentDir);
      const models = modelRegistry.getAll() as Array<Model<Api>>;

      const filter = parseModelFilter(process.env.CLAWDBOT_LIVE_MODELS);

      const failures: Array<{ model: string; error: string }> = [];
      const skipped: Array<{ model: string; reason: string }> = [];

      for (const model of models) {
        const id = `${model.provider}/${model.id}`;
        if (filter && !filter.has(id)) continue;

        let apiKeyInfo: Awaited<ReturnType<typeof getApiKeyForModel>>;
        try {
          apiKeyInfo = await getApiKeyForModel({ model, cfg });
        } catch (err) {
          skipped.push({ model: id, reason: String(err) });
          continue;
        }

        if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
          skipped.push({
            model: id,
            reason: `non-profile credential source: ${apiKeyInfo.source}`,
          });
          continue;
        }

        try {
          // Special regression: OpenAI rejects replayed `reasoning` items for tool-only turns.
          if (
            model.provider === "openai" &&
            model.api === "openai-responses" &&
            model.id === "gpt-5.2"
          ) {
            const noopTool = {
              name: "noop",
              description: "Return ok.",
              parameters: Type.Object({}, { additionalProperties: false }),
            };

            const first = await completeSimple(
              model,
              {
                messages: [
                  {
                    role: "user",
                    content:
                      "Call the tool `noop` with {}. Do not write any other text.",
                    timestamp: Date.now(),
                  },
                ],
                tools: [noopTool],
              },
              {
                apiKey: apiKeyInfo.apiKey,
                reasoning: model.reasoning ? "low" : undefined,
                maxTokens: 128,
                temperature: 0,
              },
            );

            const toolCall = first.content.find((b) => b.type === "toolCall");
            expect(toolCall).toBeTruthy();
            if (!toolCall || toolCall.type !== "toolCall") {
              throw new Error("expected tool call");
            }

            const second = await completeSimple(
              model,
              {
                messages: [
                  {
                    role: "user",
                    content:
                      "Call the tool `noop` with {}. Do not write any other text.",
                    timestamp: Date.now(),
                  },
                  first,
                  {
                    role: "toolResult",
                    toolCallId: toolCall.id,
                    toolName: "noop",
                    content: [{ type: "text", text: "ok" }],
                    isError: false,
                    timestamp: Date.now(),
                  },
                  {
                    role: "user",
                    content: "Reply with the word ok.",
                    timestamp: Date.now(),
                  },
                ],
              },
              {
                apiKey: apiKeyInfo.apiKey,
                reasoning: model.reasoning ? "low" : undefined,
                maxTokens: 64,
                temperature: 0,
              },
            );

            const secondText = second.content
              .filter((b) => b.type === "text")
              .map((b) => b.text.trim())
              .join(" ");
            expect(secondText.length).toBeGreaterThan(0);
            continue;
          }

          const res = await completeSimple(
            model,
            {
              messages: [
                {
                  role: "user",
                  content: "Reply with the word ok.",
                  timestamp: Date.now(),
                },
              ],
            },
            {
              apiKey: apiKeyInfo.apiKey,
              reasoning: model.reasoning ? "low" : undefined,
              maxTokens: 64,
              temperature: 0,
            },
          );

          const text = res.content
            .filter((block) => block.type === "text")
            .map((block) => block.text.trim())
            .join(" ");
          expect(text.length).toBeGreaterThan(0);
        } catch (err) {
          failures.push({ model: id, error: String(err) });
        }
      }

      if (failures.length > 0) {
        const preview = failures
          .slice(0, 10)
          .map((f) => `- ${f.model}: ${f.error}`)
          .join("\n");
        throw new Error(
          `live model failures (${failures.length}):\n${preview}`,
        );
      }

      // Keep one assertion so the test fails loudly if we somehow ran nothing.
      expect(models.length).toBeGreaterThan(0);
      void skipped;
    },
    15 * 60 * 1000,
  );
});