openclaw/src/agents/pi-embedded-runner-extraparams.live.test.ts

import type { Model } from "@mariozechner/pi-ai";
import { getModel, streamSimple } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import { isLiveTestEnabled } from "./live-test-helpers.js";
import { applyExtraParamsToAgent } from "./pi-embedded-runner.js";

const OPENAI_KEY = process.env.OPENAI_API_KEY ?? "";
const ANTHROPIC_KEY = process.env.ANTHROPIC_API_KEY ?? "";
const GEMINI_KEY = process.env.GEMINI_API_KEY ?? "";
const LIVE = isLiveTestEnabled(["OPENAI_LIVE_TEST"]);
const ANTHROPIC_LIVE = isLiveTestEnabled(["ANTHROPIC_LIVE_TEST"]);
const GEMINI_LIVE = isLiveTestEnabled(["GEMINI_LIVE_TEST"]);

const describeLive = LIVE && OPENAI_KEY ? describe : describe.skip;
const describeAnthropicLive = ANTHROPIC_LIVE && ANTHROPIC_KEY ? describe : describe.skip;
const describeGeminiLive = GEMINI_LIVE && GEMINI_KEY ? describe : describe.skip;

describeLive("pi embedded extra params (live)", () => {
  it("applies config maxTokens to openai streamFn", async () => {
    const model = getModel("openai", "gpt-5.2") as unknown as Model<"openai-completions">;

    const cfg: OpenClawConfig = {
      agents: {
        defaults: {
          models: {
            "openai/gpt-5.2": {
              // OpenAI Responses enforces a minimum max_output_tokens of 16.
              params: {
                maxTokens: 16,
              },
            },
          },
        },
      },
    };

    const agent = { streamFn: streamSimple };

    applyExtraParamsToAgent(agent, cfg, "openai", model.id);

    const stream = agent.streamFn(
      model,
      {
        messages: [
          {
            role: "user",
            content: "Write the alphabet letters A through Z as words separated by commas.",
            timestamp: Date.now(),
          },
        ],
      },
      { apiKey: OPENAI_KEY },
    );

    let stopReason: string | undefined;
    let outputTokens: number | undefined;
    for await (const event of stream) {
      if (event.type === "done") {
        stopReason = event.reason;
        outputTokens = event.message.usage.output;
      }
    }

    expect(stopReason).toBeDefined();
    expect(outputTokens).toBeDefined();
    // Should respect maxTokens from config (16) — allow a small buffer for provider rounding.
    expect(outputTokens ?? 0).toBeLessThanOrEqual(20);
  }, 30_000);

  it("verifies OpenAI fast-mode service_tier semantics against the live API", async () => {
    const headers = {
      "content-type": "application/json",
      authorization: `Bearer ${OPENAI_KEY}`,
    };

    const runProbe = async (serviceTier: "default" | "priority") => {
      const res = await fetch("https://api.openai.com/v1/responses", {
        method: "POST",
        headers,
        body: JSON.stringify({
          model: "gpt-5.4",
          input: "Reply with OK.",
          max_output_tokens: 32,
          service_tier: serviceTier,
        }),
      });
      const json = (await res.json()) as {
        error?: { message?: string };
        service_tier?: string;
        status?: string;
      };
      expect(res.ok, json.error?.message ?? `HTTP ${res.status}`).toBe(true);
      return json;
    };

    const standard = await runProbe("default");
    expect(standard.service_tier).toBe("default");
    expect(standard.status).toBe("completed");

    const fast = await runProbe("priority");
    expect(fast.service_tier).toBe("priority");
    expect(fast.status).toBe("completed");
  }, 45_000);
});

describeAnthropicLive("pi embedded extra params (anthropic live)", () => {
  it("verifies Anthropic fast-mode service_tier semantics against the live API", async () => {
    const headers = {
      "content-type": "application/json",
      "x-api-key": ANTHROPIC_KEY,
      "anthropic-version": "2023-06-01",
    };

    const runProbe = async (serviceTier: "auto" | "standard_only") => {
      const res = await fetch("https://api.anthropic.com/v1/messages", {
        method: "POST",
        headers,
        body: JSON.stringify({
          model: "claude-sonnet-4-5",
          max_tokens: 32,
          service_tier: serviceTier,
          messages: [{ role: "user", content: "Reply with OK." }],
        }),
      });
      const json = (await res.json()) as {
        error?: { message?: string };
        stop_reason?: string;
        usage?: { service_tier?: string };
      };
      expect(res.ok, json.error?.message ?? `HTTP ${res.status}`).toBe(true);
      return json;
    };

    const standard = await runProbe("standard_only");
    expect(standard.usage?.service_tier).toBe("standard");
    expect(standard.stop_reason).toBe("end_turn");

    const fast = await runProbe("auto");
    expect(["standard", "priority"]).toContain(fast.usage?.service_tier);
    expect(fast.stop_reason).toBe("end_turn");
  }, 45_000);
});

describeGeminiLive("pi embedded extra params (gemini live)", () => {
  function buildGeminiPayloadThroughWrapper(params: {
    model: Model<"google-generative-ai">;
    oneByOneRedPngBase64: string;
    includeImage?: boolean;
    prompt: string;
  }): Record<string, unknown> {
    const userContent: Array<
      { type: "text"; text: string } | { type: "image"; mimeType: string; data: string }
    > = [{ type: "text", text: params.prompt }];
    if (params.includeImage ?? true) {
      userContent.push({
        type: "image",
        mimeType: "image/png",
        data: params.oneByOneRedPngBase64,
      });
    }

    const payload: Record<string, unknown> = {
      model: params.model.id,
      contents: [{ role: "user", parts: userContent.map(mapGeminiContentPart) }],
      config: {
        maxOutputTokens: 64,
        thinkingConfig: {
          includeThoughts: true,
          thinkingBudget: 32768,
        },
      },
    };

    const baseStreamFn = (
      _model: Model<"google-generative-ai">,
      _context: unknown,
      options?: {
        onPayload?: (payload: unknown) => unknown;
      },
    ) => {
      options?.onPayload?.(payload);
      return {} as ReturnType<typeof streamSimple>;
    };
    const agent = { streamFn: baseStreamFn as typeof streamSimple };
    applyExtraParamsToAgent(agent, undefined, "google", params.model.id, undefined, "high");
    void agent.streamFn(
      params.model,
      { messages: [] },
      {
        reasoning: "high",
        maxTokens: 64,
      },
    );
    return payload;
  }

  function mapGeminiContentPart(
    part: { type: "text"; text: string } | { type: "image"; mimeType: string; data: string },
  ): { text: string } | { inlineData: { mimeType: string; data: string } } {
    if (part.type === "text") {
      return { text: part.text };
    }
    return {
      inlineData: {
        mimeType: part.mimeType,
        data: part.data,
      },
    };
  }

  // Payload mutation is covered by extra-params.google.test.ts, and Gemini
  // roundtrips are exercised by the dedicated live gateway/model suites. This
  // direct live test currently flakes on Vitest timeout teardown without
  // providing unique signal.
  it.skip("sanitizes Gemini thinking payload and keeps image parts with reasoning enabled", async () => {
    const model = getModel("google", "gemini-2.5-pro") as unknown as Model<"google-generative-ai">;

    const oneByOneRedPngBase64 =
      "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGP4zwAAAgIBAJBzWgkAAAAASUVORK5CYII=";

    const capturedPayload = buildGeminiPayloadThroughWrapper({
      model,
      oneByOneRedPngBase64,
      includeImage: true,
      prompt: "What color is this image? Reply with one word.",
    });

    expect(capturedPayload).toBeDefined();
    const thinkingConfig = (
      capturedPayload?.config as { thinkingConfig?: Record<string, unknown> } | undefined
    )?.thinkingConfig;
    const thinkingBudget = thinkingConfig?.thinkingBudget;
    if (thinkingBudget !== undefined) {
      expect(typeof thinkingBudget).toBe("number");
      expect(thinkingBudget).toBeGreaterThanOrEqual(0);
    }
    // Gemini 3.1-specific thinkingLevel fill is covered by
    // extra-params.google.test.ts. The live probe uses the stable 2.5 model and
    // only verifies that we never forward an invalid negative budget.

    const imagePart = (
      capturedPayload?.contents as
        | Array<{ parts?: Array<{ inlineData?: { mimeType?: string; data?: string } }> }>
        | undefined
    )?.[0]?.parts?.find((part) => part.inlineData !== undefined)?.inlineData;
    expect(imagePart).toEqual({
      mimeType: "image/png",
      data: oneByOneRedPngBase64,
    });

    // End-to-end Gemini roundtrips are already covered elsewhere. This live
    // check stays focused on the request payload we generate for those suites.
  }, 60_000);
});