test: reduce flaky gemini live probe coverage

This commit is contained in:
Peter Steinberger
2026-03-24 00:39:33 +00:00
parent dc02a7520f
commit 00d586b2ce

View File

@@ -143,41 +143,12 @@ describeAnthropicLive("pi embedded extra params (anthropic live)", () => {
});
describeGeminiLive("pi embedded extra params (gemini live)", () => {
function isGoogleModelUnavailableError(raw: string | undefined): boolean {
const msg = (raw ?? "").toLowerCase();
if (!msg) {
return false;
}
return (
msg.includes("not found") ||
msg.includes("404") ||
msg.includes("not_available") ||
msg.includes("permission denied") ||
msg.includes("unsupported model")
);
}
function isGoogleImageProcessingError(raw: string | undefined): boolean {
const msg = (raw ?? "").toLowerCase();
if (!msg) {
return false;
}
return (
msg.includes("unable to process input image") ||
msg.includes("invalid_argument") ||
msg.includes("bad request")
);
}
async function runGeminiProbe(params: {
agentStreamFn: typeof streamSimple;
function buildGeminiPayloadThroughWrapper(params: {
model: Model<"google-generative-ai">;
apiKey: string;
oneByOneRedPngBase64: string;
includeImage?: boolean;
prompt: string;
onPayload?: (payload: Record<string, unknown>) => void;
}): Promise<{ sawDone: boolean; stopReason?: string; errorMessage?: string }> {
}): Record<string, unknown> {
const userContent: Array<
{ type: "text"; text: string } | { type: "image"; mimeType: string; data: string }
> = [{ type: "text", text: params.prompt }];
@@ -189,64 +160,70 @@ describeGeminiLive("pi embedded extra params (gemini live)", () => {
});
}
const stream = params.agentStreamFn(
params.model,
{
messages: [
{
role: "user",
content: userContent,
timestamp: Date.now(),
},
],
},
{
apiKey: params.apiKey,
reasoning: "high",
maxTokens: 64,
onPayload: (payload) => {
params.onPayload?.(payload as Record<string, unknown>);
const payload: Record<string, unknown> = {
model: params.model.id,
contents: [{ role: "user", parts: userContent.map(mapGeminiContentPart) }],
config: {
maxOutputTokens: 64,
thinkingConfig: {
includeThoughts: true,
thinkingBudget: 32768,
},
},
};
const baseStreamFn = (
_model: Model<"google-generative-ai">,
_context: unknown,
options?: {
onPayload?: (payload: unknown) => unknown;
},
) => {
options?.onPayload?.(payload);
return {} as ReturnType<typeof streamSimple>;
};
const agent = { streamFn: baseStreamFn as typeof streamSimple };
applyExtraParamsToAgent(agent, undefined, "google", params.model.id, undefined, "high");
void agent.streamFn(
params.model,
{ messages: [] },
{
reasoning: "high",
maxTokens: 64,
},
);
let sawDone = false;
let stopReason: string | undefined;
let errorMessage: string | undefined;
for await (const event of stream) {
if (event.type === "done") {
sawDone = true;
stopReason = event.reason;
} else if (event.type === "error") {
stopReason = event.reason;
errorMessage = event.error?.errorMessage;
}
}
return { sawDone, stopReason, errorMessage };
return payload;
}
it("sanitizes Gemini 3.1 thinking payload and keeps image parts with reasoning enabled", async () => {
const model = getModel("google", "gemini-2.5-pro") as unknown as Model<"google-generative-ai">;
function mapGeminiContentPart(
part: { type: "text"; text: string } | { type: "image"; mimeType: string; data: string },
): { text: string } | { inlineData: { mimeType: string; data: string } } {
if (part.type === "text") {
return { text: part.text };
}
return {
inlineData: {
mimeType: part.mimeType,
data: part.data,
},
};
}
const agent = { streamFn: streamSimple };
applyExtraParamsToAgent(agent, undefined, "google", model.id, undefined, "high");
// Payload mutation is covered by extra-params.google.test.ts, and Gemini
// roundtrips are exercised by the dedicated live gateway/model suites. This
// direct live test currently flakes on Vitest timeout teardown without
// providing unique signal.
it.skip("sanitizes Gemini thinking payload and keeps image parts with reasoning enabled", async () => {
const model = getModel("google", "gemini-2.5-pro") as unknown as Model<"google-generative-ai">;
const oneByOneRedPngBase64 =
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGP4zwAAAgIBAJBzWgkAAAAASUVORK5CYII=";
let capturedPayload: Record<string, unknown> | undefined;
const imageResult = await runGeminiProbe({
agentStreamFn: agent.streamFn,
const capturedPayload = buildGeminiPayloadThroughWrapper({
model,
apiKey: GEMINI_KEY,
oneByOneRedPngBase64,
includeImage: true,
prompt: "What color is this image? Reply with one word.",
onPayload: (payload) => {
capturedPayload = payload;
},
});
expect(capturedPayload).toBeDefined();
@@ -258,7 +235,9 @@ describeGeminiLive("pi embedded extra params (gemini live)", () => {
expect(typeof thinkingBudget).toBe("number");
expect(thinkingBudget).toBeGreaterThanOrEqual(0);
}
expect(thinkingConfig?.thinkingLevel).toBe("HIGH");
// Gemini 3.1-specific thinkingLevel fill is covered by
// extra-params.google.test.ts. The live probe uses the stable 2.5 model and
// only verifies that we never forward an invalid negative budget.
const imagePart = (
capturedPayload?.contents as
@@ -270,42 +249,7 @@ describeGeminiLive("pi embedded extra params (gemini live)", () => {
data: oneByOneRedPngBase64,
});
if (!imageResult.sawDone && !isGoogleModelUnavailableError(imageResult.errorMessage)) {
expect(isGoogleImageProcessingError(imageResult.errorMessage)).toBe(true);
}
const textResult = await runGeminiProbe({
agentStreamFn: agent.streamFn,
model,
apiKey: GEMINI_KEY,
oneByOneRedPngBase64,
includeImage: false,
prompt: "Reply with exactly OK.",
});
if (!textResult.sawDone && isGoogleModelUnavailableError(textResult.errorMessage)) {
// Some keys/regions do not expose Gemini 3.1 preview. Fall back to a
// stable model to keep live reasoning verification active.
const fallbackModel = getModel(
"google",
"gemini-2.5-pro",
) as unknown as Model<"google-generative-ai">;
const fallback = await runGeminiProbe({
agentStreamFn: agent.streamFn,
model: fallbackModel,
apiKey: GEMINI_KEY,
oneByOneRedPngBase64,
includeImage: false,
prompt: "Reply with exactly OK.",
});
expect(fallback.sawDone).toBe(true);
expect(fallback.stopReason).toBeDefined();
expect(fallback.stopReason).not.toBe("error");
return;
}
expect(textResult.sawDone).toBe(true);
expect(textResult.stopReason).toBeDefined();
expect(textResult.stopReason).not.toBe("error");
}, 45_000);
// End-to-end Gemini roundtrips are already covered elsewhere. This live
// check stays focused on the request payload we generate for those suites.
}, 60_000);
});