test: add live model file and image probes

This commit is contained in:
Peter Steinberger
2026-04-23 15:09:08 +01:00
parent 4a4e56e8f3
commit e050e18945
6 changed files with 266 additions and 1 deletions

View File

@@ -40,6 +40,10 @@ When debugging real providers/models (requires real creds):
- Live suite (models + gateway tool/image probes): `pnpm test:live`
- Target one live file quietly: `pnpm test:live -- src/agents/models.profiles.live.test.ts`
- Docker live model sweep: `pnpm test:docker:live-models`
- Each selected model now runs a text turn plus a small file-read-style text
block probe. Models whose metadata advertises `image` input also run a tiny
image turn. Disable the extra probes with `OPENCLAW_LIVE_MODEL_FILE_PROBE=0`
or `OPENCLAW_LIVE_MODEL_IMAGE_PROBE=0` when isolating provider failures.
- CI coverage: daily `OpenClaw Scheduled Live And E2E Checks` and manual
`OpenClaw Release Checks` both call the reusable live/E2E workflow with
`include_live_suites: true`, which includes separate Docker live model

View File

@@ -235,6 +235,9 @@ const TOOLING_TEST_TARGETS = new Map([
["test/scripts/vitest-local-scheduling.test.ts"],
],
]);
const SOURCE_TEST_TARGETS = new Map([
["src/agents/live-model-turn-probes.ts", ["src/agents/live-model-turn-probes.test.ts"]],
]);
const GENERATED_CHANGED_TEST_TARGETS = new Set([
"src/canvas-host/a2ui/.bundle.hash",
"src/canvas-host/a2ui/a2ui.bundle.js",
@@ -511,7 +514,13 @@ export function resolveChangedTestTargetPlan(changedPaths) {
if (changedLanes.lanes.all) {
return { mode: "broad", targets: [] };
}
const targets = changedPaths.filter(isRoutableChangedTarget);
const targets = changedPaths.flatMap((changedPath) => {
const mappedTargets = SOURCE_TEST_TARGETS.get(changedPath);
if (mappedTargets) {
return mappedTargets;
}
return isRoutableChangedTarget(changedPath) ? [changedPath] : [];
});
if (changedLanes.extensionImpactFromCore) {
targets.push("extensions");
}

View File

@@ -0,0 +1,72 @@
import { describe, expect, it } from "vitest";
import {
buildLiveModelFileProbeContext,
buildLiveModelImageProbeContext,
extractAssistantText,
fileProbeTextMatches,
imageProbeTextMatches,
isLiveModelProbeEnabled,
LIVE_MODEL_FILE_PROBE_TOKEN,
modelSupportsImageInput,
} from "./live-model-turn-probes.js";
describe("live model turn probes", () => {
it("defaults probes on and accepts common opt-out values", () => {
expect(isLiveModelProbeEnabled({}, "OPENCLAW_LIVE_MODEL_IMAGE_PROBE")).toBe(true);
expect(
isLiveModelProbeEnabled(
{ OPENCLAW_LIVE_MODEL_IMAGE_PROBE: "false" },
"OPENCLAW_LIVE_MODEL_IMAGE_PROBE",
),
).toBe(false);
expect(
isLiveModelProbeEnabled(
{ OPENCLAW_LIVE_MODEL_IMAGE_PROBE: "1" },
"OPENCLAW_LIVE_MODEL_IMAGE_PROBE",
),
).toBe(true);
});
it("builds a text-block file read probe", () => {
const context = buildLiveModelFileProbeContext({ systemPrompt: "sys" });
expect(context.systemPrompt).toBe("sys");
expect(context.messages[0]?.content).toEqual([
expect.objectContaining({
type: "text",
text: expect.stringContaining(`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
}),
]);
});
it("builds an image probe with native image content", () => {
const context = buildLiveModelImageProbeContext({});
expect(context.messages[0]?.content).toEqual([
expect.objectContaining({ type: "text" }),
expect.objectContaining({ type: "image", mimeType: "image/png" }),
]);
});
it("extracts assistant text blocks only", () => {
expect(
extractAssistantText({
content: [
{ type: "thinking", thinking: "hidden" },
{ type: "text", text: " ok " },
{ type: "toolCall", id: "1", name: "noop", arguments: {} },
],
}),
).toBe("ok");
});
it("detects image input support from model metadata", () => {
expect(modelSupportsImageInput({ input: ["text", "image"] })).toBe(true);
expect(modelSupportsImageInput({ input: ["text"] })).toBe(false);
});
it("matches expected probe replies", () => {
expect(fileProbeTextMatches(`The value is ${LIVE_MODEL_FILE_PROBE_TOKEN}.`)).toBe(true);
expect(fileProbeTextMatches("OPAL-731")).toBe(false);
expect(imageProbeTextMatches("OK")).toBe(true);
expect(imageProbeTextMatches("blue")).toBe(false);
});
});

View File

@@ -0,0 +1,85 @@
import type { Api, AssistantMessage, Context, Model } from "@mariozechner/pi-ai";
export const LIVE_MODEL_FILE_PROBE_TOKEN = "OPAL_731";
export const LIVE_MODEL_FILE_PROBE_ENV = "OPENCLAW_LIVE_MODEL_FILE_PROBE";
export const LIVE_MODEL_IMAGE_PROBE_ENV = "OPENCLAW_LIVE_MODEL_IMAGE_PROBE";
const PROBE_PNG_BASE64 =
"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAALklEQVR4nO3OoQEAAAyDsP7/9HYGJgJNdtuVDQAAAAAAACAHxH8AAAAAAACAHvBX0fhq85dN7QAAAABJRU5ErkJggg==";
export function isLiveModelProbeEnabled(
env: Record<string, string | undefined>,
key: string,
): boolean {
const raw = env[key]?.trim().toLowerCase();
if (!raw) {
return true;
}
return !["0", "false", "no", "off"].includes(raw);
}
export function extractAssistantText(message: Pick<AssistantMessage, "content">): string {
return message.content
.filter((block) => block.type === "text")
.map((block) => block.text.trim())
.filter(Boolean)
.join(" ");
}
export function modelSupportsImageInput(model: Pick<Model<Api>, "input">): boolean {
return model.input.includes("image");
}
export function buildLiveModelFileProbeContext(params: { systemPrompt?: string }): Context {
return {
systemPrompt: params.systemPrompt,
messages: [
{
role: "user",
content: [
{
type: "text",
text:
"Read this file excerpt and reply with only the value after LIVE_FILE_TOKEN.\n\n" +
'<file path="live-model-probe.txt" mime="text/plain">\n' +
`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}\n` +
"</file>",
},
],
timestamp: Date.now(),
},
],
};
}
export function buildLiveModelImageProbeContext(params: { systemPrompt?: string }): Context {
return {
systemPrompt: params.systemPrompt,
messages: [
{
role: "user",
content: [
{
type: "text",
text: "Reply with exactly the word OK if you received this image.",
},
{
type: "image",
data: PROBE_PNG_BASE64,
mimeType: "image/png",
},
],
timestamp: Date.now(),
},
],
};
}
export function fileProbeTextMatches(text: string): boolean {
return text.toUpperCase().includes(LIVE_MODEL_FILE_PROBE_TOKEN);
}
export function imageProbeTextMatches(text: string): boolean {
return /\bok\b/i.test(text);
}

View File

@@ -16,6 +16,18 @@ import {
selectHighSignalLiveItems,
shouldExcludeProviderFromDefaultHighSignalLiveSweep,
} from "./live-model-filter.js";
import {
buildLiveModelFileProbeContext,
buildLiveModelImageProbeContext,
extractAssistantText,
fileProbeTextMatches,
imageProbeTextMatches,
isLiveModelProbeEnabled,
LIVE_MODEL_FILE_PROBE_ENV,
LIVE_MODEL_FILE_PROBE_TOKEN,
LIVE_MODEL_IMAGE_PROBE_ENV,
modelSupportsImageInput,
} from "./live-model-turn-probes.js";
import { createLiveTargetMatcher } from "./live-target-matcher.js";
import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
@@ -37,6 +49,8 @@ const LIVE_SETUP_TIMEOUT_MS = Math.max(
toInt(process.env.OPENCLAW_LIVE_SETUP_TIMEOUT_MS, 45_000),
);
const LIVE_MODELS_JSON_TIMEOUT_MS = resolveLiveModelsJsonTimeoutMs();
const LIVE_FILE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_FILE_PROBE_ENV);
const LIVE_IMAGE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_IMAGE_PROBE_ENV);
const describeLive = LIVE ? describe : describe.skip;
@@ -432,6 +446,60 @@ async function completeOkWithRetry(params: {
return await runOnce(256);
}
async function runExtraTurnProbes(params: {
model: Model<Api>;
apiKey: string;
timeoutMs: number;
progressLabel: string;
}) {
const options = {
apiKey: params.apiKey,
reasoning: resolveTestReasoning(params.model),
maxTokens: 64,
};
if (LIVE_FILE_PROBE_ENABLED) {
logProgress(`${params.progressLabel}: file-read probe`);
const file = await completeSimpleWithTimeout(
params.model,
buildLiveModelFileProbeContext({ systemPrompt: resolveLiveSystemPrompt(params.model) }),
options,
params.timeoutMs,
`${params.progressLabel}: file-read probe`,
);
if (file.stopReason === "error") {
throw new Error(file.errorMessage || "file-read probe returned error with no message");
}
const fileText = extractAssistantText(file);
if (!fileProbeTextMatches(fileText)) {
throw new Error(`file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`);
}
}
if (!LIVE_IMAGE_PROBE_ENABLED) {
return;
}
if (!modelSupportsImageInput(params.model)) {
logProgress(`${params.progressLabel}: image probe skipped (no image input)`);
return;
}
logProgress(`${params.progressLabel}: image probe`);
const image = await completeSimpleWithTimeout(
params.model,
buildLiveModelImageProbeContext({ systemPrompt: resolveLiveSystemPrompt(params.model) }),
options,
params.timeoutMs,
`${params.progressLabel}: image probe`,
);
if (image.stopReason === "error") {
throw new Error(image.errorMessage || "image probe returned error with no message");
}
const imageText = extractAssistantText(image);
if (!imageProbeTextMatches(imageText)) {
throw new Error(`image probe did not return ok: ${imageText}`);
}
}
describeLive("live models (profile keys)", () => {
it(
"completes across selected models",
@@ -688,6 +756,12 @@ describeLive("live models (profile keys)", () => {
.map((b) => b.text.trim())
.join(" ");
expect(secondText.length).toBeGreaterThan(0);
await runExtraTurnProbes({
model,
apiKey,
timeoutMs: perModelTimeoutMs,
progressLabel,
});
logProgress(`${progressLabel}: done`);
break;
}
@@ -761,6 +835,12 @@ describeLive("live models (profile keys)", () => {
break;
}
expect(ok.text.length).toBeGreaterThan(0);
await runExtraTurnProbes({
model,
apiKey,
timeoutMs: perModelTimeoutMs,
progressLabel,
});
logProgress(`${progressLabel}: done`);
break;
} catch (err) {

View File

@@ -228,6 +228,21 @@ describe("scripts/test-projects changed-target routing", () => {
]);
});
it("routes changed source files to sibling tests when present", () => {
const plans = buildVitestRunPlans(["--changed", "origin/main"], process.cwd(), () => [
"src/agents/live-model-turn-probes.ts",
]);
expect(plans).toEqual([
{
config: "test/vitest/vitest.unit-fast.config.ts",
forwardedArgs: [],
includePatterns: ["src/agents/live-model-turn-probes.test.ts"],
watchMode: false,
},
]);
});
it("routes changed utils and shared files to their light scoped lanes", () => {
const plans = buildVitestRunPlans(["--changed", "origin/main"], process.cwd(), () => [
"src/shared/string-normalization.ts",