mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:30:43 +00:00
test: add live model file and image probes
This commit is contained in:
@@ -40,6 +40,10 @@ When debugging real providers/models (requires real creds):
|
||||
- Live suite (models + gateway tool/image probes): `pnpm test:live`
|
||||
- Target one live file quietly: `pnpm test:live -- src/agents/models.profiles.live.test.ts`
|
||||
- Docker live model sweep: `pnpm test:docker:live-models`
|
||||
- Each selected model now runs a text turn plus a small file-read-style text
|
||||
block probe. Models whose metadata advertises `image` input also run a tiny
|
||||
image turn. Disable the extra probes with `OPENCLAW_LIVE_MODEL_FILE_PROBE=0`
|
||||
or `OPENCLAW_LIVE_MODEL_IMAGE_PROBE=0` when isolating provider failures.
|
||||
- CI coverage: daily `OpenClaw Scheduled Live And E2E Checks` and manual
|
||||
`OpenClaw Release Checks` both call the reusable live/E2E workflow with
|
||||
`include_live_suites: true`, which includes separate Docker live model
|
||||
|
||||
@@ -235,6 +235,9 @@ const TOOLING_TEST_TARGETS = new Map([
|
||||
["test/scripts/vitest-local-scheduling.test.ts"],
|
||||
],
|
||||
]);
|
||||
const SOURCE_TEST_TARGETS = new Map([
|
||||
["src/agents/live-model-turn-probes.ts", ["src/agents/live-model-turn-probes.test.ts"]],
|
||||
]);
|
||||
const GENERATED_CHANGED_TEST_TARGETS = new Set([
|
||||
"src/canvas-host/a2ui/.bundle.hash",
|
||||
"src/canvas-host/a2ui/a2ui.bundle.js",
|
||||
@@ -511,7 +514,13 @@ export function resolveChangedTestTargetPlan(changedPaths) {
|
||||
if (changedLanes.lanes.all) {
|
||||
return { mode: "broad", targets: [] };
|
||||
}
|
||||
const targets = changedPaths.filter(isRoutableChangedTarget);
|
||||
const targets = changedPaths.flatMap((changedPath) => {
|
||||
const mappedTargets = SOURCE_TEST_TARGETS.get(changedPath);
|
||||
if (mappedTargets) {
|
||||
return mappedTargets;
|
||||
}
|
||||
return isRoutableChangedTarget(changedPath) ? [changedPath] : [];
|
||||
});
|
||||
if (changedLanes.extensionImpactFromCore) {
|
||||
targets.push("extensions");
|
||||
}
|
||||
|
||||
72
src/agents/live-model-turn-probes.test.ts
Normal file
72
src/agents/live-model-turn-probes.test.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
buildLiveModelFileProbeContext,
|
||||
buildLiveModelImageProbeContext,
|
||||
extractAssistantText,
|
||||
fileProbeTextMatches,
|
||||
imageProbeTextMatches,
|
||||
isLiveModelProbeEnabled,
|
||||
LIVE_MODEL_FILE_PROBE_TOKEN,
|
||||
modelSupportsImageInput,
|
||||
} from "./live-model-turn-probes.js";
|
||||
|
||||
describe("live model turn probes", () => {
|
||||
it("defaults probes on and accepts common opt-out values", () => {
|
||||
expect(isLiveModelProbeEnabled({}, "OPENCLAW_LIVE_MODEL_IMAGE_PROBE")).toBe(true);
|
||||
expect(
|
||||
isLiveModelProbeEnabled(
|
||||
{ OPENCLAW_LIVE_MODEL_IMAGE_PROBE: "false" },
|
||||
"OPENCLAW_LIVE_MODEL_IMAGE_PROBE",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
isLiveModelProbeEnabled(
|
||||
{ OPENCLAW_LIVE_MODEL_IMAGE_PROBE: "1" },
|
||||
"OPENCLAW_LIVE_MODEL_IMAGE_PROBE",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("builds a text-block file read probe", () => {
|
||||
const context = buildLiveModelFileProbeContext({ systemPrompt: "sys" });
|
||||
expect(context.systemPrompt).toBe("sys");
|
||||
expect(context.messages[0]?.content).toEqual([
|
||||
expect.objectContaining({
|
||||
type: "text",
|
||||
text: expect.stringContaining(`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
it("builds an image probe with native image content", () => {
|
||||
const context = buildLiveModelImageProbeContext({});
|
||||
expect(context.messages[0]?.content).toEqual([
|
||||
expect.objectContaining({ type: "text" }),
|
||||
expect.objectContaining({ type: "image", mimeType: "image/png" }),
|
||||
]);
|
||||
});
|
||||
|
||||
it("extracts assistant text blocks only", () => {
|
||||
expect(
|
||||
extractAssistantText({
|
||||
content: [
|
||||
{ type: "thinking", thinking: "hidden" },
|
||||
{ type: "text", text: " ok " },
|
||||
{ type: "toolCall", id: "1", name: "noop", arguments: {} },
|
||||
],
|
||||
}),
|
||||
).toBe("ok");
|
||||
});
|
||||
|
||||
it("detects image input support from model metadata", () => {
|
||||
expect(modelSupportsImageInput({ input: ["text", "image"] })).toBe(true);
|
||||
expect(modelSupportsImageInput({ input: ["text"] })).toBe(false);
|
||||
});
|
||||
|
||||
it("matches expected probe replies", () => {
|
||||
expect(fileProbeTextMatches(`The value is ${LIVE_MODEL_FILE_PROBE_TOKEN}.`)).toBe(true);
|
||||
expect(fileProbeTextMatches("OPAL-731")).toBe(false);
|
||||
expect(imageProbeTextMatches("OK")).toBe(true);
|
||||
expect(imageProbeTextMatches("blue")).toBe(false);
|
||||
});
|
||||
});
|
||||
85
src/agents/live-model-turn-probes.ts
Normal file
85
src/agents/live-model-turn-probes.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
import type { Api, AssistantMessage, Context, Model } from "@mariozechner/pi-ai";
|
||||
|
||||
export const LIVE_MODEL_FILE_PROBE_TOKEN = "OPAL_731";
|
||||
|
||||
export const LIVE_MODEL_FILE_PROBE_ENV = "OPENCLAW_LIVE_MODEL_FILE_PROBE";
|
||||
export const LIVE_MODEL_IMAGE_PROBE_ENV = "OPENCLAW_LIVE_MODEL_IMAGE_PROBE";
|
||||
|
||||
const PROBE_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAALklEQVR4nO3OoQEAAAyDsP7/9HYGJgJNdtuVDQAAAAAAACAHxH8AAAAAAACAHvBX0fhq85dN7QAAAABJRU5ErkJggg==";
|
||||
|
||||
export function isLiveModelProbeEnabled(
|
||||
env: Record<string, string | undefined>,
|
||||
key: string,
|
||||
): boolean {
|
||||
const raw = env[key]?.trim().toLowerCase();
|
||||
if (!raw) {
|
||||
return true;
|
||||
}
|
||||
return !["0", "false", "no", "off"].includes(raw);
|
||||
}
|
||||
|
||||
export function extractAssistantText(message: Pick<AssistantMessage, "content">): string {
|
||||
return message.content
|
||||
.filter((block) => block.type === "text")
|
||||
.map((block) => block.text.trim())
|
||||
.filter(Boolean)
|
||||
.join(" ");
|
||||
}
|
||||
|
||||
export function modelSupportsImageInput(model: Pick<Model<Api>, "input">): boolean {
|
||||
return model.input.includes("image");
|
||||
}
|
||||
|
||||
export function buildLiveModelFileProbeContext(params: { systemPrompt?: string }): Context {
|
||||
return {
|
||||
systemPrompt: params.systemPrompt,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text:
|
||||
"Read this file excerpt and reply with only the value after LIVE_FILE_TOKEN.\n\n" +
|
||||
'<file path="live-model-probe.txt" mime="text/plain">\n' +
|
||||
`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}\n` +
|
||||
"</file>",
|
||||
},
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
export function buildLiveModelImageProbeContext(params: { systemPrompt?: string }): Context {
|
||||
return {
|
||||
systemPrompt: params.systemPrompt,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "Reply with exactly the word OK if you received this image.",
|
||||
},
|
||||
{
|
||||
type: "image",
|
||||
data: PROBE_PNG_BASE64,
|
||||
mimeType: "image/png",
|
||||
},
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
export function fileProbeTextMatches(text: string): boolean {
|
||||
return text.toUpperCase().includes(LIVE_MODEL_FILE_PROBE_TOKEN);
|
||||
}
|
||||
|
||||
export function imageProbeTextMatches(text: string): boolean {
|
||||
return /\bok\b/i.test(text);
|
||||
}
|
||||
@@ -16,6 +16,18 @@ import {
|
||||
selectHighSignalLiveItems,
|
||||
shouldExcludeProviderFromDefaultHighSignalLiveSweep,
|
||||
} from "./live-model-filter.js";
|
||||
import {
|
||||
buildLiveModelFileProbeContext,
|
||||
buildLiveModelImageProbeContext,
|
||||
extractAssistantText,
|
||||
fileProbeTextMatches,
|
||||
imageProbeTextMatches,
|
||||
isLiveModelProbeEnabled,
|
||||
LIVE_MODEL_FILE_PROBE_ENV,
|
||||
LIVE_MODEL_FILE_PROBE_TOKEN,
|
||||
LIVE_MODEL_IMAGE_PROBE_ENV,
|
||||
modelSupportsImageInput,
|
||||
} from "./live-model-turn-probes.js";
|
||||
import { createLiveTargetMatcher } from "./live-target-matcher.js";
|
||||
import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
|
||||
import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
|
||||
@@ -37,6 +49,8 @@ const LIVE_SETUP_TIMEOUT_MS = Math.max(
|
||||
toInt(process.env.OPENCLAW_LIVE_SETUP_TIMEOUT_MS, 45_000),
|
||||
);
|
||||
const LIVE_MODELS_JSON_TIMEOUT_MS = resolveLiveModelsJsonTimeoutMs();
|
||||
const LIVE_FILE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_FILE_PROBE_ENV);
|
||||
const LIVE_IMAGE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_IMAGE_PROBE_ENV);
|
||||
|
||||
const describeLive = LIVE ? describe : describe.skip;
|
||||
|
||||
@@ -432,6 +446,60 @@ async function completeOkWithRetry(params: {
|
||||
return await runOnce(256);
|
||||
}
|
||||
|
||||
async function runExtraTurnProbes(params: {
|
||||
model: Model<Api>;
|
||||
apiKey: string;
|
||||
timeoutMs: number;
|
||||
progressLabel: string;
|
||||
}) {
|
||||
const options = {
|
||||
apiKey: params.apiKey,
|
||||
reasoning: resolveTestReasoning(params.model),
|
||||
maxTokens: 64,
|
||||
};
|
||||
if (LIVE_FILE_PROBE_ENABLED) {
|
||||
logProgress(`${params.progressLabel}: file-read probe`);
|
||||
const file = await completeSimpleWithTimeout(
|
||||
params.model,
|
||||
buildLiveModelFileProbeContext({ systemPrompt: resolveLiveSystemPrompt(params.model) }),
|
||||
options,
|
||||
params.timeoutMs,
|
||||
`${params.progressLabel}: file-read probe`,
|
||||
);
|
||||
if (file.stopReason === "error") {
|
||||
throw new Error(file.errorMessage || "file-read probe returned error with no message");
|
||||
}
|
||||
const fileText = extractAssistantText(file);
|
||||
if (!fileProbeTextMatches(fileText)) {
|
||||
throw new Error(`file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!LIVE_IMAGE_PROBE_ENABLED) {
|
||||
return;
|
||||
}
|
||||
if (!modelSupportsImageInput(params.model)) {
|
||||
logProgress(`${params.progressLabel}: image probe skipped (no image input)`);
|
||||
return;
|
||||
}
|
||||
|
||||
logProgress(`${params.progressLabel}: image probe`);
|
||||
const image = await completeSimpleWithTimeout(
|
||||
params.model,
|
||||
buildLiveModelImageProbeContext({ systemPrompt: resolveLiveSystemPrompt(params.model) }),
|
||||
options,
|
||||
params.timeoutMs,
|
||||
`${params.progressLabel}: image probe`,
|
||||
);
|
||||
if (image.stopReason === "error") {
|
||||
throw new Error(image.errorMessage || "image probe returned error with no message");
|
||||
}
|
||||
const imageText = extractAssistantText(image);
|
||||
if (!imageProbeTextMatches(imageText)) {
|
||||
throw new Error(`image probe did not return ok: ${imageText}`);
|
||||
}
|
||||
}
|
||||
|
||||
describeLive("live models (profile keys)", () => {
|
||||
it(
|
||||
"completes across selected models",
|
||||
@@ -688,6 +756,12 @@ describeLive("live models (profile keys)", () => {
|
||||
.map((b) => b.text.trim())
|
||||
.join(" ");
|
||||
expect(secondText.length).toBeGreaterThan(0);
|
||||
await runExtraTurnProbes({
|
||||
model,
|
||||
apiKey,
|
||||
timeoutMs: perModelTimeoutMs,
|
||||
progressLabel,
|
||||
});
|
||||
logProgress(`${progressLabel}: done`);
|
||||
break;
|
||||
}
|
||||
@@ -761,6 +835,12 @@ describeLive("live models (profile keys)", () => {
|
||||
break;
|
||||
}
|
||||
expect(ok.text.length).toBeGreaterThan(0);
|
||||
await runExtraTurnProbes({
|
||||
model,
|
||||
apiKey,
|
||||
timeoutMs: perModelTimeoutMs,
|
||||
progressLabel,
|
||||
});
|
||||
logProgress(`${progressLabel}: done`);
|
||||
break;
|
||||
} catch (err) {
|
||||
|
||||
@@ -228,6 +228,21 @@ describe("scripts/test-projects changed-target routing", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("routes changed source files to sibling tests when present", () => {
|
||||
const plans = buildVitestRunPlans(["--changed", "origin/main"], process.cwd(), () => [
|
||||
"src/agents/live-model-turn-probes.ts",
|
||||
]);
|
||||
|
||||
expect(plans).toEqual([
|
||||
{
|
||||
config: "test/vitest/vitest.unit-fast.config.ts",
|
||||
forwardedArgs: [],
|
||||
includePatterns: ["src/agents/live-model-turn-probes.test.ts"],
|
||||
watchMode: false,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("routes changed utils and shared files to their light scoped lanes", () => {
|
||||
const plans = buildVitestRunPlans(["--changed", "origin/main"], process.cwd(), () => [
|
||||
"src/shared/string-normalization.ts",
|
||||
|
||||
Reference in New Issue
Block a user