diff --git a/docs/help/testing.md b/docs/help/testing.md index b69bb0fd074..55d5bfa59a3 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -40,6 +40,10 @@ When debugging real providers/models (requires real creds): - Live suite (models + gateway tool/image probes): `pnpm test:live` - Target one live file quietly: `pnpm test:live -- src/agents/models.profiles.live.test.ts` - Docker live model sweep: `pnpm test:docker:live-models` + - Each selected model now runs a text turn plus a small file-read-style text + block probe. Models whose metadata advertises `image` input also run a tiny + image turn. Disable the extra probes with `OPENCLAW_LIVE_MODEL_FILE_PROBE=0` + or `OPENCLAW_LIVE_MODEL_IMAGE_PROBE=0` when isolating provider failures. - CI coverage: daily `OpenClaw Scheduled Live And E2E Checks` and manual `OpenClaw Release Checks` both call the reusable live/E2E workflow with `include_live_suites: true`, which includes separate Docker live model diff --git a/scripts/test-projects.test-support.mjs b/scripts/test-projects.test-support.mjs index 6ea75fae636..3449ed27c77 100644 --- a/scripts/test-projects.test-support.mjs +++ b/scripts/test-projects.test-support.mjs @@ -235,6 +235,9 @@ const TOOLING_TEST_TARGETS = new Map([ ["test/scripts/vitest-local-scheduling.test.ts"], ], ]); +const SOURCE_TEST_TARGETS = new Map([ + ["src/agents/live-model-turn-probes.ts", ["src/agents/live-model-turn-probes.test.ts"]], +]); const GENERATED_CHANGED_TEST_TARGETS = new Set([ "src/canvas-host/a2ui/.bundle.hash", "src/canvas-host/a2ui/a2ui.bundle.js", @@ -511,7 +514,13 @@ export function resolveChangedTestTargetPlan(changedPaths) { if (changedLanes.lanes.all) { return { mode: "broad", targets: [] }; } - const targets = changedPaths.filter(isRoutableChangedTarget); + const targets = changedPaths.flatMap((changedPath) => { + const mappedTargets = SOURCE_TEST_TARGETS.get(changedPath); + if (mappedTargets) { + return mappedTargets; + } + return isRoutableChangedTarget(changedPath) ? [changedPath] : []; + }); if (changedLanes.extensionImpactFromCore) { targets.push("extensions"); } diff --git a/src/agents/live-model-turn-probes.test.ts b/src/agents/live-model-turn-probes.test.ts new file mode 100644 index 00000000000..17c910049bb --- /dev/null +++ b/src/agents/live-model-turn-probes.test.ts @@ -0,0 +1,72 @@ +import { describe, expect, it } from "vitest"; +import { + buildLiveModelFileProbeContext, + buildLiveModelImageProbeContext, + extractAssistantText, + fileProbeTextMatches, + imageProbeTextMatches, + isLiveModelProbeEnabled, + LIVE_MODEL_FILE_PROBE_TOKEN, + modelSupportsImageInput, +} from "./live-model-turn-probes.js"; + +describe("live model turn probes", () => { + it("defaults probes on and accepts common opt-out values", () => { + expect(isLiveModelProbeEnabled({}, "OPENCLAW_LIVE_MODEL_IMAGE_PROBE")).toBe(true); + expect( + isLiveModelProbeEnabled( + { OPENCLAW_LIVE_MODEL_IMAGE_PROBE: "false" }, + "OPENCLAW_LIVE_MODEL_IMAGE_PROBE", + ), + ).toBe(false); + expect( + isLiveModelProbeEnabled( + { OPENCLAW_LIVE_MODEL_IMAGE_PROBE: "1" }, + "OPENCLAW_LIVE_MODEL_IMAGE_PROBE", + ), + ).toBe(true); + }); + + it("builds a text-block file read probe", () => { + const context = buildLiveModelFileProbeContext({ systemPrompt: "sys" }); + expect(context.systemPrompt).toBe("sys"); + expect(context.messages[0]?.content).toEqual([ + expect.objectContaining({ + type: "text", + text: expect.stringContaining(`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`), + }), + ]); + }); + + it("builds an image probe with native image content", () => { + const context = buildLiveModelImageProbeContext({}); + expect(context.messages[0]?.content).toEqual([ + expect.objectContaining({ type: "text" }), + expect.objectContaining({ type: "image", mimeType: "image/png" }), + ]); + }); + + it("extracts assistant text blocks only", () => { + expect( + extractAssistantText({ + content: [ + { type: "thinking", thinking: "hidden" }, + { type: "text", text: " ok " }, + { type: "toolCall", id: "1", name: "noop", arguments: {} }, + ], + }), + ).toBe("ok"); + }); + + it("detects image input support from model metadata", () => { + expect(modelSupportsImageInput({ input: ["text", "image"] })).toBe(true); + expect(modelSupportsImageInput({ input: ["text"] })).toBe(false); + }); + + it("matches expected probe replies", () => { + expect(fileProbeTextMatches(`The value is ${LIVE_MODEL_FILE_PROBE_TOKEN}.`)).toBe(true); + expect(fileProbeTextMatches("OPAL-731")).toBe(false); + expect(imageProbeTextMatches("OK")).toBe(true); + expect(imageProbeTextMatches("blue")).toBe(false); + }); +}); diff --git a/src/agents/live-model-turn-probes.ts b/src/agents/live-model-turn-probes.ts new file mode 100644 index 00000000000..92642dd9fb4 --- /dev/null +++ b/src/agents/live-model-turn-probes.ts @@ -0,0 +1,85 @@ +import type { Api, AssistantMessage, Context, Model } from "@mariozechner/pi-ai"; + +export const LIVE_MODEL_FILE_PROBE_TOKEN = "OPAL_731"; + +export const LIVE_MODEL_FILE_PROBE_ENV = "OPENCLAW_LIVE_MODEL_FILE_PROBE"; +export const LIVE_MODEL_IMAGE_PROBE_ENV = "OPENCLAW_LIVE_MODEL_IMAGE_PROBE"; + +const PROBE_PNG_BASE64 = + "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAALklEQVR4nO3OoQEAAAyDsP7/9HYGJgJNdtuVDQAAAAAAACAHxH8AAAAAAACAHvBX0fhq85dN7QAAAABJRU5ErkJggg=="; + +export function isLiveModelProbeEnabled( + env: Record, + key: string, +): boolean { + const raw = env[key]?.trim().toLowerCase(); + if (!raw) { + return true; + } + return !["0", "false", "no", "off"].includes(raw); +} + +export function extractAssistantText(message: Pick): string { + return message.content + .filter((block) => block.type === "text") + .map((block) => block.text.trim()) + .filter(Boolean) + .join(" "); +} + +export function modelSupportsImageInput(model: Pick, "input">): boolean { + return model.input.includes("image"); +} + +export function buildLiveModelFileProbeContext(params: { systemPrompt?: string }): Context { + return { + systemPrompt: params.systemPrompt, + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: + "Read this file excerpt and reply with only the value after LIVE_FILE_TOKEN.\n\n" + + '\n' + + `LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}\n` + + "", + }, + ], + timestamp: Date.now(), + }, + ], + }; +} + +export function buildLiveModelImageProbeContext(params: { systemPrompt?: string }): Context { + return { + systemPrompt: params.systemPrompt, + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: "Reply with exactly the word OK if you received this image.", + }, + { + type: "image", + data: PROBE_PNG_BASE64, + mimeType: "image/png", + }, + ], + timestamp: Date.now(), + }, + ], + }; +} + +export function fileProbeTextMatches(text: string): boolean { + return text.toUpperCase().includes(LIVE_MODEL_FILE_PROBE_TOKEN); +} + +export function imageProbeTextMatches(text: string): boolean { + return /\bok\b/i.test(text); +} diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts index b2cc1bdf0e7..1a2c0111276 100644 --- a/src/agents/models.profiles.live.test.ts +++ b/src/agents/models.profiles.live.test.ts @@ -16,6 +16,18 @@ import { selectHighSignalLiveItems, shouldExcludeProviderFromDefaultHighSignalLiveSweep, } from "./live-model-filter.js"; +import { + buildLiveModelFileProbeContext, + buildLiveModelImageProbeContext, + extractAssistantText, + fileProbeTextMatches, + imageProbeTextMatches, + isLiveModelProbeEnabled, + LIVE_MODEL_FILE_PROBE_ENV, + LIVE_MODEL_FILE_PROBE_TOKEN, + LIVE_MODEL_IMAGE_PROBE_ENV, + modelSupportsImageInput, +} from "./live-model-turn-probes.js"; import { createLiveTargetMatcher } from "./live-target-matcher.js"; import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js"; import { getApiKeyForModel, requireApiKey } from "./model-auth.js"; @@ -37,6 +49,8 @@ const LIVE_SETUP_TIMEOUT_MS = Math.max( toInt(process.env.OPENCLAW_LIVE_SETUP_TIMEOUT_MS, 45_000), ); const LIVE_MODELS_JSON_TIMEOUT_MS = resolveLiveModelsJsonTimeoutMs(); +const LIVE_FILE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_FILE_PROBE_ENV); +const LIVE_IMAGE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_IMAGE_PROBE_ENV); const describeLive = LIVE ? describe : describe.skip; @@ -432,6 +446,60 @@ async function completeOkWithRetry(params: { return await runOnce(256); } +async function runExtraTurnProbes(params: { + model: Model; + apiKey: string; + timeoutMs: number; + progressLabel: string; +}) { + const options = { + apiKey: params.apiKey, + reasoning: resolveTestReasoning(params.model), + maxTokens: 64, + }; + if (LIVE_FILE_PROBE_ENABLED) { + logProgress(`${params.progressLabel}: file-read probe`); + const file = await completeSimpleWithTimeout( + params.model, + buildLiveModelFileProbeContext({ systemPrompt: resolveLiveSystemPrompt(params.model) }), + options, + params.timeoutMs, + `${params.progressLabel}: file-read probe`, + ); + if (file.stopReason === "error") { + throw new Error(file.errorMessage || "file-read probe returned error with no message"); + } + const fileText = extractAssistantText(file); + if (!fileProbeTextMatches(fileText)) { + throw new Error(`file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`); + } + } + + if (!LIVE_IMAGE_PROBE_ENABLED) { + return; + } + if (!modelSupportsImageInput(params.model)) { + logProgress(`${params.progressLabel}: image probe skipped (no image input)`); + return; + } + + logProgress(`${params.progressLabel}: image probe`); + const image = await completeSimpleWithTimeout( + params.model, + buildLiveModelImageProbeContext({ systemPrompt: resolveLiveSystemPrompt(params.model) }), + options, + params.timeoutMs, + `${params.progressLabel}: image probe`, + ); + if (image.stopReason === "error") { + throw new Error(image.errorMessage || "image probe returned error with no message"); + } + const imageText = extractAssistantText(image); + if (!imageProbeTextMatches(imageText)) { + throw new Error(`image probe did not return ok: ${imageText}`); + } +} + describeLive("live models (profile keys)", () => { it( "completes across selected models", @@ -688,6 +756,12 @@ describeLive("live models (profile keys)", () => { .map((b) => b.text.trim()) .join(" "); expect(secondText.length).toBeGreaterThan(0); + await runExtraTurnProbes({ + model, + apiKey, + timeoutMs: perModelTimeoutMs, + progressLabel, + }); logProgress(`${progressLabel}: done`); break; } @@ -761,6 +835,12 @@ describeLive("live models (profile keys)", () => { break; } expect(ok.text.length).toBeGreaterThan(0); + await runExtraTurnProbes({ + model, + apiKey, + timeoutMs: perModelTimeoutMs, + progressLabel, + }); logProgress(`${progressLabel}: done`); break; } catch (err) { diff --git a/test/scripts/test-projects.test.ts b/test/scripts/test-projects.test.ts index 6d23bfd4777..60b904ce486 100644 --- a/test/scripts/test-projects.test.ts +++ b/test/scripts/test-projects.test.ts @@ -228,6 +228,21 @@ describe("scripts/test-projects changed-target routing", () => { ]); }); + it("routes changed source files to sibling tests when present", () => { + const plans = buildVitestRunPlans(["--changed", "origin/main"], process.cwd(), () => [ + "src/agents/live-model-turn-probes.ts", + ]); + + expect(plans).toEqual([ + { + config: "test/vitest/vitest.unit-fast.config.ts", + forwardedArgs: [], + includePatterns: ["src/agents/live-model-turn-probes.test.ts"], + watchMode: false, + }, + ]); + }); + it("routes changed utils and shared files to their light scoped lanes", () => { const plans = buildVitestRunPlans(["--changed", "origin/main"], process.cwd(), () => [ "src/shared/string-normalization.ts",