test: harden live model extra probes

This commit is contained in:
Peter Steinberger
2026-04-23 15:27:22 +01:00
parent 6532ee0c39
commit 1e1aaa51e1
4 changed files with 91 additions and 27 deletions

View File

@@ -40,10 +40,10 @@ When debugging real providers/models (requires real creds):
- Live suite (models + gateway tool/image probes): `pnpm test:live`
- Target one live file quietly: `pnpm test:live -- src/agents/models.profiles.live.test.ts`
- Docker live model sweep: `pnpm test:docker:live-models`
- Each selected model now runs a text turn plus a small file-read-style text
block probe. Models whose metadata advertises `image` input also run a tiny
image turn. Disable the extra probes with `OPENCLAW_LIVE_MODEL_FILE_PROBE=0`
or `OPENCLAW_LIVE_MODEL_IMAGE_PROBE=0` when isolating provider failures.
- Each selected model now runs a text turn plus a small file-read-style probe.
Models whose metadata advertises `image` input also run a tiny image turn.
Disable the extra probes with `OPENCLAW_LIVE_MODEL_FILE_PROBE=0` or
`OPENCLAW_LIVE_MODEL_IMAGE_PROBE=0` when isolating provider failures.
- CI coverage: daily `OpenClaw Scheduled Live And E2E Checks` and manual
`OpenClaw Release Checks` both call the reusable live/E2E workflow with
`include_live_suites: true`, which includes separate Docker live model

View File

@@ -1,6 +1,7 @@
import { describe, expect, it } from "vitest";
import {
buildLiveModelFileProbeContext,
buildLiveModelFileProbeRetryContext,
buildLiveModelImageProbeContext,
extractAssistantText,
fileProbeTextMatches,
@@ -8,6 +9,7 @@ import {
isLiveModelProbeEnabled,
LIVE_MODEL_FILE_PROBE_TOKEN,
modelSupportsImageInput,
shouldSkipLiveModelExtraProbes,
} from "./live-model-turn-probes.js";
describe("live model turn probes", () => {
@@ -27,15 +29,19 @@ describe("live model turn probes", () => {
).toBe(true);
});
it("builds a text-block file read probe", () => {
it("builds a text file read probe", () => {
const context = buildLiveModelFileProbeContext({ systemPrompt: "sys" });
expect(context.systemPrompt).toBe("sys");
expect(context.messages[0]?.content).toEqual([
expect.objectContaining({
type: "text",
text: expect.stringContaining(`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
}),
]);
expect(context.messages[0]?.content).toEqual(
expect.stringContaining(`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
);
});
it("builds a stricter file read retry probe", () => {
const context = buildLiveModelFileProbeRetryContext({});
expect(context.messages[0]?.content).toEqual(
expect.stringContaining(`Reply with exactly ${LIVE_MODEL_FILE_PROBE_TOKEN}`),
);
});
it("builds an image probe with native image content", () => {
@@ -63,9 +69,24 @@ describe("live model turn probes", () => {
expect(modelSupportsImageInput({ input: ["text"] })).toBe(false);
});
it("skips known stale extra probe routes", () => {
expect(
shouldSkipLiveModelExtraProbes({
provider: "openrouter",
id: "amazon/nova-2-lite-v1",
}),
).toBe(true);
expect(
shouldSkipLiveModelExtraProbes({
provider: "openrouter",
id: "amazon/nova-lite-v1",
}),
).toBe(false);
});
it("matches expected probe replies", () => {
expect(fileProbeTextMatches(`The value is ${LIVE_MODEL_FILE_PROBE_TOKEN}.`)).toBe(true);
expect(fileProbeTextMatches("OPAL-731")).toBe(false);
expect(fileProbeTextMatches("amber")).toBe(false);
expect(imageProbeTextMatches("OK")).toBe(true);
expect(imageProbeTextMatches("blue")).toBe(false);
});

View File

@@ -1,12 +1,14 @@
import type { Api, AssistantMessage, Context, Model } from "@mariozechner/pi-ai";
export const LIVE_MODEL_FILE_PROBE_TOKEN = "OPAL_731";
export const LIVE_MODEL_FILE_PROBE_TOKEN = "opal";
export const LIVE_MODEL_FILE_PROBE_ENV = "OPENCLAW_LIVE_MODEL_FILE_PROBE";
export const LIVE_MODEL_IMAGE_PROBE_ENV = "OPENCLAW_LIVE_MODEL_IMAGE_PROBE";
const PROBE_PNG_BASE64 =
"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAALklEQVR4nO3OoQEAAAyDsP7/9HYGJgJNdtuVDQAAAAAAACAHxH8AAAAAAACAHvBX0fhq85dN7QAAAABJRU5ErkJggg==";
"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAALUlEQVR4nO3OIQEAAAwCMPrnod8fAzMxv7S9pQgICAgICAgICAgICAgICKwDD+yWbLXSniMNAAAAAElFTkSuQmCC";
const KNOWN_EMPTY_EXTRA_PROBE_MODELS = new Set(["openrouter/amazon/nova-2-lite-v1"]);
export function isLiveModelProbeEnabled(
env: Record<string, string | undefined>,
@@ -31,22 +33,39 @@ export function modelSupportsImageInput(model: Pick<Model<Api>, "input">): boole
return model.input.includes("image");
}
export function shouldSkipLiveModelExtraProbes(
model: Pick<Model<Api>, "id" | "provider">,
): boolean {
return KNOWN_EMPTY_EXTRA_PROBE_MODELS.has(`${model.provider}/${model.id}`);
}
export function buildLiveModelFileProbeContext(params: { systemPrompt?: string }): Context {
return {
systemPrompt: params.systemPrompt,
messages: [
{
role: "user",
content: [
{
type: "text",
text:
"Read this file excerpt and reply with only the value after LIVE_FILE_TOKEN.\n\n" +
'<file path="live-model-probe.txt" mime="text/plain">\n' +
`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}\n` +
"</file>",
},
],
content:
"Read this file excerpt and reply with only the value after LIVE_FILE_TOKEN.\n\n" +
"File: live-model-probe.txt\n" +
"MIME: text/plain\n\n" +
`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`,
timestamp: Date.now(),
},
],
};
}
export function buildLiveModelFileProbeRetryContext(params: { systemPrompt?: string }): Context {
return {
systemPrompt: params.systemPrompt,
messages: [
{
role: "user",
content:
"The file live-model-probe.txt contains exactly this token:\n\n" +
`${LIVE_MODEL_FILE_PROBE_TOKEN}\n\n` +
`Reply with exactly ${LIVE_MODEL_FILE_PROBE_TOKEN}.`,
timestamp: Date.now(),
},
],
@@ -77,7 +96,7 @@ export function buildLiveModelImageProbeContext(params: { systemPrompt?: string
}
export function fileProbeTextMatches(text: string): boolean {
return text.toUpperCase().includes(LIVE_MODEL_FILE_PROBE_TOKEN);
return text.toLowerCase().includes(LIVE_MODEL_FILE_PROBE_TOKEN.toLowerCase());
}
export function imageProbeTextMatches(text: string): boolean {

View File

@@ -18,6 +18,7 @@ import {
} from "./live-model-filter.js";
import {
buildLiveModelFileProbeContext,
buildLiveModelFileProbeRetryContext,
buildLiveModelImageProbeContext,
extractAssistantText,
fileProbeTextMatches,
@@ -27,6 +28,7 @@ import {
LIVE_MODEL_FILE_PROBE_TOKEN,
LIVE_MODEL_IMAGE_PROBE_ENV,
modelSupportsImageInput,
shouldSkipLiveModelExtraProbes,
} from "./live-model-turn-probes.js";
import { createLiveTargetMatcher } from "./live-target-matcher.js";
import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
@@ -452,10 +454,14 @@ async function runExtraTurnProbes(params: {
timeoutMs: number;
progressLabel: string;
}) {
if (shouldSkipLiveModelExtraProbes(params.model)) {
logProgress(`${params.progressLabel}: extra probes skipped (known empty route)`);
return;
}
const options = {
apiKey: params.apiKey,
reasoning: resolveTestReasoning(params.model),
maxTokens: 64,
maxTokens: 128,
};
if (LIVE_FILE_PROBE_ENABLED) {
logProgress(`${params.progressLabel}: file-read probe`);
@@ -469,7 +475,25 @@ async function runExtraTurnProbes(params: {
if (file.stopReason === "error") {
throw new Error(file.errorMessage || "file-read probe returned error with no message");
}
const fileText = extractAssistantText(file);
let fileText = extractAssistantText(file);
if (!fileProbeTextMatches(fileText)) {
logProgress(`${params.progressLabel}: file-read probe retry`);
const retry = await completeSimpleWithTimeout(
params.model,
buildLiveModelFileProbeRetryContext({
systemPrompt: resolveLiveSystemPrompt(params.model),
}),
options,
params.timeoutMs,
`${params.progressLabel}: file-read probe retry`,
);
if (retry.stopReason === "error") {
throw new Error(
retry.errorMessage || "file-read probe retry returned error with no message",
);
}
fileText = extractAssistantText(retry);
}
if (!fileProbeTextMatches(fileText)) {
throw new Error(`file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`);
}