mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:30:43 +00:00
test: harden live model extra probes
This commit is contained in:
@@ -40,10 +40,10 @@ When debugging real providers/models (requires real creds):
|
||||
- Live suite (models + gateway tool/image probes): `pnpm test:live`
|
||||
- Target one live file quietly: `pnpm test:live -- src/agents/models.profiles.live.test.ts`
|
||||
- Docker live model sweep: `pnpm test:docker:live-models`
|
||||
- Each selected model now runs a text turn plus a small file-read-style text
|
||||
block probe. Models whose metadata advertises `image` input also run a tiny
|
||||
image turn. Disable the extra probes with `OPENCLAW_LIVE_MODEL_FILE_PROBE=0`
|
||||
or `OPENCLAW_LIVE_MODEL_IMAGE_PROBE=0` when isolating provider failures.
|
||||
- Each selected model now runs a text turn plus a small file-read-style probe.
|
||||
Models whose metadata advertises `image` input also run a tiny image turn.
|
||||
Disable the extra probes with `OPENCLAW_LIVE_MODEL_FILE_PROBE=0` or
|
||||
`OPENCLAW_LIVE_MODEL_IMAGE_PROBE=0` when isolating provider failures.
|
||||
- CI coverage: daily `OpenClaw Scheduled Live And E2E Checks` and manual
|
||||
`OpenClaw Release Checks` both call the reusable live/E2E workflow with
|
||||
`include_live_suites: true`, which includes separate Docker live model
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
buildLiveModelFileProbeContext,
|
||||
buildLiveModelFileProbeRetryContext,
|
||||
buildLiveModelImageProbeContext,
|
||||
extractAssistantText,
|
||||
fileProbeTextMatches,
|
||||
@@ -8,6 +9,7 @@ import {
|
||||
isLiveModelProbeEnabled,
|
||||
LIVE_MODEL_FILE_PROBE_TOKEN,
|
||||
modelSupportsImageInput,
|
||||
shouldSkipLiveModelExtraProbes,
|
||||
} from "./live-model-turn-probes.js";
|
||||
|
||||
describe("live model turn probes", () => {
|
||||
@@ -27,15 +29,19 @@ describe("live model turn probes", () => {
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("builds a text-block file read probe", () => {
|
||||
it("builds a text file read probe", () => {
|
||||
const context = buildLiveModelFileProbeContext({ systemPrompt: "sys" });
|
||||
expect(context.systemPrompt).toBe("sys");
|
||||
expect(context.messages[0]?.content).toEqual([
|
||||
expect.objectContaining({
|
||||
type: "text",
|
||||
text: expect.stringContaining(`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
|
||||
}),
|
||||
]);
|
||||
expect(context.messages[0]?.content).toEqual(
|
||||
expect.stringContaining(`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`),
|
||||
);
|
||||
});
|
||||
|
||||
it("builds a stricter file read retry probe", () => {
|
||||
const context = buildLiveModelFileProbeRetryContext({});
|
||||
expect(context.messages[0]?.content).toEqual(
|
||||
expect.stringContaining(`Reply with exactly ${LIVE_MODEL_FILE_PROBE_TOKEN}`),
|
||||
);
|
||||
});
|
||||
|
||||
it("builds an image probe with native image content", () => {
|
||||
@@ -63,9 +69,24 @@ describe("live model turn probes", () => {
|
||||
expect(modelSupportsImageInput({ input: ["text"] })).toBe(false);
|
||||
});
|
||||
|
||||
it("skips known stale extra probe routes", () => {
|
||||
expect(
|
||||
shouldSkipLiveModelExtraProbes({
|
||||
provider: "openrouter",
|
||||
id: "amazon/nova-2-lite-v1",
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
shouldSkipLiveModelExtraProbes({
|
||||
provider: "openrouter",
|
||||
id: "amazon/nova-lite-v1",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("matches expected probe replies", () => {
|
||||
expect(fileProbeTextMatches(`The value is ${LIVE_MODEL_FILE_PROBE_TOKEN}.`)).toBe(true);
|
||||
expect(fileProbeTextMatches("OPAL-731")).toBe(false);
|
||||
expect(fileProbeTextMatches("amber")).toBe(false);
|
||||
expect(imageProbeTextMatches("OK")).toBe(true);
|
||||
expect(imageProbeTextMatches("blue")).toBe(false);
|
||||
});
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
import type { Api, AssistantMessage, Context, Model } from "@mariozechner/pi-ai";
|
||||
|
||||
export const LIVE_MODEL_FILE_PROBE_TOKEN = "OPAL_731";
|
||||
export const LIVE_MODEL_FILE_PROBE_TOKEN = "opal";
|
||||
|
||||
export const LIVE_MODEL_FILE_PROBE_ENV = "OPENCLAW_LIVE_MODEL_FILE_PROBE";
|
||||
export const LIVE_MODEL_IMAGE_PROBE_ENV = "OPENCLAW_LIVE_MODEL_IMAGE_PROBE";
|
||||
|
||||
const PROBE_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAALklEQVR4nO3OoQEAAAyDsP7/9HYGJgJNdtuVDQAAAAAAACAHxH8AAAAAAACAHvBX0fhq85dN7QAAAABJRU5ErkJggg==";
|
||||
"iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAALUlEQVR4nO3OIQEAAAwCMPrnod8fAzMxv7S9pQgICAgICAgICAgICAgICKwDD+yWbLXSniMNAAAAAElFTkSuQmCC";
|
||||
|
||||
const KNOWN_EMPTY_EXTRA_PROBE_MODELS = new Set(["openrouter/amazon/nova-2-lite-v1"]);
|
||||
|
||||
export function isLiveModelProbeEnabled(
|
||||
env: Record<string, string | undefined>,
|
||||
@@ -31,22 +33,39 @@ export function modelSupportsImageInput(model: Pick<Model<Api>, "input">): boole
|
||||
return model.input.includes("image");
|
||||
}
|
||||
|
||||
export function shouldSkipLiveModelExtraProbes(
|
||||
model: Pick<Model<Api>, "id" | "provider">,
|
||||
): boolean {
|
||||
return KNOWN_EMPTY_EXTRA_PROBE_MODELS.has(`${model.provider}/${model.id}`);
|
||||
}
|
||||
|
||||
export function buildLiveModelFileProbeContext(params: { systemPrompt?: string }): Context {
|
||||
return {
|
||||
systemPrompt: params.systemPrompt,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text:
|
||||
"Read this file excerpt and reply with only the value after LIVE_FILE_TOKEN.\n\n" +
|
||||
'<file path="live-model-probe.txt" mime="text/plain">\n' +
|
||||
`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}\n` +
|
||||
"</file>",
|
||||
},
|
||||
],
|
||||
content:
|
||||
"Read this file excerpt and reply with only the value after LIVE_FILE_TOKEN.\n\n" +
|
||||
"File: live-model-probe.txt\n" +
|
||||
"MIME: text/plain\n\n" +
|
||||
`LIVE_FILE_TOKEN=${LIVE_MODEL_FILE_PROBE_TOKEN}`,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
export function buildLiveModelFileProbeRetryContext(params: { systemPrompt?: string }): Context {
|
||||
return {
|
||||
systemPrompt: params.systemPrompt,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content:
|
||||
"The file live-model-probe.txt contains exactly this token:\n\n" +
|
||||
`${LIVE_MODEL_FILE_PROBE_TOKEN}\n\n` +
|
||||
`Reply with exactly ${LIVE_MODEL_FILE_PROBE_TOKEN}.`,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
@@ -77,7 +96,7 @@ export function buildLiveModelImageProbeContext(params: { systemPrompt?: string
|
||||
}
|
||||
|
||||
export function fileProbeTextMatches(text: string): boolean {
|
||||
return text.toUpperCase().includes(LIVE_MODEL_FILE_PROBE_TOKEN);
|
||||
return text.toLowerCase().includes(LIVE_MODEL_FILE_PROBE_TOKEN.toLowerCase());
|
||||
}
|
||||
|
||||
export function imageProbeTextMatches(text: string): boolean {
|
||||
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
} from "./live-model-filter.js";
|
||||
import {
|
||||
buildLiveModelFileProbeContext,
|
||||
buildLiveModelFileProbeRetryContext,
|
||||
buildLiveModelImageProbeContext,
|
||||
extractAssistantText,
|
||||
fileProbeTextMatches,
|
||||
@@ -27,6 +28,7 @@ import {
|
||||
LIVE_MODEL_FILE_PROBE_TOKEN,
|
||||
LIVE_MODEL_IMAGE_PROBE_ENV,
|
||||
modelSupportsImageInput,
|
||||
shouldSkipLiveModelExtraProbes,
|
||||
} from "./live-model-turn-probes.js";
|
||||
import { createLiveTargetMatcher } from "./live-target-matcher.js";
|
||||
import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
|
||||
@@ -452,10 +454,14 @@ async function runExtraTurnProbes(params: {
|
||||
timeoutMs: number;
|
||||
progressLabel: string;
|
||||
}) {
|
||||
if (shouldSkipLiveModelExtraProbes(params.model)) {
|
||||
logProgress(`${params.progressLabel}: extra probes skipped (known empty route)`);
|
||||
return;
|
||||
}
|
||||
const options = {
|
||||
apiKey: params.apiKey,
|
||||
reasoning: resolveTestReasoning(params.model),
|
||||
maxTokens: 64,
|
||||
maxTokens: 128,
|
||||
};
|
||||
if (LIVE_FILE_PROBE_ENABLED) {
|
||||
logProgress(`${params.progressLabel}: file-read probe`);
|
||||
@@ -469,7 +475,25 @@ async function runExtraTurnProbes(params: {
|
||||
if (file.stopReason === "error") {
|
||||
throw new Error(file.errorMessage || "file-read probe returned error with no message");
|
||||
}
|
||||
const fileText = extractAssistantText(file);
|
||||
let fileText = extractAssistantText(file);
|
||||
if (!fileProbeTextMatches(fileText)) {
|
||||
logProgress(`${params.progressLabel}: file-read probe retry`);
|
||||
const retry = await completeSimpleWithTimeout(
|
||||
params.model,
|
||||
buildLiveModelFileProbeRetryContext({
|
||||
systemPrompt: resolveLiveSystemPrompt(params.model),
|
||||
}),
|
||||
options,
|
||||
params.timeoutMs,
|
||||
`${params.progressLabel}: file-read probe retry`,
|
||||
);
|
||||
if (retry.stopReason === "error") {
|
||||
throw new Error(
|
||||
retry.errorMessage || "file-read probe retry returned error with no message",
|
||||
);
|
||||
}
|
||||
fileText = extractAssistantText(retry);
|
||||
}
|
||||
if (!fileProbeTextMatches(fileText)) {
|
||||
throw new Error(`file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user