mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 18:50:42 +00:00
test: filter live qa scenario lanes
This commit is contained in:
@@ -26,6 +26,14 @@ describe("qa model-switch evaluation", () => {
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("accepts concise handed-off phrasing from live models", () => {
|
||||
expect(
|
||||
hasModelSwitchContinuityEvidence(
|
||||
"The harness has handed off to the alternate model for this turn, and the read tool confirms continued access to the QA scenario pack mission.",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("accepts concise paraphrases of the kickoff task after a handoff", () => {
|
||||
expect(
|
||||
hasModelSwitchContinuityEvidence(
|
||||
|
||||
@@ -3,7 +3,11 @@ import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtim
|
||||
export function hasModelSwitchContinuityEvidence(text: string) {
|
||||
const lower = normalizeLowercaseStringOrEmpty(text);
|
||||
const mentionsHandoff =
|
||||
lower.includes("handoff") || lower.includes("model switch") || lower.includes("switched");
|
||||
lower.includes("handoff") ||
|
||||
lower.includes("handed off") ||
|
||||
lower.includes("handed-off") ||
|
||||
lower.includes("model switch") ||
|
||||
lower.includes("switched");
|
||||
const mentionsKickoffTask =
|
||||
lower.includes("qa_kickoff_task") ||
|
||||
lower.includes("qa/scenarios/index.md") ||
|
||||
|
||||
@@ -127,8 +127,8 @@ describe("qa scenario catalog", () => {
|
||||
const scenario = readQaScenarioById("gpt54-thinking-visibility-switch");
|
||||
const config = readQaScenarioExecutionConfig("gpt54-thinking-visibility-switch") as
|
||||
| {
|
||||
requiredLiveProvider?: string;
|
||||
requiredLiveModel?: string;
|
||||
requiredProvider?: string;
|
||||
requiredModel?: string;
|
||||
offDirective?: string;
|
||||
maxDirective?: string;
|
||||
reasoningDirective?: string;
|
||||
@@ -136,8 +136,8 @@ describe("qa scenario catalog", () => {
|
||||
| undefined;
|
||||
|
||||
expect(scenario.sourcePath).toBe("qa/scenarios/models/gpt54-thinking-visibility-switch.md");
|
||||
expect(config?.requiredLiveProvider).toBe("openai");
|
||||
expect(config?.requiredLiveModel).toBe("gpt-5.4");
|
||||
expect(config?.requiredProvider).toBe("openai");
|
||||
expect(config?.requiredModel).toBe("gpt-5.4");
|
||||
expect(config?.offDirective).toBe("/think off");
|
||||
expect(config?.maxDirective).toBe("/think max");
|
||||
expect(config?.reasoningDirective).toBe("/reasoning on");
|
||||
|
||||
@@ -250,4 +250,38 @@ describe("qa suite planning helpers", () => {
|
||||
}).map((scenario) => scenario.id),
|
||||
).toEqual(["generic", "claude-subscription"]);
|
||||
});
|
||||
|
||||
it("filters env-gated scenarios from an implicit live lane", () => {
|
||||
const previous = process.env.OPENCLAW_LIVE_SETUP_TOKEN_VALUE;
|
||||
delete process.env.OPENCLAW_LIVE_SETUP_TOKEN_VALUE;
|
||||
try {
|
||||
const scenarios = [
|
||||
makeQaSuiteTestScenario("generic"),
|
||||
makeQaSuiteTestScenario("anthropic-api-key", {
|
||||
config: { requiredProvider: "anthropic", requiredModel: "claude-opus-4-6" },
|
||||
}),
|
||||
makeQaSuiteTestScenario("anthropic-setup-token", {
|
||||
config: {
|
||||
requiredProvider: "anthropic",
|
||||
requiredModel: "claude-opus-4-6",
|
||||
requiredEnv: "OPENCLAW_LIVE_SETUP_TOKEN_VALUE",
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
expect(
|
||||
selectQaSuiteScenarios({
|
||||
scenarios,
|
||||
providerMode: "live-frontier",
|
||||
primaryModel: "anthropic/claude-opus-4-6",
|
||||
}).map((scenario) => scenario.id),
|
||||
).toEqual(["generic", "anthropic-api-key"]);
|
||||
} finally {
|
||||
if (previous === undefined) {
|
||||
delete process.env.OPENCLAW_LIVE_SETUP_TOKEN_VALUE;
|
||||
} else {
|
||||
process.env.OPENCLAW_LIVE_SETUP_TOKEN_VALUE = previous;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -32,10 +32,12 @@ function scenarioMatchesLiveLane(params: {
|
||||
primaryModel: string;
|
||||
providerMode: QaProviderMode;
|
||||
claudeCliAuthMode?: QaCliBackendAuthMode;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}) {
|
||||
if (getQaProvider(params.providerMode).kind !== "live") {
|
||||
return true;
|
||||
}
|
||||
const env = params.env ?? process.env;
|
||||
const selected = splitModelRef(params.primaryModel);
|
||||
const config = params.scenario.execution.config ?? {};
|
||||
const requiredProvider = normalizeQaConfigString(config.requiredProvider);
|
||||
@@ -50,6 +52,10 @@ function scenarioMatchesLiveLane(params: {
|
||||
if (requiredAuthMode && params.claudeCliAuthMode !== requiredAuthMode) {
|
||||
return false;
|
||||
}
|
||||
const requiredEnv = normalizeQaConfigString(config.requiredEnv);
|
||||
if (requiredEnv && !env[requiredEnv]?.trim()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user