fix(qa): isolate patched suite scenarios

This commit is contained in:
Vincent Koc
2026-05-22 10:53:47 +02:00
parent a80476fbe9
commit beccdde5bf
9 changed files with 235 additions and 22 deletions

View File

@@ -29,6 +29,8 @@ Docs: https://docs.openclaw.ai
- fix(integrations): enforce channel read target allowlists [AI]. (#84982) Thanks @pgondhi987.
- Agents/code-mode: expose outer code-mode `exec` source through the `command` hook alias with `toolKind`/`toolInputKind` discriminators so exec-shaped policies can distinguish code-mode cells. (#83483) Thanks @Kaspre.
- QA-Lab: isolate multi-scenario suite workers when scenarios need startup config patches, preventing message-routing config from leaking into unrelated scenarios.
- QA-Lab: make the commitments heartbeat-target-none scenario request an immediate heartbeat instead of waiting for the next scheduled heartbeat.
- Gateway CLI: surface local post-challenge connect assembly failures immediately instead of waiting for the wrapper timeout. Fixes #68944. (#85253) Thanks @samzong.
- Agents/exec: treat denied exec approvals as terminal instead of feeding them back into agent follow-up work, and recognize Chinese stop phrases in abort handling. Fixes #69386. (#85194) Thanks @samzong.
- CLI/agents: abort accepted Gateway-backed `openclaw agent` runs on SIGINT/SIGTERM so cron and supervisor timeouts do not leave remote agent work alive. Fixes #71710. (#84381) Thanks @Kaspre.

View File

@@ -547,6 +547,7 @@ export async function startQaLabServer(
const { runQaSuite } = await import("./suite.js");
const result = await runQaSuite({
lab: labHandle ?? undefined,
startLab: startQaLabServer,
outputDir: createQaRunOutputDir(repoRoot),
providerMode: selection.providerMode,
primaryModel: selection.primaryModel,

View File

@@ -102,7 +102,7 @@ function createQaChannelReportNotes(params: QaTransportReportParams) {
provider.kind === "mock"
? `Runs against qa-channel + qa-lab bus + real gateway child + ${params.providerMode} provider.`
: `Runs against qa-channel + qa-lab bus + real gateway child + live frontier models (${params.primaryModel}, ${params.alternateModel})${params.fastMode ? " with fast mode enabled" : ""}.`,
params.concurrency > 1
params.isolatedWorkers === true
? `Scenarios run in isolated gateway workers with concurrency ${params.concurrency}.`
: "Scenarios run serially in one gateway worker.",
"Cron uses a one-minute schedule assertion plus forced execution for fast verification.",

View File

@@ -30,6 +30,7 @@ export type QaTransportReportParams = {
alternateModel: string;
fastMode: boolean;
concurrency: number;
isolatedWorkers?: boolean;
};
export type QaTransportGatewayConfig = Pick<OpenClawConfig, "channels" | "messages">;

View File

@@ -13,6 +13,7 @@ import {
resolveQaSuiteOutputDir,
scenarioRequiresControlUi,
selectQaSuiteScenarios,
shouldUseIsolatedQaSuiteScenarioWorkers,
} from "./suite-planning.js";
import { makeQaSuiteTestScenario } from "./suite-test-helpers.js";
@@ -302,6 +303,46 @@ describe("qa suite planning helpers", () => {
});
});
it("isolates multi-scenario serial runs when a scenario needs startup config", () => {
const scenarios = [
makeQaSuiteTestScenario("baseline"),
makeQaSuiteTestScenario("message-tool-mode", {
gatewayConfigPatch: {
messages: {
groupChat: {
visibleReplies: "message_tool",
},
},
},
}),
];
expect(
shouldUseIsolatedQaSuiteScenarioWorkers({
scenarios,
concurrency: 1,
}),
).toBe(true);
});
it("does not isolate plain serial scenario runs", () => {
expect(
shouldUseIsolatedQaSuiteScenarioWorkers({
scenarios: [makeQaSuiteTestScenario("first"), makeQaSuiteTestScenario("second")],
concurrency: 1,
}),
).toBe(false);
});
it("keeps concurrent runs on isolated workers", () => {
expect(
shouldUseIsolatedQaSuiteScenarioWorkers({
scenarios: [makeQaSuiteTestScenario("first"), makeQaSuiteTestScenario("second")],
concurrency: 2,
}),
).toBe(true);
});
it("enables Control UI only for Control UI scenario workers", () => {
expect(
scenarioRequiresControlUi(

View File

@@ -152,6 +152,17 @@ function collectQaSuiteGatewayRuntimeOptions(
return forwardHostHome ? { forwardHostHome: true } : undefined;
}
function shouldUseIsolatedQaSuiteScenarioWorkers(params: {
scenarios: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"];
concurrency: number;
}) {
return (
params.scenarios.length > 1 &&
(params.concurrency > 1 ||
params.scenarios.some((scenario) => isQaPlainObject(scenario.gatewayConfigPatch)))
);
}
function scenarioRequiresControlUi(scenario: QaSeedScenario) {
return normalizeLowercaseStringOrEmpty(scenario.surface) === "control-ui";
}
@@ -268,5 +279,6 @@ export {
resolveQaSuiteOutputDir,
scenarioRequiresControlUi,
selectQaSuiteScenarios,
shouldUseIsolatedQaSuiteScenarioWorkers,
splitModelRef,
};

View File

@@ -1,4 +1,6 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import type { QaLabServerHandle } from "./lab-server.types.js";
import { makeQaSuiteTestScenario } from "./suite-test-helpers.js";
import { qaSuiteProgressTesting, runQaSuite } from "./suite.js";
const fetchWithSsrFGuardMock = vi.hoisted(() => vi.fn());
@@ -12,6 +14,19 @@ afterEach(() => {
vi.useRealTimers();
});
function makeQaSuiteTestLabHandle(): QaLabServerHandle {
return {
baseUrl: "http://127.0.0.1:43123",
listenUrl: "http://127.0.0.1:43123",
state: {} as QaLabServerHandle["state"],
setControlUi: vi.fn(),
setScenarioRun: vi.fn(),
setLatestReport: vi.fn(),
runSelfCheck: vi.fn(async () => ({}) as Awaited<ReturnType<QaLabServerHandle["runSelfCheck"]>>),
stop: vi.fn(async () => {}),
};
}
describe("qa suite", () => {
it("rejects unsupported transport ids before starting the lab", async () => {
const startLab = vi.fn();
@@ -254,6 +269,84 @@ describe("qa suite", () => {
});
});
it("forwards run options into isolated scenario worker params", () => {
const startLab = vi.fn();
const scenario = makeQaSuiteTestScenario("patched-control-ui", {
surface: "control-ui",
gatewayConfigPatch: {
messages: {
groupChat: {
visibleReplies: "message_tool",
},
},
},
});
expect(
qaSuiteProgressTesting.buildQaIsolatedScenarioWorkerParams({
repoRoot: "/repo",
outputDir: "/repo/.artifacts/qa-e2e/scenarios/patched-control-ui",
providerMode: "mock-openai",
transportId: "qa-channel",
primaryModel: "mock-openai/gpt-5.5",
alternateModel: "mock-openai/gpt-5.5-alt",
fastMode: true,
scenario,
startLab,
input: {
thinkingDefault: "minimal",
claudeCliAuthMode: "subscription",
enabledPluginIds: ["acpx"],
transportReadyTimeoutMs: 180_000,
forcedRuntime: "codex",
},
}),
).toMatchObject({
scenarioIds: ["patched-control-ui"],
concurrency: 1,
startLab,
controlUiEnabled: true,
thinkingDefault: "minimal",
claudeCliAuthMode: "subscription",
enabledPluginIds: ["acpx"],
transportReadyTimeoutMs: 180_000,
forcedRuntime: "codex",
});
});
it("keeps caller-owned serial labs on shared workers without a launcher", () => {
const scenarios = [
makeQaSuiteTestScenario("baseline"),
makeQaSuiteTestScenario("message-tool-mode", {
gatewayConfigPatch: {
messages: {
groupChat: {
visibleReplies: "message_tool",
},
},
},
}),
];
const lab = makeQaSuiteTestLabHandle();
const startLab = vi.fn();
expect(
qaSuiteProgressTesting.shouldRunQaSuiteWithIsolatedScenarioWorkers({
scenarios,
concurrency: 1,
lab,
}),
).toBe(false);
expect(
qaSuiteProgressTesting.shouldRunQaSuiteWithIsolatedScenarioWorkers({
scenarios,
concurrency: 1,
lab,
startLab,
}),
).toBe(true);
});
it("remaps mock-openai model refs onto the app-server OpenAI provider for codex cells only", () => {
expect(
qaSuiteProgressTesting.remapModelRefForForcedRuntime({

View File

@@ -51,6 +51,7 @@ import {
resolveQaSuiteOutputDir,
scenarioRequiresControlUi,
selectQaSuiteScenarios,
shouldUseIsolatedQaSuiteScenarioWorkers,
splitModelRef,
} from "./suite-planning.js";
import { createQaSuiteScenarioFlowApi } from "./suite-runtime-flow.js";
@@ -214,6 +215,28 @@ function requireQaSuiteStartLab(startLab: QaSuiteStartLabFn | undefined): QaSuit
);
}
function shouldRunQaSuiteWithIsolatedScenarioWorkers(params: {
scenarios: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"];
concurrency: number;
lab?: QaLabServerHandle;
startLab?: QaSuiteStartLabFn;
}) {
if (
!shouldUseIsolatedQaSuiteScenarioWorkers({
scenarios: params.scenarios,
concurrency: params.concurrency,
})
) {
return false;
}
if (params.concurrency === 1 && params.lab && !params.startLab) {
return false;
}
return true;
}
const QA_IMAGE_UNDERSTANDING_PNG_BASE64 =
"iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAAAAklEQVR4AewaftIAAAK4SURBVO3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+7ciPkoAAAAASUVORK5CYII=";
@@ -378,10 +401,43 @@ function createQaSuiteReportNotes(params: {
alternateModel: string;
fastMode: boolean;
concurrency: number;
isolatedWorkers?: boolean;
}) {
return params.transport.createReportNotes(params);
}
function buildQaIsolatedScenarioWorkerParams(params: {
repoRoot: string;
outputDir: string;
providerMode: QaProviderMode;
transportId: QaTransportId;
primaryModel: string;
alternateModel: string;
fastMode: boolean;
scenario: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"][number];
input?: QaSuiteRunParams;
startLab: QaSuiteStartLabFn;
}): QaSuiteRunParams {
return {
repoRoot: params.repoRoot,
outputDir: params.outputDir,
providerMode: params.providerMode,
transportId: params.transportId,
primaryModel: params.primaryModel,
alternateModel: params.alternateModel,
fastMode: params.fastMode,
thinkingDefault: params.input?.thinkingDefault,
claudeCliAuthMode: params.input?.claudeCliAuthMode,
scenarioIds: [params.scenario.id],
enabledPluginIds: params.input?.enabledPluginIds,
concurrency: 1,
startLab: params.startLab,
controlUiEnabled: scenarioRequiresControlUi(params.scenario),
transportReadyTimeoutMs: params.input?.transportReadyTimeoutMs,
forcedRuntime: params.input?.forcedRuntime,
};
}
function normalizeQaSuiteModelRef(input: string | undefined, fallback: string) {
const model = input?.trim();
return model && model.length > 0 ? model : fallback;
@@ -770,6 +826,7 @@ async function writeQaSuiteArtifacts(params: {
alternateModel: string;
fastMode: boolean;
concurrency: number;
isolatedWorkers?: boolean;
scenarioIds?: readonly string[];
runtimePair?: [RuntimeId, RuntimeId];
}) {
@@ -974,6 +1031,12 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
progressEnabled,
`run start: scenarios=${selectedCatalogScenarios.length} concurrency=${concurrency} transport=${transportId}`,
);
const useIsolatedScenarioWorkers = shouldRunQaSuiteWithIsolatedScenarioWorkers({
scenarios: selectedCatalogScenarios,
concurrency,
lab: params?.lab,
startLab: params?.startLab,
});
if (params?.runtimePair) {
return await runQaRuntimeParitySuite({
@@ -998,7 +1061,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
});
}
if (concurrency > 1 && selectedCatalogScenarios.length > 1) {
if (useIsolatedScenarioWorkers) {
const ownsLab = !params?.lab;
const startLab = requireQaSuiteStartLab(params?.startLab);
const lab =
@@ -1052,6 +1115,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
alternateModel,
fastMode,
concurrency,
isolatedWorkers: true,
scenarioIds:
params?.scenarioIds && params.scenarioIds.length > 0
? selectedCatalogScenarios.map((scenario) => scenario.id)
@@ -1093,25 +1157,20 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
updateScenarioRun();
try {
const scenarioOutputDir = path.join(outputDir, "scenarios", scenario.id);
const result: QaSuiteResult = await runQaSuite({
repoRoot,
outputDir: scenarioOutputDir,
providerMode,
transportId,
primaryModel,
alternateModel,
fastMode,
thinkingDefault: params?.thinkingDefault,
claudeCliAuthMode: params?.claudeCliAuthMode,
scenarioIds: [scenario.id],
enabledPluginIds: params?.enabledPluginIds,
concurrency: 1,
startLab,
// Most isolated workers do not need their own Control UI proxy.
// Control UI scenarios do, because they open the worker's
// gateway-backed app directly.
controlUiEnabled: scenarioRequiresControlUi(scenario),
});
const result: QaSuiteResult = await runQaSuite(
buildQaIsolatedScenarioWorkerParams({
repoRoot,
outputDir: scenarioOutputDir,
providerMode,
transportId,
primaryModel,
alternateModel,
fastMode,
startLab,
scenario,
input: params,
}),
);
const scenarioResult: QaSuiteScenarioResult =
result.scenarios[0] ??
({
@@ -1199,6 +1258,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
alternateModel,
fastMode,
concurrency,
isolatedWorkers: true,
// When the caller supplied an explicit non-empty --scenario filter,
// record the executed (post-selectQaSuiteScenarios-normalized) ids
// so the summary matches what actually ran. When the caller passed
@@ -1459,6 +1519,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
alternateModel,
fastMode,
concurrency,
isolatedWorkers: false,
// Same "filtered → executed list, unfiltered → null" convention as
// the concurrent-path writeQaSuiteArtifacts call above.
scenarioIds:
@@ -1512,6 +1573,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
export const qaSuiteProgressTesting = {
appendNodeOption,
buildQaGatewayHeapCheckpointRuntimeEnvPatch,
buildQaIsolatedScenarioWorkerParams,
buildQaSuiteRuntimeMetrics,
buildQaRuntimeEnvPatch,
mergeQaRuntimeEnvPatches,
@@ -1519,6 +1581,7 @@ export const qaSuiteProgressTesting = {
remapModelRefForForcedRuntime,
resolveQaSuiteTransportReadyTimeoutMs,
sanitizeQaSuiteProgressValue,
shouldRunQaSuiteWithIsolatedScenarioWorkers,
shouldLogQaSuiteProgress,
waitForQaLabReadyOrStopOwned,
};

View File

@@ -94,7 +94,7 @@ steps:
- call: env.gateway.call
args:
- wake
- mode: next-heartbeat
- mode: now
text: Commitments target none QA wake
- timeoutMs: 30000
- call: waitForCondition