mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-28 01:13:03 +00:00
fix(qa): isolate patched suite scenarios
This commit is contained in:
@@ -29,6 +29,8 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- fix(integrations): enforce channel read target allowlists [AI]. (#84982) Thanks @pgondhi987.
|
||||
- Agents/code-mode: expose outer code-mode `exec` source through the `command` hook alias with `toolKind`/`toolInputKind` discriminators so exec-shaped policies can distinguish code-mode cells. (#83483) Thanks @Kaspre.
|
||||
- QA-Lab: isolate multi-scenario suite workers when scenarios need startup config patches, preventing message-routing config from leaking into unrelated scenarios.
|
||||
- QA-Lab: make the commitments heartbeat-target-none scenario request an immediate heartbeat instead of waiting for the next scheduled heartbeat.
|
||||
- Gateway CLI: surface local post-challenge connect assembly failures immediately instead of waiting for the wrapper timeout. Fixes #68944. (#85253) Thanks @samzong.
|
||||
- Agents/exec: treat denied exec approvals as terminal instead of feeding them back into agent follow-up work, and recognize Chinese stop phrases in abort handling. Fixes #69386. (#85194) Thanks @samzong.
|
||||
- CLI/agents: abort accepted Gateway-backed `openclaw agent` runs on SIGINT/SIGTERM so cron and supervisor timeouts do not leave remote agent work alive. Fixes #71710. (#84381) Thanks @Kaspre.
|
||||
|
||||
@@ -547,6 +547,7 @@ export async function startQaLabServer(
|
||||
const { runQaSuite } = await import("./suite.js");
|
||||
const result = await runQaSuite({
|
||||
lab: labHandle ?? undefined,
|
||||
startLab: startQaLabServer,
|
||||
outputDir: createQaRunOutputDir(repoRoot),
|
||||
providerMode: selection.providerMode,
|
||||
primaryModel: selection.primaryModel,
|
||||
|
||||
@@ -102,7 +102,7 @@ function createQaChannelReportNotes(params: QaTransportReportParams) {
|
||||
provider.kind === "mock"
|
||||
? `Runs against qa-channel + qa-lab bus + real gateway child + ${params.providerMode} provider.`
|
||||
: `Runs against qa-channel + qa-lab bus + real gateway child + live frontier models (${params.primaryModel}, ${params.alternateModel})${params.fastMode ? " with fast mode enabled" : ""}.`,
|
||||
params.concurrency > 1
|
||||
params.isolatedWorkers === true
|
||||
? `Scenarios run in isolated gateway workers with concurrency ${params.concurrency}.`
|
||||
: "Scenarios run serially in one gateway worker.",
|
||||
"Cron uses a one-minute schedule assertion plus forced execution for fast verification.",
|
||||
|
||||
@@ -30,6 +30,7 @@ export type QaTransportReportParams = {
|
||||
alternateModel: string;
|
||||
fastMode: boolean;
|
||||
concurrency: number;
|
||||
isolatedWorkers?: boolean;
|
||||
};
|
||||
|
||||
export type QaTransportGatewayConfig = Pick<OpenClawConfig, "channels" | "messages">;
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
resolveQaSuiteOutputDir,
|
||||
scenarioRequiresControlUi,
|
||||
selectQaSuiteScenarios,
|
||||
shouldUseIsolatedQaSuiteScenarioWorkers,
|
||||
} from "./suite-planning.js";
|
||||
import { makeQaSuiteTestScenario } from "./suite-test-helpers.js";
|
||||
|
||||
@@ -302,6 +303,46 @@ describe("qa suite planning helpers", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("isolates multi-scenario serial runs when a scenario needs startup config", () => {
|
||||
const scenarios = [
|
||||
makeQaSuiteTestScenario("baseline"),
|
||||
makeQaSuiteTestScenario("message-tool-mode", {
|
||||
gatewayConfigPatch: {
|
||||
messages: {
|
||||
groupChat: {
|
||||
visibleReplies: "message_tool",
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
];
|
||||
|
||||
expect(
|
||||
shouldUseIsolatedQaSuiteScenarioWorkers({
|
||||
scenarios,
|
||||
concurrency: 1,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("does not isolate plain serial scenario runs", () => {
|
||||
expect(
|
||||
shouldUseIsolatedQaSuiteScenarioWorkers({
|
||||
scenarios: [makeQaSuiteTestScenario("first"), makeQaSuiteTestScenario("second")],
|
||||
concurrency: 1,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("keeps concurrent runs on isolated workers", () => {
|
||||
expect(
|
||||
shouldUseIsolatedQaSuiteScenarioWorkers({
|
||||
scenarios: [makeQaSuiteTestScenario("first"), makeQaSuiteTestScenario("second")],
|
||||
concurrency: 2,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("enables Control UI only for Control UI scenario workers", () => {
|
||||
expect(
|
||||
scenarioRequiresControlUi(
|
||||
|
||||
@@ -152,6 +152,17 @@ function collectQaSuiteGatewayRuntimeOptions(
|
||||
return forwardHostHome ? { forwardHostHome: true } : undefined;
|
||||
}
|
||||
|
||||
function shouldUseIsolatedQaSuiteScenarioWorkers(params: {
|
||||
scenarios: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"];
|
||||
concurrency: number;
|
||||
}) {
|
||||
return (
|
||||
params.scenarios.length > 1 &&
|
||||
(params.concurrency > 1 ||
|
||||
params.scenarios.some((scenario) => isQaPlainObject(scenario.gatewayConfigPatch)))
|
||||
);
|
||||
}
|
||||
|
||||
function scenarioRequiresControlUi(scenario: QaSeedScenario) {
|
||||
return normalizeLowercaseStringOrEmpty(scenario.surface) === "control-ui";
|
||||
}
|
||||
@@ -268,5 +279,6 @@ export {
|
||||
resolveQaSuiteOutputDir,
|
||||
scenarioRequiresControlUi,
|
||||
selectQaSuiteScenarios,
|
||||
shouldUseIsolatedQaSuiteScenarioWorkers,
|
||||
splitModelRef,
|
||||
};
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import type { QaLabServerHandle } from "./lab-server.types.js";
|
||||
import { makeQaSuiteTestScenario } from "./suite-test-helpers.js";
|
||||
import { qaSuiteProgressTesting, runQaSuite } from "./suite.js";
|
||||
|
||||
const fetchWithSsrFGuardMock = vi.hoisted(() => vi.fn());
|
||||
@@ -12,6 +14,19 @@ afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
function makeQaSuiteTestLabHandle(): QaLabServerHandle {
|
||||
return {
|
||||
baseUrl: "http://127.0.0.1:43123",
|
||||
listenUrl: "http://127.0.0.1:43123",
|
||||
state: {} as QaLabServerHandle["state"],
|
||||
setControlUi: vi.fn(),
|
||||
setScenarioRun: vi.fn(),
|
||||
setLatestReport: vi.fn(),
|
||||
runSelfCheck: vi.fn(async () => ({}) as Awaited<ReturnType<QaLabServerHandle["runSelfCheck"]>>),
|
||||
stop: vi.fn(async () => {}),
|
||||
};
|
||||
}
|
||||
|
||||
describe("qa suite", () => {
|
||||
it("rejects unsupported transport ids before starting the lab", async () => {
|
||||
const startLab = vi.fn();
|
||||
@@ -254,6 +269,84 @@ describe("qa suite", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("forwards run options into isolated scenario worker params", () => {
|
||||
const startLab = vi.fn();
|
||||
const scenario = makeQaSuiteTestScenario("patched-control-ui", {
|
||||
surface: "control-ui",
|
||||
gatewayConfigPatch: {
|
||||
messages: {
|
||||
groupChat: {
|
||||
visibleReplies: "message_tool",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(
|
||||
qaSuiteProgressTesting.buildQaIsolatedScenarioWorkerParams({
|
||||
repoRoot: "/repo",
|
||||
outputDir: "/repo/.artifacts/qa-e2e/scenarios/patched-control-ui",
|
||||
providerMode: "mock-openai",
|
||||
transportId: "qa-channel",
|
||||
primaryModel: "mock-openai/gpt-5.5",
|
||||
alternateModel: "mock-openai/gpt-5.5-alt",
|
||||
fastMode: true,
|
||||
scenario,
|
||||
startLab,
|
||||
input: {
|
||||
thinkingDefault: "minimal",
|
||||
claudeCliAuthMode: "subscription",
|
||||
enabledPluginIds: ["acpx"],
|
||||
transportReadyTimeoutMs: 180_000,
|
||||
forcedRuntime: "codex",
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
scenarioIds: ["patched-control-ui"],
|
||||
concurrency: 1,
|
||||
startLab,
|
||||
controlUiEnabled: true,
|
||||
thinkingDefault: "minimal",
|
||||
claudeCliAuthMode: "subscription",
|
||||
enabledPluginIds: ["acpx"],
|
||||
transportReadyTimeoutMs: 180_000,
|
||||
forcedRuntime: "codex",
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps caller-owned serial labs on shared workers without a launcher", () => {
|
||||
const scenarios = [
|
||||
makeQaSuiteTestScenario("baseline"),
|
||||
makeQaSuiteTestScenario("message-tool-mode", {
|
||||
gatewayConfigPatch: {
|
||||
messages: {
|
||||
groupChat: {
|
||||
visibleReplies: "message_tool",
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
];
|
||||
const lab = makeQaSuiteTestLabHandle();
|
||||
const startLab = vi.fn();
|
||||
|
||||
expect(
|
||||
qaSuiteProgressTesting.shouldRunQaSuiteWithIsolatedScenarioWorkers({
|
||||
scenarios,
|
||||
concurrency: 1,
|
||||
lab,
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
qaSuiteProgressTesting.shouldRunQaSuiteWithIsolatedScenarioWorkers({
|
||||
scenarios,
|
||||
concurrency: 1,
|
||||
lab,
|
||||
startLab,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("remaps mock-openai model refs onto the app-server OpenAI provider for codex cells only", () => {
|
||||
expect(
|
||||
qaSuiteProgressTesting.remapModelRefForForcedRuntime({
|
||||
|
||||
@@ -51,6 +51,7 @@ import {
|
||||
resolveQaSuiteOutputDir,
|
||||
scenarioRequiresControlUi,
|
||||
selectQaSuiteScenarios,
|
||||
shouldUseIsolatedQaSuiteScenarioWorkers,
|
||||
splitModelRef,
|
||||
} from "./suite-planning.js";
|
||||
import { createQaSuiteScenarioFlowApi } from "./suite-runtime-flow.js";
|
||||
@@ -214,6 +215,28 @@ function requireQaSuiteStartLab(startLab: QaSuiteStartLabFn | undefined): QaSuit
|
||||
);
|
||||
}
|
||||
|
||||
function shouldRunQaSuiteWithIsolatedScenarioWorkers(params: {
|
||||
scenarios: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"];
|
||||
concurrency: number;
|
||||
lab?: QaLabServerHandle;
|
||||
startLab?: QaSuiteStartLabFn;
|
||||
}) {
|
||||
if (
|
||||
!shouldUseIsolatedQaSuiteScenarioWorkers({
|
||||
scenarios: params.scenarios,
|
||||
concurrency: params.concurrency,
|
||||
})
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params.concurrency === 1 && params.lab && !params.startLab) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const QA_IMAGE_UNDERSTANDING_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAAAAklEQVR4AewaftIAAAK4SURBVO3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+7ciPkoAAAAASUVORK5CYII=";
|
||||
|
||||
@@ -378,10 +401,43 @@ function createQaSuiteReportNotes(params: {
|
||||
alternateModel: string;
|
||||
fastMode: boolean;
|
||||
concurrency: number;
|
||||
isolatedWorkers?: boolean;
|
||||
}) {
|
||||
return params.transport.createReportNotes(params);
|
||||
}
|
||||
|
||||
function buildQaIsolatedScenarioWorkerParams(params: {
|
||||
repoRoot: string;
|
||||
outputDir: string;
|
||||
providerMode: QaProviderMode;
|
||||
transportId: QaTransportId;
|
||||
primaryModel: string;
|
||||
alternateModel: string;
|
||||
fastMode: boolean;
|
||||
scenario: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"][number];
|
||||
input?: QaSuiteRunParams;
|
||||
startLab: QaSuiteStartLabFn;
|
||||
}): QaSuiteRunParams {
|
||||
return {
|
||||
repoRoot: params.repoRoot,
|
||||
outputDir: params.outputDir,
|
||||
providerMode: params.providerMode,
|
||||
transportId: params.transportId,
|
||||
primaryModel: params.primaryModel,
|
||||
alternateModel: params.alternateModel,
|
||||
fastMode: params.fastMode,
|
||||
thinkingDefault: params.input?.thinkingDefault,
|
||||
claudeCliAuthMode: params.input?.claudeCliAuthMode,
|
||||
scenarioIds: [params.scenario.id],
|
||||
enabledPluginIds: params.input?.enabledPluginIds,
|
||||
concurrency: 1,
|
||||
startLab: params.startLab,
|
||||
controlUiEnabled: scenarioRequiresControlUi(params.scenario),
|
||||
transportReadyTimeoutMs: params.input?.transportReadyTimeoutMs,
|
||||
forcedRuntime: params.input?.forcedRuntime,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeQaSuiteModelRef(input: string | undefined, fallback: string) {
|
||||
const model = input?.trim();
|
||||
return model && model.length > 0 ? model : fallback;
|
||||
@@ -770,6 +826,7 @@ async function writeQaSuiteArtifacts(params: {
|
||||
alternateModel: string;
|
||||
fastMode: boolean;
|
||||
concurrency: number;
|
||||
isolatedWorkers?: boolean;
|
||||
scenarioIds?: readonly string[];
|
||||
runtimePair?: [RuntimeId, RuntimeId];
|
||||
}) {
|
||||
@@ -974,6 +1031,12 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
progressEnabled,
|
||||
`run start: scenarios=${selectedCatalogScenarios.length} concurrency=${concurrency} transport=${transportId}`,
|
||||
);
|
||||
const useIsolatedScenarioWorkers = shouldRunQaSuiteWithIsolatedScenarioWorkers({
|
||||
scenarios: selectedCatalogScenarios,
|
||||
concurrency,
|
||||
lab: params?.lab,
|
||||
startLab: params?.startLab,
|
||||
});
|
||||
|
||||
if (params?.runtimePair) {
|
||||
return await runQaRuntimeParitySuite({
|
||||
@@ -998,7 +1061,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
});
|
||||
}
|
||||
|
||||
if (concurrency > 1 && selectedCatalogScenarios.length > 1) {
|
||||
if (useIsolatedScenarioWorkers) {
|
||||
const ownsLab = !params?.lab;
|
||||
const startLab = requireQaSuiteStartLab(params?.startLab);
|
||||
const lab =
|
||||
@@ -1052,6 +1115,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
alternateModel,
|
||||
fastMode,
|
||||
concurrency,
|
||||
isolatedWorkers: true,
|
||||
scenarioIds:
|
||||
params?.scenarioIds && params.scenarioIds.length > 0
|
||||
? selectedCatalogScenarios.map((scenario) => scenario.id)
|
||||
@@ -1093,25 +1157,20 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
updateScenarioRun();
|
||||
try {
|
||||
const scenarioOutputDir = path.join(outputDir, "scenarios", scenario.id);
|
||||
const result: QaSuiteResult = await runQaSuite({
|
||||
repoRoot,
|
||||
outputDir: scenarioOutputDir,
|
||||
providerMode,
|
||||
transportId,
|
||||
primaryModel,
|
||||
alternateModel,
|
||||
fastMode,
|
||||
thinkingDefault: params?.thinkingDefault,
|
||||
claudeCliAuthMode: params?.claudeCliAuthMode,
|
||||
scenarioIds: [scenario.id],
|
||||
enabledPluginIds: params?.enabledPluginIds,
|
||||
concurrency: 1,
|
||||
startLab,
|
||||
// Most isolated workers do not need their own Control UI proxy.
|
||||
// Control UI scenarios do, because they open the worker's
|
||||
// gateway-backed app directly.
|
||||
controlUiEnabled: scenarioRequiresControlUi(scenario),
|
||||
});
|
||||
const result: QaSuiteResult = await runQaSuite(
|
||||
buildQaIsolatedScenarioWorkerParams({
|
||||
repoRoot,
|
||||
outputDir: scenarioOutputDir,
|
||||
providerMode,
|
||||
transportId,
|
||||
primaryModel,
|
||||
alternateModel,
|
||||
fastMode,
|
||||
startLab,
|
||||
scenario,
|
||||
input: params,
|
||||
}),
|
||||
);
|
||||
const scenarioResult: QaSuiteScenarioResult =
|
||||
result.scenarios[0] ??
|
||||
({
|
||||
@@ -1199,6 +1258,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
alternateModel,
|
||||
fastMode,
|
||||
concurrency,
|
||||
isolatedWorkers: true,
|
||||
// When the caller supplied an explicit non-empty --scenario filter,
|
||||
// record the executed (post-selectQaSuiteScenarios-normalized) ids
|
||||
// so the summary matches what actually ran. When the caller passed
|
||||
@@ -1459,6 +1519,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
alternateModel,
|
||||
fastMode,
|
||||
concurrency,
|
||||
isolatedWorkers: false,
|
||||
// Same "filtered → executed list, unfiltered → null" convention as
|
||||
// the concurrent-path writeQaSuiteArtifacts call above.
|
||||
scenarioIds:
|
||||
@@ -1512,6 +1573,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
export const qaSuiteProgressTesting = {
|
||||
appendNodeOption,
|
||||
buildQaGatewayHeapCheckpointRuntimeEnvPatch,
|
||||
buildQaIsolatedScenarioWorkerParams,
|
||||
buildQaSuiteRuntimeMetrics,
|
||||
buildQaRuntimeEnvPatch,
|
||||
mergeQaRuntimeEnvPatches,
|
||||
@@ -1519,6 +1581,7 @@ export const qaSuiteProgressTesting = {
|
||||
remapModelRefForForcedRuntime,
|
||||
resolveQaSuiteTransportReadyTimeoutMs,
|
||||
sanitizeQaSuiteProgressValue,
|
||||
shouldRunQaSuiteWithIsolatedScenarioWorkers,
|
||||
shouldLogQaSuiteProgress,
|
||||
waitForQaLabReadyOrStopOwned,
|
||||
};
|
||||
|
||||
@@ -94,7 +94,7 @@ steps:
|
||||
- call: env.gateway.call
|
||||
args:
|
||||
- wake
|
||||
- mode: next-heartbeat
|
||||
- mode: now
|
||||
text: Commitments target none QA wake
|
||||
- timeoutMs: 30000
|
||||
- call: waitForCondition
|
||||
|
||||
Reference in New Issue
Block a user