diff --git a/CHANGELOG.md b/CHANGELOG.md index e505e43cd33..52f7666665d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,8 @@ Docs: https://docs.openclaw.ai - fix(integrations): enforce channel read target allowlists [AI]. (#84982) Thanks @pgondhi987. - Agents/code-mode: expose outer code-mode `exec` source through the `command` hook alias with `toolKind`/`toolInputKind` discriminators so exec-shaped policies can distinguish code-mode cells. (#83483) Thanks @Kaspre. +- QA-Lab: isolate multi-scenario suite workers when scenarios need startup config patches, preventing message-routing config from leaking into unrelated scenarios. +- QA-Lab: make the commitments heartbeat-target-none scenario request an immediate heartbeat instead of waiting for the next scheduled heartbeat. - Gateway CLI: surface local post-challenge connect assembly failures immediately instead of waiting for the wrapper timeout. Fixes #68944. (#85253) Thanks @samzong. - Agents/exec: treat denied exec approvals as terminal instead of feeding them back into agent follow-up work, and recognize Chinese stop phrases in abort handling. Fixes #69386. (#85194) Thanks @samzong. - CLI/agents: abort accepted Gateway-backed `openclaw agent` runs on SIGINT/SIGTERM so cron and supervisor timeouts do not leave remote agent work alive. Fixes #71710. (#84381) Thanks @Kaspre. diff --git a/extensions/qa-lab/src/lab-server.ts b/extensions/qa-lab/src/lab-server.ts index 3c0766df384..32d1c6c2715 100644 --- a/extensions/qa-lab/src/lab-server.ts +++ b/extensions/qa-lab/src/lab-server.ts @@ -547,6 +547,7 @@ export async function startQaLabServer( const { runQaSuite } = await import("./suite.js"); const result = await runQaSuite({ lab: labHandle ?? undefined, + startLab: startQaLabServer, outputDir: createQaRunOutputDir(repoRoot), providerMode: selection.providerMode, primaryModel: selection.primaryModel, diff --git a/extensions/qa-lab/src/qa-channel-transport.ts b/extensions/qa-lab/src/qa-channel-transport.ts index 1612b4ee912..552a0adc811 100644 --- a/extensions/qa-lab/src/qa-channel-transport.ts +++ b/extensions/qa-lab/src/qa-channel-transport.ts @@ -102,7 +102,7 @@ function createQaChannelReportNotes(params: QaTransportReportParams) { provider.kind === "mock" ? `Runs against qa-channel + qa-lab bus + real gateway child + ${params.providerMode} provider.` : `Runs against qa-channel + qa-lab bus + real gateway child + live frontier models (${params.primaryModel}, ${params.alternateModel})${params.fastMode ? " with fast mode enabled" : ""}.`, - params.concurrency > 1 + params.isolatedWorkers === true ? `Scenarios run in isolated gateway workers with concurrency ${params.concurrency}.` : "Scenarios run serially in one gateway worker.", "Cron uses a one-minute schedule assertion plus forced execution for fast verification.", diff --git a/extensions/qa-lab/src/qa-transport.ts b/extensions/qa-lab/src/qa-transport.ts index 1c3a372951f..9de9a9e6cd8 100644 --- a/extensions/qa-lab/src/qa-transport.ts +++ b/extensions/qa-lab/src/qa-transport.ts @@ -30,6 +30,7 @@ export type QaTransportReportParams = { alternateModel: string; fastMode: boolean; concurrency: number; + isolatedWorkers?: boolean; }; export type QaTransportGatewayConfig = Pick; diff --git a/extensions/qa-lab/src/suite-planning.test.ts b/extensions/qa-lab/src/suite-planning.test.ts index 32b0604e829..142fcf8cb43 100644 --- a/extensions/qa-lab/src/suite-planning.test.ts +++ b/extensions/qa-lab/src/suite-planning.test.ts @@ -13,6 +13,7 @@ import { resolveQaSuiteOutputDir, scenarioRequiresControlUi, selectQaSuiteScenarios, + shouldUseIsolatedQaSuiteScenarioWorkers, } from "./suite-planning.js"; import { makeQaSuiteTestScenario } from "./suite-test-helpers.js"; @@ -302,6 +303,46 @@ describe("qa suite planning helpers", () => { }); }); + it("isolates multi-scenario serial runs when a scenario needs startup config", () => { + const scenarios = [ + makeQaSuiteTestScenario("baseline"), + makeQaSuiteTestScenario("message-tool-mode", { + gatewayConfigPatch: { + messages: { + groupChat: { + visibleReplies: "message_tool", + }, + }, + }, + }), + ]; + + expect( + shouldUseIsolatedQaSuiteScenarioWorkers({ + scenarios, + concurrency: 1, + }), + ).toBe(true); + }); + + it("does not isolate plain serial scenario runs", () => { + expect( + shouldUseIsolatedQaSuiteScenarioWorkers({ + scenarios: [makeQaSuiteTestScenario("first"), makeQaSuiteTestScenario("second")], + concurrency: 1, + }), + ).toBe(false); + }); + + it("keeps concurrent runs on isolated workers", () => { + expect( + shouldUseIsolatedQaSuiteScenarioWorkers({ + scenarios: [makeQaSuiteTestScenario("first"), makeQaSuiteTestScenario("second")], + concurrency: 2, + }), + ).toBe(true); + }); + it("enables Control UI only for Control UI scenario workers", () => { expect( scenarioRequiresControlUi( diff --git a/extensions/qa-lab/src/suite-planning.ts b/extensions/qa-lab/src/suite-planning.ts index b7b4938b74f..441090eb93d 100644 --- a/extensions/qa-lab/src/suite-planning.ts +++ b/extensions/qa-lab/src/suite-planning.ts @@ -152,6 +152,17 @@ function collectQaSuiteGatewayRuntimeOptions( return forwardHostHome ? { forwardHostHome: true } : undefined; } +function shouldUseIsolatedQaSuiteScenarioWorkers(params: { + scenarios: ReturnType["scenarios"]; + concurrency: number; +}) { + return ( + params.scenarios.length > 1 && + (params.concurrency > 1 || + params.scenarios.some((scenario) => isQaPlainObject(scenario.gatewayConfigPatch))) + ); +} + function scenarioRequiresControlUi(scenario: QaSeedScenario) { return normalizeLowercaseStringOrEmpty(scenario.surface) === "control-ui"; } @@ -268,5 +279,6 @@ export { resolveQaSuiteOutputDir, scenarioRequiresControlUi, selectQaSuiteScenarios, + shouldUseIsolatedQaSuiteScenarioWorkers, splitModelRef, }; diff --git a/extensions/qa-lab/src/suite.test.ts b/extensions/qa-lab/src/suite.test.ts index e77efb207d1..2c277f8eddb 100644 --- a/extensions/qa-lab/src/suite.test.ts +++ b/extensions/qa-lab/src/suite.test.ts @@ -1,4 +1,6 @@ import { afterEach, describe, expect, it, vi } from "vitest"; +import type { QaLabServerHandle } from "./lab-server.types.js"; +import { makeQaSuiteTestScenario } from "./suite-test-helpers.js"; import { qaSuiteProgressTesting, runQaSuite } from "./suite.js"; const fetchWithSsrFGuardMock = vi.hoisted(() => vi.fn()); @@ -12,6 +14,19 @@ afterEach(() => { vi.useRealTimers(); }); +function makeQaSuiteTestLabHandle(): QaLabServerHandle { + return { + baseUrl: "http://127.0.0.1:43123", + listenUrl: "http://127.0.0.1:43123", + state: {} as QaLabServerHandle["state"], + setControlUi: vi.fn(), + setScenarioRun: vi.fn(), + setLatestReport: vi.fn(), + runSelfCheck: vi.fn(async () => ({}) as Awaited>), + stop: vi.fn(async () => {}), + }; +} + describe("qa suite", () => { it("rejects unsupported transport ids before starting the lab", async () => { const startLab = vi.fn(); @@ -254,6 +269,84 @@ describe("qa suite", () => { }); }); + it("forwards run options into isolated scenario worker params", () => { + const startLab = vi.fn(); + const scenario = makeQaSuiteTestScenario("patched-control-ui", { + surface: "control-ui", + gatewayConfigPatch: { + messages: { + groupChat: { + visibleReplies: "message_tool", + }, + }, + }, + }); + + expect( + qaSuiteProgressTesting.buildQaIsolatedScenarioWorkerParams({ + repoRoot: "/repo", + outputDir: "/repo/.artifacts/qa-e2e/scenarios/patched-control-ui", + providerMode: "mock-openai", + transportId: "qa-channel", + primaryModel: "mock-openai/gpt-5.5", + alternateModel: "mock-openai/gpt-5.5-alt", + fastMode: true, + scenario, + startLab, + input: { + thinkingDefault: "minimal", + claudeCliAuthMode: "subscription", + enabledPluginIds: ["acpx"], + transportReadyTimeoutMs: 180_000, + forcedRuntime: "codex", + }, + }), + ).toMatchObject({ + scenarioIds: ["patched-control-ui"], + concurrency: 1, + startLab, + controlUiEnabled: true, + thinkingDefault: "minimal", + claudeCliAuthMode: "subscription", + enabledPluginIds: ["acpx"], + transportReadyTimeoutMs: 180_000, + forcedRuntime: "codex", + }); + }); + + it("keeps caller-owned serial labs on shared workers without a launcher", () => { + const scenarios = [ + makeQaSuiteTestScenario("baseline"), + makeQaSuiteTestScenario("message-tool-mode", { + gatewayConfigPatch: { + messages: { + groupChat: { + visibleReplies: "message_tool", + }, + }, + }, + }), + ]; + const lab = makeQaSuiteTestLabHandle(); + const startLab = vi.fn(); + + expect( + qaSuiteProgressTesting.shouldRunQaSuiteWithIsolatedScenarioWorkers({ + scenarios, + concurrency: 1, + lab, + }), + ).toBe(false); + expect( + qaSuiteProgressTesting.shouldRunQaSuiteWithIsolatedScenarioWorkers({ + scenarios, + concurrency: 1, + lab, + startLab, + }), + ).toBe(true); + }); + it("remaps mock-openai model refs onto the app-server OpenAI provider for codex cells only", () => { expect( qaSuiteProgressTesting.remapModelRefForForcedRuntime({ diff --git a/extensions/qa-lab/src/suite.ts b/extensions/qa-lab/src/suite.ts index 41058ca8ec6..1c06ee26509 100644 --- a/extensions/qa-lab/src/suite.ts +++ b/extensions/qa-lab/src/suite.ts @@ -51,6 +51,7 @@ import { resolveQaSuiteOutputDir, scenarioRequiresControlUi, selectQaSuiteScenarios, + shouldUseIsolatedQaSuiteScenarioWorkers, splitModelRef, } from "./suite-planning.js"; import { createQaSuiteScenarioFlowApi } from "./suite-runtime-flow.js"; @@ -214,6 +215,28 @@ function requireQaSuiteStartLab(startLab: QaSuiteStartLabFn | undefined): QaSuit ); } +function shouldRunQaSuiteWithIsolatedScenarioWorkers(params: { + scenarios: ReturnType["scenarios"]; + concurrency: number; + lab?: QaLabServerHandle; + startLab?: QaSuiteStartLabFn; +}) { + if ( + !shouldUseIsolatedQaSuiteScenarioWorkers({ + scenarios: params.scenarios, + concurrency: params.concurrency, + }) + ) { + return false; + } + + if (params.concurrency === 1 && params.lab && !params.startLab) { + return false; + } + + return true; +} + const QA_IMAGE_UNDERSTANDING_PNG_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAAAAklEQVR4AewaftIAAAK4SURBVO3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+7ciPkoAAAAASUVORK5CYII="; @@ -378,10 +401,43 @@ function createQaSuiteReportNotes(params: { alternateModel: string; fastMode: boolean; concurrency: number; + isolatedWorkers?: boolean; }) { return params.transport.createReportNotes(params); } +function buildQaIsolatedScenarioWorkerParams(params: { + repoRoot: string; + outputDir: string; + providerMode: QaProviderMode; + transportId: QaTransportId; + primaryModel: string; + alternateModel: string; + fastMode: boolean; + scenario: ReturnType["scenarios"][number]; + input?: QaSuiteRunParams; + startLab: QaSuiteStartLabFn; +}): QaSuiteRunParams { + return { + repoRoot: params.repoRoot, + outputDir: params.outputDir, + providerMode: params.providerMode, + transportId: params.transportId, + primaryModel: params.primaryModel, + alternateModel: params.alternateModel, + fastMode: params.fastMode, + thinkingDefault: params.input?.thinkingDefault, + claudeCliAuthMode: params.input?.claudeCliAuthMode, + scenarioIds: [params.scenario.id], + enabledPluginIds: params.input?.enabledPluginIds, + concurrency: 1, + startLab: params.startLab, + controlUiEnabled: scenarioRequiresControlUi(params.scenario), + transportReadyTimeoutMs: params.input?.transportReadyTimeoutMs, + forcedRuntime: params.input?.forcedRuntime, + }; +} + function normalizeQaSuiteModelRef(input: string | undefined, fallback: string) { const model = input?.trim(); return model && model.length > 0 ? model : fallback; @@ -770,6 +826,7 @@ async function writeQaSuiteArtifacts(params: { alternateModel: string; fastMode: boolean; concurrency: number; + isolatedWorkers?: boolean; scenarioIds?: readonly string[]; runtimePair?: [RuntimeId, RuntimeId]; }) { @@ -974,6 +1031,12 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise 1 && selectedCatalogScenarios.length > 1) { + if (useIsolatedScenarioWorkers) { const ownsLab = !params?.lab; const startLab = requireQaSuiteStartLab(params?.startLab); const lab = @@ -1052,6 +1115,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise 0 ? selectedCatalogScenarios.map((scenario) => scenario.id) @@ -1093,25 +1157,20 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise