feat(qa-lab): add control ui qa-channel roundtrip scenario

This commit is contained in:
Peter Steinberger
2026-04-12 19:40:48 -07:00
parent f682413f57
commit 20266c14cb
12 changed files with 472 additions and 5 deletions

View File

@@ -115,6 +115,25 @@ describe("buildQaRuntimeEnv", () => {
expect(env.OPENCLAW_STATE_DIR).toBe("/tmp/openclaw-qa/state");
});
it("can forward host HOME for browser-backed QA runs while keeping OpenClaw home sandboxed", async () => {
const hostHome = await mkdtemp(path.join(os.tmpdir(), "qa-host-home-"));
cleanups.push(async () => {
await rm(hostHome, { recursive: true, force: true });
});
const env = buildQaRuntimeEnv({
...createParams({
HOME: hostHome,
}),
providerMode: "mock-openai",
forwardHostHome: true,
});
expect(env.HOME).toBe(hostHome);
expect(env.OPENCLAW_HOME).toBe("/tmp/openclaw-qa/home");
expect(env.OPENCLAW_STATE_DIR).toBe("/tmp/openclaw-qa/state");
});
it("preserves the live Anthropic key for live Claude CLI runs without writing it into config", async () => {
const hostHome = await mkdtemp(path.join(os.tmpdir(), "qa-host-home-"));
cleanups.push(async () => {

View File

@@ -295,6 +295,7 @@ export function buildQaRuntimeEnv(params: {
configPath: string;
gatewayToken: string;
homeDir: string;
forwardHostHome?: boolean;
stateDir: string;
xdgConfigHome: string;
xdgDataHome: string;
@@ -307,9 +308,12 @@ export function buildQaRuntimeEnv(params: {
claudeCliAuthMode?: QaCliBackendAuthMode;
}) {
const baseEnv = params.baseEnv ?? process.env;
const forwardedHostHome = params.forwardHostHome
? baseEnv.HOME?.trim() || os.homedir()
: undefined;
const env: NodeJS.ProcessEnv = {
...baseEnv,
HOME: params.homeDir,
HOME: forwardedHostHome ?? params.homeDir,
...(params.providerMode === "live-frontier"
? resolveQaLiveCliAuthEnv(baseEnv, {
forwardHostHomeForClaudeCli: params.forwardHostHomeForClaudeCli,
@@ -837,6 +841,7 @@ export async function startQaGatewayChild(params: {
claudeCliAuthMode?: QaCliBackendAuthMode;
controlUiEnabled?: boolean;
enabledPluginIds?: string[];
forwardHostHome?: boolean;
mutateConfig?: (cfg: OpenClawConfig) => OpenClawConfig;
}) {
const tempRoot = await fs.mkdtemp(
@@ -969,6 +974,7 @@ export async function startQaGatewayChild(params: {
configPath,
gatewayToken,
homeDir,
forwardHostHome: params.forwardHostHome,
stateDir,
xdgConfigHome,
xdgDataHome,

View File

@@ -81,6 +81,12 @@ describe("qa scenario catalog", () => {
expect(fanoutConfig?.expectedReplyGroups?.flat()).toContain("subagent-2: ok");
});
it("loads scenario-declared gateway runtime options from markdown", () => {
const scenario = readQaScenarioById("control-ui-qa-channel-image-roundtrip");
expect(scenario.gatewayRuntime?.forwardHostHome).toBe(true);
});
it("keeps the character eval scenario natural and task-shaped", () => {
const characterConfig = readQaScenarioExecutionConfig("character-vibes-gollum") as
| {

View File

@@ -51,6 +51,10 @@ const qaScenarioExecutionSchema = z.object({
config: qaScenarioConfigSchema.optional(),
});
const qaScenarioGatewayRuntimeSchema = z.object({
forwardHostHome: z.boolean().optional(),
});
const qaFlowCallActionSchema = z.object({
call: z.string().trim().min(1),
args: z.array(z.unknown()).optional(),
@@ -137,6 +141,7 @@ const qaSeedScenarioSchema = z.object({
successCriteria: z.array(z.string().trim().min(1)).min(1),
plugins: z.array(z.string().trim().min(1)).optional(),
gatewayConfigPatch: z.record(z.string(), z.unknown()).optional(),
gatewayRuntime: qaScenarioGatewayRuntimeSchema.optional(),
docsRefs: z.array(z.string().trim().min(1)).optional(),
codeRefs: z.array(z.string().trim().min(1)).optional(),
execution: qaScenarioExecutionSchema.optional(),

View File

@@ -30,6 +30,16 @@ function createDeps(overrides?: Partial<QaScenarioRuntimeDeps>): QaScenarioRunti
waitForGatewayHealthy: fn,
waitForTransportReady: fn,
waitForQaChannelReady: fn,
browserRequest: fn,
waitForBrowserReady: fn,
browserOpenTab: fn,
browserSnapshot: fn,
browserAct: fn,
webOpenPage: fn,
webWait: fn,
webType: fn,
webSnapshot: fn,
webEvaluate: fn,
waitForConfigRestartSettle: fn,
patchConfig: fn,
applyConfig: fn,
@@ -130,6 +140,16 @@ describe("createQaScenarioRuntimeApi", () => {
expect(api.config).toEqual({ expected: "value" });
expect(api.waitForCondition).toBe(waitForCondition);
expect(api.waitForChannelReady).toBe(api.waitForTransportReady);
expect(api.browserRequest).toBeDefined();
expect(api.waitForBrowserReady).toBeDefined();
expect(api.browserOpenTab).toBeDefined();
expect(api.browserSnapshot).toBeDefined();
expect(api.browserAct).toBeDefined();
expect(api.webOpenPage).toBeDefined();
expect(api.webWait).toBeDefined();
expect(api.webType).toBeDefined();
expect(api.webSnapshot).toBeDefined();
expect(api.webEvaluate).toBeDefined();
expect(api.getTransportSnapshot()).toEqual(state.getSnapshot());
expect(api.imageUnderstandingPngBase64).toBe("png-small");

View File

@@ -42,6 +42,16 @@ export type QaScenarioRuntimeDeps = {
waitForGatewayHealthy: QaScenarioRuntimeFunction;
waitForTransportReady: QaScenarioRuntimeFunction;
waitForQaChannelReady: QaScenarioRuntimeFunction;
browserRequest: QaScenarioRuntimeFunction;
waitForBrowserReady: QaScenarioRuntimeFunction;
browserOpenTab: QaScenarioRuntimeFunction;
browserSnapshot: QaScenarioRuntimeFunction;
browserAct: QaScenarioRuntimeFunction;
webOpenPage: QaScenarioRuntimeFunction;
webWait: QaScenarioRuntimeFunction;
webType: QaScenarioRuntimeFunction;
webSnapshot: QaScenarioRuntimeFunction;
webEvaluate: QaScenarioRuntimeFunction;
waitForConfigRestartSettle: QaScenarioRuntimeFunction;
patchConfig: QaScenarioRuntimeFunction;
applyConfig: QaScenarioRuntimeFunction;
@@ -116,6 +126,16 @@ export type QaScenarioRuntimeApi<
waitForTransportReady: TDeps["waitForTransportReady"];
waitForChannelReady: TDeps["waitForTransportReady"];
waitForQaChannelReady: TDeps["waitForQaChannelReady"];
browserRequest: TDeps["browserRequest"];
waitForBrowserReady: TDeps["waitForBrowserReady"];
browserOpenTab: TDeps["browserOpenTab"];
browserSnapshot: TDeps["browserSnapshot"];
browserAct: TDeps["browserAct"];
webOpenPage: TDeps["webOpenPage"];
webWait: TDeps["webWait"];
webType: TDeps["webType"];
webSnapshot: TDeps["webSnapshot"];
webEvaluate: TDeps["webEvaluate"];
waitForConfigRestartSettle: TDeps["waitForConfigRestartSettle"];
patchConfig: TDeps["patchConfig"];
applyConfig: TDeps["applyConfig"];
@@ -205,6 +225,16 @@ export function createQaScenarioRuntimeApi<
waitForTransportReady: params.deps.waitForTransportReady,
waitForChannelReady: params.deps.waitForTransportReady,
waitForQaChannelReady: params.deps.waitForQaChannelReady,
browserRequest: params.deps.browserRequest,
waitForBrowserReady: params.deps.waitForBrowserReady,
browserOpenTab: params.deps.browserOpenTab,
browserSnapshot: params.deps.browserSnapshot,
browserAct: params.deps.browserAct,
webOpenPage: params.deps.webOpenPage,
webWait: params.deps.webWait,
webType: params.deps.webType,
webSnapshot: params.deps.webSnapshot,
webEvaluate: params.deps.webEvaluate,
waitForConfigRestartSettle: params.deps.waitForConfigRestartSettle,
patchConfig: params.deps.patchConfig,
applyConfig: params.deps.applyConfig,

View File

@@ -11,6 +11,7 @@ describe("qa suite failure reply handling", () => {
config?: Record<string, unknown>,
plugins?: string[],
gatewayConfigPatch?: Record<string, unknown>,
gatewayRuntime?: { forwardHostHome?: boolean },
): Parameters<typeof qaSuiteTesting.selectQaSuiteScenarios>[0]["scenarios"][number] =>
({
id,
@@ -20,6 +21,7 @@ describe("qa suite failure reply handling", () => {
successCriteria: ["test"],
plugins,
gatewayConfigPatch,
gatewayRuntime,
sourcePath: `qa/scenarios/${id}.md`,
execution: {
kind: "flow",
@@ -199,6 +201,19 @@ describe("qa suite failure reply handling", () => {
});
});
it("collects gateway runtime options across selected scenarios", () => {
const scenarios = [
makeScenario("plain"),
makeScenario("browser-ui", undefined, ["browser"], undefined, {
forwardHostHome: true,
}),
];
expect(qaSuiteTesting.collectQaSuiteGatewayRuntimeOptions(scenarios)).toEqual({
forwardHostHome: true,
});
});
it("filters provider-specific scenarios from an implicit live lane", () => {
const scenarios = [
makeScenario("generic"),

View File

@@ -15,6 +15,13 @@ import {
import { buildAgentSessionKey } from "openclaw/plugin-sdk/routing";
import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
import {
callQaBrowserRequest,
qaBrowserAct,
qaBrowserOpenTab,
qaBrowserSnapshot,
waitForQaBrowserReady,
} from "./browser-runtime.js";
import { ensureRepoBoundDirectory, resolveRepoRelativeOutputDir } from "./cli-paths.js";
import { waitForCronRunCompletion } from "./cron-run-wait.js";
import {
@@ -60,6 +67,14 @@ import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js";
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
import { runScenarioFlow } from "./scenario-flow-runner.js";
import { createQaScenarioRuntimeApi } from "./scenario-runtime-api.js";
import {
closeAllQaWebSessions,
qaWebEvaluate,
qaWebOpenPage,
qaWebSnapshot,
qaWebType,
qaWebWait,
} from "./web-runtime.js";
type QaSuiteStep = {
name: string;
@@ -313,6 +328,18 @@ function collectQaSuiteGatewayConfigPatch(
return merged;
}
function collectQaSuiteGatewayRuntimeOptions(
scenarios: ReturnType<typeof readQaBootstrapScenarioCatalog>["scenarios"],
) {
let forwardHostHome = false;
for (const scenario of scenarios) {
if (scenario.gatewayRuntime?.forwardHostHome === true) {
forwardHostHome = true;
}
}
return forwardHostHome ? { forwardHostHome: true } : undefined;
}
function liveTurnTimeoutMs(env: QaSuiteEnvironment, fallbackMs: number) {
return resolveQaLiveTurnTimeoutMs(env, fallbackMs);
}
@@ -1236,6 +1263,16 @@ function createScenarioFlowApi(
waitForGatewayHealthy,
waitForTransportReady,
waitForQaChannelReady,
browserRequest: callQaBrowserRequest,
waitForBrowserReady: waitForQaBrowserReady,
browserOpenTab: qaBrowserOpenTab,
browserSnapshot: qaBrowserSnapshot,
browserAct: qaBrowserAct,
webOpenPage: qaWebOpenPage,
webWait: qaWebWait,
webType: qaWebType,
webSnapshot: qaWebSnapshot,
webEvaluate: qaWebEvaluate,
waitForConfigRestartSettle,
patchConfig,
applyConfig,
@@ -1284,6 +1321,7 @@ function createScenarioFlowApi(
export const qaSuiteTesting = {
collectQaSuiteGatewayConfigPatch,
collectQaSuiteGatewayRuntimeOptions,
collectQaSuitePluginIds,
createScenarioWaitForCondition,
findFailureOutboundMessage,
@@ -1397,6 +1435,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
});
const enabledPluginIds = collectQaSuitePluginIds(selectedCatalogScenarios);
const gatewayConfigPatch = collectQaSuiteGatewayConfigPatch(selectedCatalogScenarios);
const gatewayRuntimeOptions = collectQaSuiteGatewayRuntimeOptions(selectedCatalogScenarios);
const concurrency = normalizeQaSuiteConcurrency(
params?.concurrency,
selectedCatalogScenarios.length,
@@ -1594,6 +1633,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
claudeCliAuthMode: params?.claudeCliAuthMode,
controlUiEnabled: params?.controlUiEnabled ?? true,
enabledPluginIds,
forwardHostHome: gatewayRuntimeOptions?.forwardHostHome,
mutateConfig: gatewayConfigPatch
? (cfg) => applyQaMergePatch(cfg, gatewayConfigPatch) as OpenClawConfig
: undefined,
@@ -1606,9 +1646,9 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
lab,
mock,
gateway,
cfg: transport.createGatewayConfig({
baseUrl: lab.listenUrl,
}),
// Markdown scenarios should see the full staged gateway config, not just
// the transport fragment. Routing/session/plugin assertions depend on it.
cfg: gateway.cfg,
transport,
repoRoot,
providerMode,
@@ -1717,6 +1757,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
preserveGatewayRuntimeDir = path.join(outputDir, "artifacts", "gateway-runtime");
throw error;
} finally {
await closeAllQaWebSessions();
const keepTemp = process.env.OPENCLAW_QA_KEEP_TEMP === "1" || false;
await gateway.stop({
keepTemp,