mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 01:31:08 +00:00
feat(qa): add repo-backed qa suite runner
This commit is contained in:
@@ -79,6 +79,12 @@ pnpm qa:lab:build
|
||||
pnpm openclaw qa ui
|
||||
```
|
||||
|
||||
Full repo-backed QA suite:
|
||||
|
||||
```bash
|
||||
pnpm openclaw qa suite
|
||||
```
|
||||
|
||||
That launches the private QA debugger at a local URL, separate from the
|
||||
shipped Control UI bundle.
|
||||
|
||||
|
||||
@@ -15,3 +15,5 @@ export * from "./src/scenario.js";
|
||||
export * from "./src/scenario-catalog.js";
|
||||
export * from "./src/self-check-scenario.js";
|
||||
export * from "./src/self-check.js";
|
||||
export * from "./src/gateway-child.js";
|
||||
export * from "./src/suite.js";
|
||||
|
||||
@@ -2,6 +2,7 @@ import path from "node:path";
|
||||
import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js";
|
||||
import { startQaLabServer } from "./lab-server.js";
|
||||
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
|
||||
import { runQaSuite } from "./suite.js";
|
||||
|
||||
export async function runQaLabSelfCheckCommand(opts: { output?: string }) {
|
||||
const server = await startQaLabServer({
|
||||
@@ -15,6 +16,14 @@ export async function runQaLabSelfCheckCommand(opts: { output?: string }) {
|
||||
}
|
||||
}
|
||||
|
||||
export async function runQaSuiteCommand(opts: { outputDir?: string }) {
|
||||
const result = await runQaSuite({
|
||||
outputDir: opts.outputDir ? path.resolve(opts.outputDir) : undefined,
|
||||
});
|
||||
process.stdout.write(`QA suite report: ${result.reportPath}\n`);
|
||||
process.stdout.write(`QA suite summary: ${result.summaryPath}\n`);
|
||||
}
|
||||
|
||||
export async function runQaLabUiCommand(opts: {
|
||||
host?: string;
|
||||
port?: number;
|
||||
|
||||
@@ -14,6 +14,11 @@ async function runQaSelfCheck(opts: { output?: string }) {
|
||||
await runtime.runQaLabSelfCheckCommand(opts);
|
||||
}
|
||||
|
||||
async function runQaSuite(opts: { outputDir?: string }) {
|
||||
const runtime = await loadQaLabCliRuntime();
|
||||
await runtime.runQaSuiteCommand(opts);
|
||||
}
|
||||
|
||||
async function runQaUi(opts: {
|
||||
host?: string;
|
||||
port?: number;
|
||||
@@ -63,6 +68,13 @@ export function registerQaLabCli(program: Command) {
|
||||
await runQaSelfCheck(opts);
|
||||
});
|
||||
|
||||
qa.command("suite")
|
||||
.description("Run all repo-backed QA scenarios against the real QA gateway lane")
|
||||
.option("--output-dir <path>", "Suite artifact directory")
|
||||
.action(async (opts: { outputDir?: string }) => {
|
||||
await runQaSuite(opts);
|
||||
});
|
||||
|
||||
qa.command("ui")
|
||||
.description("Start the private QA debugger UI and local QA bus")
|
||||
.option("--host <host>", "Bind host", "127.0.0.1")
|
||||
|
||||
210
extensions/qa-lab/src/gateway-child.ts
Normal file
210
extensions/qa-lab/src/gateway-child.ts
Normal file
@@ -0,0 +1,210 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import net from "node:net";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { setTimeout as sleep } from "node:timers/promises";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/core";
|
||||
import { seedQaAgentWorkspace } from "./qa-agent-workspace.js";
|
||||
import { buildQaGatewayConfig } from "./qa-gateway-config.js";
|
||||
|
||||
async function getFreePort() {
|
||||
return await new Promise<number>((resolve, reject) => {
|
||||
const server = net.createServer();
|
||||
server.once("error", reject);
|
||||
server.listen(0, "127.0.0.1", () => {
|
||||
const address = server.address();
|
||||
if (!address || typeof address === "string") {
|
||||
reject(new Error("failed to allocate port"));
|
||||
return;
|
||||
}
|
||||
server.close((error) => (error ? reject(error) : resolve(address.port)));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function waitForGatewayReady(baseUrl: string, logs: () => string, timeoutMs = 30_000) {
|
||||
const startedAt = Date.now();
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/readyz`);
|
||||
if (response.ok) {
|
||||
return;
|
||||
}
|
||||
} catch {
|
||||
// retry until timeout
|
||||
}
|
||||
await sleep(250);
|
||||
}
|
||||
throw new Error(`gateway failed to become healthy:\n${logs()}`);
|
||||
}
|
||||
|
||||
async function runCliJson(params: { cwd: string; env: NodeJS.ProcessEnv; args: string[] }) {
|
||||
const stdout: Buffer[] = [];
|
||||
const stderr: Buffer[] = [];
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(process.execPath, params.args, {
|
||||
cwd: params.cwd,
|
||||
env: params.env,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk)));
|
||||
child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk)));
|
||||
child.once("error", reject);
|
||||
child.once("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(
|
||||
new Error(
|
||||
`gateway cli failed (${code ?? "unknown"}): ${Buffer.concat(stderr).toString("utf8")}`,
|
||||
),
|
||||
);
|
||||
});
|
||||
});
|
||||
const text = Buffer.concat(stdout).toString("utf8").trim();
|
||||
return text ? (JSON.parse(text) as unknown) : {};
|
||||
}
|
||||
|
||||
export async function startQaGatewayChild(params: {
|
||||
repoRoot: string;
|
||||
providerBaseUrl: string;
|
||||
qaBusBaseUrl: string;
|
||||
}) {
|
||||
const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-qa-suite-"));
|
||||
const workspaceDir = path.join(tempRoot, "workspace");
|
||||
const stateDir = path.join(tempRoot, "state");
|
||||
const homeDir = path.join(tempRoot, "home");
|
||||
const xdgConfigHome = path.join(tempRoot, "xdg-config");
|
||||
const xdgDataHome = path.join(tempRoot, "xdg-data");
|
||||
const xdgCacheHome = path.join(tempRoot, "xdg-cache");
|
||||
const configPath = path.join(tempRoot, "openclaw.json");
|
||||
const gatewayPort = await getFreePort();
|
||||
const gatewayToken = `qa-suite-${randomUUID()}`;
|
||||
await seedQaAgentWorkspace({
|
||||
workspaceDir,
|
||||
repoRoot: params.repoRoot,
|
||||
});
|
||||
await Promise.all([
|
||||
fs.mkdir(stateDir, { recursive: true }),
|
||||
fs.mkdir(homeDir, { recursive: true }),
|
||||
fs.mkdir(xdgConfigHome, { recursive: true }),
|
||||
fs.mkdir(xdgDataHome, { recursive: true }),
|
||||
fs.mkdir(xdgCacheHome, { recursive: true }),
|
||||
]);
|
||||
const cfg = buildQaGatewayConfig({
|
||||
bind: "loopback",
|
||||
gatewayPort,
|
||||
gatewayToken,
|
||||
providerBaseUrl: params.providerBaseUrl,
|
||||
qaBusBaseUrl: params.qaBusBaseUrl,
|
||||
workspaceDir,
|
||||
});
|
||||
await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
|
||||
|
||||
const stdout: Buffer[] = [];
|
||||
const stderr: Buffer[] = [];
|
||||
const env = {
|
||||
...process.env,
|
||||
HOME: homeDir,
|
||||
OPENCLAW_HOME: homeDir,
|
||||
OPENCLAW_CONFIG_PATH: configPath,
|
||||
OPENCLAW_STATE_DIR: stateDir,
|
||||
OPENCLAW_OAUTH_DIR: path.join(stateDir, "credentials"),
|
||||
OPENCLAW_GATEWAY_TOKEN: gatewayToken,
|
||||
OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1",
|
||||
OPENCLAW_SKIP_GMAIL_WATCHER: "1",
|
||||
OPENCLAW_SKIP_CANVAS_HOST: "1",
|
||||
OPENCLAW_NO_RESPAWN: "1",
|
||||
OPENCLAW_TEST_FAST: "1",
|
||||
XDG_CONFIG_HOME: xdgConfigHome,
|
||||
XDG_DATA_HOME: xdgDataHome,
|
||||
XDG_CACHE_HOME: xdgCacheHome,
|
||||
};
|
||||
|
||||
const child = spawn(
|
||||
process.execPath,
|
||||
[
|
||||
"dist/index.js",
|
||||
"gateway",
|
||||
"run",
|
||||
"--port",
|
||||
String(gatewayPort),
|
||||
"--bind",
|
||||
"loopback",
|
||||
"--allow-unconfigured",
|
||||
],
|
||||
{
|
||||
cwd: params.repoRoot,
|
||||
env,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
},
|
||||
);
|
||||
child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk)));
|
||||
child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk)));
|
||||
|
||||
const baseUrl = `http://127.0.0.1:${gatewayPort}`;
|
||||
const wsUrl = `ws://127.0.0.1:${gatewayPort}`;
|
||||
const logs = () =>
|
||||
`${Buffer.concat(stdout).toString("utf8")}\n${Buffer.concat(stderr).toString("utf8")}`.trim();
|
||||
|
||||
try {
|
||||
await waitForGatewayReady(baseUrl, logs);
|
||||
} catch (error) {
|
||||
child.kill("SIGTERM");
|
||||
throw error;
|
||||
}
|
||||
|
||||
return {
|
||||
cfg,
|
||||
baseUrl,
|
||||
wsUrl,
|
||||
token: gatewayToken,
|
||||
workspaceDir,
|
||||
tempRoot,
|
||||
configPath,
|
||||
logs,
|
||||
async call(
|
||||
method: string,
|
||||
rpcParams?: unknown,
|
||||
opts?: { expectFinal?: boolean; timeoutMs?: number },
|
||||
) {
|
||||
return await runCliJson({
|
||||
cwd: params.repoRoot,
|
||||
env,
|
||||
args: [
|
||||
"dist/index.js",
|
||||
"gateway",
|
||||
"call",
|
||||
method,
|
||||
"--url",
|
||||
wsUrl,
|
||||
"--token",
|
||||
gatewayToken,
|
||||
"--json",
|
||||
"--timeout",
|
||||
String(opts?.timeoutMs ?? 20_000),
|
||||
...(opts?.expectFinal ? ["--expect-final"] : []),
|
||||
"--params",
|
||||
JSON.stringify(rpcParams ?? {}),
|
||||
],
|
||||
});
|
||||
},
|
||||
async stop() {
|
||||
if (!child.killed) {
|
||||
child.kill("SIGTERM");
|
||||
await Promise.race([
|
||||
new Promise<void>((resolve) => child.once("exit", () => resolve())),
|
||||
sleep(5_000).then(() => {
|
||||
if (!child.killed) {
|
||||
child.kill("SIGKILL");
|
||||
}
|
||||
}),
|
||||
]);
|
||||
}
|
||||
await fs.rm(tempRoot, { recursive: true, force: true });
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -192,4 +192,34 @@ describe("qa-lab server", () => {
|
||||
expect(html).not.toContain("QA Lab UI not built");
|
||||
expect(html).toContain("<title>");
|
||||
});
|
||||
|
||||
it("can disable the embedded echo gateway for real-suite runs", async () => {
|
||||
const lab = await startQaLabServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
embeddedGateway: "disabled",
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await lab.stop();
|
||||
});
|
||||
|
||||
await fetch(`${lab.baseUrl}/api/inbound/message`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
conversation: { id: "bob", kind: "direct" },
|
||||
senderId: "bob",
|
||||
senderName: "Bob",
|
||||
text: "hello from suite",
|
||||
}),
|
||||
});
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 800));
|
||||
const snapshot = (await (await fetch(`${lab.baseUrl}/api/state`)).json()) as {
|
||||
messages: Array<{ direction: string }>;
|
||||
};
|
||||
expect(snapshot.messages.filter((message) => message.direction === "outbound")).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -372,6 +372,7 @@ export async function startQaLabServer(params?: {
|
||||
stop: () => Promise<void>;
|
||||
}
|
||||
| undefined;
|
||||
const embeddedGatewayEnabled = params?.embeddedGateway !== "disabled";
|
||||
|
||||
let publicBaseUrl = "";
|
||||
const server = createServer(async (req, res) => {
|
||||
@@ -514,7 +515,9 @@ export async function startQaLabServer(params?: {
|
||||
advertiseHost: params?.advertiseHost,
|
||||
advertisePort: params?.advertisePort,
|
||||
});
|
||||
gateway = await startQaGatewayLoop({ state, baseUrl: listenUrl });
|
||||
if (embeddedGatewayEnabled) {
|
||||
gateway = await startQaGatewayLoop({ state, baseUrl: listenUrl });
|
||||
}
|
||||
if (params?.sendKickoffOnStart) {
|
||||
injectKickoffMessage({
|
||||
state,
|
||||
@@ -544,7 +547,7 @@ export async function startQaLabServer(params?: {
|
||||
async runSelfCheck() {
|
||||
const result = await runQaSelfCheckAgainstState({
|
||||
state,
|
||||
cfg: gateway!.cfg,
|
||||
cfg: gateway?.cfg ?? createQaLabConfig(listenUrl),
|
||||
outputPath: params?.outputPath,
|
||||
});
|
||||
latestReport = {
|
||||
|
||||
738
extensions/qa-lab/src/suite.ts
Normal file
738
extensions/qa-lab/src/suite.ts
Normal file
@@ -0,0 +1,738 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { setTimeout as sleep } from "node:timers/promises";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/core";
|
||||
import type { QaBusState } from "./bus-state.js";
|
||||
import { extractQaToolPayload } from "./extract-tool-payload.js";
|
||||
import { startQaGatewayChild } from "./gateway-child.js";
|
||||
import { startQaLabServer } from "./lab-server.js";
|
||||
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
|
||||
import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js";
|
||||
import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js";
|
||||
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
|
||||
|
||||
type QaSuiteStep = {
|
||||
name: string;
|
||||
run: () => Promise<string | void>;
|
||||
};
|
||||
|
||||
type QaSuiteScenarioResult = {
|
||||
name: string;
|
||||
status: "pass" | "fail";
|
||||
steps: QaReportCheck[];
|
||||
details?: string;
|
||||
};
|
||||
|
||||
type QaSuiteEnvironment = {
|
||||
lab: Awaited<ReturnType<typeof startQaLabServer>>;
|
||||
mock: Awaited<ReturnType<typeof startQaMockOpenAiServer>>;
|
||||
gateway: Awaited<ReturnType<typeof startQaGatewayChild>>;
|
||||
cfg: OpenClawConfig;
|
||||
};
|
||||
|
||||
export type QaSuiteResult = {
|
||||
outputDir: string;
|
||||
reportPath: string;
|
||||
summaryPath: string;
|
||||
report: string;
|
||||
scenarios: QaSuiteScenarioResult[];
|
||||
};
|
||||
|
||||
function createQaActionConfig(baseUrl: string): OpenClawConfig {
|
||||
return {
|
||||
channels: {
|
||||
"qa-channel": {
|
||||
enabled: true,
|
||||
baseUrl,
|
||||
botUserId: "openclaw",
|
||||
botDisplayName: "OpenClaw QA",
|
||||
allowFrom: ["*"],
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function waitForCondition<T>(
|
||||
check: () => T | Promise<T | null | undefined> | null | undefined,
|
||||
timeoutMs = 15_000,
|
||||
intervalMs = 100,
|
||||
): Promise<T> {
|
||||
const startedAt = Date.now();
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
const value = await check();
|
||||
if (value !== null && value !== undefined) {
|
||||
return value;
|
||||
}
|
||||
await sleep(intervalMs);
|
||||
}
|
||||
throw new Error(`timed out after ${timeoutMs}ms`);
|
||||
}
|
||||
|
||||
async function waitForOutboundMessage(
|
||||
state: QaBusState,
|
||||
predicate: (message: QaBusMessage) => boolean,
|
||||
timeoutMs = 15_000,
|
||||
) {
|
||||
return await waitForCondition(
|
||||
() =>
|
||||
state
|
||||
.getSnapshot()
|
||||
.messages.filter((message) => message.direction === "outbound")
|
||||
.find(predicate),
|
||||
timeoutMs,
|
||||
);
|
||||
}
|
||||
|
||||
async function waitForNoOutbound(state: QaBusState, timeoutMs = 1_200) {
|
||||
await sleep(timeoutMs);
|
||||
const outbound = state
|
||||
.getSnapshot()
|
||||
.messages.filter((message) => message.direction === "outbound");
|
||||
if (outbound.length > 0) {
|
||||
throw new Error(`expected no outbound messages, saw ${outbound.length}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function runScenario(name: string, steps: QaSuiteStep[]): Promise<QaSuiteScenarioResult> {
|
||||
const stepResults: QaReportCheck[] = [];
|
||||
for (const step of steps) {
|
||||
try {
|
||||
const details = await step.run();
|
||||
stepResults.push({
|
||||
name: step.name,
|
||||
status: "pass",
|
||||
...(details ? { details } : {}),
|
||||
});
|
||||
} catch (error) {
|
||||
const details = error instanceof Error ? error.message : String(error);
|
||||
stepResults.push({
|
||||
name: step.name,
|
||||
status: "fail",
|
||||
details,
|
||||
});
|
||||
return {
|
||||
name,
|
||||
status: "fail",
|
||||
steps: stepResults,
|
||||
details,
|
||||
};
|
||||
}
|
||||
}
|
||||
return {
|
||||
name,
|
||||
status: "pass",
|
||||
steps: stepResults,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchJson<T>(url: string): Promise<T> {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) {
|
||||
throw new Error(`request failed ${response.status}: ${url}`);
|
||||
}
|
||||
return (await response.json()) as T;
|
||||
}
|
||||
|
||||
async function runAgentPrompt(
|
||||
env: QaSuiteEnvironment,
|
||||
params: {
|
||||
sessionKey: string;
|
||||
message: string;
|
||||
to?: string;
|
||||
threadId?: string;
|
||||
provider?: string;
|
||||
model?: string;
|
||||
timeoutMs?: number;
|
||||
},
|
||||
) {
|
||||
const target = params.to ?? "dm:qa-operator";
|
||||
const started = (await env.gateway.call(
|
||||
"agent",
|
||||
{
|
||||
idempotencyKey: randomUUID(),
|
||||
agentId: "qa",
|
||||
sessionKey: params.sessionKey,
|
||||
message: params.message,
|
||||
deliver: true,
|
||||
channel: "qa-channel",
|
||||
to: target,
|
||||
replyChannel: "qa-channel",
|
||||
replyTo: target,
|
||||
...(params.threadId ? { threadId: params.threadId } : {}),
|
||||
...(params.provider ? { provider: params.provider } : {}),
|
||||
...(params.model ? { model: params.model } : {}),
|
||||
},
|
||||
{
|
||||
timeoutMs: params.timeoutMs ?? 30_000,
|
||||
},
|
||||
)) as { runId?: string; status?: string };
|
||||
if (!started.runId) {
|
||||
throw new Error(`agent call did not return a runId: ${JSON.stringify(started)}`);
|
||||
}
|
||||
const waited = (await env.gateway.call(
|
||||
"agent.wait",
|
||||
{
|
||||
runId: started.runId,
|
||||
timeoutMs: params.timeoutMs ?? 30_000,
|
||||
},
|
||||
{
|
||||
timeoutMs: (params.timeoutMs ?? 30_000) + 5_000,
|
||||
},
|
||||
)) as { status?: string; error?: string };
|
||||
if (waited.status !== "ok") {
|
||||
throw new Error(
|
||||
`agent.wait returned ${String(waited.status ?? "unknown")}: ${waited.error ?? "no error"}`,
|
||||
);
|
||||
}
|
||||
return {
|
||||
started,
|
||||
waited,
|
||||
};
|
||||
}
|
||||
|
||||
type QaActionName = "delete" | "edit" | "react" | "thread-create";
|
||||
|
||||
async function handleQaAction(params: {
|
||||
env: QaSuiteEnvironment;
|
||||
action: QaActionName;
|
||||
args: Record<string, unknown>;
|
||||
}) {
|
||||
const result = await qaChannelPlugin.actions?.handleAction?.({
|
||||
channel: "qa-channel",
|
||||
action: params.action,
|
||||
cfg: params.env.cfg,
|
||||
accountId: "default",
|
||||
params: params.args,
|
||||
});
|
||||
return extractQaToolPayload(result);
|
||||
}
|
||||
|
||||
function buildScenarioMap(env: QaSuiteEnvironment) {
|
||||
const state = env.lab.state;
|
||||
const reset = async () => {
|
||||
state.reset();
|
||||
await sleep(100);
|
||||
};
|
||||
|
||||
return new Map<string, () => Promise<QaSuiteScenarioResult>>([
|
||||
[
|
||||
"channel-chat-baseline",
|
||||
async () =>
|
||||
await runScenario("Channel baseline conversation", [
|
||||
{
|
||||
name: "ignores unmentioned channel chatter",
|
||||
run: async () => {
|
||||
await reset();
|
||||
state.addInboundMessage({
|
||||
conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
|
||||
senderId: "alice",
|
||||
senderName: "Alice",
|
||||
text: "hello team, no bot ping here",
|
||||
});
|
||||
await waitForNoOutbound(state);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "replies when mentioned in channel",
|
||||
run: async () => {
|
||||
state.addInboundMessage({
|
||||
conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
|
||||
senderId: "alice",
|
||||
senderName: "Alice",
|
||||
text: "@openclaw explain the QA lab",
|
||||
});
|
||||
const message = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-room" && !candidate.threadId,
|
||||
);
|
||||
return message.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"cron-one-minute-ping",
|
||||
async () =>
|
||||
await runScenario("Cron one-minute ping", [
|
||||
{
|
||||
name: "stores a reminder roughly one minute ahead",
|
||||
run: async () => {
|
||||
await reset();
|
||||
const at = new Date(Date.now() + 60_000).toISOString();
|
||||
const response = (await env.gateway.call("cron.add", {
|
||||
name: `qa-suite-${randomUUID()}`,
|
||||
enabled: true,
|
||||
schedule: { kind: "at", at },
|
||||
sessionTarget: "isolated",
|
||||
wakeMode: "next-heartbeat",
|
||||
payload: {
|
||||
kind: "agentTurn",
|
||||
message:
|
||||
"A QA cron just fired. Send a one-line ping back to the room so the operator can verify delivery.",
|
||||
},
|
||||
delivery: {
|
||||
mode: "announce",
|
||||
channel: "qa-channel",
|
||||
to: "channel:qa-room",
|
||||
},
|
||||
})) as { id?: string; schedule?: { at?: string } };
|
||||
const scheduledAt = response.schedule?.at ?? at;
|
||||
const delta = new Date(scheduledAt).getTime() - Date.now();
|
||||
if (delta < 45_000 || delta > 75_000) {
|
||||
throw new Error(`expected ~1 minute schedule, got ${delta}ms`);
|
||||
}
|
||||
(globalThis as typeof globalThis & { __qaCronJobId?: string }).__qaCronJobId =
|
||||
response.id;
|
||||
return scheduledAt;
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "forces the reminder through QA channel delivery",
|
||||
run: async () => {
|
||||
const jobId = (globalThis as typeof globalThis & { __qaCronJobId?: string })
|
||||
.__qaCronJobId;
|
||||
if (!jobId) {
|
||||
throw new Error("missing cron job id");
|
||||
}
|
||||
await env.gateway.call(
|
||||
"cron.run",
|
||||
{ id: jobId, mode: "force" },
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-room",
|
||||
30_000,
|
||||
);
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"dm-chat-baseline",
|
||||
async () =>
|
||||
await runScenario("DM baseline conversation", [
|
||||
{
|
||||
name: "replies coherently in DM",
|
||||
run: async () => {
|
||||
await reset();
|
||||
state.addInboundMessage({
|
||||
conversation: { id: "alice", kind: "direct" },
|
||||
senderId: "alice",
|
||||
senderName: "Alice",
|
||||
text: "Hello there, who are you?",
|
||||
});
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "alice",
|
||||
);
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"lobster-invaders-build",
|
||||
async () =>
|
||||
await runScenario("Build Lobster Invaders", [
|
||||
{
|
||||
name: "creates the artifact after reading context",
|
||||
run: async () => {
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:lobster-invaders",
|
||||
message:
|
||||
"Read the QA kickoff context first, then build a tiny Lobster Invaders HTML game in this workspace and tell me where it is.",
|
||||
});
|
||||
await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-operator",
|
||||
);
|
||||
const artifactPath = path.join(env.gateway.workspaceDir, "lobster-invaders.html");
|
||||
const artifact = await fs.readFile(artifactPath, "utf8");
|
||||
if (!artifact.includes("Lobster Invaders")) {
|
||||
throw new Error("missing Lobster Invaders artifact");
|
||||
}
|
||||
const requests = await fetchJson<Array<{ prompt?: string; toolOutput?: string }>>(
|
||||
`${env.mock.baseUrl}/debug/requests`,
|
||||
);
|
||||
if (!requests.some((request) => (request.toolOutput ?? "").includes("QA mission"))) {
|
||||
throw new Error("expected pre-write read evidence");
|
||||
}
|
||||
return "lobster-invaders.html";
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"memory-recall",
|
||||
async () =>
|
||||
await runScenario("Memory recall after context switch", [
|
||||
{
|
||||
name: "stores the canary fact",
|
||||
run: async () => {
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:memory",
|
||||
message: "Please remember this fact for later: the QA canary code is ALPHA-7.",
|
||||
});
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-operator",
|
||||
);
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "recalls the same fact later",
|
||||
run: async () => {
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:memory",
|
||||
message: "What was the QA canary code I asked you to remember earlier?",
|
||||
});
|
||||
const outbound = await waitForCondition(
|
||||
() =>
|
||||
state
|
||||
.getSnapshot()
|
||||
.messages.filter(
|
||||
(candidate) =>
|
||||
candidate.direction === "outbound" &&
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.includes("ALPHA-7"),
|
||||
)
|
||||
.at(-1),
|
||||
20_000,
|
||||
);
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"model-switch-follow-up",
|
||||
async () =>
|
||||
await runScenario("Model switch follow-up", [
|
||||
{
|
||||
name: "runs on the default configured model",
|
||||
run: async () => {
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:model-switch",
|
||||
message: "Say hello from the default configured model.",
|
||||
});
|
||||
await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) => candidate.conversation.id === "qa-operator",
|
||||
);
|
||||
const request = await fetchJson<{ body?: { model?: string } }>(
|
||||
`${env.mock.baseUrl}/debug/last-request`,
|
||||
);
|
||||
return String(request.body?.model ?? "");
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "switches to the alternate model and continues",
|
||||
run: async () => {
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:model-switch",
|
||||
message: "Continue the exchange after switching models and note the handoff.",
|
||||
provider: "mock-openai",
|
||||
model: "gpt-5.4-alt",
|
||||
});
|
||||
const outbound = await waitForCondition(
|
||||
() =>
|
||||
state
|
||||
.getSnapshot()
|
||||
.messages.filter(
|
||||
(candidate) =>
|
||||
candidate.direction === "outbound" &&
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.toLowerCase().includes("switch"),
|
||||
)
|
||||
.at(-1),
|
||||
20_000,
|
||||
);
|
||||
const request = await fetchJson<{ body?: { model?: string } }>(
|
||||
`${env.mock.baseUrl}/debug/last-request`,
|
||||
);
|
||||
if (request.body?.model !== "gpt-5.4-alt") {
|
||||
throw new Error(`expected gpt-5.4-alt, got ${String(request.body?.model ?? "")}`);
|
||||
}
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"reaction-edit-delete",
|
||||
async () =>
|
||||
await runScenario("Reaction, edit, delete lifecycle", [
|
||||
{
|
||||
name: "records reaction, edit, and delete actions",
|
||||
run: async () => {
|
||||
await reset();
|
||||
const seed = state.addOutboundMessage({
|
||||
to: "channel:qa-room",
|
||||
text: "seed message",
|
||||
});
|
||||
await handleQaAction({
|
||||
env,
|
||||
action: "react",
|
||||
args: { messageId: seed.id, emoji: "white_check_mark" },
|
||||
});
|
||||
await handleQaAction({
|
||||
env,
|
||||
action: "edit",
|
||||
args: { messageId: seed.id, text: "seed message (edited)" },
|
||||
});
|
||||
await handleQaAction({
|
||||
env,
|
||||
action: "delete",
|
||||
args: { messageId: seed.id },
|
||||
});
|
||||
const message = state.readMessage({ messageId: seed.id });
|
||||
if (
|
||||
message.reactions.length === 0 ||
|
||||
!message.deleted ||
|
||||
!message.text.includes("(edited)")
|
||||
) {
|
||||
throw new Error("message lifecycle did not persist");
|
||||
}
|
||||
return message.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"source-docs-discovery-report",
|
||||
async () =>
|
||||
await runScenario("Source and docs discovery report", [
|
||||
{
|
||||
name: "reads seeded material and emits a protocol report",
|
||||
run: async () => {
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:discovery",
|
||||
message:
|
||||
"Read the seeded docs and source plan, then report grouped into Worked, Failed, Blocked, and Follow-up.",
|
||||
});
|
||||
const outbound = await waitForCondition(
|
||||
() =>
|
||||
state
|
||||
.getSnapshot()
|
||||
.messages.filter(
|
||||
(candidate) =>
|
||||
candidate.direction === "outbound" &&
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.includes("Worked:"),
|
||||
)
|
||||
.at(-1),
|
||||
20_000,
|
||||
);
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"subagent-handoff",
|
||||
async () =>
|
||||
await runScenario("Subagent handoff", [
|
||||
{
|
||||
name: "delegates a bounded task and reports the result",
|
||||
run: async () => {
|
||||
await reset();
|
||||
await runAgentPrompt(env, {
|
||||
sessionKey: "agent:qa:subagent",
|
||||
message:
|
||||
"Delegate a bounded QA task to a subagent, then summarize the delegated result clearly.",
|
||||
timeoutMs: 45_000,
|
||||
});
|
||||
const outbound = await waitForCondition(
|
||||
() =>
|
||||
state
|
||||
.getSnapshot()
|
||||
.messages.filter(
|
||||
(candidate) =>
|
||||
candidate.direction === "outbound" &&
|
||||
candidate.conversation.id === "qa-operator" &&
|
||||
candidate.text.toLowerCase().includes("delegated"),
|
||||
)
|
||||
.at(-1),
|
||||
45_000,
|
||||
);
|
||||
const sessions = await waitForCondition(
|
||||
async () => {
|
||||
const listed = (await env.gateway.call("sessions.list", {
|
||||
spawnedBy: "agent:qa:subagent",
|
||||
})) as {
|
||||
sessions?: Array<{
|
||||
key?: string;
|
||||
parentSessionKey?: string;
|
||||
spawnedBy?: string;
|
||||
}>;
|
||||
};
|
||||
return (listed.sessions ?? []).length > 0 ? listed : null;
|
||||
},
|
||||
20_000,
|
||||
250,
|
||||
);
|
||||
if ((sessions.sessions ?? []).length === 0) {
|
||||
throw new Error("expected spawned child session");
|
||||
}
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
[
|
||||
"thread-follow-up",
|
||||
async () =>
|
||||
await runScenario("Threaded follow-up", [
|
||||
{
|
||||
name: "keeps follow-up inside the thread",
|
||||
run: async () => {
|
||||
await reset();
|
||||
const threadPayload = (await handleQaAction({
|
||||
env,
|
||||
action: "thread-create",
|
||||
args: {
|
||||
channelId: "qa-room",
|
||||
title: "QA deep dive",
|
||||
},
|
||||
})) as { thread?: { id?: string } } | undefined;
|
||||
const threadId = threadPayload?.thread?.id;
|
||||
if (!threadId) {
|
||||
throw new Error("missing thread id");
|
||||
}
|
||||
state.addInboundMessage({
|
||||
conversation: { id: "qa-room", kind: "channel", title: "QA Room" },
|
||||
senderId: "alice",
|
||||
senderName: "Alice",
|
||||
text: "@openclaw continue this work inside the thread",
|
||||
threadId,
|
||||
threadTitle: "QA deep dive",
|
||||
});
|
||||
const outbound = await waitForOutboundMessage(
|
||||
state,
|
||||
(candidate) =>
|
||||
candidate.conversation.id === "qa-room" && candidate.threadId === threadId,
|
||||
);
|
||||
const leaked = state
|
||||
.getSnapshot()
|
||||
.messages.some(
|
||||
(candidate) =>
|
||||
candidate.direction === "outbound" &&
|
||||
candidate.conversation.id === "qa-room" &&
|
||||
!candidate.threadId,
|
||||
);
|
||||
if (leaked) {
|
||||
throw new Error("thread reply leaked into root channel");
|
||||
}
|
||||
return outbound.text;
|
||||
},
|
||||
},
|
||||
]),
|
||||
],
|
||||
]);
|
||||
}
|
||||
|
||||
export async function runQaSuite(params?: { outputDir?: string }) {
|
||||
const startedAt = new Date();
|
||||
const outputDir =
|
||||
params?.outputDir ??
|
||||
path.join(process.cwd(), ".artifacts", "qa-e2e", `suite-${Date.now().toString(36)}`);
|
||||
await fs.mkdir(outputDir, { recursive: true });
|
||||
|
||||
const lab = await startQaLabServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
embeddedGateway: "disabled",
|
||||
});
|
||||
const mock = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
});
|
||||
const gateway = await startQaGatewayChild({
|
||||
repoRoot: process.cwd(),
|
||||
providerBaseUrl: `${mock.baseUrl}/v1`,
|
||||
qaBusBaseUrl: lab.listenUrl,
|
||||
});
|
||||
const env: QaSuiteEnvironment = {
|
||||
lab,
|
||||
mock,
|
||||
gateway,
|
||||
cfg: createQaActionConfig(lab.listenUrl),
|
||||
};
|
||||
|
||||
try {
|
||||
const catalog = readQaBootstrapScenarioCatalog();
|
||||
const scenarioMap = buildScenarioMap(env);
|
||||
const scenarios: QaSuiteScenarioResult[] = [];
|
||||
|
||||
for (const scenario of catalog.scenarios) {
|
||||
const run = scenarioMap.get(scenario.id);
|
||||
if (!run) {
|
||||
scenarios.push({
|
||||
name: scenario.title,
|
||||
status: "fail",
|
||||
details: `no executable scenario registered for ${scenario.id}`,
|
||||
steps: [],
|
||||
});
|
||||
continue;
|
||||
}
|
||||
scenarios.push(await run());
|
||||
}
|
||||
|
||||
const finishedAt = new Date();
|
||||
const report = renderQaMarkdownReport({
|
||||
title: "OpenClaw QA Scenario Suite",
|
||||
startedAt,
|
||||
finishedAt,
|
||||
checks: [],
|
||||
scenarios: scenarios.map((scenario) => ({
|
||||
name: scenario.name,
|
||||
status: scenario.status,
|
||||
details: scenario.details,
|
||||
steps: scenario.steps,
|
||||
})) satisfies QaReportScenario[],
|
||||
notes: [
|
||||
"Runs against qa-channel + qa-lab bus + real gateway child + mock OpenAI provider.",
|
||||
"Cron uses a one-minute schedule assertion plus forced execution for fast verification.",
|
||||
],
|
||||
});
|
||||
const reportPath = path.join(outputDir, "qa-suite-report.md");
|
||||
const summaryPath = path.join(outputDir, "qa-suite-summary.json");
|
||||
await fs.writeFile(reportPath, report, "utf8");
|
||||
await fs.writeFile(
|
||||
summaryPath,
|
||||
`${JSON.stringify(
|
||||
{
|
||||
scenarios,
|
||||
counts: {
|
||||
total: scenarios.length,
|
||||
passed: scenarios.filter((scenario) => scenario.status === "pass").length,
|
||||
failed: scenarios.filter((scenario) => scenario.status === "fail").length,
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
"utf8",
|
||||
);
|
||||
|
||||
return {
|
||||
outputDir,
|
||||
reportPath,
|
||||
summaryPath,
|
||||
report,
|
||||
scenarios,
|
||||
} satisfies QaSuiteResult;
|
||||
} finally {
|
||||
await gateway.stop();
|
||||
await mock.stop();
|
||||
await lab.stop();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user