feat(qa): add manual harness lane

This commit is contained in:
Vincent Koc
2026-04-07 10:28:56 +01:00
committed by Peter Steinberger
parent 63e6bb026c
commit f93b217834
4 changed files with 308 additions and 0 deletions

View File

@@ -2,6 +2,7 @@ import path from "node:path";
import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js";
import { runQaDockerUp } from "./docker-up.runtime.js";
import { startQaLabServer } from "./lab-server.js";
import { runQaManualLane } from "./manual-lane.runtime.js";
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
import { runQaSuite } from "./suite.js";
@@ -63,6 +64,28 @@ export async function runQaSuiteCommand(opts: {
process.stdout.write(`QA suite summary: ${result.summaryPath}\n`);
}
export async function runQaManualLaneCommand(opts: {
providerMode?: "mock-openai" | "live-frontier";
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
message: string;
timeoutMs?: number;
}) {
const model = opts.primaryModel?.trim() || "openai/gpt-5.4";
const result = await runQaManualLane({
repoRoot: process.cwd(),
providerMode: opts.providerMode ?? "live-frontier",
primaryModel: model,
alternateModel: opts.alternateModel?.trim() || model,
fastMode: opts.fastMode,
message: opts.message,
timeoutMs: opts.timeoutMs,
});
process.stdout.write(JSON.stringify(result, null, 2));
process.stdout.write("\n");
}
export async function runQaLabUiCommand(opts: {
host?: string;
port?: number;

View File

@@ -26,6 +26,18 @@ async function runQaSuite(opts: {
await runtime.runQaSuiteCommand(opts);
}
async function runQaManualLane(opts: {
providerMode?: "mock-openai" | "live-frontier";
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
message: string;
timeoutMs?: number;
}) {
const runtime = await loadQaLabCliRuntime();
await runtime.runQaManualLaneCommand(opts);
}
function collectString(value: string, previous: string[]) {
const trimmed = value.trim();
return trimmed ? [...previous, trimmed] : previous;
@@ -128,6 +140,38 @@ export function registerQaLabCli(program: Command) {
},
);
qa.command("manual")
.description("Run a one-off QA agent prompt against the selected provider/model lane")
.requiredOption("--message <text>", "Prompt to send to the QA agent")
.option(
"--provider-mode <mode>",
"Provider mode: mock-openai or live-frontier (legacy live-openai still works)",
"live-frontier",
)
.option("--model <ref>", "Primary provider/model ref", "openai/gpt-5.4")
.option("--alt-model <ref>", "Alternate provider/model ref")
.option("--fast", "Enable provider fast mode where supported", false)
.option("--timeout-ms <ms>", "Override agent.wait timeout", (value: string) => Number(value))
.action(
async (opts: {
message: string;
providerMode?: "mock-openai" | "live-frontier";
model?: string;
altModel?: string;
fast?: boolean;
timeoutMs?: number;
}) => {
await runQaManualLane({
providerMode: opts.providerMode,
primaryModel: opts.model,
alternateModel: opts.altModel,
fastMode: opts.fast,
message: opts.message,
timeoutMs: opts.timeoutMs,
});
},
);
qa.command("ui")
.description("Start the private QA debugger UI and local QA bus")
.option("--host <host>", "Bind host", "127.0.0.1")

View File

@@ -0,0 +1,117 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
const { startQaLabServer, startQaGatewayChild, startQaMockOpenAiServer } = vi.hoisted(() => ({
startQaLabServer: vi.fn(),
startQaGatewayChild: vi.fn(),
startQaMockOpenAiServer: vi.fn(),
}));
vi.mock("./lab-server.js", () => ({
startQaLabServer,
}));
vi.mock("./gateway-child.js", () => ({
startQaGatewayChild,
}));
vi.mock("./mock-openai-server.js", () => ({
startQaMockOpenAiServer,
}));
import { runQaManualLane } from "./manual-lane.runtime.js";
describe("runQaManualLane", () => {
const gatewayStop = vi.fn();
const mockStop = vi.fn();
const labStop = vi.fn();
beforeEach(() => {
gatewayStop.mockReset();
mockStop.mockReset();
labStop.mockReset();
startQaLabServer.mockReset();
startQaGatewayChild.mockReset();
startQaMockOpenAiServer.mockReset();
startQaLabServer.mockResolvedValue({
listenUrl: "http://127.0.0.1:43124",
baseUrl: "http://127.0.0.1:58000",
state: {
getSnapshot: () => ({
messages: [
{
direction: "outbound",
conversation: { id: "qa-operator" },
text: "Protocol note: mock reply.",
},
],
}),
},
stop: labStop,
});
startQaGatewayChild.mockResolvedValue({
call: vi
.fn()
.mockResolvedValueOnce({ runId: "run-1" })
.mockResolvedValueOnce({ status: "ok" }),
stop: gatewayStop,
});
startQaMockOpenAiServer.mockResolvedValue({
baseUrl: "http://127.0.0.1:44080",
stop: mockStop,
});
});
afterEach(() => {
vi.clearAllMocks();
});
it("starts the mock provider and threads its base url into the gateway child", async () => {
const result = await runQaManualLane({
repoRoot: "/tmp/openclaw-repo",
providerMode: "mock-openai",
primaryModel: "mock-openai/gpt-5.4",
alternateModel: "mock-openai/gpt-5.4-alt",
message: "check the kickoff file",
timeoutMs: 5_000,
});
expect(startQaMockOpenAiServer).toHaveBeenCalledWith({
host: "127.0.0.1",
port: 0,
});
expect(startQaGatewayChild).toHaveBeenCalledWith(
expect.objectContaining({
repoRoot: "/tmp/openclaw-repo",
providerMode: "mock-openai",
providerBaseUrl: "http://127.0.0.1:44080/v1",
}),
);
expect(result.reply).toBe("Protocol note: mock reply.");
expect(gatewayStop).toHaveBeenCalledTimes(1);
expect(mockStop).toHaveBeenCalledTimes(1);
expect(labStop).toHaveBeenCalledTimes(1);
});
it("skips the mock provider bootstrap for live frontier runs", async () => {
const result = await runQaManualLane({
repoRoot: "/tmp/openclaw-repo",
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
message: "check the kickoff file",
timeoutMs: 5_000,
});
expect(startQaMockOpenAiServer).not.toHaveBeenCalled();
expect(startQaGatewayChild).toHaveBeenCalledWith(
expect.objectContaining({
providerMode: "live-frontier",
providerBaseUrl: undefined,
}),
);
expect(result.reply).toBe("Protocol note: mock reply.");
});
});

View File

@@ -0,0 +1,124 @@
import { randomUUID } from "node:crypto";
import { setTimeout as sleep } from "node:timers/promises";
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
import { startQaGatewayChild } from "./gateway-child.js";
import { startQaLabServer } from "./lab-server.js";
import { resolveQaLiveTurnTimeoutMs } from "./live-timeout.js";
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
type QaManualLaneParams = {
repoRoot: string;
providerMode: "mock-openai" | "live-frontier";
primaryModel: string;
alternateModel: string;
fastMode?: boolean;
message: string;
timeoutMs?: number;
};
function resolveManualLaneTimeoutMs(params: {
providerMode: "mock-openai" | "live-frontier";
primaryModel: string;
alternateModel: string;
timeoutMs?: number;
}) {
if (
typeof params.timeoutMs === "number" &&
Number.isFinite(params.timeoutMs) &&
params.timeoutMs > 0
) {
return params.timeoutMs;
}
return resolveQaLiveTurnTimeoutMs(
{
providerMode: params.providerMode,
primaryModel: params.primaryModel,
alternateModel: params.alternateModel,
},
120_000,
params.primaryModel,
);
}
export async function runQaManualLane(params: QaManualLaneParams) {
const sessionSuffix = params.primaryModel.replace(/[^a-z0-9._-]+/gi, "-");
const lab = await startQaLabServer({ embeddedGateway: "disabled" });
const mock =
params.providerMode === "mock-openai"
? await startQaMockOpenAiServer({
host: "127.0.0.1",
port: 0,
})
: null;
const gateway = await startQaGatewayChild({
repoRoot: params.repoRoot,
providerBaseUrl: mock ? `${mock.baseUrl}/v1` : undefined,
qaBusBaseUrl: lab.listenUrl,
providerMode: params.providerMode,
primaryModel: params.primaryModel,
alternateModel: params.alternateModel,
fastMode: params.fastMode,
controlUiEnabled: false,
});
const timeoutMs = resolveManualLaneTimeoutMs({
providerMode: params.providerMode,
primaryModel: params.primaryModel,
alternateModel: params.alternateModel,
timeoutMs: params.timeoutMs,
});
try {
const started = (await gateway.call(
"agent",
{
idempotencyKey: randomUUID(),
agentId: "qa",
sessionKey: `agent:qa:manual:${sessionSuffix}`,
message: params.message,
deliver: true,
channel: "qa-channel",
to: "dm:qa-operator",
replyChannel: "qa-channel",
replyTo: "dm:qa-operator",
},
{ timeoutMs: 30_000 },
)) as { runId?: string };
if (!started.runId) {
throw new Error(`agent call did not return a runId: ${JSON.stringify(started)}`);
}
const waited = (await gateway.call(
"agent.wait",
{
runId: started.runId,
timeoutMs,
},
{ timeoutMs: timeoutMs + 5_000 },
)) as { status?: string; error?: string };
await sleep(500);
const reply =
lab.state
.getSnapshot()
.messages.filter(
(candidate) =>
candidate.direction === "outbound" && candidate.conversation.id === "qa-operator",
)
.at(-1)?.text ?? null;
return {
model: params.primaryModel,
waited,
reply,
watchUrl: lab.baseUrl,
};
} catch (error) {
throw new Error(formatErrorMessage(error), { cause: error });
} finally {
await gateway.stop();
await mock?.stop();
await lab.stop();
}
}