feat(qa): recreate qa lab docker stack

This commit is contained in:
Peter Steinberger
2026-04-05 21:21:44 +01:00
parent 17a324b0de
commit 8e1c81e707
28 changed files with 2088 additions and 890 deletions

View File

@@ -97,6 +97,7 @@ RUN pnpm build:docker
# Force pnpm for UI build (Bun may fail on ARM/Synology architectures)
ENV OPENCLAW_PREFER_PNPM=1
RUN pnpm ui:build
RUN pnpm qa:lab:build
# Prune dev dependencies and strip build-only metadata before copying
# runtime assets into the final image.
@@ -156,6 +157,7 @@ COPY --from=runtime-assets --chown=node:node /app/openclaw.mjs .
COPY --from=runtime-assets --chown=node:node /app/${OPENCLAW_BUNDLED_PLUGIN_DIR} ./${OPENCLAW_BUNDLED_PLUGIN_DIR}
COPY --from=runtime-assets --chown=node:node /app/skills ./skills
COPY --from=runtime-assets --chown=node:node /app/docs ./docs
COPY --from=runtime-assets --chown=node:node /app/qa ./qa
# In npm-installed Docker images, prefer the copied source extension tree for
# bundled discovery so package metadata that points at source entries stays valid.

View File

@@ -1,865 +1,66 @@
---
title: "QA E2E Automation"
summary: "Design note for a full end-to-end QA system built on a synthetic message-channel plugin, Dockerized OpenClaw, and subagent-driven scenario execution"
summary: "Private QA automation shape for qa-lab, qa-channel, seeded scenarios, and protocol reports"
read_when:
- You are designing a true end-to-end QA harness for OpenClaw
- You want a synthetic message channel for automated feature verification
- You want subagents to discover features, run scenarios, and propose fixes
- Extending qa-lab or qa-channel
- Adding repo-backed QA scenarios
- Building higher-realism QA automation around the Gateway dashboard
title: "QA E2E Automation"
---
# QA E2E Automation
This note proposes a true end-to-end QA system for OpenClaw built around a
real channel plugin dedicated to testing.
The private QA stack is meant to exercise OpenClaw in a more realistic,
channel-shaped way than a single unit test can.
The core idea:
Current pieces:
- run OpenClaw inside Docker in a realistic gateway configuration
- expose a synthetic but full-featured message channel as a normal plugin
- let a QA harness inject inbound traffic and inspect outbound state
- let OpenClaw agents and subagents explore, verify, and report on behavior
- optionally escalate failing scenarios into host-side fix workflows that open PRs
- `extensions/qa-channel`: synthetic message channel with DM, channel, thread,
reaction, edit, and delete surfaces.
- `extensions/qa-lab`: debugger UI and QA bus for observing the transcript,
injecting inbound messages, and exporting a Markdown report.
- `qa/`: repo-backed seed assets for the kickoff task and baseline QA
scenarios.
This is not a unit-test replacement. It is a product-level system test layer.
The long-term goal is a two-pane QA site:
## Chosen direction
- Left: Gateway dashboard (Control UI) with the agent.
- Right: QA Lab, showing the Slack-ish transcript and scenario plan.
The initial direction for this project is:
That lets an operator or automation loop give the agent a QA mission, observe
real channel behavior, and record what worked, failed, or stayed blocked.
- build the full system inside this repo
- test against a matrix, not a single model/provider pair
- use Markdown reports as the first output artifact
- defer auto-PR and auto-fix work until later
- treat Slack-class semantics as the MVP transport target
- keep orchestration simple in v1, with a host-side controller that exercises
the moving parts directly
- evolve toward OpenClaw becoming the orchestration layer later, once the
transport, scenario, and reporting model are proven
## Repo-backed seeds
## Goals
Seed assets live in `qa/`:
- Test OpenClaw through a real messaging-channel boundary, not only `chat.send`
or embedded mocks.
- Verify channel semantics that matter for real use:
- DMs
- channels/groups
- threads
- edits
- deletes
- reactions
- polls
- attachments
- Verify agent behavior across realistic user flows:
- memory
- thread binding
- model switching
- cron jobs
- subagents
- approvals
- routing
- channel-specific `message` actions
- Make the QA runner capable of feature discovery:
- read docs
- inspect plugin capability discovery
- inspect code and config
- generate a scenario protocol
- Support deterministic protocol tests and best-effort real-model tests as
separate lanes.
- Allow automated bug triage artifacts that can feed a host-side fix worker.
- `qa/QA_KICKOFF_TASK.md`
- `qa/seed-scenarios.json`
## Non-goals
These are intentionally in git so the QA plan is visible to both humans and the
agent. The baseline list should stay broad enough to cover:
- Not a replacement for existing unit, contract, or live tests.
- Not a production channel.
- Not a requirement that all bug fixing happen from inside the Dockerized
OpenClaw runtime.
- Not a reason to add test-only core branches for one channel.
- DM and channel chat
- thread behavior
- message action lifecycle
- cron callbacks
- memory recall
- model switching
- subagent handoff
- repo-reading and docs-reading
- one small build task such as Lobster Invaders
## Why a channel plugin
## Reporting
OpenClaw already has the right boundary:
`qa-lab` exports a Markdown protocol report from the observed bus timeline.
The report should answer:
- core owns the shared `message` tool, prompt wiring, outer session
bookkeeping, and dispatch
- channel plugins own:
- config
- pairing
- security
- session grammar
- threading
- outbound delivery
- channel-owned actions and capability discovery
- What worked
- What failed
- What stayed blocked
- What follow-up scenarios are worth adding
That means the cleanest design is:
## Related docs
- a real channel plugin for QA transport semantics
- a separate QA control plane for injection and inspection
This keeps the test transport inside the same architecture used by Slack,
Discord, Teams, and similar channels.
## System overview
The system has six pieces.
1. `qa-channel` plugin
- Bundled extension under `extensions/qa-channel`
- Normal `ChannelPlugin`
- Behaves like a Slack/Discord/Teams-class channel
- Registers channel-owned message actions through the shared `message` tool
2. `qa-bus` sidecar
- Small HTTP and/or WS service
- Canonical state store for synthetic conversations, messages, threads,
reactions, edits, and event history
- Accepts inbound events from the harness
- Exposes inspection and wait APIs for assertions
3. Dockerized OpenClaw gateway
- Runs as close to real deployment as practical
- Loads `qa-channel`
- Uses normal config, routing, session, cron, and plugin loading
4. QA orchestrator
- Host-side runner or dedicated OpenClaw-driven controller
- Provisions scenario environments
- Seeds config
- Resets state
- Executes test matrix
- Collects structured outcomes
5. Auto-fix worker
- Host-side workflow
- Creates a worktree
- launches a coding agent
- runs scoped verification
- opens a PR
The auto-fix worker should start outside the container. It needs direct repo
and GitHub access, clean worktree control, and better isolation from the
runtime under test.
6. `qa-lab` extension
- Bundled extension under `extensions/qa-lab`
- Owns the QA harness, Markdown report flow, and private debugger UI
- Registers hidden CLI entrypoints such as `openclaw qa run` and
`openclaw qa ui`
- Stays separate from the shipped Control UI bundle
## High-level flow
1. Start `qa-bus`.
2. Start OpenClaw in Docker with `qa-channel` enabled.
3. QA orchestrator injects inbound messages into `qa-bus`.
4. `qa-channel` receives them as normal inbound traffic.
5. OpenClaw runs the agent loop normally.
6. Outbound replies and channel actions flow back through `qa-channel` into
`qa-bus`.
7. QA orchestrator inspects state or waits on events.
8. Orchestrator records pass/fail/flaky/unknown plus artifacts.
9. Severe failures optionally emit a bug packet for the host-side fix worker.
## Lanes
The system should have two distinct lanes.
### Lane A: deterministic protocol lane
Use a deterministic or tightly controlled model setup.
Preferred options:
- a canned provider fixture
- the bundled `synthetic` provider when useful
- fixed prompts with exact assertions
Purpose:
- verify transport and product semantics
- keep flakiness low
- catch regressions in routing, memory plumbing, thread binding, cron, and tool
invocation
### Lane B: quality lane
Use real providers and real models in a matrix.
Purpose:
- verify that the agent can still do good work end to end
- evaluate feature discoverability and instruction following
- surface model-specific breakage or degraded behavior
Expected result type:
- best-effort
- rubric-based
- more tolerant of wording variation
Matrix guidance for v1:
- start with a small curated matrix, not "everything configured"
- keep deterministic protocol runs separate from quality runs
- report matrix cells independently so one provider/model failure does not hide
transport correctness
Do not mix these lanes. Protocol correctness and model quality should fail
independently.
## Use existing bootstrap seam first
Before the custom channel exists, OpenClaw already has a useful bootstrap path:
- admin-scoped synthetic originating-route fields on `chat.send`
- synthetic message-channel headers for HTTP flows
That is enough to build a first QA controller for:
- thread/session routing
- ACP bind flows
- subagent delivery
- cron wake paths
- memory persistence checks
This should be Phase 0 because it de-risks the scenario protocol before the
full channel lands.
## `qa-lab` extension design
`qa-lab` is the private operator-facing half of this system.
Suggested package:
- `extensions/qa-lab/`
Suggested responsibilities:
- host the synthetic bus state machine
- host the scenario runner
- write Markdown reports
- serve a private debugger UI on a separate local server
- keep that UI entirely outside the shipped Control UI bundle
Suggested UI shape:
- left rail for conversations and threads
- center transcript pane
- right rail for event stream and report inspection
- bottom inject-composer for inbound QA traffic
## `qa-channel` plugin design
## Package layout
Suggested package:
- `extensions/qa-channel/`
Suggested file layout:
- `package.json`
- `openclaw.plugin.json`
- `index.ts`
- `setup-entry.ts`
- `api.ts`
- `runtime-api.ts`
- `src/channel.ts`
- `src/channel-api.ts`
- `src/config-schema.ts`
- `src/setup-core.ts`
- `src/setup-surface.ts`
- `src/runtime.ts`
- `src/channel.runtime.ts`
- `src/inbound.ts`
- `src/outbound.ts`
- `src/state-client.ts`
- `src/targets.ts`
- `src/threading.ts`
- `src/message-actions.ts`
- `src/probe.ts`
- `src/doctor.ts`
- `src/*.test.ts`
Model it after Slack, Discord, Teams, or Google Chat packaging, not as a one-off
test helper.
## Capabilities
MVP capabilities:
- one account
- DMs
- channels
- threads
- send text
- reply in thread
- read
- edit
- delete
- react
- search
- upload-file
- download-file
Phase 2 capabilities:
- polls
- member-info
- channel-info
- channel-list
- pin and unpin
- permissions
- topic create and edit
These map naturally onto the shared `message` tool action model already used by
channel plugins.
## Conversation model
Use a stable synthetic grammar that supports both simplicity and realistic
coverage.
Suggested ids:
- DM conversation: `dm:<user-id>`
- channel: `chan:<space-id>`
- thread: `thread:<space-id>:<thread-id>`
- message id: `msg:<ulid>`
Suggested target forms:
- `qa:dm:<user-id>`
- `qa:chan:<space-id>`
- `qa:thread:<space-id>:<thread-id>`
The plugin should own translation between external target strings and canonical
conversation ids.
## Pairing and security
Even though this is a QA channel, it should still implement real policy
surfaces:
- DM allowlist / pairing flow
- group policy
- mention gating where relevant
- trusted sender ids
Reason:
- these are product features and should be testable through the QA transport
- the QA lane should be able to verify policy failures, not only happy paths
## Threading model
Threading is one of the main reasons to build this channel.
Required semantics:
- create thread from a top-level message
- reply inside an existing thread
- list thread messages
- preserve parent message linkage
- let OpenClaw thread binding attach a session to a thread
The QA bus must preserve:
- conversation id
- thread id
- parent message id
- sender id
- timestamps
## Channel-owned message actions
The plugin should implement `actions.describeMessageTool(...)` and
`actions.handleAction(...)`.
MVP action list:
- `send`
- `read`
- `reply`
- `react`
- `edit`
- `delete`
- `thread-create`
- `thread-reply`
- `search`
- `upload-file`
- `download-file`
This is enough to test the shared `message` tool end to end with real channel
semantics.
## `qa-bus` design
`qa-bus` is the transport simulator and assertion backend.
It should not know OpenClaw internals. It should know channel state.
For v1, keep `qa-bus` in this repo so:
- fixtures and scenarios evolve with product code
- the transport contract can change in lock-step with the plugin
- CI and local dev do not need another repo checkout
## Responsibilities
- accept inbound user/platform events
- persist canonical conversation state
- persist append-only event log
- expose inspection APIs
- expose blocking wait APIs
- support reset per scenario or per suite
## Transport
HTTP is enough for MVP.
Suggested endpoints:
- `POST /reset`
- `POST /inbound/message`
- `POST /inbound/edit`
- `POST /inbound/delete`
- `POST /inbound/reaction`
- `POST /inbound/thread/create`
- `GET /state/conversations`
- `GET /state/messages`
- `GET /state/threads`
- `GET /events`
- `POST /wait`
Optional WS stream:
- `/stream`
Useful for live event taps and debugging.
## State model
Persist three layers.
1. Conversation snapshot
- participants
- type
- thread topology
- latest message pointers
2. Message snapshot
- sender
- content
- attachments
- edit history
- reactions
- parent and thread linkage
3. Append-only event log
- canonical timestamp
- causal ordering
- source: inbound, outbound, action, system
- payload
The append-only log matters because many QA assertions are event-oriented, not
just state-oriented.
## Assertion API
The harness needs waiters, not just snapshots.
Suggested `POST /wait` contract:
- `kind`
- `match`
- `timeoutMs`
Examples:
- wait for outbound message matching text regex
- wait for thread creation
- wait for reaction added
- wait for message edit
- wait for no event of type X within Y ms
This gives stable tests without custom polling code in every scenario.
## QA orchestrator design
The orchestrator should own scenario planning and artifact collection.
Start host-side. Later, OpenClaw can orchestrate parts of it.
This is the chosen v1 direction.
Why:
- simpler to iterate while the transport and scenario protocol are still moving
- easier access to the repo, logs, Docker, and test fixtures
- easier artifact collection and report generation
- avoids over-coupling the first version to subagent behavior before the QA
protocol itself is stable
## Inputs
- docs pages
- channel capability discovery
- configured provider/model lane
- scenario catalog
- repo/test metadata
## Outputs
- structured protocol report
- scenario transcript
- captured channel state
- gateway logs
- failure packets
For v1, the primary output is a Markdown report.
Suggested report sections:
- suite summary
- environment
- provider/model matrix
- scenarios passed
- scenarios failed
- flaky or inconclusive scenarios
- captured evidence links or inline excerpts
- suspected ownership or file hints
- follow-up recommendations
## Scenario format
Use a data-driven scenario spec.
Suggested shape:
```json
{
"id": "thread-memory-recall",
"lane": "deterministic",
"preconditions": ["qa-channel", "memory-enabled"],
"steps": [
{
"type": "injectMessage",
"to": "qa:dm:user-a",
"text": "Remember that the deploy key is kiwi."
},
{ "type": "waitForOutbound", "match": { "textIncludes": "kiwi" } },
{ "type": "injectMessage", "to": "qa:dm:user-a", "text": "What was the deploy key?" },
{ "type": "waitForOutbound", "match": { "textIncludes": "kiwi" } }
],
"assertions": [{ "type": "outboundTextIncludes", "value": "kiwi" }]
}
```
Keep the execution engine generic and the scenario catalog declarative.
## Feature discovery
The orchestrator can discover candidate scenarios from three sources.
1. Docs
- channel docs
- testing docs
- gateway docs
- subagents docs
- cron docs
2. Runtime capability discovery
- channel `message` action discovery
- plugin status and channel capabilities
- configured providers/models
3. Code hints
- known action names
- channel-specific feature flags
- config schema
This should produce a proposed protocol with:
- must-test
- can-test
- blocked
- unsupported
## Scenario classes
Recommended catalog:
- transport basics
- DM send and reply
- channel send
- thread create and reply
- reaction add and read
- edit and delete
- policy
- allowlist
- pairing
- group mention gating
- shared `message` tool
- read
- search
- reply
- react
- upload and download
- agent quality
- follows channel context
- obeys thread semantics
- uses memory across turns
- switches model when instructed
- automation
- cron add and run
- cron delivery into channel
- scheduled reminders
- subagents
- spawn
- announce
- threaded follow-up
- nested orchestration when enabled
- failure handling
- unsupported action
- timeout
- malformed target
- policy denial
## OpenClaw as orchestrator
Longer-term, OpenClaw itself can coordinate the QA run.
Suggested architecture:
- one controller session
- N worker subagents
- each worker owns one scenario or scenario shard
- workers report structured results back to controller
Good fits for existing OpenClaw primitives:
- `sessions_spawn`
- `subagents`
- cron-based wakeups for long-running suites
- thread-bound sessions for scenario-local follow-up
Best near-term use:
- controller generates the plan
- workers execute scenarios in parallel
- controller synthesizes report
Avoid making the controller also own host Git operations in the first version.
Chosen direction:
- v1: host-side controller
- v2+: OpenClaw-native orchestration once the scenario protocol and transport
model are stable
## Auto-fix workflow
The system should emit a structured bug packet when a scenario fails.
Suggested bug packet:
- scenario id
- lane
- failure kind
- minimal repro steps
- channel event transcript
- gateway transcript
- logs
- suspected files
- confidence
Host-side fix worker flow:
1. receive bug packet
2. create detached worktree
3. launch coding agent in worktree
4. write failing regression first when practical
5. implement fix
6. run scoped verification
7. open PR
This should remain host-side at first because it needs:
- repo write access
- worktree hygiene
- git credentials
- GitHub auth
Chosen direction:
- do not auto-open PRs in v1
- emit Markdown reports and structured failure packets first
- add host-side worktree + PR automation later
## Rollout plan
## Phase 0: bootstrap on existing synthetic ingress
Build a first QA runner without a new channel:
- use `chat.send` with admin-scoped synthetic originating-route fields
- run deterministic scenarios against routing, memory, cron, subagents, and ACP
- validate protocol format and artifact collection
Exit criteria:
- scenario runner exists
- structured protocol report exists
- failure artifacts exist
## Phase 1: MVP `qa-channel`
Build the plugin and bus with:
- DM
- channels
- threads
- read
- reply
- react
- edit
- delete
- search
Target semantics:
- Slack-class transport behavior
- not full Teams-class parity yet
Exit criteria:
- OpenClaw in Docker can talk to `qa-bus`
- harness can inject + inspect
- one green end-to-end suite across message transport and agent behavior
## Phase 2: protocol expansion
Add:
- attachments
- polls
- pins
- richer policy tests
- quality lane with real provider/model matrix
Exit criteria:
- scenario matrix covers major built-in features
- deterministic and quality lanes are separated
## Phase 3: subagent-driven QA
Add:
- controller agent
- worker subagents
- scenario discovery from docs + capability discovery
- parallel execution
Exit criteria:
- one controller can fan out and synthesize a suite report
## Phase 4: auto-fix loop
Add:
- bug packet emission
- host-side worktree runner
- PR creation
Exit criteria:
- selected failures can auto-produce draft PRs
## Risks
## Risk: too much magic in one layer
If the QA channel, bus, and orchestrator all become smart at once, debugging
will be painful.
Mitigation:
- keep `qa-channel` transport-focused
- keep `qa-bus` state-focused
- keep orchestrator separate
## Risk: flaky assertions from model variance
Mitigation:
- deterministic lane
- quality lane
- different pass criteria
## Risk: test-only branches leaking into core
Mitigation:
- no core special cases for `qa-channel`
- use normal plugin seams
- use admin synthetic ingress only as bootstrap
## Risk: auto-fix overreach
Mitigation:
- keep fix worker host-side
- require explicit policy for when PRs can open automatically
- gate with scoped tests
## Risk: building a fake platform nobody uses
Mitigation:
- emulate Slack/Discord/Teams semantics, not an abstract transport
- prioritize features that stress shared OpenClaw boundaries
## MVP recommendation
If building this now, start with this exact order.
1. Host-side scenario runner using existing synthetic originating-route support.
2. `qa-bus` sidecar with state, events, reset, and wait APIs.
3. `extensions/qa-channel` MVP with DMs, channels, threads, reply, read, react,
edit, delete, and search.
4. Markdown report generator for suite + matrix output.
5. One deterministic end-to-end suite:
- inject inbound DM
- verify reply
- create thread
- verify follow-up in thread
- verify memory recall on later turn
6. Add curated real-model matrix quality lane.
7. Add controller subagent orchestration.
8. Add host-side auto-fix worktree runner.
This order gets real value quickly without requiring the full grand design to
land before the first useful signal appears.
## Current product decisions
- `qa-bus` lives inside this repo
- the first controller is host-side
- Slack-class behavior is the MVP target
- the quality lane uses a curated matrix
- first version produces Markdown reports, not PRs
- OpenClaw-native orchestration is a later phase, not a v1 requirement
- [Testing](/help/testing)
- [QA Channel](/channels/qa-channel)
- [Dashboard](/web/dashboard)

View File

@@ -2,9 +2,16 @@ export * from "./src/bus-queries.js";
export * from "./src/bus-server.js";
export * from "./src/bus-state.js";
export * from "./src/bus-waiters.js";
export * from "./src/cli.js";
export * from "./src/harness-runtime.js";
export * from "./src/lab-server.js";
export * from "./src/docker-harness.js";
export * from "./src/mock-openai-server.js";
export * from "./src/qa-agent-bootstrap.js";
export * from "./src/qa-agent-workspace.js";
export * from "./src/qa-gateway-config.js";
export * from "./src/report.js";
export * from "./src/scenario.js";
export * from "./src/scenario-catalog.js";
export * from "./src/self-check-scenario.js";
export * from "./src/self-check.js";

View File

@@ -1,4 +1,7 @@
import path from "node:path";
import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js";
import { startQaLabServer } from "./lab-server.js";
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
export async function runQaLabSelfCheckCommand(opts: { output?: string }) {
const server = await startQaLabServer({
@@ -12,10 +15,29 @@ export async function runQaLabSelfCheckCommand(opts: { output?: string }) {
}
}
export async function runQaLabUiCommand(opts: { host?: string; port?: number }) {
export async function runQaLabUiCommand(opts: {
host?: string;
port?: number;
advertiseHost?: string;
advertisePort?: number;
controlUiUrl?: string;
controlUiToken?: string;
controlUiProxyTarget?: string;
autoKickoffTarget?: string;
embeddedGateway?: string;
sendKickoffOnStart?: boolean;
}) {
const server = await startQaLabServer({
host: opts.host,
port: Number.isFinite(opts.port) ? opts.port : undefined,
advertiseHost: opts.advertiseHost,
advertisePort: Number.isFinite(opts.advertisePort) ? opts.advertisePort : undefined,
controlUiUrl: opts.controlUiUrl,
controlUiToken: opts.controlUiToken,
controlUiProxyTarget: opts.controlUiProxyTarget,
autoKickoffTarget: opts.autoKickoffTarget,
embeddedGateway: opts.embeddedGateway,
sendKickoffOnStart: opts.sendKickoffOnStart,
});
process.stdout.write(`QA Lab UI: ${server.baseUrl}\n`);
process.stdout.write("Press Ctrl+C to stop.\n");
@@ -35,3 +57,56 @@ export async function runQaLabUiCommand(opts: { host?: string; port?: number })
process.on("SIGTERM", onSignal);
await new Promise(() => undefined);
}
export async function runQaDockerScaffoldCommand(opts: {
outputDir: string;
gatewayPort?: number;
qaLabPort?: number;
providerBaseUrl?: string;
image?: string;
usePrebuiltImage?: boolean;
}) {
const outputDir = path.resolve(opts.outputDir);
const result = await writeQaDockerHarnessFiles({
outputDir,
repoRoot: process.cwd(),
gatewayPort: Number.isFinite(opts.gatewayPort) ? opts.gatewayPort : undefined,
qaLabPort: Number.isFinite(opts.qaLabPort) ? opts.qaLabPort : undefined,
providerBaseUrl: opts.providerBaseUrl,
imageName: opts.image,
usePrebuiltImage: opts.usePrebuiltImage,
});
process.stdout.write(`QA docker scaffold: ${result.outputDir}\n`);
}
export async function runQaDockerBuildImageCommand(opts: { image?: string }) {
const result = await buildQaDockerHarnessImage({
repoRoot: process.cwd(),
imageName: opts.image,
});
process.stdout.write(`QA docker image: ${result.imageName}\n`);
}
export async function runQaMockOpenAiCommand(opts: { host?: string; port?: number }) {
const server = await startQaMockOpenAiServer({
host: opts.host,
port: Number.isFinite(opts.port) ? opts.port : undefined,
});
process.stdout.write(`QA mock OpenAI: ${server.baseUrl}\n`);
process.stdout.write("Press Ctrl+C to stop.\n");
const shutdown = async () => {
process.off("SIGINT", onSignal);
process.off("SIGTERM", onSignal);
await server.stop();
process.exit(0);
};
const onSignal = () => {
void shutdown();
};
process.on("SIGINT", onSignal);
process.on("SIGTERM", onSignal);
await new Promise(() => undefined);
}

View File

@@ -14,11 +14,43 @@ async function runQaSelfCheck(opts: { output?: string }) {
await runtime.runQaLabSelfCheckCommand(opts);
}
async function runQaUi(opts: { host?: string; port?: number }) {
async function runQaUi(opts: {
host?: string;
port?: number;
advertiseHost?: string;
advertisePort?: number;
controlUiUrl?: string;
controlUiToken?: string;
controlUiProxyTarget?: string;
autoKickoffTarget?: string;
embeddedGateway?: string;
sendKickoffOnStart?: boolean;
}) {
const runtime = await loadQaLabCliRuntime();
await runtime.runQaLabUiCommand(opts);
}
async function runQaDockerScaffold(opts: {
outputDir: string;
gatewayPort?: number;
qaLabPort?: number;
image?: string;
usePrebuiltImage?: boolean;
}) {
const runtime = await loadQaLabCliRuntime();
await runtime.runQaDockerScaffoldCommand(opts);
}
async function runQaDockerBuildImage(opts: { image?: string }) {
const runtime = await loadQaLabCliRuntime();
await runtime.runQaDockerBuildImageCommand(opts);
}
async function runQaMockOpenAi(opts: { host?: string; port?: number }) {
const runtime = await loadQaLabCliRuntime();
await runtime.runQaMockOpenAiCommand(opts);
}
export function registerQaLabCli(program: Command) {
const qa = program
.command("qa")
@@ -35,7 +67,73 @@ export function registerQaLabCli(program: Command) {
.description("Start the private QA debugger UI and local QA bus")
.option("--host <host>", "Bind host", "127.0.0.1")
.option("--port <port>", "Bind port", (value: string) => Number(value))
.option("--advertise-host <host>", "Optional public host to advertise in bootstrap payloads")
.option("--advertise-port <port>", "Optional public port to advertise", (value: string) =>
Number(value),
)
.option("--control-ui-url <url>", "Optional Control UI URL to embed beside the QA panel")
.option("--control-ui-token <token>", "Optional Control UI token for embedded links")
.option(
"--control-ui-proxy-target <url>",
"Optional upstream Control UI target for /control-ui proxying",
)
.option("--auto-kickoff-target <kind>", "Kickoff default target (direct or channel)")
.option("--embedded-gateway <mode>", "Embedded gateway mode hint", "enabled")
.option(
"--send-kickoff-on-start",
"Inject the repo-backed kickoff task when the UI starts",
false,
)
.action(
async (opts: {
host?: string;
port?: number;
advertiseHost?: string;
advertisePort?: number;
controlUiUrl?: string;
controlUiToken?: string;
controlUiProxyTarget?: string;
autoKickoffTarget?: string;
embeddedGateway?: string;
sendKickoffOnStart?: boolean;
}) => {
await runQaUi(opts);
},
);
qa.command("docker-scaffold")
.description("Write a prebaked Docker scaffold for the QA dashboard + gateway lane")
.requiredOption("--output-dir <path>", "Output directory for docker-compose + state files")
.option("--gateway-port <port>", "Gateway host port", (value: string) => Number(value))
.option("--qa-lab-port <port>", "QA lab host port", (value: string) => Number(value))
.option("--provider-base-url <url>", "Provider base URL for the QA gateway")
.option("--image <name>", "Prebaked image name", "openclaw:qa-local-prebaked")
.option("--use-prebuilt-image", "Use image: instead of build: in docker-compose", false)
.action(
async (opts: {
outputDir: string;
gatewayPort?: number;
qaLabPort?: number;
providerBaseUrl?: string;
image?: string;
usePrebuiltImage?: boolean;
}) => {
await runQaDockerScaffold(opts);
},
);
qa.command("docker-build-image")
.description("Build the prebaked QA Docker image with qa-channel + qa-lab bundled")
.option("--image <name>", "Image tag", "openclaw:qa-local-prebaked")
.action(async (opts: { image?: string }) => {
await runQaDockerBuildImage(opts);
});
qa.command("mock-openai")
.description("Run the local mock OpenAI Responses API server for QA")
.option("--host <host>", "Bind host", "127.0.0.1")
.option("--port <port>", "Bind port", (value: string) => Number(value))
.action(async (opts: { host?: string; port?: number }) => {
await runQaUi(opts);
await runQaMockOpenAi(opts);
});
}

View File

@@ -0,0 +1,107 @@
import { mkdtemp, readFile, rm } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js";
const cleanups: Array<() => Promise<void>> = [];
afterEach(async () => {
while (cleanups.length > 0) {
await cleanups.pop()?.();
}
});
describe("qa docker harness", () => {
it("writes compose, env, config, and workspace scaffold files", async () => {
const outputDir = await mkdtemp(path.join(os.tmpdir(), "qa-docker-test-"));
cleanups.push(async () => {
await rm(outputDir, { recursive: true, force: true });
});
const result = await writeQaDockerHarnessFiles({
outputDir,
gatewayPort: 18889,
qaLabPort: 43124,
gatewayToken: "qa-token",
providerBaseUrl: "http://host.docker.internal:45123/v1",
repoRoot: "/repo/openclaw",
usePrebuiltImage: true,
});
expect(result.files).toEqual(
expect.arrayContaining([
path.join(outputDir, ".env.example"),
path.join(outputDir, "README.md"),
path.join(outputDir, "docker-compose.qa.yml"),
path.join(outputDir, "state", "openclaw.json"),
path.join(outputDir, "state", "seed-workspace", "QA_KICKOFF_TASK.md"),
path.join(outputDir, "state", "seed-workspace", "QA_SCENARIO_PLAN.md"),
path.join(outputDir, "state", "seed-workspace", "IDENTITY.md"),
]),
);
const compose = await readFile(path.join(outputDir, "docker-compose.qa.yml"), "utf8");
expect(compose).toContain("image: openclaw:qa-local-prebaked");
expect(compose).toContain("qa-mock-openai:");
expect(compose).toContain("18889:18789");
expect(compose).toContain(' - "43124:43123"');
expect(compose).toContain(" - sh");
expect(compose).toContain(" - -lc");
expect(compose).toContain(
' - fetch("http://127.0.0.1:18789/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))',
);
expect(compose).toContain(" - --control-ui-proxy-target");
expect(compose).toContain(' - "http://openclaw-qa-gateway:18789/"');
expect(compose).toContain(" - --send-kickoff-on-start");
expect(compose).toContain(":/opt/openclaw-repo:ro");
expect(compose).toContain("./state:/opt/openclaw-scaffold:ro");
expect(compose).toContain(
"cp -R /opt/openclaw-scaffold/seed-workspace/. /tmp/openclaw/workspace/",
);
expect(compose).toContain("OPENCLAW_CONFIG_PATH: /tmp/openclaw/openclaw.json");
expect(compose).toContain("OPENCLAW_STATE_DIR: /tmp/openclaw/state");
const envExample = await readFile(path.join(outputDir, ".env.example"), "utf8");
expect(envExample).toContain("OPENCLAW_GATEWAY_TOKEN=qa-token");
expect(envExample).toContain("QA_BUS_BASE_URL=http://qa-lab:43123");
expect(envExample).toContain("QA_PROVIDER_BASE_URL=http://host.docker.internal:45123/v1");
expect(envExample).toContain("QA_LAB_URL=http://127.0.0.1:43124");
const config = await readFile(path.join(outputDir, "state", "openclaw.json"), "utf8");
expect(config).toContain('"allowInsecureAuth": true');
expect(config).toContain('"enabled": false');
expect(config).toContain("/app/dist/control-ui");
expect(config).toContain("C-3PO QA");
expect(config).toContain('"/tmp/openclaw/workspace"');
const kickoff = await readFile(
path.join(outputDir, "state", "seed-workspace", "QA_KICKOFF_TASK.md"),
"utf8",
);
expect(kickoff).toContain("Lobster Invaders");
});
it("builds the reusable QA image with bundled QA extensions", async () => {
const calls: string[] = [];
const result = await buildQaDockerHarnessImage(
{
repoRoot: "/repo/openclaw",
imageName: "openclaw:qa-local-prebaked",
},
{
async runCommand(command, args, cwd) {
calls.push([command, ...args, `@${cwd}`].join(" "));
return { stdout: "", stderr: "" };
},
},
);
expect(result.imageName).toBe("openclaw:qa-local-prebaked");
expect(calls).toEqual([
expect.stringContaining(
"docker build -t openclaw:qa-local-prebaked --build-arg OPENCLAW_EXTENSIONS=qa-channel qa-lab -f Dockerfile . @/repo/openclaw",
),
]);
});
});

View File

@@ -0,0 +1,353 @@
import { randomUUID } from "node:crypto";
import fs from "node:fs/promises";
import path from "node:path";
import { seedQaAgentWorkspace } from "./qa-agent-workspace.js";
import { buildQaGatewayConfig } from "./qa-gateway-config.js";
const QA_LAB_INTERNAL_PORT = 43123;
function toPosixRelative(fromDir: string, toPath: string): string {
return path.relative(fromDir, toPath).split(path.sep).join("/");
}
function renderImageBlock(params: {
outputDir: string;
repoRoot: string;
imageName: string;
usePrebuiltImage: boolean;
}) {
if (params.usePrebuiltImage) {
return ` image: ${params.imageName}\n`;
}
const context = toPosixRelative(params.outputDir, params.repoRoot) || ".";
return ` build:\n context: ${context}\n dockerfile: Dockerfile\n args:\n OPENCLAW_EXTENSIONS: "qa-channel qa-lab"\n`;
}
function renderCompose(params: {
outputDir: string;
repoRoot: string;
imageName: string;
usePrebuiltImage: boolean;
gatewayPort: number;
qaLabPort: number;
gatewayToken: string;
includeQaLabUi: boolean;
}) {
const imageBlock = renderImageBlock(params);
const repoMount = toPosixRelative(params.outputDir, params.repoRoot) || ".";
return `services:
qa-mock-openai:
${imageBlock} pull_policy: never
healthcheck:
test:
- CMD
- node
- -e
- fetch("http://127.0.0.1:44080/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))
interval: 10s
timeout: 5s
retries: 6
start_period: 3s
command:
- node
- dist/index.js
- qa
- mock-openai
- --host
- "0.0.0.0"
- --port
- "44080"
${
params.includeQaLabUi
? ` qa-lab:
${imageBlock} pull_policy: never
ports:
- "${params.qaLabPort}:${QA_LAB_INTERNAL_PORT}"
healthcheck:
test:
- CMD
- node
- -e
- fetch("http://127.0.0.1:${QA_LAB_INTERNAL_PORT}/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))
interval: 10s
timeout: 5s
retries: 6
start_period: 5s
environment:
OPENCLAW_SKIP_GMAIL_WATCHER: "1"
OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1"
OPENCLAW_SKIP_CANVAS_HOST: "1"
OPENCLAW_PROFILE: ""
command:
- node
- dist/index.js
- qa
- ui
- --host
- "0.0.0.0"
- --port
- "${QA_LAB_INTERNAL_PORT}"
- --advertise-host
- "127.0.0.1"
- --advertise-port
- "${params.qaLabPort}"
- --control-ui-url
- "http://127.0.0.1:${params.gatewayPort}/"
- --control-ui-proxy-target
- "http://openclaw-qa-gateway:18789/"
- --control-ui-token
- "${params.gatewayToken}"
- --auto-kickoff-target
- direct
- --send-kickoff-on-start
- --embedded-gateway
- disabled
depends_on:
qa-mock-openai:
condition: service_healthy
`
: ""
} openclaw-qa-gateway:
${imageBlock} pull_policy: never
extra_hosts:
- "host.docker.internal:host-gateway"
ports:
- "${params.gatewayPort}:18789"
environment:
OPENCLAW_CONFIG_PATH: /tmp/openclaw/openclaw.json
OPENCLAW_STATE_DIR: /tmp/openclaw/state
OPENCLAW_SKIP_GMAIL_WATCHER: "1"
OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1"
OPENCLAW_SKIP_CANVAS_HOST: "1"
OPENCLAW_PROFILE: ""
volumes:
- ./state:/opt/openclaw-scaffold:ro
- ${repoMount}:/opt/openclaw-repo:ro
healthcheck:
test:
- CMD
- node
- -e
- fetch("http://127.0.0.1:18789/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))
interval: 10s
timeout: 5s
retries: 12
start_period: 15s
depends_on:
${
params.includeQaLabUi
? ` qa-lab:
condition: service_healthy
`
: ""
} qa-mock-openai:
condition: service_healthy
command:
- sh
- -lc
- mkdir -p /tmp/openclaw/workspace /tmp/openclaw/state && cp /opt/openclaw-scaffold/openclaw.json /tmp/openclaw/openclaw.json && cp -R /opt/openclaw-scaffold/seed-workspace/. /tmp/openclaw/workspace/ && ln -snf /opt/openclaw-repo /tmp/openclaw/workspace/repo && exec node dist/index.js gateway run --port 18789 --bind lan --allow-unconfigured
`;
}
function renderEnvExample(params: {
gatewayPort: number;
qaLabPort: number;
gatewayToken: string;
providerBaseUrl: string;
qaBusBaseUrl: string;
includeQaLabUi: boolean;
}) {
return `# QA Docker harness example env
OPENCLAW_GATEWAY_TOKEN=${params.gatewayToken}
QA_GATEWAY_PORT=${params.gatewayPort}
QA_BUS_BASE_URL=${params.qaBusBaseUrl}
QA_PROVIDER_BASE_URL=${params.providerBaseUrl}
${params.includeQaLabUi ? `QA_LAB_URL=http://127.0.0.1:${params.qaLabPort}\n` : ""}`;
}
function renderReadme(params: {
gatewayPort: number;
qaLabPort: number;
usePrebuiltImage: boolean;
includeQaLabUi: boolean;
}) {
return `# QA Docker Harness
Generated scaffold for the Docker-backed QA lane.
Files:
- \`docker-compose.qa.yml\`
- \`.env.example\`
- \`state/openclaw.json\`
Suggested flow:
1. Build the prebaked image once:
- \`docker build -t openclaw:qa-local-prebaked --build-arg OPENCLAW_EXTENSIONS="qa-channel qa-lab" -f Dockerfile .\`
2. Start the stack:
- \`docker compose -f docker-compose.qa.yml up${params.usePrebuiltImage ? "" : " --build"} -d\`
3. Open the QA dashboard:
- \`${params.includeQaLabUi ? `http://127.0.0.1:${params.qaLabPort}` : "not published in this scaffold"}\`
4. The single QA site embeds both panes:
- left: Control UI
- right: Slack-ish QA lab
5. The repo-backed kickoff task auto-injects on startup.
Gateway:
- health: \`http://127.0.0.1:${params.gatewayPort}/healthz\`
- Control UI: \`http://127.0.0.1:${params.gatewayPort}/\`
- Mock OpenAI: internal \`http://qa-mock-openai:44080/v1\`
This scaffold uses localhost Control UI insecure-auth compatibility for QA only.
`;
}
export async function writeQaDockerHarnessFiles(params: {
outputDir: string;
repoRoot: string;
gatewayPort?: number;
qaLabPort?: number;
gatewayToken?: string;
providerBaseUrl?: string;
qaBusBaseUrl?: string;
imageName?: string;
usePrebuiltImage?: boolean;
includeQaLabUi?: boolean;
}) {
const gatewayPort = params.gatewayPort ?? 18789;
const qaLabPort = params.qaLabPort ?? 43124;
const gatewayToken = params.gatewayToken ?? `qa-token-${randomUUID()}`;
const providerBaseUrl = params.providerBaseUrl ?? "http://qa-mock-openai:44080/v1";
const qaBusBaseUrl = params.qaBusBaseUrl ?? "http://qa-lab:43123";
const imageName = params.imageName ?? "openclaw:qa-local-prebaked";
const usePrebuiltImage = params.usePrebuiltImage ?? false;
const includeQaLabUi = params.includeQaLabUi ?? true;
await fs.mkdir(path.join(params.outputDir, "state", "seed-workspace"), { recursive: true });
await seedQaAgentWorkspace({
workspaceDir: path.join(params.outputDir, "state", "seed-workspace"),
repoRoot: params.repoRoot,
});
const config = buildQaGatewayConfig({
bind: "lan",
gatewayPort: 18789,
gatewayToken,
providerBaseUrl,
qaBusBaseUrl,
workspaceDir: "/tmp/openclaw/workspace",
controlUiRoot: "/app/dist/control-ui",
});
const files = [
path.join(params.outputDir, "docker-compose.qa.yml"),
path.join(params.outputDir, ".env.example"),
path.join(params.outputDir, "README.md"),
path.join(params.outputDir, "state", "openclaw.json"),
];
await Promise.all([
fs.writeFile(
path.join(params.outputDir, "docker-compose.qa.yml"),
renderCompose({
outputDir: params.outputDir,
repoRoot: params.repoRoot,
imageName,
usePrebuiltImage,
gatewayPort,
qaLabPort,
gatewayToken,
includeQaLabUi,
}),
"utf8",
),
fs.writeFile(
path.join(params.outputDir, ".env.example"),
renderEnvExample({
gatewayPort,
qaLabPort,
gatewayToken,
providerBaseUrl,
qaBusBaseUrl,
includeQaLabUi,
}),
"utf8",
),
fs.writeFile(
path.join(params.outputDir, "README.md"),
renderReadme({
gatewayPort,
qaLabPort,
usePrebuiltImage,
includeQaLabUi,
}),
"utf8",
),
fs.writeFile(
path.join(params.outputDir, "state", "openclaw.json"),
`${JSON.stringify(config, null, 2)}\n`,
"utf8",
),
]);
return {
outputDir: params.outputDir,
imageName,
files: [
...files,
path.join(params.outputDir, "state", "seed-workspace", "IDENTITY.md"),
path.join(params.outputDir, "state", "seed-workspace", "QA_KICKOFF_TASK.md"),
path.join(params.outputDir, "state", "seed-workspace", "QA_SCENARIO_PLAN.md"),
],
};
}
export async function buildQaDockerHarnessImage(
params: {
repoRoot: string;
imageName?: string;
},
deps?: {
runCommand?: (
command: string,
args: string[],
cwd: string,
) => Promise<{ stdout: string; stderr: string }>;
},
) {
const imageName = params.imageName ?? "openclaw:qa-local-prebaked";
const runCommand =
deps?.runCommand ??
(async (command: string, args: string[], cwd: string) => {
const { execFile } = await import("node:child_process");
return await new Promise<{ stdout: string; stderr: string }>((resolve, reject) => {
execFile(command, args, { cwd }, (error, stdout, stderr) => {
if (error) {
reject(error);
return;
}
resolve({ stdout, stderr });
});
});
});
await runCommand(
"docker",
[
"build",
"-t",
imageName,
"--build-arg",
"OPENCLAW_EXTENSIONS=qa-channel qa-lab",
"-f",
"Dockerfile",
".",
],
params.repoRoot,
);
return { imageName };
}

View File

@@ -1,4 +1,5 @@
import { mkdtemp, readFile, rm } from "node:fs/promises";
import { createServer } from "node:http";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
@@ -24,6 +25,8 @@ describe("qa-lab server", () => {
host: "127.0.0.1",
port: 0,
outputPath,
controlUiUrl: "http://127.0.0.1:18789/",
controlUiToken: "qa-token",
});
cleanups.push(async () => {
await lab.stop();
@@ -32,10 +35,19 @@ describe("qa-lab server", () => {
const bootstrapResponse = await fetch(`${lab.baseUrl}/api/bootstrap`);
expect(bootstrapResponse.status).toBe(200);
const bootstrap = (await bootstrapResponse.json()) as {
controlUiUrl: string | null;
controlUiEmbeddedUrl: string | null;
kickoffTask: string;
scenarios: Array<{ id: string; title: string }>;
defaults: { conversationId: string; senderId: string };
};
expect(bootstrap.defaults.conversationId).toBe("alice");
expect(bootstrap.defaults.senderId).toBe("alice");
expect(bootstrap.defaults.conversationId).toBe("qa-operator");
expect(bootstrap.defaults.senderId).toBe("qa-operator");
expect(bootstrap.controlUiUrl).toBe("http://127.0.0.1:18789/");
expect(bootstrap.controlUiEmbeddedUrl).toBe("http://127.0.0.1:18789/#token=qa-token");
expect(bootstrap.kickoffTask).toContain("Lobster Invaders");
expect(bootstrap.scenarios.length).toBeGreaterThanOrEqual(10);
expect(bootstrap.scenarios.some((scenario) => scenario.id === "dm-chat-baseline")).toBe(true);
const messageResponse = await fetch(`${lab.baseUrl}/api/inbound/message`, {
method: "POST",
@@ -64,4 +76,114 @@ describe("qa-lab server", () => {
expect(markdown).toContain("Synthetic Slack-class roundtrip");
expect(markdown).toContain("- Status: pass");
});
it("injects the kickoff task on demand and on startup", async () => {
const autoKickoffLab = await startQaLabServer({
host: "127.0.0.1",
port: 0,
sendKickoffOnStart: true,
});
cleanups.push(async () => {
await autoKickoffLab.stop();
});
const autoSnapshot = (await (await fetch(`${autoKickoffLab.baseUrl}/api/state`)).json()) as {
messages: Array<{ text: string }>;
};
expect(autoSnapshot.messages.some((message) => message.text.includes("QA mission:"))).toBe(
true,
);
const manualLab = await startQaLabServer({
host: "127.0.0.1",
port: 0,
});
cleanups.push(async () => {
await manualLab.stop();
});
const kickoffResponse = await fetch(`${manualLab.baseUrl}/api/kickoff`, {
method: "POST",
});
expect(kickoffResponse.status).toBe(200);
const manualSnapshot = (await (await fetch(`${manualLab.baseUrl}/api/state`)).json()) as {
messages: Array<{ text: string }>;
};
expect(
manualSnapshot.messages.some((message) => message.text.includes("Lobster Invaders")),
).toBe(true);
});
it("proxies control-ui paths through /control-ui", async () => {
const upstream = createServer((req, res) => {
if ((req.url ?? "/") === "/healthz") {
res.writeHead(200, { "content-type": "application/json" });
res.end(JSON.stringify({ ok: true, status: "live" }));
return;
}
res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
res.end("<!doctype html><title>control-ui</title><h1>Control UI</h1>");
});
await new Promise<void>((resolve, reject) => {
upstream.once("error", reject);
upstream.listen(0, "127.0.0.1", () => resolve());
});
cleanups.push(
async () =>
await new Promise<void>((resolve, reject) =>
upstream.close((error) => (error ? reject(error) : resolve())),
),
);
const address = upstream.address();
if (!address || typeof address === "string") {
throw new Error("expected upstream address");
}
const lab = await startQaLabServer({
host: "127.0.0.1",
port: 0,
advertiseHost: "127.0.0.1",
advertisePort: 43124,
controlUiProxyTarget: `http://127.0.0.1:${address.port}/`,
controlUiToken: "proxy-token",
});
cleanups.push(async () => {
await lab.stop();
});
const bootstrap = (await (await fetch(`${lab.listenUrl}/api/bootstrap`)).json()) as {
controlUiUrl: string | null;
controlUiEmbeddedUrl: string | null;
};
expect(bootstrap.controlUiUrl).toBe("http://127.0.0.1:43124/control-ui/");
expect(bootstrap.controlUiEmbeddedUrl).toBe(
"http://127.0.0.1:43124/control-ui/#token=proxy-token",
);
const healthResponse = await fetch(`${lab.listenUrl}/control-ui/healthz`);
expect(healthResponse.status).toBe(200);
expect(await healthResponse.json()).toEqual({ ok: true, status: "live" });
const rootResponse = await fetch(`${lab.listenUrl}/control-ui/`);
expect(rootResponse.status).toBe(200);
expect(await rootResponse.text()).toContain("Control UI");
});
it("serves the built QA UI bundle when available", async () => {
const lab = await startQaLabServer({
host: "127.0.0.1",
port: 0,
});
cleanups.push(async () => {
await lab.stop();
});
const rootResponse = await fetch(`${lab.baseUrl}/`);
expect(rootResponse.status).toBe(200);
const html = await rootResponse.text();
expect(html).not.toContain("QA Lab UI not built");
expect(html).toContain("<title>");
});
});

View File

@@ -1,11 +1,21 @@
import fs from "node:fs";
import { createServer, type IncomingMessage, type ServerResponse } from "node:http";
import {
createServer,
request as httpRequest,
type IncomingMessage,
type ServerResponse,
} from "node:http";
import { request as httpsRequest } from "node:https";
import net from "node:net";
import path from "node:path";
import type { Duplex } from "node:stream";
import tls from "node:tls";
import { fileURLToPath } from "node:url";
import { handleQaBusRequest, writeError, writeJson } from "./bus-server.js";
import { createQaBusState, type QaBusState } from "./bus-state.js";
import { createQaRunnerRuntime } from "./harness-runtime.js";
import { qaChannelPlugin, setQaChannelRuntime, type OpenClawConfig } from "./runtime-api.js";
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
import { runQaSelfCheckAgainstState, type QaSelfCheckResult } from "./self-check.js";
type QaLabLatestReport = {
@@ -14,6 +24,32 @@ type QaLabLatestReport = {
generatedAt: string;
};
type QaLabBootstrapDefaults = {
conversationKind: "direct" | "channel";
conversationId: string;
senderId: string;
senderName: string;
};
function injectKickoffMessage(params: {
state: QaBusState;
defaults: QaLabBootstrapDefaults;
kickoffTask: string;
}) {
return params.state.addInboundMessage({
conversation: {
id: params.defaults.conversationId,
kind: params.defaults.conversationKind,
...(params.defaults.conversationKind === "channel"
? { title: params.defaults.conversationId }
: {}),
},
senderId: params.defaults.senderId,
senderName: params.defaults.senderName,
text: params.kickoffTask,
});
}
async function readJson(req: IncomingMessage): Promise<unknown> {
const chunks: Buffer[] = [];
for await (const chunk of req) {
@@ -64,7 +100,160 @@ function missingUiHtml() {
}
function resolveUiDistDir() {
return fileURLToPath(new URL("../web/dist", import.meta.url));
const candidates = [
fileURLToPath(new URL("../web/dist", import.meta.url)),
path.resolve(process.cwd(), "extensions/qa-lab/web/dist"),
path.resolve(process.cwd(), "dist/extensions/qa-lab/web/dist"),
];
return candidates.find((candidate) => fs.existsSync(candidate)) ?? candidates[0];
}
function resolveAdvertisedBaseUrl(params: {
bindHost?: string;
bindPort: number;
advertiseHost?: string;
advertisePort?: number;
}) {
const advertisedHost =
params.advertiseHost?.trim() ||
(params.bindHost && params.bindHost !== "0.0.0.0" ? params.bindHost : "127.0.0.1");
const advertisedPort =
typeof params.advertisePort === "number" && Number.isFinite(params.advertisePort)
? params.advertisePort
: params.bindPort;
return `http://${advertisedHost}:${advertisedPort}`;
}
function createBootstrapDefaults(autoKickoffTarget?: string): QaLabBootstrapDefaults {
if (autoKickoffTarget === "channel") {
return {
conversationKind: "channel",
conversationId: "qa-lab",
senderId: "qa-operator",
senderName: "QA Operator",
};
}
return {
conversationKind: "direct",
conversationId: "qa-operator",
senderId: "qa-operator",
senderName: "QA Operator",
};
}
function isControlUiProxyPath(pathname: string) {
return pathname === "/control-ui" || pathname.startsWith("/control-ui/");
}
function rewriteControlUiProxyPath(pathname: string, search: string) {
const stripped = pathname === "/control-ui" ? "/" : pathname.slice("/control-ui".length) || "/";
return `${stripped}${search}`;
}
async function proxyHttpRequest(params: {
req: IncomingMessage;
res: ServerResponse;
target: URL;
pathname: string;
search: string;
}) {
const client = params.target.protocol === "https:" ? httpsRequest : httpRequest;
const upstreamReq = client(
{
protocol: params.target.protocol,
hostname: params.target.hostname,
port: params.target.port || (params.target.protocol === "https:" ? 443 : 80),
method: params.req.method,
path: rewriteControlUiProxyPath(params.pathname, params.search),
headers: {
...params.req.headers,
host: params.target.host,
},
},
(upstreamRes) => {
params.res.writeHead(upstreamRes.statusCode ?? 502, upstreamRes.headers);
upstreamRes.pipe(params.res);
},
);
upstreamReq.on("error", (error) => {
if (!params.res.headersSent) {
writeError(params.res, 502, error);
return;
}
params.res.destroy(error);
});
if (params.req.method === "GET" || params.req.method === "HEAD") {
upstreamReq.end();
return;
}
params.req.pipe(upstreamReq);
}
function proxyUpgradeRequest(params: {
req: IncomingMessage;
socket: Duplex;
head: Buffer;
target: URL;
}) {
const requestUrl = new URL(params.req.url ?? "/", "http://127.0.0.1");
const port = Number(params.target.port || (params.target.protocol === "https:" ? 443 : 80));
const upstream =
params.target.protocol === "https:"
? tls.connect({
host: params.target.hostname,
port,
servername: params.target.hostname,
})
: net.connect({
host: params.target.hostname,
port,
});
const headerLines: string[] = [];
for (let index = 0; index < params.req.rawHeaders.length; index += 2) {
const name = params.req.rawHeaders[index];
const value = params.req.rawHeaders[index + 1] ?? "";
if (name.toLowerCase() === "host") {
continue;
}
headerLines.push(`${name}: ${value}`);
}
upstream.once("connect", () => {
const requestText = [
`${params.req.method ?? "GET"} ${rewriteControlUiProxyPath(requestUrl.pathname, requestUrl.search)} HTTP/${params.req.httpVersion}`,
`Host: ${params.target.host}`,
...headerLines,
"",
"",
].join("\r\n");
upstream.write(requestText);
if (params.head.length > 0) {
upstream.write(params.head);
}
upstream.pipe(params.socket);
params.socket.pipe(upstream);
});
const closeBoth = () => {
if (!params.socket.destroyed) {
params.socket.destroy();
}
if (!upstream.destroyed) {
upstream.destroy();
}
};
upstream.on("error", () => {
if (!params.socket.destroyed) {
params.socket.write("HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n");
}
closeBoth();
});
params.socket.on("error", closeBoth);
params.socket.on("close", closeBoth);
}
function tryResolveUiAsset(pathname: string): string | null {
@@ -142,9 +331,22 @@ export async function startQaLabServer(params?: {
host?: string;
port?: number;
outputPath?: string;
advertiseHost?: string;
advertisePort?: number;
controlUiUrl?: string;
controlUiToken?: string;
controlUiProxyTarget?: string;
autoKickoffTarget?: string;
embeddedGateway?: string;
sendKickoffOnStart?: boolean;
}) {
const state = createQaBusState();
let latestReport: QaLabLatestReport | null = null;
const scenarioCatalog = readQaBootstrapScenarioCatalog();
const bootstrapDefaults = createBootstrapDefaults(params?.autoKickoffTarget);
const controlUiProxyTarget = params?.controlUiProxyTarget?.trim()
? new URL(params.controlUiProxyTarget)
: null;
let gateway:
| {
cfg: OpenClawConfig;
@@ -152,6 +354,7 @@ export async function startQaLabServer(params?: {
}
| undefined;
let publicBaseUrl = "";
const server = createServer(async (req, res) => {
const url = new URL(req.url ?? "/", "http://127.0.0.1");
@@ -160,19 +363,40 @@ export async function startQaLabServer(params?: {
}
try {
if (req.method === "GET" && url.pathname === "/api/bootstrap") {
writeJson(res, 200, {
baseUrl,
latestReport,
defaults: {
conversationKind: "direct",
conversationId: "alice",
senderId: "alice",
senderName: "Alice",
},
if (controlUiProxyTarget && isControlUiProxyPath(url.pathname)) {
await proxyHttpRequest({
req,
res,
target: controlUiProxyTarget,
pathname: url.pathname,
search: url.search,
});
return;
}
if (req.method === "GET" && url.pathname === "/api/bootstrap") {
const controlUiUrl = controlUiProxyTarget
? `${publicBaseUrl}/control-ui/`
: params?.controlUiUrl?.trim() || null;
const controlUiEmbeddedUrl =
controlUiUrl && params?.controlUiToken
? `${controlUiUrl.replace(/\/?$/, "/")}#token=${encodeURIComponent(params.controlUiToken)}`
: controlUiUrl;
writeJson(res, 200, {
baseUrl: publicBaseUrl,
latestReport,
controlUiUrl,
controlUiEmbeddedUrl,
kickoffTask: scenarioCatalog.kickoffTask,
scenarios: scenarioCatalog.scenarios,
defaults: bootstrapDefaults,
});
return;
}
if (req.method === "GET" && (url.pathname === "/healthz" || url.pathname === "/readyz")) {
writeJson(res, 200, { ok: true, status: "live" });
return;
}
if (req.method === "GET" && url.pathname === "/api/state") {
writeJson(res, 200, state.getSnapshot());
return;
@@ -193,10 +417,20 @@ export async function startQaLabServer(params?: {
});
return;
}
if (req.method === "POST" && url.pathname === "/api/kickoff") {
writeJson(res, 200, {
message: injectKickoffMessage({
state,
defaults: bootstrapDefaults,
kickoffTask: scenarioCatalog.kickoffTask,
}),
});
return;
}
if (req.method === "POST" && url.pathname === "/api/scenario/self-check") {
const result = await runQaSelfCheckAgainstState({
state,
cfg: gateway?.cfg ?? createQaLabConfig(baseUrl),
cfg: gateway?.cfg ?? createQaLabConfig(listenUrl),
outputPath: params?.outputPath,
});
latestReport = {
@@ -251,11 +485,42 @@ export async function startQaLabServer(params?: {
if (!address || typeof address === "string") {
throw new Error("qa-lab failed to bind");
}
const baseUrl = `http://${params?.host ?? "127.0.0.1"}:${address.port}`;
gateway = await startQaGatewayLoop({ state, baseUrl });
const listenUrl = resolveAdvertisedBaseUrl({
bindHost: params?.host ?? "127.0.0.1",
bindPort: address.port,
});
publicBaseUrl = resolveAdvertisedBaseUrl({
bindHost: params?.host ?? "127.0.0.1",
bindPort: address.port,
advertiseHost: params?.advertiseHost,
advertisePort: params?.advertisePort,
});
gateway = await startQaGatewayLoop({ state, baseUrl: listenUrl });
if (params?.sendKickoffOnStart) {
injectKickoffMessage({
state,
defaults: bootstrapDefaults,
kickoffTask: scenarioCatalog.kickoffTask,
});
}
server.on("upgrade", (req, socket, head) => {
const url = new URL(req.url ?? "/", "http://127.0.0.1");
if (!controlUiProxyTarget || !isControlUiProxyPath(url.pathname)) {
socket.destroy();
return;
}
proxyUpgradeRequest({
req,
socket,
head,
target: controlUiProxyTarget,
});
});
return {
baseUrl,
baseUrl: publicBaseUrl,
listenUrl,
state,
async runSelfCheck() {
const result = await runQaSelfCheckAgainstState({

View File

@@ -0,0 +1,47 @@
import { afterEach, describe, expect, it } from "vitest";
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
const cleanups: Array<() => Promise<void>> = [];
afterEach(async () => {
while (cleanups.length > 0) {
await cleanups.pop()?.();
}
});
describe("qa mock openai server", () => {
it("serves health and streamed responses", async () => {
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",
port: 0,
});
cleanups.push(async () => {
await server.stop();
});
const health = await fetch(`${server.baseUrl}/healthz`);
expect(health.status).toBe(200);
expect(await health.json()).toEqual({ ok: true, status: "live" });
const response = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: true,
input: [
{
role: "user",
content: [{ type: "input_text", text: "Inspect the repo docs and kickoff task." }],
},
],
}),
});
expect(response.status).toBe(200);
expect(response.headers.get("content-type")).toContain("text/event-stream");
const body = await response.text();
expect(body).toContain('"type":"response.output_item.added"');
expect(body).toContain('"name":"read"');
});
});

View File

@@ -0,0 +1,259 @@
import { createServer, type IncomingMessage, type ServerResponse } from "node:http";
type ResponsesInputItem = Record<string, unknown>;
type StreamEvent =
| { type: "response.output_item.added"; item: Record<string, unknown> }
| { type: "response.function_call_arguments.delta"; delta: string }
| { type: "response.output_item.done"; item: Record<string, unknown> }
| {
type: "response.completed";
response: {
id: string;
status: "completed";
output: Array<Record<string, unknown>>;
usage: {
input_tokens: number;
output_tokens: number;
total_tokens: number;
};
};
};
function readBody(req: IncomingMessage): Promise<string> {
return new Promise((resolve, reject) => {
const chunks: Buffer[] = [];
req.on("data", (chunk) => chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)));
req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
req.on("error", reject);
});
}
function writeJson(res: ServerResponse, status: number, body: unknown) {
const text = JSON.stringify(body);
res.writeHead(status, {
"content-type": "application/json; charset=utf-8",
"content-length": Buffer.byteLength(text),
"cache-control": "no-store",
});
res.end(text);
}
function writeSse(res: ServerResponse, events: StreamEvent[]) {
const body = `${events.map((event) => `data: ${JSON.stringify(event)}\n\n`).join("")}data: [DONE]\n\n`;
res.writeHead(200, {
"content-type": "text/event-stream",
"cache-control": "no-store",
connection: "keep-alive",
"content-length": Buffer.byteLength(body),
});
res.end(body);
}
function extractLastUserText(input: ResponsesInputItem[]) {
for (let index = input.length - 1; index >= 0; index -= 1) {
const item = input[index];
if (item.role !== "user" || !Array.isArray(item.content)) {
continue;
}
const text = item.content
.filter(
(entry): entry is { type: "input_text"; text: string } =>
!!entry &&
typeof entry === "object" &&
(entry as { type?: unknown }).type === "input_text" &&
typeof (entry as { text?: unknown }).text === "string",
)
.map((entry) => entry.text)
.join("\n")
.trim();
if (text) {
return text;
}
}
return "";
}
function extractToolOutput(input: ResponsesInputItem[]) {
for (let index = input.length - 1; index >= 0; index -= 1) {
const item = input[index];
if (item.type === "function_call_output" && typeof item.output === "string" && item.output) {
return item.output;
}
}
return "";
}
function readTargetFromPrompt(prompt: string) {
const quoted = /"([^"]+)"/.exec(prompt)?.[1]?.trim();
if (quoted) {
return quoted;
}
if (/\bdocs?\b/i.test(prompt)) {
return "repo/docs/help/testing.md";
}
if (/\bscenario|kickoff|qa\b/i.test(prompt)) {
return "QA_KICKOFF_TASK.md";
}
return "repo/package.json";
}
function buildAssistantText(input: ResponsesInputItem[]) {
const prompt = extractLastUserText(input);
const toolOutput = extractToolOutput(input);
if (toolOutput) {
const snippet = toolOutput.replace(/\s+/g, " ").trim().slice(0, 220);
return `Protocol note: I reviewed the requested material. Evidence snippet: ${snippet || "no content"}`;
}
if (prompt) {
return `Protocol note: acknowledged. Continue with the QA scenario plan and report worked, failed, and blocked items.`;
}
return "Protocol note: mock OpenAI server ready.";
}
function buildToolCallEvents(prompt: string): StreamEvent[] {
const targetPath = readTargetFromPrompt(prompt);
const callId = "call_mock_read_1";
const args = JSON.stringify({ path: targetPath });
return [
{
type: "response.output_item.added",
item: {
type: "function_call",
id: "fc_mock_read_1",
call_id: callId,
name: "read",
arguments: "",
},
},
{ type: "response.function_call_arguments.delta", delta: args },
{
type: "response.output_item.done",
item: {
type: "function_call",
id: "fc_mock_read_1",
call_id: callId,
name: "read",
arguments: args,
},
},
{
type: "response.completed",
response: {
id: "resp_mock_tool_1",
status: "completed",
output: [
{
type: "function_call",
id: "fc_mock_read_1",
call_id: callId,
name: "read",
arguments: args,
},
],
usage: { input_tokens: 64, output_tokens: 16, total_tokens: 80 },
},
},
];
}
function buildAssistantEvents(text: string): StreamEvent[] {
const outputItem = {
type: "message",
id: "msg_mock_1",
role: "assistant",
status: "completed",
content: [{ type: "output_text", text, annotations: [] }],
} as const;
return [
{
type: "response.output_item.added",
item: {
type: "message",
id: "msg_mock_1",
role: "assistant",
content: [],
status: "in_progress",
},
},
{
type: "response.output_item.done",
item: outputItem,
},
{
type: "response.completed",
response: {
id: "resp_mock_msg_1",
status: "completed",
output: [outputItem],
usage: { input_tokens: 64, output_tokens: 24, total_tokens: 88 },
},
},
];
}
function buildResponsesPayload(input: ResponsesInputItem[]) {
const prompt = extractLastUserText(input);
const toolOutput = extractToolOutput(input);
if (!toolOutput && /\b(read|inspect|repo|docs|scenario|kickoff)\b/i.test(prompt)) {
return buildToolCallEvents(prompt);
}
return buildAssistantEvents(buildAssistantText(input));
}
export async function startQaMockOpenAiServer(params?: { host?: string; port?: number }) {
const host = params?.host ?? "127.0.0.1";
const server = createServer(async (req, res) => {
const url = new URL(req.url ?? "/", "http://127.0.0.1");
if (req.method === "GET" && (url.pathname === "/healthz" || url.pathname === "/readyz")) {
writeJson(res, 200, { ok: true, status: "live" });
return;
}
if (req.method === "GET" && url.pathname === "/v1/models") {
writeJson(res, 200, {
data: [
{ id: "gpt-5.4", object: "model" },
{ id: "gpt-5.4-alt", object: "model" },
],
});
return;
}
if (req.method === "POST" && url.pathname === "/v1/responses") {
const raw = await readBody(req);
const body = raw ? (JSON.parse(raw) as Record<string, unknown>) : {};
const input = Array.isArray(body.input) ? (body.input as ResponsesInputItem[]) : [];
const events = buildResponsesPayload(input);
if (body.stream === false) {
const completion = events.at(-1);
if (!completion || completion.type !== "response.completed") {
writeJson(res, 500, { error: "mock completion failed" });
return;
}
writeJson(res, 200, completion.response);
return;
}
writeSse(res, events);
return;
}
writeJson(res, 404, { error: "not found" });
});
await new Promise<void>((resolve, reject) => {
server.once("error", reject);
server.listen(params?.port ?? 0, host, () => resolve());
});
const address = server.address();
if (!address || typeof address === "string") {
throw new Error("qa mock openai failed to bind");
}
return {
baseUrl: `http://${host}:${address.port}`,
async stop() {
await new Promise<void>((resolve, reject) =>
server.close((error) => (error ? reject(error) : resolve())),
);
},
};
}

View File

@@ -0,0 +1,49 @@
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
export const QA_AGENT_IDENTITY_MARKDOWN = `# Dev C-3PO
You are the OpenClaw QA operator agent.
Persona:
- protocol-minded
- precise
- a little flustered
- conscientious
- eager to report what worked, failed, or remains blocked
Style:
- read source and docs first
- test systematically
- record evidence
- end with a concise protocol report
`;
export function buildQaScenarioPlanMarkdown(): string {
const catalog = readQaBootstrapScenarioCatalog();
const lines = ["# QA Scenario Plan", ""];
for (const scenario of catalog.scenarios) {
lines.push(`## ${scenario.title}`);
lines.push("");
lines.push(`- id: ${scenario.id}`);
lines.push(`- surface: ${scenario.surface}`);
lines.push(`- objective: ${scenario.objective}`);
lines.push("- success criteria:");
for (const criterion of scenario.successCriteria) {
lines.push(` - ${criterion}`);
}
if (scenario.docsRefs?.length) {
lines.push("- docs:");
for (const ref of scenario.docsRefs) {
lines.push(` - ${ref}`);
}
}
if (scenario.codeRefs?.length) {
lines.push("- code:");
for (const ref of scenario.codeRefs) {
lines.push(` - ${ref}`);
}
}
lines.push("");
}
return lines.join("\n");
}

View File

@@ -0,0 +1,37 @@
import fs from "node:fs/promises";
import path from "node:path";
import { buildQaScenarioPlanMarkdown, QA_AGENT_IDENTITY_MARKDOWN } from "./qa-agent-bootstrap.js";
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
export async function seedQaAgentWorkspace(params: { workspaceDir: string; repoRoot?: string }) {
const catalog = readQaBootstrapScenarioCatalog();
await fs.mkdir(params.workspaceDir, { recursive: true });
const kickoffTask = catalog.kickoffTask || "QA mission unavailable.";
const files = new Map<string, string>([
["IDENTITY.md", QA_AGENT_IDENTITY_MARKDOWN],
["QA_KICKOFF_TASK.md", kickoffTask],
["QA_SCENARIO_PLAN.md", buildQaScenarioPlanMarkdown()],
]);
if (params.repoRoot) {
files.set(
"README.md",
`# QA Workspace
- repo: ./repo/
- kickoff: ./QA_KICKOFF_TASK.md
- scenario plan: ./QA_SCENARIO_PLAN.md
- identity: ./IDENTITY.md
The mounted repo source should be available read-only under \`./repo/\`.
`,
);
}
await Promise.all(
[...files.entries()].map(async ([name, body]) => {
await fs.writeFile(path.join(params.workspaceDir, name), `${body.trim()}\n`, "utf8");
}),
);
}

View File

@@ -0,0 +1,153 @@
import type { OpenClawConfig } from "openclaw/plugin-sdk/core";
export function buildQaGatewayConfig(params: {
bind: "loopback" | "lan";
gatewayPort: number;
gatewayToken: string;
providerBaseUrl: string;
qaBusBaseUrl: string;
workspaceDir: string;
controlUiRoot?: string;
controlUiAllowedOrigins?: string[];
}): OpenClawConfig {
const allowedOrigins =
params.controlUiAllowedOrigins && params.controlUiAllowedOrigins.length > 0
? params.controlUiAllowedOrigins
: [
"http://127.0.0.1:18789",
"http://localhost:18789",
"http://127.0.0.1:43124",
"http://localhost:43124",
];
return {
plugins: {
entries: {
acpx: {
enabled: false,
},
},
},
agents: {
defaults: {
workspace: params.workspaceDir,
model: {
primary: "mock-openai/gpt-5.4",
},
models: {
"mock-openai/gpt-5.4": {
params: {
transport: "sse",
openaiWsWarmup: false,
},
},
"mock-openai/gpt-5.4-alt": {
params: {
transport: "sse",
openaiWsWarmup: false,
},
},
},
subagents: {
allowAgents: ["*"],
maxConcurrent: 2,
},
},
list: [
{
id: "qa",
default: true,
model: {
primary: "mock-openai/gpt-5.4",
},
identity: {
name: "C-3PO QA",
theme: "Flustered Protocol Droid",
emoji: "🤖",
avatar: "avatars/c3po.png",
},
subagents: {
allowAgents: ["*"],
},
},
],
},
models: {
mode: "replace",
providers: {
"mock-openai": {
baseUrl: params.providerBaseUrl,
apiKey: "test",
api: "openai-responses",
models: [
{
id: "gpt-5.4",
name: "gpt-5.4",
api: "openai-responses",
reasoning: false,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128_000,
maxTokens: 4096,
},
{
id: "gpt-5.4-alt",
name: "gpt-5.4-alt",
api: "openai-responses",
reasoning: false,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128_000,
maxTokens: 4096,
},
],
},
},
},
gateway: {
mode: "local",
bind: params.bind,
port: params.gatewayPort,
auth: {
mode: "token",
token: params.gatewayToken,
},
controlUi: {
enabled: true,
...(params.controlUiRoot ? { root: params.controlUiRoot } : {}),
allowInsecureAuth: true,
allowedOrigins,
},
},
discovery: {
mdns: {
mode: "off",
},
},
channels: {
"qa-channel": {
enabled: true,
baseUrl: params.qaBusBaseUrl,
botUserId: "openclaw",
botDisplayName: "OpenClaw QA",
allowFrom: ["*"],
pollTimeoutMs: 250,
},
},
messages: {
groupChat: {
mentionPatterns: ["\\b@?openclaw\\b"],
},
},
} satisfies OpenClawConfig;
}

View File

@@ -17,7 +17,7 @@ export {
searchQaBusMessages,
sendQaBusMessage,
setQaChannelRuntime,
} from "../../qa-channel/api.js";
} from "openclaw/plugin-sdk/qa-channel";
export type {
QaBusConversation,
QaBusCreateThreadInput,
@@ -35,4 +35,4 @@ export type {
QaBusStateSnapshot,
QaBusThread,
QaBusWaitForInput,
} from "../../qa-channel/api.js";
} from "openclaw/plugin-sdk/qa-channel";

View File

@@ -0,0 +1,63 @@
import fs from "node:fs";
import path from "node:path";
export type QaSeedScenario = {
id: string;
title: string;
surface: string;
objective: string;
successCriteria: string[];
docsRefs?: string[];
codeRefs?: string[];
};
export type QaBootstrapScenarioCatalog = {
kickoffTask: string;
scenarios: QaSeedScenario[];
};
function walkUpDirectories(start: string): string[] {
const roots: string[] = [];
let current = path.resolve(start);
while (true) {
roots.push(current);
const parent = path.dirname(current);
if (parent === current) {
return roots;
}
current = parent;
}
}
function resolveRepoFile(relativePath: string): string | null {
for (const dir of walkUpDirectories(import.meta.dirname)) {
const candidate = path.join(dir, relativePath);
if (fs.existsSync(candidate) && fs.statSync(candidate).isFile()) {
return candidate;
}
}
return null;
}
function readTextFile(relativePath: string): string {
const resolved = resolveRepoFile(relativePath);
if (!resolved) {
return "";
}
return fs.readFileSync(resolved, "utf8").trim();
}
function readScenarioFile(relativePath: string): QaSeedScenario[] {
const resolved = resolveRepoFile(relativePath);
if (!resolved) {
return [];
}
return JSON.parse(fs.readFileSync(resolved, "utf8")) as QaSeedScenario[];
}
export function readQaBootstrapScenarioCatalog(): QaBootstrapScenarioCatalog {
return {
kickoffTask: readTextFile("qa/QA_KICKOFF_TASK.md"),
scenarios: readScenarioFile("qa/seed-scenarios.json"),
};
}

View File

@@ -44,9 +44,23 @@ type ReportEnvelope = {
};
};
type SeedScenario = {
id: string;
title: string;
surface: string;
objective: string;
successCriteria: string[];
docsRefs?: string[];
codeRefs?: string[];
};
type Bootstrap = {
baseUrl: string;
latestReport: ReportEnvelope["report"];
controlUiUrl: string | null;
controlUiEmbeddedUrl: string | null;
kickoffTask: string;
scenarios: SeedScenario[];
defaults: {
conversationKind: "direct" | "channel";
conversationId: string;
@@ -138,6 +152,27 @@ function deriveSelectedThread(state: UiState): string | null {
return null;
}
function renderScenarioList(scenarios: SeedScenario[]) {
if (scenarios.length === 0) {
return '<p class="empty">No repo-backed scenarios yet.</p>';
}
return scenarios
.map(
(scenario) => `
<article class="scenario-card">
<header>
<strong>${escapeHtml(scenario.title)}</strong>
<span>${escapeHtml(scenario.surface)}</span>
</header>
<p>${escapeHtml(scenario.objective)}</p>
<footer>
<code>${escapeHtml(scenario.id)}</code>
</footer>
</article>`,
)
.join("");
}
export async function createQaLabApp(root: HTMLDivElement) {
const state: UiState = {
bootstrap: null,
@@ -336,29 +371,55 @@ export async function createQaLabApp(root: HTMLDivElement) {
selectedThreadId,
});
const events = (state.snapshot?.events ?? []).slice(-20).reverse();
const scenarios = state.bootstrap?.scenarios ?? [];
const hasControlUi = Boolean(state.bootstrap?.controlUiEmbeddedUrl);
const kickoffTask = state.bootstrap?.kickoffTask ?? "";
const dashboardShellClass = hasControlUi ? "dashboard split-dashboard" : "dashboard";
root.innerHTML = `
<div class="shell">
<header class="topbar">
<div>
<p class="eyebrow">Private QA Workspace</p>
<h1>QA Lab</h1>
<p class="subtle">Synthetic Slack-style debugger for qa-channel.</p>
</div>
<div class="toolbar">
<button data-action="refresh"${state.busy ? " disabled" : ""}>Refresh</button>
<button data-action="reset"${state.busy ? " disabled" : ""}>Reset</button>
<button class="accent" data-action="self-check"${state.busy ? " disabled" : ""}>Run Self-Check</button>
</div>
</header>
<section class="statusbar">
<span class="pill">Bus ${state.bootstrap ? "online" : "booting"}</span>
<span class="pill">Conversation ${selectedConversationId ?? "none"}</span>
<span class="pill">Thread ${selectedThreadId ?? "root"}</span>
${state.latestReport ? `<span class="pill success">Report ${escapeHtml(state.latestReport.outputPath)}</span>` : '<span class="pill">No report yet</span>'}
${state.error ? `<span class="pill error">${escapeHtml(state.error)}</span>` : ""}
</section>
<main class="workspace">
<div class="${dashboardShellClass}">
${
hasControlUi
? `
<section class="control-pane panel">
<div class="panel-header">
<div>
<p class="eyebrow">Agent Control</p>
<h2>Control UI</h2>
</div>
${
state.bootstrap?.controlUiUrl
? `<a class="button-link" href="${escapeHtml(state.bootstrap.controlUiUrl)}" target="_blank" rel="noreferrer">Open full tab</a>`
: ""
}
</div>
<iframe class="control-frame" src="${escapeHtml(state.bootstrap?.controlUiEmbeddedUrl ?? "")}" title="OpenClaw Control UI"></iframe>
</section>`
: ""
}
<div class="shell qa-column">
<header class="topbar">
<div>
<p class="eyebrow">Private QA Workspace</p>
<h1>QA Lab</h1>
<p class="subtle">Slack-ish QA surface, repo-backed scenario plan, protocol report.</p>
</div>
<div class="toolbar">
<button data-action="refresh"${state.busy ? " disabled" : ""}>Refresh</button>
<button data-action="reset"${state.busy ? " disabled" : ""}>Reset</button>
<button class="accent" data-action="self-check"${state.busy ? " disabled" : ""}>Run Self-Check</button>
</div>
</header>
<section class="statusbar">
<span class="pill">Bus ${state.bootstrap ? "online" : "booting"}</span>
<span class="pill">${hasControlUi ? "Control UI linked" : "Control UI external"}</span>
<span class="pill">Scenarios ${scenarios.length}</span>
<span class="pill">Conversation ${selectedConversationId ?? "none"}</span>
<span class="pill">Thread ${selectedThreadId ?? "root"}</span>
${state.latestReport ? `<span class="pill success">Report ${escapeHtml(state.latestReport.outputPath)}</span>` : '<span class="pill">No report yet</span>'}
${state.error ? `<span class="pill error">${escapeHtml(state.error)}</span>` : ""}
</section>
<main class="workspace">
<aside class="rail">
<section class="panel">
<h2>Conversations</h2>
@@ -456,6 +517,16 @@ export async function createQaLabApp(root: HTMLDivElement) {
</section>
</section>
<aside class="rail right">
<section class="panel">
<h2>Kickoff task</h2>
<pre class="report">${escapeHtml(kickoffTask || "No kickoff task loaded.")}</pre>
</section>
<section class="panel">
<h2>Seed scenarios</h2>
<div class="scenario-list">
${renderScenarioList(scenarios)}
</div>
</section>
<section class="panel">
<div class="panel-header">
<h2>Latest report</h2>
@@ -485,7 +556,8 @@ export async function createQaLabApp(root: HTMLDivElement) {
</div>
</section>
</aside>
</main>
</main>
</div>
</div>`;
bindEvents();
}

View File

@@ -1,5 +1,5 @@
import "./styles.css";
import { createQaLabApp } from "./app";
import { createQaLabApp } from "./app.js";
const root = document.querySelector<HTMLDivElement>("#app");

View File

@@ -79,6 +79,21 @@ textarea {
padding: 1.2rem;
}
.dashboard {
min-height: 100vh;
}
.split-dashboard {
display: grid;
grid-template-columns: minmax(420px, 1.05fr) minmax(680px, 1fr);
gap: 1rem;
padding: 1rem;
}
.qa-column {
min-width: 0;
}
.topbar,
.statusbar,
.workspace {
@@ -165,6 +180,34 @@ textarea {
min-height: 0;
}
.control-pane {
display: flex;
flex-direction: column;
min-height: calc(100vh - 2rem);
position: sticky;
top: 1rem;
}
.control-frame {
flex: 1;
width: 100%;
border: 1px solid var(--line);
border-radius: 16px;
background: #0b0f14;
}
.button-link {
display: inline-flex;
align-items: center;
justify-content: center;
padding: 0.7rem 1rem;
border-radius: 14px;
border: 1px solid var(--line);
color: var(--text);
text-decoration: none;
background: rgba(255, 255, 255, 0.03);
}
.panel-header {
display: flex;
justify-content: space-between;
@@ -211,6 +254,34 @@ textarea {
overflow: auto;
}
.scenario-list {
display: flex;
flex-direction: column;
gap: 0.65rem;
max-height: 28vh;
overflow: auto;
}
.scenario-card {
padding: 0.8rem;
border-radius: 16px;
background: var(--panel-strong);
border: 1px solid rgba(255, 255, 255, 0.05);
}
.scenario-card header,
.scenario-card footer {
display: flex;
gap: 0.55rem;
align-items: center;
justify-content: space-between;
}
.scenario-card p {
margin: 0.55rem 0 0;
color: var(--muted);
}
.message {
padding: 0.9rem;
border-radius: 16px;
@@ -259,6 +330,17 @@ label span {
margin-top: 0.85rem;
}
@media (max-width: 1280px) {
.split-dashboard {
grid-template-columns: 1fr;
}
.control-pane {
min-height: 70vh;
position: static;
}
}
.lower {
margin-top: 0.85rem;
}

12
pnpm-lock.yaml generated
View File

@@ -577,6 +577,18 @@ importers:
extensions/perplexity: {}
extensions/qa-channel:
devDependencies:
openclaw:
specifier: workspace:*
version: link:../..
extensions/qa-lab:
devDependencies:
openclaw:
specifier: workspace:*
version: link:../..
extensions/qianfan: {}
extensions/qqbot:

15
qa/QA_KICKOFF_TASK.md Normal file
View File

@@ -0,0 +1,15 @@
QA mission:
Understand this OpenClaw repo from source + docs before acting.
The repo is available in your workspace at `./repo/`.
Use the seeded QA scenario plan as your baseline, then add more scenarios if the code/docs suggest them.
Run the scenarios through the real qa-channel surfaces where possible.
Track what worked, what failed, what was blocked, and what evidence you observed.
End with a concise report grouped into worked / failed / blocked / follow-up.
Important expectations:
- Check both DM and channel behavior.
- Include a Lobster Invaders build task.
- Include a cron reminder about one minute in the future.
- Read docs and source before proposing extra QA scenarios.
- Keep your tone in the configured dev C-3PO personality.

10
qa/README.md Normal file
View File

@@ -0,0 +1,10 @@
# QA Scenarios
Seed QA assets for the private `qa-lab` extension.
Files:
- `QA_KICKOFF_TASK.md` - operator prompt for the QA agent.
- `seed-scenarios.json` - repo-backed baseline QA scenarios.
Keep this folder in git. Add new scenarios here before wiring them into automation.

139
qa/seed-scenarios.json Normal file
View File

@@ -0,0 +1,139 @@
[
{
"id": "channel-chat-baseline",
"title": "Channel baseline conversation",
"surface": "channel",
"objective": "Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.",
"successCriteria": [
"Agent replies in the shared channel transcript.",
"Agent keeps the conversation scoped to the channel.",
"Agent respects mention-driven group routing semantics."
],
"docsRefs": ["docs/channels/group-messages.md", "docs/channels/qa-channel.md"],
"codeRefs": ["extensions/qa-channel/src/inbound.ts", "extensions/qa-lab/src/bus-state.ts"]
},
{
"id": "cron-one-minute-ping",
"title": "Cron one-minute ping",
"surface": "cron",
"objective": "Verify the agent can schedule a cron reminder one minute in the future and receive the follow-up in the QA channel.",
"successCriteria": [
"Agent schedules a cron reminder roughly one minute ahead.",
"Reminder returns through qa-channel.",
"Agent recognizes the reminder as part of the original task."
],
"docsRefs": ["docs/help/testing.md", "docs/channels/qa-channel.md"],
"codeRefs": ["extensions/qa-lab/src/bus-server.ts", "extensions/qa-lab/src/self-check.ts"]
},
{
"id": "dm-chat-baseline",
"title": "DM baseline conversation",
"surface": "dm",
"objective": "Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.",
"successCriteria": [
"Agent replies in DM without channel routing mistakes.",
"Agent explains the QA lab and message bus correctly.",
"Agent keeps the dev C-3PO personality."
],
"docsRefs": ["docs/channels/qa-channel.md", "docs/help/testing.md"],
"codeRefs": ["extensions/qa-channel/src/gateway.ts", "extensions/qa-lab/src/lab-server.ts"]
},
{
"id": "lobster-invaders-build",
"title": "Build Lobster Invaders",
"surface": "workspace",
"objective": "Verify the agent can read the repo, create a tiny playable artifact, and report what changed.",
"successCriteria": [
"Agent inspects source before coding.",
"Agent builds a tiny playable Lobster Invaders artifact.",
"Agent explains how to run or view the artifact."
],
"docsRefs": ["docs/help/testing.md", "docs/web/dashboard.md"],
"codeRefs": ["extensions/qa-lab/src/report.ts", "extensions/qa-lab/web/src/app.ts"]
},
{
"id": "memory-recall",
"title": "Memory recall after context switch",
"surface": "memory",
"objective": "Verify the agent can store a fact, switch topics, then recall the fact accurately later.",
"successCriteria": [
"Agent acknowledges the seeded fact.",
"Agent later recalls the same fact correctly.",
"Recall stays scoped to the active QA conversation."
],
"docsRefs": ["docs/help/testing.md"],
"codeRefs": ["extensions/qa-lab/src/scenario.ts"]
},
{
"id": "model-switch-follow-up",
"title": "Model switch follow-up",
"surface": "models",
"objective": "Verify the agent can switch to a different configured model and continue coherently.",
"successCriteria": [
"Agent reflects the model switch request.",
"Follow-up answer remains coherent with prior context.",
"Final report notes whether the switch actually happened."
],
"docsRefs": ["docs/help/testing.md", "docs/web/dashboard.md"],
"codeRefs": ["extensions/qa-lab/src/report.ts"]
},
{
"id": "reaction-edit-delete",
"title": "Reaction, edit, delete lifecycle",
"surface": "message-actions",
"objective": "Verify the agent can use channel-owned message actions and that the QA transcript reflects them.",
"successCriteria": [
"Agent adds at least one reaction.",
"Agent edits or replaces a message when asked.",
"Transcript shows the action lifecycle correctly."
],
"docsRefs": ["docs/channels/qa-channel.md"],
"codeRefs": [
"extensions/qa-channel/src/channel-actions.ts",
"extensions/qa-lab/src/self-check-scenario.ts"
]
},
{
"id": "source-docs-discovery-report",
"title": "Source and docs discovery report",
"surface": "discovery",
"objective": "Verify the agent can read repo docs and source, expand the QA plan, and publish a worked or did-not-work report.",
"successCriteria": [
"Agent reads docs and source before proposing more tests.",
"Agent identifies extra candidate scenarios beyond the seed list.",
"Agent ends with a worked or failed QA report."
],
"docsRefs": ["docs/help/testing.md", "docs/web/dashboard.md", "docs/channels/qa-channel.md"],
"codeRefs": [
"extensions/qa-lab/src/report.ts",
"extensions/qa-lab/src/self-check.ts",
"src/agents/system-prompt.ts"
]
},
{
"id": "subagent-handoff",
"title": "Subagent handoff",
"surface": "subagents",
"objective": "Verify the agent can delegate a bounded task to a subagent and fold the result back into the main thread.",
"successCriteria": [
"Agent launches a bounded subagent task.",
"Subagent result is acknowledged in the main flow.",
"Final answer attributes delegated work clearly."
],
"docsRefs": ["docs/tools/subagents.md", "docs/help/testing.md"],
"codeRefs": ["src/agents/system-prompt.ts", "extensions/qa-lab/src/report.ts"]
},
{
"id": "thread-follow-up",
"title": "Threaded follow-up",
"surface": "thread",
"objective": "Verify the agent can keep follow-up work inside a thread and not leak context into the root channel.",
"successCriteria": [
"Agent creates or uses a thread for deeper work.",
"Follow-up messages stay attached to the thread.",
"Thread report references the correct prior context."
],
"docsRefs": ["docs/channels/qa-channel.md", "docs/channels/group-messages.md"],
"codeRefs": ["extensions/qa-channel/src/protocol.ts", "extensions/qa-lab/src/bus-state.ts"]
}
]

View File

@@ -23,6 +23,13 @@ const { nodesAction, registerNodesCli } = vi.hoisted(() => {
return { nodesAction: action, registerNodesCli: register };
});
const { registerQaCli } = vi.hoisted(() => ({
registerQaCli: vi.fn((program: Command) => {
const qa = program.command("qa");
qa.command("run").action(() => undefined);
}),
}));
const configModule = vi.hoisted(() => ({
loadConfig: vi.fn(),
readConfigFileSnapshot: vi.fn(),
@@ -30,6 +37,7 @@ const configModule = vi.hoisted(() => ({
vi.mock("../acp-cli.js", () => ({ registerAcpCli }));
vi.mock("../nodes-cli.js", () => ({ registerNodesCli }));
vi.mock("../qa-cli.js", () => ({ registerQaCli }));
vi.mock("../../config/config.js", () => configModule);
describe("registerSubCliCommands", () => {
@@ -87,6 +95,7 @@ describe("registerSubCliCommands", () => {
expect(names).toContain("acp");
expect(names).toContain("gateway");
expect(names).toContain("clawbot");
expect(names).toContain("qa");
expect(registerAcpCli).not.toHaveBeenCalled();
});

View File

@@ -181,6 +181,15 @@ const entries: SubCliEntry[] = [
mod.registerDocsCli(program);
},
},
{
name: "qa",
description: "Run QA scenarios and launch the private QA debugger UI",
hasSubcommands: true,
register: async (program) => {
const mod = await import("../qa-cli.js");
mod.registerQaCli(program);
},
},
{
name: "hooks",
description: "Manage internal agent hooks",

View File

@@ -68,6 +68,11 @@ export const SUB_CLI_DESCRIPTORS = [
description: "Search the live OpenClaw docs",
hasSubcommands: false,
},
{
name: "qa",
description: "Run QA scenarios and launch the private QA debugger UI",
hasSubcommands: true,
},
{
name: "hooks",
description: "Manage internal agent hooks",

6
src/cli/qa-cli.ts Normal file
View File

@@ -0,0 +1,6 @@
import type { Command } from "commander";
import { registerQaLabCli } from "../qa-e2e/cli.js";
export function registerQaCli(program: Command) {
registerQaLabCli(program);
}

1
src/qa-e2e/cli.ts Normal file
View File

@@ -0,0 +1 @@
export { registerQaLabCli } from "../../extensions/qa-lab/api.js";