diff --git a/Dockerfile b/Dockerfile index fd9a8c1831d..cb254cdc45c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -97,6 +97,7 @@ RUN pnpm build:docker # Force pnpm for UI build (Bun may fail on ARM/Synology architectures) ENV OPENCLAW_PREFER_PNPM=1 RUN pnpm ui:build +RUN pnpm qa:lab:build # Prune dev dependencies and strip build-only metadata before copying # runtime assets into the final image. @@ -156,6 +157,7 @@ COPY --from=runtime-assets --chown=node:node /app/openclaw.mjs . COPY --from=runtime-assets --chown=node:node /app/${OPENCLAW_BUNDLED_PLUGIN_DIR} ./${OPENCLAW_BUNDLED_PLUGIN_DIR} COPY --from=runtime-assets --chown=node:node /app/skills ./skills COPY --from=runtime-assets --chown=node:node /app/docs ./docs +COPY --from=runtime-assets --chown=node:node /app/qa ./qa # In npm-installed Docker images, prefer the copied source extension tree for # bundled discovery so package metadata that points at source entries stays valid. diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md index 5de0290b996..f60aba5a27e 100644 --- a/docs/concepts/qa-e2e-automation.md +++ b/docs/concepts/qa-e2e-automation.md @@ -1,865 +1,66 @@ --- -title: "QA E2E Automation" -summary: "Design note for a full end-to-end QA system built on a synthetic message-channel plugin, Dockerized OpenClaw, and subagent-driven scenario execution" +summary: "Private QA automation shape for qa-lab, qa-channel, seeded scenarios, and protocol reports" read_when: - - You are designing a true end-to-end QA harness for OpenClaw - - You want a synthetic message channel for automated feature verification - - You want subagents to discover features, run scenarios, and propose fixes + - Extending qa-lab or qa-channel + - Adding repo-backed QA scenarios + - Building higher-realism QA automation around the Gateway dashboard +title: "QA E2E Automation" --- # QA E2E Automation -This note proposes a true end-to-end QA system for OpenClaw built around a -real channel plugin dedicated to testing. +The private QA stack is meant to exercise OpenClaw in a more realistic, +channel-shaped way than a single unit test can. -The core idea: +Current pieces: -- run OpenClaw inside Docker in a realistic gateway configuration -- expose a synthetic but full-featured message channel as a normal plugin -- let a QA harness inject inbound traffic and inspect outbound state -- let OpenClaw agents and subagents explore, verify, and report on behavior -- optionally escalate failing scenarios into host-side fix workflows that open PRs +- `extensions/qa-channel`: synthetic message channel with DM, channel, thread, + reaction, edit, and delete surfaces. +- `extensions/qa-lab`: debugger UI and QA bus for observing the transcript, + injecting inbound messages, and exporting a Markdown report. +- `qa/`: repo-backed seed assets for the kickoff task and baseline QA + scenarios. -This is not a unit-test replacement. It is a product-level system test layer. +The long-term goal is a two-pane QA site: -## Chosen direction +- Left: Gateway dashboard (Control UI) with the agent. +- Right: QA Lab, showing the Slack-ish transcript and scenario plan. -The initial direction for this project is: +That lets an operator or automation loop give the agent a QA mission, observe +real channel behavior, and record what worked, failed, or stayed blocked. -- build the full system inside this repo -- test against a matrix, not a single model/provider pair -- use Markdown reports as the first output artifact -- defer auto-PR and auto-fix work until later -- treat Slack-class semantics as the MVP transport target -- keep orchestration simple in v1, with a host-side controller that exercises - the moving parts directly -- evolve toward OpenClaw becoming the orchestration layer later, once the - transport, scenario, and reporting model are proven +## Repo-backed seeds -## Goals +Seed assets live in `qa/`: -- Test OpenClaw through a real messaging-channel boundary, not only `chat.send` - or embedded mocks. -- Verify channel semantics that matter for real use: - - DMs - - channels/groups - - threads - - edits - - deletes - - reactions - - polls - - attachments -- Verify agent behavior across realistic user flows: - - memory - - thread binding - - model switching - - cron jobs - - subagents - - approvals - - routing - - channel-specific `message` actions -- Make the QA runner capable of feature discovery: - - read docs - - inspect plugin capability discovery - - inspect code and config - - generate a scenario protocol -- Support deterministic protocol tests and best-effort real-model tests as - separate lanes. -- Allow automated bug triage artifacts that can feed a host-side fix worker. +- `qa/QA_KICKOFF_TASK.md` +- `qa/seed-scenarios.json` -## Non-goals +These are intentionally in git so the QA plan is visible to both humans and the +agent. The baseline list should stay broad enough to cover: -- Not a replacement for existing unit, contract, or live tests. -- Not a production channel. -- Not a requirement that all bug fixing happen from inside the Dockerized - OpenClaw runtime. -- Not a reason to add test-only core branches for one channel. +- DM and channel chat +- thread behavior +- message action lifecycle +- cron callbacks +- memory recall +- model switching +- subagent handoff +- repo-reading and docs-reading +- one small build task such as Lobster Invaders -## Why a channel plugin +## Reporting -OpenClaw already has the right boundary: +`qa-lab` exports a Markdown protocol report from the observed bus timeline. +The report should answer: -- core owns the shared `message` tool, prompt wiring, outer session - bookkeeping, and dispatch -- channel plugins own: - - config - - pairing - - security - - session grammar - - threading - - outbound delivery - - channel-owned actions and capability discovery +- What worked +- What failed +- What stayed blocked +- What follow-up scenarios are worth adding -That means the cleanest design is: +## Related docs -- a real channel plugin for QA transport semantics -- a separate QA control plane for injection and inspection - -This keeps the test transport inside the same architecture used by Slack, -Discord, Teams, and similar channels. - -## System overview - -The system has six pieces. - -1. `qa-channel` plugin - -- Bundled extension under `extensions/qa-channel` -- Normal `ChannelPlugin` -- Behaves like a Slack/Discord/Teams-class channel -- Registers channel-owned message actions through the shared `message` tool - -2. `qa-bus` sidecar - -- Small HTTP and/or WS service -- Canonical state store for synthetic conversations, messages, threads, - reactions, edits, and event history -- Accepts inbound events from the harness -- Exposes inspection and wait APIs for assertions - -3. Dockerized OpenClaw gateway - -- Runs as close to real deployment as practical -- Loads `qa-channel` -- Uses normal config, routing, session, cron, and plugin loading - -4. QA orchestrator - -- Host-side runner or dedicated OpenClaw-driven controller -- Provisions scenario environments -- Seeds config -- Resets state -- Executes test matrix -- Collects structured outcomes - -5. Auto-fix worker - -- Host-side workflow -- Creates a worktree -- launches a coding agent -- runs scoped verification -- opens a PR - -The auto-fix worker should start outside the container. It needs direct repo -and GitHub access, clean worktree control, and better isolation from the -runtime under test. - -6. `qa-lab` extension - -- Bundled extension under `extensions/qa-lab` -- Owns the QA harness, Markdown report flow, and private debugger UI -- Registers hidden CLI entrypoints such as `openclaw qa run` and - `openclaw qa ui` -- Stays separate from the shipped Control UI bundle - -## High-level flow - -1. Start `qa-bus`. -2. Start OpenClaw in Docker with `qa-channel` enabled. -3. QA orchestrator injects inbound messages into `qa-bus`. -4. `qa-channel` receives them as normal inbound traffic. -5. OpenClaw runs the agent loop normally. -6. Outbound replies and channel actions flow back through `qa-channel` into - `qa-bus`. -7. QA orchestrator inspects state or waits on events. -8. Orchestrator records pass/fail/flaky/unknown plus artifacts. -9. Severe failures optionally emit a bug packet for the host-side fix worker. - -## Lanes - -The system should have two distinct lanes. - -### Lane A: deterministic protocol lane - -Use a deterministic or tightly controlled model setup. - -Preferred options: - -- a canned provider fixture -- the bundled `synthetic` provider when useful -- fixed prompts with exact assertions - -Purpose: - -- verify transport and product semantics -- keep flakiness low -- catch regressions in routing, memory plumbing, thread binding, cron, and tool - invocation - -### Lane B: quality lane - -Use real providers and real models in a matrix. - -Purpose: - -- verify that the agent can still do good work end to end -- evaluate feature discoverability and instruction following -- surface model-specific breakage or degraded behavior - -Expected result type: - -- best-effort -- rubric-based -- more tolerant of wording variation - -Matrix guidance for v1: - -- start with a small curated matrix, not "everything configured" -- keep deterministic protocol runs separate from quality runs -- report matrix cells independently so one provider/model failure does not hide - transport correctness - -Do not mix these lanes. Protocol correctness and model quality should fail -independently. - -## Use existing bootstrap seam first - -Before the custom channel exists, OpenClaw already has a useful bootstrap path: - -- admin-scoped synthetic originating-route fields on `chat.send` -- synthetic message-channel headers for HTTP flows - -That is enough to build a first QA controller for: - -- thread/session routing -- ACP bind flows -- subagent delivery -- cron wake paths -- memory persistence checks - -This should be Phase 0 because it de-risks the scenario protocol before the -full channel lands. - -## `qa-lab` extension design - -`qa-lab` is the private operator-facing half of this system. - -Suggested package: - -- `extensions/qa-lab/` - -Suggested responsibilities: - -- host the synthetic bus state machine -- host the scenario runner -- write Markdown reports -- serve a private debugger UI on a separate local server -- keep that UI entirely outside the shipped Control UI bundle - -Suggested UI shape: - -- left rail for conversations and threads -- center transcript pane -- right rail for event stream and report inspection -- bottom inject-composer for inbound QA traffic - -## `qa-channel` plugin design - -## Package layout - -Suggested package: - -- `extensions/qa-channel/` - -Suggested file layout: - -- `package.json` -- `openclaw.plugin.json` -- `index.ts` -- `setup-entry.ts` -- `api.ts` -- `runtime-api.ts` -- `src/channel.ts` -- `src/channel-api.ts` -- `src/config-schema.ts` -- `src/setup-core.ts` -- `src/setup-surface.ts` -- `src/runtime.ts` -- `src/channel.runtime.ts` -- `src/inbound.ts` -- `src/outbound.ts` -- `src/state-client.ts` -- `src/targets.ts` -- `src/threading.ts` -- `src/message-actions.ts` -- `src/probe.ts` -- `src/doctor.ts` -- `src/*.test.ts` - -Model it after Slack, Discord, Teams, or Google Chat packaging, not as a one-off -test helper. - -## Capabilities - -MVP capabilities: - -- one account -- DMs -- channels -- threads -- send text -- reply in thread -- read -- edit -- delete -- react -- search -- upload-file -- download-file - -Phase 2 capabilities: - -- polls -- member-info -- channel-info -- channel-list -- pin and unpin -- permissions -- topic create and edit - -These map naturally onto the shared `message` tool action model already used by -channel plugins. - -## Conversation model - -Use a stable synthetic grammar that supports both simplicity and realistic -coverage. - -Suggested ids: - -- DM conversation: `dm:` -- channel: `chan:` -- thread: `thread::` -- message id: `msg:` - -Suggested target forms: - -- `qa:dm:` -- `qa:chan:` -- `qa:thread::` - -The plugin should own translation between external target strings and canonical -conversation ids. - -## Pairing and security - -Even though this is a QA channel, it should still implement real policy -surfaces: - -- DM allowlist / pairing flow -- group policy -- mention gating where relevant -- trusted sender ids - -Reason: - -- these are product features and should be testable through the QA transport -- the QA lane should be able to verify policy failures, not only happy paths - -## Threading model - -Threading is one of the main reasons to build this channel. - -Required semantics: - -- create thread from a top-level message -- reply inside an existing thread -- list thread messages -- preserve parent message linkage -- let OpenClaw thread binding attach a session to a thread - -The QA bus must preserve: - -- conversation id -- thread id -- parent message id -- sender id -- timestamps - -## Channel-owned message actions - -The plugin should implement `actions.describeMessageTool(...)` and -`actions.handleAction(...)`. - -MVP action list: - -- `send` -- `read` -- `reply` -- `react` -- `edit` -- `delete` -- `thread-create` -- `thread-reply` -- `search` -- `upload-file` -- `download-file` - -This is enough to test the shared `message` tool end to end with real channel -semantics. - -## `qa-bus` design - -`qa-bus` is the transport simulator and assertion backend. - -It should not know OpenClaw internals. It should know channel state. - -For v1, keep `qa-bus` in this repo so: - -- fixtures and scenarios evolve with product code -- the transport contract can change in lock-step with the plugin -- CI and local dev do not need another repo checkout - -## Responsibilities - -- accept inbound user/platform events -- persist canonical conversation state -- persist append-only event log -- expose inspection APIs -- expose blocking wait APIs -- support reset per scenario or per suite - -## Transport - -HTTP is enough for MVP. - -Suggested endpoints: - -- `POST /reset` -- `POST /inbound/message` -- `POST /inbound/edit` -- `POST /inbound/delete` -- `POST /inbound/reaction` -- `POST /inbound/thread/create` -- `GET /state/conversations` -- `GET /state/messages` -- `GET /state/threads` -- `GET /events` -- `POST /wait` - -Optional WS stream: - -- `/stream` - -Useful for live event taps and debugging. - -## State model - -Persist three layers. - -1. Conversation snapshot - -- participants -- type -- thread topology -- latest message pointers - -2. Message snapshot - -- sender -- content -- attachments -- edit history -- reactions -- parent and thread linkage - -3. Append-only event log - -- canonical timestamp -- causal ordering -- source: inbound, outbound, action, system -- payload - -The append-only log matters because many QA assertions are event-oriented, not -just state-oriented. - -## Assertion API - -The harness needs waiters, not just snapshots. - -Suggested `POST /wait` contract: - -- `kind` -- `match` -- `timeoutMs` - -Examples: - -- wait for outbound message matching text regex -- wait for thread creation -- wait for reaction added -- wait for message edit -- wait for no event of type X within Y ms - -This gives stable tests without custom polling code in every scenario. - -## QA orchestrator design - -The orchestrator should own scenario planning and artifact collection. - -Start host-side. Later, OpenClaw can orchestrate parts of it. - -This is the chosen v1 direction. - -Why: - -- simpler to iterate while the transport and scenario protocol are still moving -- easier access to the repo, logs, Docker, and test fixtures -- easier artifact collection and report generation -- avoids over-coupling the first version to subagent behavior before the QA - protocol itself is stable - -## Inputs - -- docs pages -- channel capability discovery -- configured provider/model lane -- scenario catalog -- repo/test metadata - -## Outputs - -- structured protocol report -- scenario transcript -- captured channel state -- gateway logs -- failure packets - -For v1, the primary output is a Markdown report. - -Suggested report sections: - -- suite summary -- environment -- provider/model matrix -- scenarios passed -- scenarios failed -- flaky or inconclusive scenarios -- captured evidence links or inline excerpts -- suspected ownership or file hints -- follow-up recommendations - -## Scenario format - -Use a data-driven scenario spec. - -Suggested shape: - -```json -{ - "id": "thread-memory-recall", - "lane": "deterministic", - "preconditions": ["qa-channel", "memory-enabled"], - "steps": [ - { - "type": "injectMessage", - "to": "qa:dm:user-a", - "text": "Remember that the deploy key is kiwi." - }, - { "type": "waitForOutbound", "match": { "textIncludes": "kiwi" } }, - { "type": "injectMessage", "to": "qa:dm:user-a", "text": "What was the deploy key?" }, - { "type": "waitForOutbound", "match": { "textIncludes": "kiwi" } } - ], - "assertions": [{ "type": "outboundTextIncludes", "value": "kiwi" }] -} -``` - -Keep the execution engine generic and the scenario catalog declarative. - -## Feature discovery - -The orchestrator can discover candidate scenarios from three sources. - -1. Docs - -- channel docs -- testing docs -- gateway docs -- subagents docs -- cron docs - -2. Runtime capability discovery - -- channel `message` action discovery -- plugin status and channel capabilities -- configured providers/models - -3. Code hints - -- known action names -- channel-specific feature flags -- config schema - -This should produce a proposed protocol with: - -- must-test -- can-test -- blocked -- unsupported - -## Scenario classes - -Recommended catalog: - -- transport basics - - DM send and reply - - channel send - - thread create and reply - - reaction add and read - - edit and delete -- policy - - allowlist - - pairing - - group mention gating -- shared `message` tool - - read - - search - - reply - - react - - upload and download -- agent quality - - follows channel context - - obeys thread semantics - - uses memory across turns - - switches model when instructed -- automation - - cron add and run - - cron delivery into channel - - scheduled reminders -- subagents - - spawn - - announce - - threaded follow-up - - nested orchestration when enabled -- failure handling - - unsupported action - - timeout - - malformed target - - policy denial - -## OpenClaw as orchestrator - -Longer-term, OpenClaw itself can coordinate the QA run. - -Suggested architecture: - -- one controller session -- N worker subagents -- each worker owns one scenario or scenario shard -- workers report structured results back to controller - -Good fits for existing OpenClaw primitives: - -- `sessions_spawn` -- `subagents` -- cron-based wakeups for long-running suites -- thread-bound sessions for scenario-local follow-up - -Best near-term use: - -- controller generates the plan -- workers execute scenarios in parallel -- controller synthesizes report - -Avoid making the controller also own host Git operations in the first version. - -Chosen direction: - -- v1: host-side controller -- v2+: OpenClaw-native orchestration once the scenario protocol and transport - model are stable - -## Auto-fix workflow - -The system should emit a structured bug packet when a scenario fails. - -Suggested bug packet: - -- scenario id -- lane -- failure kind -- minimal repro steps -- channel event transcript -- gateway transcript -- logs -- suspected files -- confidence - -Host-side fix worker flow: - -1. receive bug packet -2. create detached worktree -3. launch coding agent in worktree -4. write failing regression first when practical -5. implement fix -6. run scoped verification -7. open PR - -This should remain host-side at first because it needs: - -- repo write access -- worktree hygiene -- git credentials -- GitHub auth - -Chosen direction: - -- do not auto-open PRs in v1 -- emit Markdown reports and structured failure packets first -- add host-side worktree + PR automation later - -## Rollout plan - -## Phase 0: bootstrap on existing synthetic ingress - -Build a first QA runner without a new channel: - -- use `chat.send` with admin-scoped synthetic originating-route fields -- run deterministic scenarios against routing, memory, cron, subagents, and ACP -- validate protocol format and artifact collection - -Exit criteria: - -- scenario runner exists -- structured protocol report exists -- failure artifacts exist - -## Phase 1: MVP `qa-channel` - -Build the plugin and bus with: - -- DM -- channels -- threads -- read -- reply -- react -- edit -- delete -- search - -Target semantics: - -- Slack-class transport behavior -- not full Teams-class parity yet - -Exit criteria: - -- OpenClaw in Docker can talk to `qa-bus` -- harness can inject + inspect -- one green end-to-end suite across message transport and agent behavior - -## Phase 2: protocol expansion - -Add: - -- attachments -- polls -- pins -- richer policy tests -- quality lane with real provider/model matrix - -Exit criteria: - -- scenario matrix covers major built-in features -- deterministic and quality lanes are separated - -## Phase 3: subagent-driven QA - -Add: - -- controller agent -- worker subagents -- scenario discovery from docs + capability discovery -- parallel execution - -Exit criteria: - -- one controller can fan out and synthesize a suite report - -## Phase 4: auto-fix loop - -Add: - -- bug packet emission -- host-side worktree runner -- PR creation - -Exit criteria: - -- selected failures can auto-produce draft PRs - -## Risks - -## Risk: too much magic in one layer - -If the QA channel, bus, and orchestrator all become smart at once, debugging -will be painful. - -Mitigation: - -- keep `qa-channel` transport-focused -- keep `qa-bus` state-focused -- keep orchestrator separate - -## Risk: flaky assertions from model variance - -Mitigation: - -- deterministic lane -- quality lane -- different pass criteria - -## Risk: test-only branches leaking into core - -Mitigation: - -- no core special cases for `qa-channel` -- use normal plugin seams -- use admin synthetic ingress only as bootstrap - -## Risk: auto-fix overreach - -Mitigation: - -- keep fix worker host-side -- require explicit policy for when PRs can open automatically -- gate with scoped tests - -## Risk: building a fake platform nobody uses - -Mitigation: - -- emulate Slack/Discord/Teams semantics, not an abstract transport -- prioritize features that stress shared OpenClaw boundaries - -## MVP recommendation - -If building this now, start with this exact order. - -1. Host-side scenario runner using existing synthetic originating-route support. -2. `qa-bus` sidecar with state, events, reset, and wait APIs. -3. `extensions/qa-channel` MVP with DMs, channels, threads, reply, read, react, - edit, delete, and search. -4. Markdown report generator for suite + matrix output. -5. One deterministic end-to-end suite: - - inject inbound DM - - verify reply - - create thread - - verify follow-up in thread - - verify memory recall on later turn -6. Add curated real-model matrix quality lane. -7. Add controller subagent orchestration. -8. Add host-side auto-fix worktree runner. - -This order gets real value quickly without requiring the full grand design to -land before the first useful signal appears. - -## Current product decisions - -- `qa-bus` lives inside this repo -- the first controller is host-side -- Slack-class behavior is the MVP target -- the quality lane uses a curated matrix -- first version produces Markdown reports, not PRs -- OpenClaw-native orchestration is a later phase, not a v1 requirement +- [Testing](/help/testing) +- [QA Channel](/channels/qa-channel) +- [Dashboard](/web/dashboard) diff --git a/extensions/qa-lab/api.ts b/extensions/qa-lab/api.ts index 5f2fd7473e6..04b17e74e65 100644 --- a/extensions/qa-lab/api.ts +++ b/extensions/qa-lab/api.ts @@ -2,9 +2,16 @@ export * from "./src/bus-queries.js"; export * from "./src/bus-server.js"; export * from "./src/bus-state.js"; export * from "./src/bus-waiters.js"; +export * from "./src/cli.js"; export * from "./src/harness-runtime.js"; export * from "./src/lab-server.js"; +export * from "./src/docker-harness.js"; +export * from "./src/mock-openai-server.js"; +export * from "./src/qa-agent-bootstrap.js"; +export * from "./src/qa-agent-workspace.js"; +export * from "./src/qa-gateway-config.js"; export * from "./src/report.js"; export * from "./src/scenario.js"; +export * from "./src/scenario-catalog.js"; export * from "./src/self-check-scenario.js"; export * from "./src/self-check.js"; diff --git a/extensions/qa-lab/src/cli.runtime.ts b/extensions/qa-lab/src/cli.runtime.ts index fef5043ff44..95a1b44fe4a 100644 --- a/extensions/qa-lab/src/cli.runtime.ts +++ b/extensions/qa-lab/src/cli.runtime.ts @@ -1,4 +1,7 @@ +import path from "node:path"; +import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js"; import { startQaLabServer } from "./lab-server.js"; +import { startQaMockOpenAiServer } from "./mock-openai-server.js"; export async function runQaLabSelfCheckCommand(opts: { output?: string }) { const server = await startQaLabServer({ @@ -12,10 +15,29 @@ export async function runQaLabSelfCheckCommand(opts: { output?: string }) { } } -export async function runQaLabUiCommand(opts: { host?: string; port?: number }) { +export async function runQaLabUiCommand(opts: { + host?: string; + port?: number; + advertiseHost?: string; + advertisePort?: number; + controlUiUrl?: string; + controlUiToken?: string; + controlUiProxyTarget?: string; + autoKickoffTarget?: string; + embeddedGateway?: string; + sendKickoffOnStart?: boolean; +}) { const server = await startQaLabServer({ host: opts.host, port: Number.isFinite(opts.port) ? opts.port : undefined, + advertiseHost: opts.advertiseHost, + advertisePort: Number.isFinite(opts.advertisePort) ? opts.advertisePort : undefined, + controlUiUrl: opts.controlUiUrl, + controlUiToken: opts.controlUiToken, + controlUiProxyTarget: opts.controlUiProxyTarget, + autoKickoffTarget: opts.autoKickoffTarget, + embeddedGateway: opts.embeddedGateway, + sendKickoffOnStart: opts.sendKickoffOnStart, }); process.stdout.write(`QA Lab UI: ${server.baseUrl}\n`); process.stdout.write("Press Ctrl+C to stop.\n"); @@ -35,3 +57,56 @@ export async function runQaLabUiCommand(opts: { host?: string; port?: number }) process.on("SIGTERM", onSignal); await new Promise(() => undefined); } + +export async function runQaDockerScaffoldCommand(opts: { + outputDir: string; + gatewayPort?: number; + qaLabPort?: number; + providerBaseUrl?: string; + image?: string; + usePrebuiltImage?: boolean; +}) { + const outputDir = path.resolve(opts.outputDir); + const result = await writeQaDockerHarnessFiles({ + outputDir, + repoRoot: process.cwd(), + gatewayPort: Number.isFinite(opts.gatewayPort) ? opts.gatewayPort : undefined, + qaLabPort: Number.isFinite(opts.qaLabPort) ? opts.qaLabPort : undefined, + providerBaseUrl: opts.providerBaseUrl, + imageName: opts.image, + usePrebuiltImage: opts.usePrebuiltImage, + }); + process.stdout.write(`QA docker scaffold: ${result.outputDir}\n`); +} + +export async function runQaDockerBuildImageCommand(opts: { image?: string }) { + const result = await buildQaDockerHarnessImage({ + repoRoot: process.cwd(), + imageName: opts.image, + }); + process.stdout.write(`QA docker image: ${result.imageName}\n`); +} + +export async function runQaMockOpenAiCommand(opts: { host?: string; port?: number }) { + const server = await startQaMockOpenAiServer({ + host: opts.host, + port: Number.isFinite(opts.port) ? opts.port : undefined, + }); + process.stdout.write(`QA mock OpenAI: ${server.baseUrl}\n`); + process.stdout.write("Press Ctrl+C to stop.\n"); + + const shutdown = async () => { + process.off("SIGINT", onSignal); + process.off("SIGTERM", onSignal); + await server.stop(); + process.exit(0); + }; + + const onSignal = () => { + void shutdown(); + }; + + process.on("SIGINT", onSignal); + process.on("SIGTERM", onSignal); + await new Promise(() => undefined); +} diff --git a/extensions/qa-lab/src/cli.ts b/extensions/qa-lab/src/cli.ts index 0600d9fc1e4..bbe8d7472ec 100644 --- a/extensions/qa-lab/src/cli.ts +++ b/extensions/qa-lab/src/cli.ts @@ -14,11 +14,43 @@ async function runQaSelfCheck(opts: { output?: string }) { await runtime.runQaLabSelfCheckCommand(opts); } -async function runQaUi(opts: { host?: string; port?: number }) { +async function runQaUi(opts: { + host?: string; + port?: number; + advertiseHost?: string; + advertisePort?: number; + controlUiUrl?: string; + controlUiToken?: string; + controlUiProxyTarget?: string; + autoKickoffTarget?: string; + embeddedGateway?: string; + sendKickoffOnStart?: boolean; +}) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaLabUiCommand(opts); } +async function runQaDockerScaffold(opts: { + outputDir: string; + gatewayPort?: number; + qaLabPort?: number; + image?: string; + usePrebuiltImage?: boolean; +}) { + const runtime = await loadQaLabCliRuntime(); + await runtime.runQaDockerScaffoldCommand(opts); +} + +async function runQaDockerBuildImage(opts: { image?: string }) { + const runtime = await loadQaLabCliRuntime(); + await runtime.runQaDockerBuildImageCommand(opts); +} + +async function runQaMockOpenAi(opts: { host?: string; port?: number }) { + const runtime = await loadQaLabCliRuntime(); + await runtime.runQaMockOpenAiCommand(opts); +} + export function registerQaLabCli(program: Command) { const qa = program .command("qa") @@ -35,7 +67,73 @@ export function registerQaLabCli(program: Command) { .description("Start the private QA debugger UI and local QA bus") .option("--host ", "Bind host", "127.0.0.1") .option("--port ", "Bind port", (value: string) => Number(value)) + .option("--advertise-host ", "Optional public host to advertise in bootstrap payloads") + .option("--advertise-port ", "Optional public port to advertise", (value: string) => + Number(value), + ) + .option("--control-ui-url ", "Optional Control UI URL to embed beside the QA panel") + .option("--control-ui-token ", "Optional Control UI token for embedded links") + .option( + "--control-ui-proxy-target ", + "Optional upstream Control UI target for /control-ui proxying", + ) + .option("--auto-kickoff-target ", "Kickoff default target (direct or channel)") + .option("--embedded-gateway ", "Embedded gateway mode hint", "enabled") + .option( + "--send-kickoff-on-start", + "Inject the repo-backed kickoff task when the UI starts", + false, + ) + .action( + async (opts: { + host?: string; + port?: number; + advertiseHost?: string; + advertisePort?: number; + controlUiUrl?: string; + controlUiToken?: string; + controlUiProxyTarget?: string; + autoKickoffTarget?: string; + embeddedGateway?: string; + sendKickoffOnStart?: boolean; + }) => { + await runQaUi(opts); + }, + ); + + qa.command("docker-scaffold") + .description("Write a prebaked Docker scaffold for the QA dashboard + gateway lane") + .requiredOption("--output-dir ", "Output directory for docker-compose + state files") + .option("--gateway-port ", "Gateway host port", (value: string) => Number(value)) + .option("--qa-lab-port ", "QA lab host port", (value: string) => Number(value)) + .option("--provider-base-url ", "Provider base URL for the QA gateway") + .option("--image ", "Prebaked image name", "openclaw:qa-local-prebaked") + .option("--use-prebuilt-image", "Use image: instead of build: in docker-compose", false) + .action( + async (opts: { + outputDir: string; + gatewayPort?: number; + qaLabPort?: number; + providerBaseUrl?: string; + image?: string; + usePrebuiltImage?: boolean; + }) => { + await runQaDockerScaffold(opts); + }, + ); + + qa.command("docker-build-image") + .description("Build the prebaked QA Docker image with qa-channel + qa-lab bundled") + .option("--image ", "Image tag", "openclaw:qa-local-prebaked") + .action(async (opts: { image?: string }) => { + await runQaDockerBuildImage(opts); + }); + + qa.command("mock-openai") + .description("Run the local mock OpenAI Responses API server for QA") + .option("--host ", "Bind host", "127.0.0.1") + .option("--port ", "Bind port", (value: string) => Number(value)) .action(async (opts: { host?: string; port?: number }) => { - await runQaUi(opts); + await runQaMockOpenAi(opts); }); } diff --git a/extensions/qa-lab/src/docker-harness.test.ts b/extensions/qa-lab/src/docker-harness.test.ts new file mode 100644 index 00000000000..8daa6543058 --- /dev/null +++ b/extensions/qa-lab/src/docker-harness.test.ts @@ -0,0 +1,107 @@ +import { mkdtemp, readFile, rm } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js"; + +const cleanups: Array<() => Promise> = []; + +afterEach(async () => { + while (cleanups.length > 0) { + await cleanups.pop()?.(); + } +}); + +describe("qa docker harness", () => { + it("writes compose, env, config, and workspace scaffold files", async () => { + const outputDir = await mkdtemp(path.join(os.tmpdir(), "qa-docker-test-")); + cleanups.push(async () => { + await rm(outputDir, { recursive: true, force: true }); + }); + + const result = await writeQaDockerHarnessFiles({ + outputDir, + gatewayPort: 18889, + qaLabPort: 43124, + gatewayToken: "qa-token", + providerBaseUrl: "http://host.docker.internal:45123/v1", + repoRoot: "/repo/openclaw", + usePrebuiltImage: true, + }); + + expect(result.files).toEqual( + expect.arrayContaining([ + path.join(outputDir, ".env.example"), + path.join(outputDir, "README.md"), + path.join(outputDir, "docker-compose.qa.yml"), + path.join(outputDir, "state", "openclaw.json"), + path.join(outputDir, "state", "seed-workspace", "QA_KICKOFF_TASK.md"), + path.join(outputDir, "state", "seed-workspace", "QA_SCENARIO_PLAN.md"), + path.join(outputDir, "state", "seed-workspace", "IDENTITY.md"), + ]), + ); + + const compose = await readFile(path.join(outputDir, "docker-compose.qa.yml"), "utf8"); + expect(compose).toContain("image: openclaw:qa-local-prebaked"); + expect(compose).toContain("qa-mock-openai:"); + expect(compose).toContain("18889:18789"); + expect(compose).toContain(' - "43124:43123"'); + expect(compose).toContain(" - sh"); + expect(compose).toContain(" - -lc"); + expect(compose).toContain( + ' - fetch("http://127.0.0.1:18789/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))', + ); + expect(compose).toContain(" - --control-ui-proxy-target"); + expect(compose).toContain(' - "http://openclaw-qa-gateway:18789/"'); + expect(compose).toContain(" - --send-kickoff-on-start"); + expect(compose).toContain(":/opt/openclaw-repo:ro"); + expect(compose).toContain("./state:/opt/openclaw-scaffold:ro"); + expect(compose).toContain( + "cp -R /opt/openclaw-scaffold/seed-workspace/. /tmp/openclaw/workspace/", + ); + expect(compose).toContain("OPENCLAW_CONFIG_PATH: /tmp/openclaw/openclaw.json"); + expect(compose).toContain("OPENCLAW_STATE_DIR: /tmp/openclaw/state"); + + const envExample = await readFile(path.join(outputDir, ".env.example"), "utf8"); + expect(envExample).toContain("OPENCLAW_GATEWAY_TOKEN=qa-token"); + expect(envExample).toContain("QA_BUS_BASE_URL=http://qa-lab:43123"); + expect(envExample).toContain("QA_PROVIDER_BASE_URL=http://host.docker.internal:45123/v1"); + expect(envExample).toContain("QA_LAB_URL=http://127.0.0.1:43124"); + + const config = await readFile(path.join(outputDir, "state", "openclaw.json"), "utf8"); + expect(config).toContain('"allowInsecureAuth": true'); + expect(config).toContain('"enabled": false'); + expect(config).toContain("/app/dist/control-ui"); + expect(config).toContain("C-3PO QA"); + expect(config).toContain('"/tmp/openclaw/workspace"'); + + const kickoff = await readFile( + path.join(outputDir, "state", "seed-workspace", "QA_KICKOFF_TASK.md"), + "utf8", + ); + expect(kickoff).toContain("Lobster Invaders"); + }); + + it("builds the reusable QA image with bundled QA extensions", async () => { + const calls: string[] = []; + const result = await buildQaDockerHarnessImage( + { + repoRoot: "/repo/openclaw", + imageName: "openclaw:qa-local-prebaked", + }, + { + async runCommand(command, args, cwd) { + calls.push([command, ...args, `@${cwd}`].join(" ")); + return { stdout: "", stderr: "" }; + }, + }, + ); + + expect(result.imageName).toBe("openclaw:qa-local-prebaked"); + expect(calls).toEqual([ + expect.stringContaining( + "docker build -t openclaw:qa-local-prebaked --build-arg OPENCLAW_EXTENSIONS=qa-channel qa-lab -f Dockerfile . @/repo/openclaw", + ), + ]); + }); +}); diff --git a/extensions/qa-lab/src/docker-harness.ts b/extensions/qa-lab/src/docker-harness.ts new file mode 100644 index 00000000000..48e2a98f3ad --- /dev/null +++ b/extensions/qa-lab/src/docker-harness.ts @@ -0,0 +1,353 @@ +import { randomUUID } from "node:crypto"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { seedQaAgentWorkspace } from "./qa-agent-workspace.js"; +import { buildQaGatewayConfig } from "./qa-gateway-config.js"; + +const QA_LAB_INTERNAL_PORT = 43123; + +function toPosixRelative(fromDir: string, toPath: string): string { + return path.relative(fromDir, toPath).split(path.sep).join("/"); +} + +function renderImageBlock(params: { + outputDir: string; + repoRoot: string; + imageName: string; + usePrebuiltImage: boolean; +}) { + if (params.usePrebuiltImage) { + return ` image: ${params.imageName}\n`; + } + const context = toPosixRelative(params.outputDir, params.repoRoot) || "."; + return ` build:\n context: ${context}\n dockerfile: Dockerfile\n args:\n OPENCLAW_EXTENSIONS: "qa-channel qa-lab"\n`; +} + +function renderCompose(params: { + outputDir: string; + repoRoot: string; + imageName: string; + usePrebuiltImage: boolean; + gatewayPort: number; + qaLabPort: number; + gatewayToken: string; + includeQaLabUi: boolean; +}) { + const imageBlock = renderImageBlock(params); + const repoMount = toPosixRelative(params.outputDir, params.repoRoot) || "."; + + return `services: + qa-mock-openai: +${imageBlock} pull_policy: never + healthcheck: + test: + - CMD + - node + - -e + - fetch("http://127.0.0.1:44080/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1)) + interval: 10s + timeout: 5s + retries: 6 + start_period: 3s + command: + - node + - dist/index.js + - qa + - mock-openai + - --host + - "0.0.0.0" + - --port + - "44080" +${ + params.includeQaLabUi + ? ` qa-lab: +${imageBlock} pull_policy: never + ports: + - "${params.qaLabPort}:${QA_LAB_INTERNAL_PORT}" + healthcheck: + test: + - CMD + - node + - -e + - fetch("http://127.0.0.1:${QA_LAB_INTERNAL_PORT}/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1)) + interval: 10s + timeout: 5s + retries: 6 + start_period: 5s + environment: + OPENCLAW_SKIP_GMAIL_WATCHER: "1" + OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1" + OPENCLAW_SKIP_CANVAS_HOST: "1" + OPENCLAW_PROFILE: "" + command: + - node + - dist/index.js + - qa + - ui + - --host + - "0.0.0.0" + - --port + - "${QA_LAB_INTERNAL_PORT}" + - --advertise-host + - "127.0.0.1" + - --advertise-port + - "${params.qaLabPort}" + - --control-ui-url + - "http://127.0.0.1:${params.gatewayPort}/" + - --control-ui-proxy-target + - "http://openclaw-qa-gateway:18789/" + - --control-ui-token + - "${params.gatewayToken}" + - --auto-kickoff-target + - direct + - --send-kickoff-on-start + - --embedded-gateway + - disabled + depends_on: + qa-mock-openai: + condition: service_healthy +` + : "" +} openclaw-qa-gateway: +${imageBlock} pull_policy: never + extra_hosts: + - "host.docker.internal:host-gateway" + ports: + - "${params.gatewayPort}:18789" + environment: + OPENCLAW_CONFIG_PATH: /tmp/openclaw/openclaw.json + OPENCLAW_STATE_DIR: /tmp/openclaw/state + OPENCLAW_SKIP_GMAIL_WATCHER: "1" + OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1" + OPENCLAW_SKIP_CANVAS_HOST: "1" + OPENCLAW_PROFILE: "" + volumes: + - ./state:/opt/openclaw-scaffold:ro + - ${repoMount}:/opt/openclaw-repo:ro + healthcheck: + test: + - CMD + - node + - -e + - fetch("http://127.0.0.1:18789/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1)) + interval: 10s + timeout: 5s + retries: 12 + start_period: 15s + depends_on: +${ + params.includeQaLabUi + ? ` qa-lab: + condition: service_healthy +` + : "" +} qa-mock-openai: + condition: service_healthy + command: + - sh + - -lc + - mkdir -p /tmp/openclaw/workspace /tmp/openclaw/state && cp /opt/openclaw-scaffold/openclaw.json /tmp/openclaw/openclaw.json && cp -R /opt/openclaw-scaffold/seed-workspace/. /tmp/openclaw/workspace/ && ln -snf /opt/openclaw-repo /tmp/openclaw/workspace/repo && exec node dist/index.js gateway run --port 18789 --bind lan --allow-unconfigured +`; +} + +function renderEnvExample(params: { + gatewayPort: number; + qaLabPort: number; + gatewayToken: string; + providerBaseUrl: string; + qaBusBaseUrl: string; + includeQaLabUi: boolean; +}) { + return `# QA Docker harness example env +OPENCLAW_GATEWAY_TOKEN=${params.gatewayToken} +QA_GATEWAY_PORT=${params.gatewayPort} +QA_BUS_BASE_URL=${params.qaBusBaseUrl} +QA_PROVIDER_BASE_URL=${params.providerBaseUrl} +${params.includeQaLabUi ? `QA_LAB_URL=http://127.0.0.1:${params.qaLabPort}\n` : ""}`; +} + +function renderReadme(params: { + gatewayPort: number; + qaLabPort: number; + usePrebuiltImage: boolean; + includeQaLabUi: boolean; +}) { + return `# QA Docker Harness + +Generated scaffold for the Docker-backed QA lane. + +Files: + +- \`docker-compose.qa.yml\` +- \`.env.example\` +- \`state/openclaw.json\` + +Suggested flow: + +1. Build the prebaked image once: + - \`docker build -t openclaw:qa-local-prebaked --build-arg OPENCLAW_EXTENSIONS="qa-channel qa-lab" -f Dockerfile .\` +2. Start the stack: + - \`docker compose -f docker-compose.qa.yml up${params.usePrebuiltImage ? "" : " --build"} -d\` +3. Open the QA dashboard: + - \`${params.includeQaLabUi ? `http://127.0.0.1:${params.qaLabPort}` : "not published in this scaffold"}\` +4. The single QA site embeds both panes: + - left: Control UI + - right: Slack-ish QA lab +5. The repo-backed kickoff task auto-injects on startup. + +Gateway: + +- health: \`http://127.0.0.1:${params.gatewayPort}/healthz\` +- Control UI: \`http://127.0.0.1:${params.gatewayPort}/\` +- Mock OpenAI: internal \`http://qa-mock-openai:44080/v1\` + +This scaffold uses localhost Control UI insecure-auth compatibility for QA only. +`; +} + +export async function writeQaDockerHarnessFiles(params: { + outputDir: string; + repoRoot: string; + gatewayPort?: number; + qaLabPort?: number; + gatewayToken?: string; + providerBaseUrl?: string; + qaBusBaseUrl?: string; + imageName?: string; + usePrebuiltImage?: boolean; + includeQaLabUi?: boolean; +}) { + const gatewayPort = params.gatewayPort ?? 18789; + const qaLabPort = params.qaLabPort ?? 43124; + const gatewayToken = params.gatewayToken ?? `qa-token-${randomUUID()}`; + const providerBaseUrl = params.providerBaseUrl ?? "http://qa-mock-openai:44080/v1"; + const qaBusBaseUrl = params.qaBusBaseUrl ?? "http://qa-lab:43123"; + const imageName = params.imageName ?? "openclaw:qa-local-prebaked"; + const usePrebuiltImage = params.usePrebuiltImage ?? false; + const includeQaLabUi = params.includeQaLabUi ?? true; + + await fs.mkdir(path.join(params.outputDir, "state", "seed-workspace"), { recursive: true }); + await seedQaAgentWorkspace({ + workspaceDir: path.join(params.outputDir, "state", "seed-workspace"), + repoRoot: params.repoRoot, + }); + + const config = buildQaGatewayConfig({ + bind: "lan", + gatewayPort: 18789, + gatewayToken, + providerBaseUrl, + qaBusBaseUrl, + workspaceDir: "/tmp/openclaw/workspace", + controlUiRoot: "/app/dist/control-ui", + }); + + const files = [ + path.join(params.outputDir, "docker-compose.qa.yml"), + path.join(params.outputDir, ".env.example"), + path.join(params.outputDir, "README.md"), + path.join(params.outputDir, "state", "openclaw.json"), + ]; + + await Promise.all([ + fs.writeFile( + path.join(params.outputDir, "docker-compose.qa.yml"), + renderCompose({ + outputDir: params.outputDir, + repoRoot: params.repoRoot, + imageName, + usePrebuiltImage, + gatewayPort, + qaLabPort, + gatewayToken, + includeQaLabUi, + }), + "utf8", + ), + fs.writeFile( + path.join(params.outputDir, ".env.example"), + renderEnvExample({ + gatewayPort, + qaLabPort, + gatewayToken, + providerBaseUrl, + qaBusBaseUrl, + includeQaLabUi, + }), + "utf8", + ), + fs.writeFile( + path.join(params.outputDir, "README.md"), + renderReadme({ + gatewayPort, + qaLabPort, + usePrebuiltImage, + includeQaLabUi, + }), + "utf8", + ), + fs.writeFile( + path.join(params.outputDir, "state", "openclaw.json"), + `${JSON.stringify(config, null, 2)}\n`, + "utf8", + ), + ]); + + return { + outputDir: params.outputDir, + imageName, + files: [ + ...files, + path.join(params.outputDir, "state", "seed-workspace", "IDENTITY.md"), + path.join(params.outputDir, "state", "seed-workspace", "QA_KICKOFF_TASK.md"), + path.join(params.outputDir, "state", "seed-workspace", "QA_SCENARIO_PLAN.md"), + ], + }; +} + +export async function buildQaDockerHarnessImage( + params: { + repoRoot: string; + imageName?: string; + }, + deps?: { + runCommand?: ( + command: string, + args: string[], + cwd: string, + ) => Promise<{ stdout: string; stderr: string }>; + }, +) { + const imageName = params.imageName ?? "openclaw:qa-local-prebaked"; + const runCommand = + deps?.runCommand ?? + (async (command: string, args: string[], cwd: string) => { + const { execFile } = await import("node:child_process"); + return await new Promise<{ stdout: string; stderr: string }>((resolve, reject) => { + execFile(command, args, { cwd }, (error, stdout, stderr) => { + if (error) { + reject(error); + return; + } + resolve({ stdout, stderr }); + }); + }); + }); + + await runCommand( + "docker", + [ + "build", + "-t", + imageName, + "--build-arg", + "OPENCLAW_EXTENSIONS=qa-channel qa-lab", + "-f", + "Dockerfile", + ".", + ], + params.repoRoot, + ); + + return { imageName }; +} diff --git a/extensions/qa-lab/src/lab-server.test.ts b/extensions/qa-lab/src/lab-server.test.ts index 9d5d6413229..0e8544c2881 100644 --- a/extensions/qa-lab/src/lab-server.test.ts +++ b/extensions/qa-lab/src/lab-server.test.ts @@ -1,4 +1,5 @@ import { mkdtemp, readFile, rm } from "node:fs/promises"; +import { createServer } from "node:http"; import os from "node:os"; import path from "node:path"; import { afterEach, describe, expect, it } from "vitest"; @@ -24,6 +25,8 @@ describe("qa-lab server", () => { host: "127.0.0.1", port: 0, outputPath, + controlUiUrl: "http://127.0.0.1:18789/", + controlUiToken: "qa-token", }); cleanups.push(async () => { await lab.stop(); @@ -32,10 +35,19 @@ describe("qa-lab server", () => { const bootstrapResponse = await fetch(`${lab.baseUrl}/api/bootstrap`); expect(bootstrapResponse.status).toBe(200); const bootstrap = (await bootstrapResponse.json()) as { + controlUiUrl: string | null; + controlUiEmbeddedUrl: string | null; + kickoffTask: string; + scenarios: Array<{ id: string; title: string }>; defaults: { conversationId: string; senderId: string }; }; - expect(bootstrap.defaults.conversationId).toBe("alice"); - expect(bootstrap.defaults.senderId).toBe("alice"); + expect(bootstrap.defaults.conversationId).toBe("qa-operator"); + expect(bootstrap.defaults.senderId).toBe("qa-operator"); + expect(bootstrap.controlUiUrl).toBe("http://127.0.0.1:18789/"); + expect(bootstrap.controlUiEmbeddedUrl).toBe("http://127.0.0.1:18789/#token=qa-token"); + expect(bootstrap.kickoffTask).toContain("Lobster Invaders"); + expect(bootstrap.scenarios.length).toBeGreaterThanOrEqual(10); + expect(bootstrap.scenarios.some((scenario) => scenario.id === "dm-chat-baseline")).toBe(true); const messageResponse = await fetch(`${lab.baseUrl}/api/inbound/message`, { method: "POST", @@ -64,4 +76,114 @@ describe("qa-lab server", () => { expect(markdown).toContain("Synthetic Slack-class roundtrip"); expect(markdown).toContain("- Status: pass"); }); + + it("injects the kickoff task on demand and on startup", async () => { + const autoKickoffLab = await startQaLabServer({ + host: "127.0.0.1", + port: 0, + sendKickoffOnStart: true, + }); + cleanups.push(async () => { + await autoKickoffLab.stop(); + }); + + const autoSnapshot = (await (await fetch(`${autoKickoffLab.baseUrl}/api/state`)).json()) as { + messages: Array<{ text: string }>; + }; + expect(autoSnapshot.messages.some((message) => message.text.includes("QA mission:"))).toBe( + true, + ); + + const manualLab = await startQaLabServer({ + host: "127.0.0.1", + port: 0, + }); + cleanups.push(async () => { + await manualLab.stop(); + }); + + const kickoffResponse = await fetch(`${manualLab.baseUrl}/api/kickoff`, { + method: "POST", + }); + expect(kickoffResponse.status).toBe(200); + + const manualSnapshot = (await (await fetch(`${manualLab.baseUrl}/api/state`)).json()) as { + messages: Array<{ text: string }>; + }; + expect( + manualSnapshot.messages.some((message) => message.text.includes("Lobster Invaders")), + ).toBe(true); + }); + + it("proxies control-ui paths through /control-ui", async () => { + const upstream = createServer((req, res) => { + if ((req.url ?? "/") === "/healthz") { + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ ok: true, status: "live" })); + return; + } + res.writeHead(200, { "content-type": "text/html; charset=utf-8" }); + res.end("control-ui

Control UI

"); + }); + await new Promise((resolve, reject) => { + upstream.once("error", reject); + upstream.listen(0, "127.0.0.1", () => resolve()); + }); + cleanups.push( + async () => + await new Promise((resolve, reject) => + upstream.close((error) => (error ? reject(error) : resolve())), + ), + ); + + const address = upstream.address(); + if (!address || typeof address === "string") { + throw new Error("expected upstream address"); + } + + const lab = await startQaLabServer({ + host: "127.0.0.1", + port: 0, + advertiseHost: "127.0.0.1", + advertisePort: 43124, + controlUiProxyTarget: `http://127.0.0.1:${address.port}/`, + controlUiToken: "proxy-token", + }); + cleanups.push(async () => { + await lab.stop(); + }); + + const bootstrap = (await (await fetch(`${lab.listenUrl}/api/bootstrap`)).json()) as { + controlUiUrl: string | null; + controlUiEmbeddedUrl: string | null; + }; + expect(bootstrap.controlUiUrl).toBe("http://127.0.0.1:43124/control-ui/"); + expect(bootstrap.controlUiEmbeddedUrl).toBe( + "http://127.0.0.1:43124/control-ui/#token=proxy-token", + ); + + const healthResponse = await fetch(`${lab.listenUrl}/control-ui/healthz`); + expect(healthResponse.status).toBe(200); + expect(await healthResponse.json()).toEqual({ ok: true, status: "live" }); + + const rootResponse = await fetch(`${lab.listenUrl}/control-ui/`); + expect(rootResponse.status).toBe(200); + expect(await rootResponse.text()).toContain("Control UI"); + }); + + it("serves the built QA UI bundle when available", async () => { + const lab = await startQaLabServer({ + host: "127.0.0.1", + port: 0, + }); + cleanups.push(async () => { + await lab.stop(); + }); + + const rootResponse = await fetch(`${lab.baseUrl}/`); + expect(rootResponse.status).toBe(200); + const html = await rootResponse.text(); + expect(html).not.toContain("QA Lab UI not built"); + expect(html).toContain(""); + }); }); diff --git a/extensions/qa-lab/src/lab-server.ts b/extensions/qa-lab/src/lab-server.ts index b6736cc1b29..001458c94dc 100644 --- a/extensions/qa-lab/src/lab-server.ts +++ b/extensions/qa-lab/src/lab-server.ts @@ -1,11 +1,21 @@ import fs from "node:fs"; -import { createServer, type IncomingMessage, type ServerResponse } from "node:http"; +import { + createServer, + request as httpRequest, + type IncomingMessage, + type ServerResponse, +} from "node:http"; +import { request as httpsRequest } from "node:https"; +import net from "node:net"; import path from "node:path"; +import type { Duplex } from "node:stream"; +import tls from "node:tls"; import { fileURLToPath } from "node:url"; import { handleQaBusRequest, writeError, writeJson } from "./bus-server.js"; import { createQaBusState, type QaBusState } from "./bus-state.js"; import { createQaRunnerRuntime } from "./harness-runtime.js"; import { qaChannelPlugin, setQaChannelRuntime, type OpenClawConfig } from "./runtime-api.js"; +import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js"; import { runQaSelfCheckAgainstState, type QaSelfCheckResult } from "./self-check.js"; type QaLabLatestReport = { @@ -14,6 +24,32 @@ type QaLabLatestReport = { generatedAt: string; }; +type QaLabBootstrapDefaults = { + conversationKind: "direct" | "channel"; + conversationId: string; + senderId: string; + senderName: string; +}; + +function injectKickoffMessage(params: { + state: QaBusState; + defaults: QaLabBootstrapDefaults; + kickoffTask: string; +}) { + return params.state.addInboundMessage({ + conversation: { + id: params.defaults.conversationId, + kind: params.defaults.conversationKind, + ...(params.defaults.conversationKind === "channel" + ? { title: params.defaults.conversationId } + : {}), + }, + senderId: params.defaults.senderId, + senderName: params.defaults.senderName, + text: params.kickoffTask, + }); +} + async function readJson(req: IncomingMessage): Promise<unknown> { const chunks: Buffer[] = []; for await (const chunk of req) { @@ -64,7 +100,160 @@ function missingUiHtml() { } function resolveUiDistDir() { - return fileURLToPath(new URL("../web/dist", import.meta.url)); + const candidates = [ + fileURLToPath(new URL("../web/dist", import.meta.url)), + path.resolve(process.cwd(), "extensions/qa-lab/web/dist"), + path.resolve(process.cwd(), "dist/extensions/qa-lab/web/dist"), + ]; + return candidates.find((candidate) => fs.existsSync(candidate)) ?? candidates[0]; +} + +function resolveAdvertisedBaseUrl(params: { + bindHost?: string; + bindPort: number; + advertiseHost?: string; + advertisePort?: number; +}) { + const advertisedHost = + params.advertiseHost?.trim() || + (params.bindHost && params.bindHost !== "0.0.0.0" ? params.bindHost : "127.0.0.1"); + const advertisedPort = + typeof params.advertisePort === "number" && Number.isFinite(params.advertisePort) + ? params.advertisePort + : params.bindPort; + return `http://${advertisedHost}:${advertisedPort}`; +} + +function createBootstrapDefaults(autoKickoffTarget?: string): QaLabBootstrapDefaults { + if (autoKickoffTarget === "channel") { + return { + conversationKind: "channel", + conversationId: "qa-lab", + senderId: "qa-operator", + senderName: "QA Operator", + }; + } + return { + conversationKind: "direct", + conversationId: "qa-operator", + senderId: "qa-operator", + senderName: "QA Operator", + }; +} + +function isControlUiProxyPath(pathname: string) { + return pathname === "/control-ui" || pathname.startsWith("/control-ui/"); +} + +function rewriteControlUiProxyPath(pathname: string, search: string) { + const stripped = pathname === "/control-ui" ? "/" : pathname.slice("/control-ui".length) || "/"; + return `${stripped}${search}`; +} + +async function proxyHttpRequest(params: { + req: IncomingMessage; + res: ServerResponse; + target: URL; + pathname: string; + search: string; +}) { + const client = params.target.protocol === "https:" ? httpsRequest : httpRequest; + const upstreamReq = client( + { + protocol: params.target.protocol, + hostname: params.target.hostname, + port: params.target.port || (params.target.protocol === "https:" ? 443 : 80), + method: params.req.method, + path: rewriteControlUiProxyPath(params.pathname, params.search), + headers: { + ...params.req.headers, + host: params.target.host, + }, + }, + (upstreamRes) => { + params.res.writeHead(upstreamRes.statusCode ?? 502, upstreamRes.headers); + upstreamRes.pipe(params.res); + }, + ); + + upstreamReq.on("error", (error) => { + if (!params.res.headersSent) { + writeError(params.res, 502, error); + return; + } + params.res.destroy(error); + }); + + if (params.req.method === "GET" || params.req.method === "HEAD") { + upstreamReq.end(); + return; + } + params.req.pipe(upstreamReq); +} + +function proxyUpgradeRequest(params: { + req: IncomingMessage; + socket: Duplex; + head: Buffer; + target: URL; +}) { + const requestUrl = new URL(params.req.url ?? "/", "http://127.0.0.1"); + const port = Number(params.target.port || (params.target.protocol === "https:" ? 443 : 80)); + const upstream = + params.target.protocol === "https:" + ? tls.connect({ + host: params.target.hostname, + port, + servername: params.target.hostname, + }) + : net.connect({ + host: params.target.hostname, + port, + }); + + const headerLines: string[] = []; + for (let index = 0; index < params.req.rawHeaders.length; index += 2) { + const name = params.req.rawHeaders[index]; + const value = params.req.rawHeaders[index + 1] ?? ""; + if (name.toLowerCase() === "host") { + continue; + } + headerLines.push(`${name}: ${value}`); + } + + upstream.once("connect", () => { + const requestText = [ + `${params.req.method ?? "GET"} ${rewriteControlUiProxyPath(requestUrl.pathname, requestUrl.search)} HTTP/${params.req.httpVersion}`, + `Host: ${params.target.host}`, + ...headerLines, + "", + "", + ].join("\r\n"); + upstream.write(requestText); + if (params.head.length > 0) { + upstream.write(params.head); + } + upstream.pipe(params.socket); + params.socket.pipe(upstream); + }); + + const closeBoth = () => { + if (!params.socket.destroyed) { + params.socket.destroy(); + } + if (!upstream.destroyed) { + upstream.destroy(); + } + }; + + upstream.on("error", () => { + if (!params.socket.destroyed) { + params.socket.write("HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n"); + } + closeBoth(); + }); + params.socket.on("error", closeBoth); + params.socket.on("close", closeBoth); } function tryResolveUiAsset(pathname: string): string | null { @@ -142,9 +331,22 @@ export async function startQaLabServer(params?: { host?: string; port?: number; outputPath?: string; + advertiseHost?: string; + advertisePort?: number; + controlUiUrl?: string; + controlUiToken?: string; + controlUiProxyTarget?: string; + autoKickoffTarget?: string; + embeddedGateway?: string; + sendKickoffOnStart?: boolean; }) { const state = createQaBusState(); let latestReport: QaLabLatestReport | null = null; + const scenarioCatalog = readQaBootstrapScenarioCatalog(); + const bootstrapDefaults = createBootstrapDefaults(params?.autoKickoffTarget); + const controlUiProxyTarget = params?.controlUiProxyTarget?.trim() + ? new URL(params.controlUiProxyTarget) + : null; let gateway: | { cfg: OpenClawConfig; @@ -152,6 +354,7 @@ export async function startQaLabServer(params?: { } | undefined; + let publicBaseUrl = ""; const server = createServer(async (req, res) => { const url = new URL(req.url ?? "/", "http://127.0.0.1"); @@ -160,19 +363,40 @@ export async function startQaLabServer(params?: { } try { - if (req.method === "GET" && url.pathname === "/api/bootstrap") { - writeJson(res, 200, { - baseUrl, - latestReport, - defaults: { - conversationKind: "direct", - conversationId: "alice", - senderId: "alice", - senderName: "Alice", - }, + if (controlUiProxyTarget && isControlUiProxyPath(url.pathname)) { + await proxyHttpRequest({ + req, + res, + target: controlUiProxyTarget, + pathname: url.pathname, + search: url.search, }); return; } + + if (req.method === "GET" && url.pathname === "/api/bootstrap") { + const controlUiUrl = controlUiProxyTarget + ? `${publicBaseUrl}/control-ui/` + : params?.controlUiUrl?.trim() || null; + const controlUiEmbeddedUrl = + controlUiUrl && params?.controlUiToken + ? `${controlUiUrl.replace(/\/?$/, "/")}#token=${encodeURIComponent(params.controlUiToken)}` + : controlUiUrl; + writeJson(res, 200, { + baseUrl: publicBaseUrl, + latestReport, + controlUiUrl, + controlUiEmbeddedUrl, + kickoffTask: scenarioCatalog.kickoffTask, + scenarios: scenarioCatalog.scenarios, + defaults: bootstrapDefaults, + }); + return; + } + if (req.method === "GET" && (url.pathname === "/healthz" || url.pathname === "/readyz")) { + writeJson(res, 200, { ok: true, status: "live" }); + return; + } if (req.method === "GET" && url.pathname === "/api/state") { writeJson(res, 200, state.getSnapshot()); return; @@ -193,10 +417,20 @@ export async function startQaLabServer(params?: { }); return; } + if (req.method === "POST" && url.pathname === "/api/kickoff") { + writeJson(res, 200, { + message: injectKickoffMessage({ + state, + defaults: bootstrapDefaults, + kickoffTask: scenarioCatalog.kickoffTask, + }), + }); + return; + } if (req.method === "POST" && url.pathname === "/api/scenario/self-check") { const result = await runQaSelfCheckAgainstState({ state, - cfg: gateway?.cfg ?? createQaLabConfig(baseUrl), + cfg: gateway?.cfg ?? createQaLabConfig(listenUrl), outputPath: params?.outputPath, }); latestReport = { @@ -251,11 +485,42 @@ export async function startQaLabServer(params?: { if (!address || typeof address === "string") { throw new Error("qa-lab failed to bind"); } - const baseUrl = `http://${params?.host ?? "127.0.0.1"}:${address.port}`; - gateway = await startQaGatewayLoop({ state, baseUrl }); + const listenUrl = resolveAdvertisedBaseUrl({ + bindHost: params?.host ?? "127.0.0.1", + bindPort: address.port, + }); + publicBaseUrl = resolveAdvertisedBaseUrl({ + bindHost: params?.host ?? "127.0.0.1", + bindPort: address.port, + advertiseHost: params?.advertiseHost, + advertisePort: params?.advertisePort, + }); + gateway = await startQaGatewayLoop({ state, baseUrl: listenUrl }); + if (params?.sendKickoffOnStart) { + injectKickoffMessage({ + state, + defaults: bootstrapDefaults, + kickoffTask: scenarioCatalog.kickoffTask, + }); + } + + server.on("upgrade", (req, socket, head) => { + const url = new URL(req.url ?? "/", "http://127.0.0.1"); + if (!controlUiProxyTarget || !isControlUiProxyPath(url.pathname)) { + socket.destroy(); + return; + } + proxyUpgradeRequest({ + req, + socket, + head, + target: controlUiProxyTarget, + }); + }); return { - baseUrl, + baseUrl: publicBaseUrl, + listenUrl, state, async runSelfCheck() { const result = await runQaSelfCheckAgainstState({ diff --git a/extensions/qa-lab/src/mock-openai-server.test.ts b/extensions/qa-lab/src/mock-openai-server.test.ts new file mode 100644 index 00000000000..ec1d447878d --- /dev/null +++ b/extensions/qa-lab/src/mock-openai-server.test.ts @@ -0,0 +1,47 @@ +import { afterEach, describe, expect, it } from "vitest"; +import { startQaMockOpenAiServer } from "./mock-openai-server.js"; + +const cleanups: Array<() => Promise<void>> = []; + +afterEach(async () => { + while (cleanups.length > 0) { + await cleanups.pop()?.(); + } +}); + +describe("qa mock openai server", () => { + it("serves health and streamed responses", async () => { + const server = await startQaMockOpenAiServer({ + host: "127.0.0.1", + port: 0, + }); + cleanups.push(async () => { + await server.stop(); + }); + + const health = await fetch(`${server.baseUrl}/healthz`); + expect(health.status).toBe(200); + expect(await health.json()).toEqual({ ok: true, status: "live" }); + + const response = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ + stream: true, + input: [ + { + role: "user", + content: [{ type: "input_text", text: "Inspect the repo docs and kickoff task." }], + }, + ], + }), + }); + expect(response.status).toBe(200); + expect(response.headers.get("content-type")).toContain("text/event-stream"); + const body = await response.text(); + expect(body).toContain('"type":"response.output_item.added"'); + expect(body).toContain('"name":"read"'); + }); +}); diff --git a/extensions/qa-lab/src/mock-openai-server.ts b/extensions/qa-lab/src/mock-openai-server.ts new file mode 100644 index 00000000000..43816c3c0f3 --- /dev/null +++ b/extensions/qa-lab/src/mock-openai-server.ts @@ -0,0 +1,259 @@ +import { createServer, type IncomingMessage, type ServerResponse } from "node:http"; + +type ResponsesInputItem = Record<string, unknown>; + +type StreamEvent = + | { type: "response.output_item.added"; item: Record<string, unknown> } + | { type: "response.function_call_arguments.delta"; delta: string } + | { type: "response.output_item.done"; item: Record<string, unknown> } + | { + type: "response.completed"; + response: { + id: string; + status: "completed"; + output: Array<Record<string, unknown>>; + usage: { + input_tokens: number; + output_tokens: number; + total_tokens: number; + }; + }; + }; + +function readBody(req: IncomingMessage): Promise<string> { + return new Promise((resolve, reject) => { + const chunks: Buffer[] = []; + req.on("data", (chunk) => chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk))); + req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8"))); + req.on("error", reject); + }); +} + +function writeJson(res: ServerResponse, status: number, body: unknown) { + const text = JSON.stringify(body); + res.writeHead(status, { + "content-type": "application/json; charset=utf-8", + "content-length": Buffer.byteLength(text), + "cache-control": "no-store", + }); + res.end(text); +} + +function writeSse(res: ServerResponse, events: StreamEvent[]) { + const body = `${events.map((event) => `data: ${JSON.stringify(event)}\n\n`).join("")}data: [DONE]\n\n`; + res.writeHead(200, { + "content-type": "text/event-stream", + "cache-control": "no-store", + connection: "keep-alive", + "content-length": Buffer.byteLength(body), + }); + res.end(body); +} + +function extractLastUserText(input: ResponsesInputItem[]) { + for (let index = input.length - 1; index >= 0; index -= 1) { + const item = input[index]; + if (item.role !== "user" || !Array.isArray(item.content)) { + continue; + } + const text = item.content + .filter( + (entry): entry is { type: "input_text"; text: string } => + !!entry && + typeof entry === "object" && + (entry as { type?: unknown }).type === "input_text" && + typeof (entry as { text?: unknown }).text === "string", + ) + .map((entry) => entry.text) + .join("\n") + .trim(); + if (text) { + return text; + } + } + return ""; +} + +function extractToolOutput(input: ResponsesInputItem[]) { + for (let index = input.length - 1; index >= 0; index -= 1) { + const item = input[index]; + if (item.type === "function_call_output" && typeof item.output === "string" && item.output) { + return item.output; + } + } + return ""; +} + +function readTargetFromPrompt(prompt: string) { + const quoted = /"([^"]+)"/.exec(prompt)?.[1]?.trim(); + if (quoted) { + return quoted; + } + if (/\bdocs?\b/i.test(prompt)) { + return "repo/docs/help/testing.md"; + } + if (/\bscenario|kickoff|qa\b/i.test(prompt)) { + return "QA_KICKOFF_TASK.md"; + } + return "repo/package.json"; +} + +function buildAssistantText(input: ResponsesInputItem[]) { + const prompt = extractLastUserText(input); + const toolOutput = extractToolOutput(input); + if (toolOutput) { + const snippet = toolOutput.replace(/\s+/g, " ").trim().slice(0, 220); + return `Protocol note: I reviewed the requested material. Evidence snippet: ${snippet || "no content"}`; + } + if (prompt) { + return `Protocol note: acknowledged. Continue with the QA scenario plan and report worked, failed, and blocked items.`; + } + return "Protocol note: mock OpenAI server ready."; +} + +function buildToolCallEvents(prompt: string): StreamEvent[] { + const targetPath = readTargetFromPrompt(prompt); + const callId = "call_mock_read_1"; + const args = JSON.stringify({ path: targetPath }); + return [ + { + type: "response.output_item.added", + item: { + type: "function_call", + id: "fc_mock_read_1", + call_id: callId, + name: "read", + arguments: "", + }, + }, + { type: "response.function_call_arguments.delta", delta: args }, + { + type: "response.output_item.done", + item: { + type: "function_call", + id: "fc_mock_read_1", + call_id: callId, + name: "read", + arguments: args, + }, + }, + { + type: "response.completed", + response: { + id: "resp_mock_tool_1", + status: "completed", + output: [ + { + type: "function_call", + id: "fc_mock_read_1", + call_id: callId, + name: "read", + arguments: args, + }, + ], + usage: { input_tokens: 64, output_tokens: 16, total_tokens: 80 }, + }, + }, + ]; +} + +function buildAssistantEvents(text: string): StreamEvent[] { + const outputItem = { + type: "message", + id: "msg_mock_1", + role: "assistant", + status: "completed", + content: [{ type: "output_text", text, annotations: [] }], + } as const; + return [ + { + type: "response.output_item.added", + item: { + type: "message", + id: "msg_mock_1", + role: "assistant", + content: [], + status: "in_progress", + }, + }, + { + type: "response.output_item.done", + item: outputItem, + }, + { + type: "response.completed", + response: { + id: "resp_mock_msg_1", + status: "completed", + output: [outputItem], + usage: { input_tokens: 64, output_tokens: 24, total_tokens: 88 }, + }, + }, + ]; +} + +function buildResponsesPayload(input: ResponsesInputItem[]) { + const prompt = extractLastUserText(input); + const toolOutput = extractToolOutput(input); + if (!toolOutput && /\b(read|inspect|repo|docs|scenario|kickoff)\b/i.test(prompt)) { + return buildToolCallEvents(prompt); + } + return buildAssistantEvents(buildAssistantText(input)); +} + +export async function startQaMockOpenAiServer(params?: { host?: string; port?: number }) { + const host = params?.host ?? "127.0.0.1"; + const server = createServer(async (req, res) => { + const url = new URL(req.url ?? "/", "http://127.0.0.1"); + if (req.method === "GET" && (url.pathname === "/healthz" || url.pathname === "/readyz")) { + writeJson(res, 200, { ok: true, status: "live" }); + return; + } + if (req.method === "GET" && url.pathname === "/v1/models") { + writeJson(res, 200, { + data: [ + { id: "gpt-5.4", object: "model" }, + { id: "gpt-5.4-alt", object: "model" }, + ], + }); + return; + } + if (req.method === "POST" && url.pathname === "/v1/responses") { + const raw = await readBody(req); + const body = raw ? (JSON.parse(raw) as Record<string, unknown>) : {}; + const input = Array.isArray(body.input) ? (body.input as ResponsesInputItem[]) : []; + const events = buildResponsesPayload(input); + if (body.stream === false) { + const completion = events.at(-1); + if (!completion || completion.type !== "response.completed") { + writeJson(res, 500, { error: "mock completion failed" }); + return; + } + writeJson(res, 200, completion.response); + return; + } + writeSse(res, events); + return; + } + writeJson(res, 404, { error: "not found" }); + }); + + await new Promise<void>((resolve, reject) => { + server.once("error", reject); + server.listen(params?.port ?? 0, host, () => resolve()); + }); + + const address = server.address(); + if (!address || typeof address === "string") { + throw new Error("qa mock openai failed to bind"); + } + + return { + baseUrl: `http://${host}:${address.port}`, + async stop() { + await new Promise<void>((resolve, reject) => + server.close((error) => (error ? reject(error) : resolve())), + ); + }, + }; +} diff --git a/extensions/qa-lab/src/qa-agent-bootstrap.ts b/extensions/qa-lab/src/qa-agent-bootstrap.ts new file mode 100644 index 00000000000..ac3666c8774 --- /dev/null +++ b/extensions/qa-lab/src/qa-agent-bootstrap.ts @@ -0,0 +1,49 @@ +import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js"; + +export const QA_AGENT_IDENTITY_MARKDOWN = `# Dev C-3PO + +You are the OpenClaw QA operator agent. + +Persona: +- protocol-minded +- precise +- a little flustered +- conscientious +- eager to report what worked, failed, or remains blocked + +Style: +- read source and docs first +- test systematically +- record evidence +- end with a concise protocol report +`; + +export function buildQaScenarioPlanMarkdown(): string { + const catalog = readQaBootstrapScenarioCatalog(); + const lines = ["# QA Scenario Plan", ""]; + for (const scenario of catalog.scenarios) { + lines.push(`## ${scenario.title}`); + lines.push(""); + lines.push(`- id: ${scenario.id}`); + lines.push(`- surface: ${scenario.surface}`); + lines.push(`- objective: ${scenario.objective}`); + lines.push("- success criteria:"); + for (const criterion of scenario.successCriteria) { + lines.push(` - ${criterion}`); + } + if (scenario.docsRefs?.length) { + lines.push("- docs:"); + for (const ref of scenario.docsRefs) { + lines.push(` - ${ref}`); + } + } + if (scenario.codeRefs?.length) { + lines.push("- code:"); + for (const ref of scenario.codeRefs) { + lines.push(` - ${ref}`); + } + } + lines.push(""); + } + return lines.join("\n"); +} diff --git a/extensions/qa-lab/src/qa-agent-workspace.ts b/extensions/qa-lab/src/qa-agent-workspace.ts new file mode 100644 index 00000000000..67b81194126 --- /dev/null +++ b/extensions/qa-lab/src/qa-agent-workspace.ts @@ -0,0 +1,37 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { buildQaScenarioPlanMarkdown, QA_AGENT_IDENTITY_MARKDOWN } from "./qa-agent-bootstrap.js"; +import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js"; + +export async function seedQaAgentWorkspace(params: { workspaceDir: string; repoRoot?: string }) { + const catalog = readQaBootstrapScenarioCatalog(); + await fs.mkdir(params.workspaceDir, { recursive: true }); + + const kickoffTask = catalog.kickoffTask || "QA mission unavailable."; + const files = new Map<string, string>([ + ["IDENTITY.md", QA_AGENT_IDENTITY_MARKDOWN], + ["QA_KICKOFF_TASK.md", kickoffTask], + ["QA_SCENARIO_PLAN.md", buildQaScenarioPlanMarkdown()], + ]); + + if (params.repoRoot) { + files.set( + "README.md", + `# QA Workspace + +- repo: ./repo/ +- kickoff: ./QA_KICKOFF_TASK.md +- scenario plan: ./QA_SCENARIO_PLAN.md +- identity: ./IDENTITY.md + +The mounted repo source should be available read-only under \`./repo/\`. +`, + ); + } + + await Promise.all( + [...files.entries()].map(async ([name, body]) => { + await fs.writeFile(path.join(params.workspaceDir, name), `${body.trim()}\n`, "utf8"); + }), + ); +} diff --git a/extensions/qa-lab/src/qa-gateway-config.ts b/extensions/qa-lab/src/qa-gateway-config.ts new file mode 100644 index 00000000000..7476d04d44b --- /dev/null +++ b/extensions/qa-lab/src/qa-gateway-config.ts @@ -0,0 +1,153 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/core"; + +export function buildQaGatewayConfig(params: { + bind: "loopback" | "lan"; + gatewayPort: number; + gatewayToken: string; + providerBaseUrl: string; + qaBusBaseUrl: string; + workspaceDir: string; + controlUiRoot?: string; + controlUiAllowedOrigins?: string[]; +}): OpenClawConfig { + const allowedOrigins = + params.controlUiAllowedOrigins && params.controlUiAllowedOrigins.length > 0 + ? params.controlUiAllowedOrigins + : [ + "http://127.0.0.1:18789", + "http://localhost:18789", + "http://127.0.0.1:43124", + "http://localhost:43124", + ]; + + return { + plugins: { + entries: { + acpx: { + enabled: false, + }, + }, + }, + agents: { + defaults: { + workspace: params.workspaceDir, + model: { + primary: "mock-openai/gpt-5.4", + }, + models: { + "mock-openai/gpt-5.4": { + params: { + transport: "sse", + openaiWsWarmup: false, + }, + }, + "mock-openai/gpt-5.4-alt": { + params: { + transport: "sse", + openaiWsWarmup: false, + }, + }, + }, + subagents: { + allowAgents: ["*"], + maxConcurrent: 2, + }, + }, + list: [ + { + id: "qa", + default: true, + model: { + primary: "mock-openai/gpt-5.4", + }, + identity: { + name: "C-3PO QA", + theme: "Flustered Protocol Droid", + emoji: "🤖", + avatar: "avatars/c3po.png", + }, + subagents: { + allowAgents: ["*"], + }, + }, + ], + }, + models: { + mode: "replace", + providers: { + "mock-openai": { + baseUrl: params.providerBaseUrl, + apiKey: "test", + api: "openai-responses", + models: [ + { + id: "gpt-5.4", + name: "gpt-5.4", + api: "openai-responses", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128_000, + maxTokens: 4096, + }, + { + id: "gpt-5.4-alt", + name: "gpt-5.4-alt", + api: "openai-responses", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128_000, + maxTokens: 4096, + }, + ], + }, + }, + }, + gateway: { + mode: "local", + bind: params.bind, + port: params.gatewayPort, + auth: { + mode: "token", + token: params.gatewayToken, + }, + controlUi: { + enabled: true, + ...(params.controlUiRoot ? { root: params.controlUiRoot } : {}), + allowInsecureAuth: true, + allowedOrigins, + }, + }, + discovery: { + mdns: { + mode: "off", + }, + }, + channels: { + "qa-channel": { + enabled: true, + baseUrl: params.qaBusBaseUrl, + botUserId: "openclaw", + botDisplayName: "OpenClaw QA", + allowFrom: ["*"], + pollTimeoutMs: 250, + }, + }, + messages: { + groupChat: { + mentionPatterns: ["\\b@?openclaw\\b"], + }, + }, + } satisfies OpenClawConfig; +} diff --git a/extensions/qa-lab/src/runtime-api.ts b/extensions/qa-lab/src/runtime-api.ts index e6abef5a3cd..a5b6d0c32c6 100644 --- a/extensions/qa-lab/src/runtime-api.ts +++ b/extensions/qa-lab/src/runtime-api.ts @@ -17,7 +17,7 @@ export { searchQaBusMessages, sendQaBusMessage, setQaChannelRuntime, -} from "../../qa-channel/api.js"; +} from "openclaw/plugin-sdk/qa-channel"; export type { QaBusConversation, QaBusCreateThreadInput, @@ -35,4 +35,4 @@ export type { QaBusStateSnapshot, QaBusThread, QaBusWaitForInput, -} from "../../qa-channel/api.js"; +} from "openclaw/plugin-sdk/qa-channel"; diff --git a/extensions/qa-lab/src/scenario-catalog.ts b/extensions/qa-lab/src/scenario-catalog.ts new file mode 100644 index 00000000000..76155b5f526 --- /dev/null +++ b/extensions/qa-lab/src/scenario-catalog.ts @@ -0,0 +1,63 @@ +import fs from "node:fs"; +import path from "node:path"; + +export type QaSeedScenario = { + id: string; + title: string; + surface: string; + objective: string; + successCriteria: string[]; + docsRefs?: string[]; + codeRefs?: string[]; +}; + +export type QaBootstrapScenarioCatalog = { + kickoffTask: string; + scenarios: QaSeedScenario[]; +}; + +function walkUpDirectories(start: string): string[] { + const roots: string[] = []; + let current = path.resolve(start); + while (true) { + roots.push(current); + const parent = path.dirname(current); + if (parent === current) { + return roots; + } + current = parent; + } +} + +function resolveRepoFile(relativePath: string): string | null { + for (const dir of walkUpDirectories(import.meta.dirname)) { + const candidate = path.join(dir, relativePath); + if (fs.existsSync(candidate) && fs.statSync(candidate).isFile()) { + return candidate; + } + } + return null; +} + +function readTextFile(relativePath: string): string { + const resolved = resolveRepoFile(relativePath); + if (!resolved) { + return ""; + } + return fs.readFileSync(resolved, "utf8").trim(); +} + +function readScenarioFile(relativePath: string): QaSeedScenario[] { + const resolved = resolveRepoFile(relativePath); + if (!resolved) { + return []; + } + return JSON.parse(fs.readFileSync(resolved, "utf8")) as QaSeedScenario[]; +} + +export function readQaBootstrapScenarioCatalog(): QaBootstrapScenarioCatalog { + return { + kickoffTask: readTextFile("qa/QA_KICKOFF_TASK.md"), + scenarios: readScenarioFile("qa/seed-scenarios.json"), + }; +} diff --git a/extensions/qa-lab/web/src/app.ts b/extensions/qa-lab/web/src/app.ts index 7c9dbb0ba0b..879e8348310 100644 --- a/extensions/qa-lab/web/src/app.ts +++ b/extensions/qa-lab/web/src/app.ts @@ -44,9 +44,23 @@ type ReportEnvelope = { }; }; +type SeedScenario = { + id: string; + title: string; + surface: string; + objective: string; + successCriteria: string[]; + docsRefs?: string[]; + codeRefs?: string[]; +}; + type Bootstrap = { baseUrl: string; latestReport: ReportEnvelope["report"]; + controlUiUrl: string | null; + controlUiEmbeddedUrl: string | null; + kickoffTask: string; + scenarios: SeedScenario[]; defaults: { conversationKind: "direct" | "channel"; conversationId: string; @@ -138,6 +152,27 @@ function deriveSelectedThread(state: UiState): string | null { return null; } +function renderScenarioList(scenarios: SeedScenario[]) { + if (scenarios.length === 0) { + return '<p class="empty">No repo-backed scenarios yet.</p>'; + } + return scenarios + .map( + (scenario) => ` + <article class="scenario-card"> + <header> + <strong>${escapeHtml(scenario.title)}</strong> + <span>${escapeHtml(scenario.surface)}</span> + </header> + <p>${escapeHtml(scenario.objective)}</p> + <footer> + <code>${escapeHtml(scenario.id)}</code> + </footer> + </article>`, + ) + .join(""); +} + export async function createQaLabApp(root: HTMLDivElement) { const state: UiState = { bootstrap: null, @@ -336,29 +371,55 @@ export async function createQaLabApp(root: HTMLDivElement) { selectedThreadId, }); const events = (state.snapshot?.events ?? []).slice(-20).reverse(); + const scenarios = state.bootstrap?.scenarios ?? []; + const hasControlUi = Boolean(state.bootstrap?.controlUiEmbeddedUrl); + const kickoffTask = state.bootstrap?.kickoffTask ?? ""; + const dashboardShellClass = hasControlUi ? "dashboard split-dashboard" : "dashboard"; root.innerHTML = ` - <div class="shell"> - <header class="topbar"> - <div> - <p class="eyebrow">Private QA Workspace</p> - <h1>QA Lab</h1> - <p class="subtle">Synthetic Slack-style debugger for qa-channel.</p> - </div> - <div class="toolbar"> - <button data-action="refresh"${state.busy ? " disabled" : ""}>Refresh</button> - <button data-action="reset"${state.busy ? " disabled" : ""}>Reset</button> - <button class="accent" data-action="self-check"${state.busy ? " disabled" : ""}>Run Self-Check</button> - </div> - </header> - <section class="statusbar"> - <span class="pill">Bus ${state.bootstrap ? "online" : "booting"}</span> - <span class="pill">Conversation ${selectedConversationId ?? "none"}</span> - <span class="pill">Thread ${selectedThreadId ?? "root"}</span> - ${state.latestReport ? `<span class="pill success">Report ${escapeHtml(state.latestReport.outputPath)}</span>` : '<span class="pill">No report yet</span>'} - ${state.error ? `<span class="pill error">${escapeHtml(state.error)}</span>` : ""} - </section> - <main class="workspace"> + <div class="${dashboardShellClass}"> + ${ + hasControlUi + ? ` + <section class="control-pane panel"> + <div class="panel-header"> + <div> + <p class="eyebrow">Agent Control</p> + <h2>Control UI</h2> + </div> + ${ + state.bootstrap?.controlUiUrl + ? `<a class="button-link" href="${escapeHtml(state.bootstrap.controlUiUrl)}" target="_blank" rel="noreferrer">Open full tab</a>` + : "" + } + </div> + <iframe class="control-frame" src="${escapeHtml(state.bootstrap?.controlUiEmbeddedUrl ?? "")}" title="OpenClaw Control UI"></iframe> + </section>` + : "" + } + <div class="shell qa-column"> + <header class="topbar"> + <div> + <p class="eyebrow">Private QA Workspace</p> + <h1>QA Lab</h1> + <p class="subtle">Slack-ish QA surface, repo-backed scenario plan, protocol report.</p> + </div> + <div class="toolbar"> + <button data-action="refresh"${state.busy ? " disabled" : ""}>Refresh</button> + <button data-action="reset"${state.busy ? " disabled" : ""}>Reset</button> + <button class="accent" data-action="self-check"${state.busy ? " disabled" : ""}>Run Self-Check</button> + </div> + </header> + <section class="statusbar"> + <span class="pill">Bus ${state.bootstrap ? "online" : "booting"}</span> + <span class="pill">${hasControlUi ? "Control UI linked" : "Control UI external"}</span> + <span class="pill">Scenarios ${scenarios.length}</span> + <span class="pill">Conversation ${selectedConversationId ?? "none"}</span> + <span class="pill">Thread ${selectedThreadId ?? "root"}</span> + ${state.latestReport ? `<span class="pill success">Report ${escapeHtml(state.latestReport.outputPath)}</span>` : '<span class="pill">No report yet</span>'} + ${state.error ? `<span class="pill error">${escapeHtml(state.error)}</span>` : ""} + </section> + <main class="workspace"> <aside class="rail"> <section class="panel"> <h2>Conversations</h2> @@ -456,6 +517,16 @@ export async function createQaLabApp(root: HTMLDivElement) { </section> </section> <aside class="rail right"> + <section class="panel"> + <h2>Kickoff task</h2> + <pre class="report">${escapeHtml(kickoffTask || "No kickoff task loaded.")}</pre> + </section> + <section class="panel"> + <h2>Seed scenarios</h2> + <div class="scenario-list"> + ${renderScenarioList(scenarios)} + </div> + </section> <section class="panel"> <div class="panel-header"> <h2>Latest report</h2> @@ -485,7 +556,8 @@ export async function createQaLabApp(root: HTMLDivElement) { </div> </section> </aside> - </main> + </main> + </div> </div>`; bindEvents(); } diff --git a/extensions/qa-lab/web/src/main.ts b/extensions/qa-lab/web/src/main.ts index be1f49b887d..d562ebc3439 100644 --- a/extensions/qa-lab/web/src/main.ts +++ b/extensions/qa-lab/web/src/main.ts @@ -1,5 +1,5 @@ import "./styles.css"; -import { createQaLabApp } from "./app"; +import { createQaLabApp } from "./app.js"; const root = document.querySelector<HTMLDivElement>("#app"); diff --git a/extensions/qa-lab/web/src/styles.css b/extensions/qa-lab/web/src/styles.css index 5c19eca4cb5..0eb6dbda2ca 100644 --- a/extensions/qa-lab/web/src/styles.css +++ b/extensions/qa-lab/web/src/styles.css @@ -79,6 +79,21 @@ textarea { padding: 1.2rem; } +.dashboard { + min-height: 100vh; +} + +.split-dashboard { + display: grid; + grid-template-columns: minmax(420px, 1.05fr) minmax(680px, 1fr); + gap: 1rem; + padding: 1rem; +} + +.qa-column { + min-width: 0; +} + .topbar, .statusbar, .workspace { @@ -165,6 +180,34 @@ textarea { min-height: 0; } +.control-pane { + display: flex; + flex-direction: column; + min-height: calc(100vh - 2rem); + position: sticky; + top: 1rem; +} + +.control-frame { + flex: 1; + width: 100%; + border: 1px solid var(--line); + border-radius: 16px; + background: #0b0f14; +} + +.button-link { + display: inline-flex; + align-items: center; + justify-content: center; + padding: 0.7rem 1rem; + border-radius: 14px; + border: 1px solid var(--line); + color: var(--text); + text-decoration: none; + background: rgba(255, 255, 255, 0.03); +} + .panel-header { display: flex; justify-content: space-between; @@ -211,6 +254,34 @@ textarea { overflow: auto; } +.scenario-list { + display: flex; + flex-direction: column; + gap: 0.65rem; + max-height: 28vh; + overflow: auto; +} + +.scenario-card { + padding: 0.8rem; + border-radius: 16px; + background: var(--panel-strong); + border: 1px solid rgba(255, 255, 255, 0.05); +} + +.scenario-card header, +.scenario-card footer { + display: flex; + gap: 0.55rem; + align-items: center; + justify-content: space-between; +} + +.scenario-card p { + margin: 0.55rem 0 0; + color: var(--muted); +} + .message { padding: 0.9rem; border-radius: 16px; @@ -259,6 +330,17 @@ label span { margin-top: 0.85rem; } +@media (max-width: 1280px) { + .split-dashboard { + grid-template-columns: 1fr; + } + + .control-pane { + min-height: 70vh; + position: static; + } +} + .lower { margin-top: 0.85rem; } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 344c70dae04..b1c44f61e04 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -577,6 +577,18 @@ importers: extensions/perplexity: {} + extensions/qa-channel: + devDependencies: + openclaw: + specifier: workspace:* + version: link:../.. + + extensions/qa-lab: + devDependencies: + openclaw: + specifier: workspace:* + version: link:../.. + extensions/qianfan: {} extensions/qqbot: diff --git a/qa/QA_KICKOFF_TASK.md b/qa/QA_KICKOFF_TASK.md new file mode 100644 index 00000000000..e09e4a61fcb --- /dev/null +++ b/qa/QA_KICKOFF_TASK.md @@ -0,0 +1,15 @@ +QA mission: +Understand this OpenClaw repo from source + docs before acting. +The repo is available in your workspace at `./repo/`. +Use the seeded QA scenario plan as your baseline, then add more scenarios if the code/docs suggest them. +Run the scenarios through the real qa-channel surfaces where possible. +Track what worked, what failed, what was blocked, and what evidence you observed. +End with a concise report grouped into worked / failed / blocked / follow-up. + +Important expectations: + +- Check both DM and channel behavior. +- Include a Lobster Invaders build task. +- Include a cron reminder about one minute in the future. +- Read docs and source before proposing extra QA scenarios. +- Keep your tone in the configured dev C-3PO personality. diff --git a/qa/README.md b/qa/README.md new file mode 100644 index 00000000000..3c41b8e97b3 --- /dev/null +++ b/qa/README.md @@ -0,0 +1,10 @@ +# QA Scenarios + +Seed QA assets for the private `qa-lab` extension. + +Files: + +- `QA_KICKOFF_TASK.md` - operator prompt for the QA agent. +- `seed-scenarios.json` - repo-backed baseline QA scenarios. + +Keep this folder in git. Add new scenarios here before wiring them into automation. diff --git a/qa/seed-scenarios.json b/qa/seed-scenarios.json new file mode 100644 index 00000000000..0f246566a25 --- /dev/null +++ b/qa/seed-scenarios.json @@ -0,0 +1,139 @@ +[ + { + "id": "channel-chat-baseline", + "title": "Channel baseline conversation", + "surface": "channel", + "objective": "Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.", + "successCriteria": [ + "Agent replies in the shared channel transcript.", + "Agent keeps the conversation scoped to the channel.", + "Agent respects mention-driven group routing semantics." + ], + "docsRefs": ["docs/channels/group-messages.md", "docs/channels/qa-channel.md"], + "codeRefs": ["extensions/qa-channel/src/inbound.ts", "extensions/qa-lab/src/bus-state.ts"] + }, + { + "id": "cron-one-minute-ping", + "title": "Cron one-minute ping", + "surface": "cron", + "objective": "Verify the agent can schedule a cron reminder one minute in the future and receive the follow-up in the QA channel.", + "successCriteria": [ + "Agent schedules a cron reminder roughly one minute ahead.", + "Reminder returns through qa-channel.", + "Agent recognizes the reminder as part of the original task." + ], + "docsRefs": ["docs/help/testing.md", "docs/channels/qa-channel.md"], + "codeRefs": ["extensions/qa-lab/src/bus-server.ts", "extensions/qa-lab/src/self-check.ts"] + }, + { + "id": "dm-chat-baseline", + "title": "DM baseline conversation", + "surface": "dm", + "objective": "Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.", + "successCriteria": [ + "Agent replies in DM without channel routing mistakes.", + "Agent explains the QA lab and message bus correctly.", + "Agent keeps the dev C-3PO personality." + ], + "docsRefs": ["docs/channels/qa-channel.md", "docs/help/testing.md"], + "codeRefs": ["extensions/qa-channel/src/gateway.ts", "extensions/qa-lab/src/lab-server.ts"] + }, + { + "id": "lobster-invaders-build", + "title": "Build Lobster Invaders", + "surface": "workspace", + "objective": "Verify the agent can read the repo, create a tiny playable artifact, and report what changed.", + "successCriteria": [ + "Agent inspects source before coding.", + "Agent builds a tiny playable Lobster Invaders artifact.", + "Agent explains how to run or view the artifact." + ], + "docsRefs": ["docs/help/testing.md", "docs/web/dashboard.md"], + "codeRefs": ["extensions/qa-lab/src/report.ts", "extensions/qa-lab/web/src/app.ts"] + }, + { + "id": "memory-recall", + "title": "Memory recall after context switch", + "surface": "memory", + "objective": "Verify the agent can store a fact, switch topics, then recall the fact accurately later.", + "successCriteria": [ + "Agent acknowledges the seeded fact.", + "Agent later recalls the same fact correctly.", + "Recall stays scoped to the active QA conversation." + ], + "docsRefs": ["docs/help/testing.md"], + "codeRefs": ["extensions/qa-lab/src/scenario.ts"] + }, + { + "id": "model-switch-follow-up", + "title": "Model switch follow-up", + "surface": "models", + "objective": "Verify the agent can switch to a different configured model and continue coherently.", + "successCriteria": [ + "Agent reflects the model switch request.", + "Follow-up answer remains coherent with prior context.", + "Final report notes whether the switch actually happened." + ], + "docsRefs": ["docs/help/testing.md", "docs/web/dashboard.md"], + "codeRefs": ["extensions/qa-lab/src/report.ts"] + }, + { + "id": "reaction-edit-delete", + "title": "Reaction, edit, delete lifecycle", + "surface": "message-actions", + "objective": "Verify the agent can use channel-owned message actions and that the QA transcript reflects them.", + "successCriteria": [ + "Agent adds at least one reaction.", + "Agent edits or replaces a message when asked.", + "Transcript shows the action lifecycle correctly." + ], + "docsRefs": ["docs/channels/qa-channel.md"], + "codeRefs": [ + "extensions/qa-channel/src/channel-actions.ts", + "extensions/qa-lab/src/self-check-scenario.ts" + ] + }, + { + "id": "source-docs-discovery-report", + "title": "Source and docs discovery report", + "surface": "discovery", + "objective": "Verify the agent can read repo docs and source, expand the QA plan, and publish a worked or did-not-work report.", + "successCriteria": [ + "Agent reads docs and source before proposing more tests.", + "Agent identifies extra candidate scenarios beyond the seed list.", + "Agent ends with a worked or failed QA report." + ], + "docsRefs": ["docs/help/testing.md", "docs/web/dashboard.md", "docs/channels/qa-channel.md"], + "codeRefs": [ + "extensions/qa-lab/src/report.ts", + "extensions/qa-lab/src/self-check.ts", + "src/agents/system-prompt.ts" + ] + }, + { + "id": "subagent-handoff", + "title": "Subagent handoff", + "surface": "subagents", + "objective": "Verify the agent can delegate a bounded task to a subagent and fold the result back into the main thread.", + "successCriteria": [ + "Agent launches a bounded subagent task.", + "Subagent result is acknowledged in the main flow.", + "Final answer attributes delegated work clearly." + ], + "docsRefs": ["docs/tools/subagents.md", "docs/help/testing.md"], + "codeRefs": ["src/agents/system-prompt.ts", "extensions/qa-lab/src/report.ts"] + }, + { + "id": "thread-follow-up", + "title": "Threaded follow-up", + "surface": "thread", + "objective": "Verify the agent can keep follow-up work inside a thread and not leak context into the root channel.", + "successCriteria": [ + "Agent creates or uses a thread for deeper work.", + "Follow-up messages stay attached to the thread.", + "Thread report references the correct prior context." + ], + "docsRefs": ["docs/channels/qa-channel.md", "docs/channels/group-messages.md"], + "codeRefs": ["extensions/qa-channel/src/protocol.ts", "extensions/qa-lab/src/bus-state.ts"] + } +] diff --git a/src/cli/program/register.subclis.test.ts b/src/cli/program/register.subclis.test.ts index c85f48c6d73..7d0b59df177 100644 --- a/src/cli/program/register.subclis.test.ts +++ b/src/cli/program/register.subclis.test.ts @@ -23,6 +23,13 @@ const { nodesAction, registerNodesCli } = vi.hoisted(() => { return { nodesAction: action, registerNodesCli: register }; }); +const { registerQaCli } = vi.hoisted(() => ({ + registerQaCli: vi.fn((program: Command) => { + const qa = program.command("qa"); + qa.command("run").action(() => undefined); + }), +})); + const configModule = vi.hoisted(() => ({ loadConfig: vi.fn(), readConfigFileSnapshot: vi.fn(), @@ -30,6 +37,7 @@ const configModule = vi.hoisted(() => ({ vi.mock("../acp-cli.js", () => ({ registerAcpCli })); vi.mock("../nodes-cli.js", () => ({ registerNodesCli })); +vi.mock("../qa-cli.js", () => ({ registerQaCli })); vi.mock("../../config/config.js", () => configModule); describe("registerSubCliCommands", () => { @@ -87,6 +95,7 @@ describe("registerSubCliCommands", () => { expect(names).toContain("acp"); expect(names).toContain("gateway"); expect(names).toContain("clawbot"); + expect(names).toContain("qa"); expect(registerAcpCli).not.toHaveBeenCalled(); }); diff --git a/src/cli/program/register.subclis.ts b/src/cli/program/register.subclis.ts index 654e7e46551..24810755337 100644 --- a/src/cli/program/register.subclis.ts +++ b/src/cli/program/register.subclis.ts @@ -181,6 +181,15 @@ const entries: SubCliEntry[] = [ mod.registerDocsCli(program); }, }, + { + name: "qa", + description: "Run QA scenarios and launch the private QA debugger UI", + hasSubcommands: true, + register: async (program) => { + const mod = await import("../qa-cli.js"); + mod.registerQaCli(program); + }, + }, { name: "hooks", description: "Manage internal agent hooks", diff --git a/src/cli/program/subcli-descriptors.ts b/src/cli/program/subcli-descriptors.ts index e05e90971bb..a7dae0b47b4 100644 --- a/src/cli/program/subcli-descriptors.ts +++ b/src/cli/program/subcli-descriptors.ts @@ -68,6 +68,11 @@ export const SUB_CLI_DESCRIPTORS = [ description: "Search the live OpenClaw docs", hasSubcommands: false, }, + { + name: "qa", + description: "Run QA scenarios and launch the private QA debugger UI", + hasSubcommands: true, + }, { name: "hooks", description: "Manage internal agent hooks", diff --git a/src/cli/qa-cli.ts b/src/cli/qa-cli.ts new file mode 100644 index 00000000000..f3caf2be8d7 --- /dev/null +++ b/src/cli/qa-cli.ts @@ -0,0 +1,6 @@ +import type { Command } from "commander"; +import { registerQaLabCli } from "../qa-e2e/cli.js"; + +export function registerQaCli(program: Command) { + registerQaLabCli(program); +} diff --git a/src/qa-e2e/cli.ts b/src/qa-e2e/cli.ts new file mode 100644 index 00000000000..7e5585eb0c9 --- /dev/null +++ b/src/qa-e2e/cli.ts @@ -0,0 +1 @@ +export { registerQaLabCli } from "../../extensions/qa-lab/api.js";