mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 01:31:08 +00:00
feat(qa): recreate qa lab docker stack
This commit is contained in:
@@ -97,6 +97,7 @@ RUN pnpm build:docker
|
||||
# Force pnpm for UI build (Bun may fail on ARM/Synology architectures)
|
||||
ENV OPENCLAW_PREFER_PNPM=1
|
||||
RUN pnpm ui:build
|
||||
RUN pnpm qa:lab:build
|
||||
|
||||
# Prune dev dependencies and strip build-only metadata before copying
|
||||
# runtime assets into the final image.
|
||||
@@ -156,6 +157,7 @@ COPY --from=runtime-assets --chown=node:node /app/openclaw.mjs .
|
||||
COPY --from=runtime-assets --chown=node:node /app/${OPENCLAW_BUNDLED_PLUGIN_DIR} ./${OPENCLAW_BUNDLED_PLUGIN_DIR}
|
||||
COPY --from=runtime-assets --chown=node:node /app/skills ./skills
|
||||
COPY --from=runtime-assets --chown=node:node /app/docs ./docs
|
||||
COPY --from=runtime-assets --chown=node:node /app/qa ./qa
|
||||
|
||||
# In npm-installed Docker images, prefer the copied source extension tree for
|
||||
# bundled discovery so package metadata that points at source entries stays valid.
|
||||
|
||||
@@ -1,865 +1,66 @@
|
||||
---
|
||||
title: "QA E2E Automation"
|
||||
summary: "Design note for a full end-to-end QA system built on a synthetic message-channel plugin, Dockerized OpenClaw, and subagent-driven scenario execution"
|
||||
summary: "Private QA automation shape for qa-lab, qa-channel, seeded scenarios, and protocol reports"
|
||||
read_when:
|
||||
- You are designing a true end-to-end QA harness for OpenClaw
|
||||
- You want a synthetic message channel for automated feature verification
|
||||
- You want subagents to discover features, run scenarios, and propose fixes
|
||||
- Extending qa-lab or qa-channel
|
||||
- Adding repo-backed QA scenarios
|
||||
- Building higher-realism QA automation around the Gateway dashboard
|
||||
title: "QA E2E Automation"
|
||||
---
|
||||
|
||||
# QA E2E Automation
|
||||
|
||||
This note proposes a true end-to-end QA system for OpenClaw built around a
|
||||
real channel plugin dedicated to testing.
|
||||
The private QA stack is meant to exercise OpenClaw in a more realistic,
|
||||
channel-shaped way than a single unit test can.
|
||||
|
||||
The core idea:
|
||||
Current pieces:
|
||||
|
||||
- run OpenClaw inside Docker in a realistic gateway configuration
|
||||
- expose a synthetic but full-featured message channel as a normal plugin
|
||||
- let a QA harness inject inbound traffic and inspect outbound state
|
||||
- let OpenClaw agents and subagents explore, verify, and report on behavior
|
||||
- optionally escalate failing scenarios into host-side fix workflows that open PRs
|
||||
- `extensions/qa-channel`: synthetic message channel with DM, channel, thread,
|
||||
reaction, edit, and delete surfaces.
|
||||
- `extensions/qa-lab`: debugger UI and QA bus for observing the transcript,
|
||||
injecting inbound messages, and exporting a Markdown report.
|
||||
- `qa/`: repo-backed seed assets for the kickoff task and baseline QA
|
||||
scenarios.
|
||||
|
||||
This is not a unit-test replacement. It is a product-level system test layer.
|
||||
The long-term goal is a two-pane QA site:
|
||||
|
||||
## Chosen direction
|
||||
- Left: Gateway dashboard (Control UI) with the agent.
|
||||
- Right: QA Lab, showing the Slack-ish transcript and scenario plan.
|
||||
|
||||
The initial direction for this project is:
|
||||
That lets an operator or automation loop give the agent a QA mission, observe
|
||||
real channel behavior, and record what worked, failed, or stayed blocked.
|
||||
|
||||
- build the full system inside this repo
|
||||
- test against a matrix, not a single model/provider pair
|
||||
- use Markdown reports as the first output artifact
|
||||
- defer auto-PR and auto-fix work until later
|
||||
- treat Slack-class semantics as the MVP transport target
|
||||
- keep orchestration simple in v1, with a host-side controller that exercises
|
||||
the moving parts directly
|
||||
- evolve toward OpenClaw becoming the orchestration layer later, once the
|
||||
transport, scenario, and reporting model are proven
|
||||
## Repo-backed seeds
|
||||
|
||||
## Goals
|
||||
Seed assets live in `qa/`:
|
||||
|
||||
- Test OpenClaw through a real messaging-channel boundary, not only `chat.send`
|
||||
or embedded mocks.
|
||||
- Verify channel semantics that matter for real use:
|
||||
- DMs
|
||||
- channels/groups
|
||||
- threads
|
||||
- edits
|
||||
- deletes
|
||||
- reactions
|
||||
- polls
|
||||
- attachments
|
||||
- Verify agent behavior across realistic user flows:
|
||||
- memory
|
||||
- thread binding
|
||||
- model switching
|
||||
- cron jobs
|
||||
- subagents
|
||||
- approvals
|
||||
- routing
|
||||
- channel-specific `message` actions
|
||||
- Make the QA runner capable of feature discovery:
|
||||
- read docs
|
||||
- inspect plugin capability discovery
|
||||
- inspect code and config
|
||||
- generate a scenario protocol
|
||||
- Support deterministic protocol tests and best-effort real-model tests as
|
||||
separate lanes.
|
||||
- Allow automated bug triage artifacts that can feed a host-side fix worker.
|
||||
- `qa/QA_KICKOFF_TASK.md`
|
||||
- `qa/seed-scenarios.json`
|
||||
|
||||
## Non-goals
|
||||
These are intentionally in git so the QA plan is visible to both humans and the
|
||||
agent. The baseline list should stay broad enough to cover:
|
||||
|
||||
- Not a replacement for existing unit, contract, or live tests.
|
||||
- Not a production channel.
|
||||
- Not a requirement that all bug fixing happen from inside the Dockerized
|
||||
OpenClaw runtime.
|
||||
- Not a reason to add test-only core branches for one channel.
|
||||
- DM and channel chat
|
||||
- thread behavior
|
||||
- message action lifecycle
|
||||
- cron callbacks
|
||||
- memory recall
|
||||
- model switching
|
||||
- subagent handoff
|
||||
- repo-reading and docs-reading
|
||||
- one small build task such as Lobster Invaders
|
||||
|
||||
## Why a channel plugin
|
||||
## Reporting
|
||||
|
||||
OpenClaw already has the right boundary:
|
||||
`qa-lab` exports a Markdown protocol report from the observed bus timeline.
|
||||
The report should answer:
|
||||
|
||||
- core owns the shared `message` tool, prompt wiring, outer session
|
||||
bookkeeping, and dispatch
|
||||
- channel plugins own:
|
||||
- config
|
||||
- pairing
|
||||
- security
|
||||
- session grammar
|
||||
- threading
|
||||
- outbound delivery
|
||||
- channel-owned actions and capability discovery
|
||||
- What worked
|
||||
- What failed
|
||||
- What stayed blocked
|
||||
- What follow-up scenarios are worth adding
|
||||
|
||||
That means the cleanest design is:
|
||||
## Related docs
|
||||
|
||||
- a real channel plugin for QA transport semantics
|
||||
- a separate QA control plane for injection and inspection
|
||||
|
||||
This keeps the test transport inside the same architecture used by Slack,
|
||||
Discord, Teams, and similar channels.
|
||||
|
||||
## System overview
|
||||
|
||||
The system has six pieces.
|
||||
|
||||
1. `qa-channel` plugin
|
||||
|
||||
- Bundled extension under `extensions/qa-channel`
|
||||
- Normal `ChannelPlugin`
|
||||
- Behaves like a Slack/Discord/Teams-class channel
|
||||
- Registers channel-owned message actions through the shared `message` tool
|
||||
|
||||
2. `qa-bus` sidecar
|
||||
|
||||
- Small HTTP and/or WS service
|
||||
- Canonical state store for synthetic conversations, messages, threads,
|
||||
reactions, edits, and event history
|
||||
- Accepts inbound events from the harness
|
||||
- Exposes inspection and wait APIs for assertions
|
||||
|
||||
3. Dockerized OpenClaw gateway
|
||||
|
||||
- Runs as close to real deployment as practical
|
||||
- Loads `qa-channel`
|
||||
- Uses normal config, routing, session, cron, and plugin loading
|
||||
|
||||
4. QA orchestrator
|
||||
|
||||
- Host-side runner or dedicated OpenClaw-driven controller
|
||||
- Provisions scenario environments
|
||||
- Seeds config
|
||||
- Resets state
|
||||
- Executes test matrix
|
||||
- Collects structured outcomes
|
||||
|
||||
5. Auto-fix worker
|
||||
|
||||
- Host-side workflow
|
||||
- Creates a worktree
|
||||
- launches a coding agent
|
||||
- runs scoped verification
|
||||
- opens a PR
|
||||
|
||||
The auto-fix worker should start outside the container. It needs direct repo
|
||||
and GitHub access, clean worktree control, and better isolation from the
|
||||
runtime under test.
|
||||
|
||||
6. `qa-lab` extension
|
||||
|
||||
- Bundled extension under `extensions/qa-lab`
|
||||
- Owns the QA harness, Markdown report flow, and private debugger UI
|
||||
- Registers hidden CLI entrypoints such as `openclaw qa run` and
|
||||
`openclaw qa ui`
|
||||
- Stays separate from the shipped Control UI bundle
|
||||
|
||||
## High-level flow
|
||||
|
||||
1. Start `qa-bus`.
|
||||
2. Start OpenClaw in Docker with `qa-channel` enabled.
|
||||
3. QA orchestrator injects inbound messages into `qa-bus`.
|
||||
4. `qa-channel` receives them as normal inbound traffic.
|
||||
5. OpenClaw runs the agent loop normally.
|
||||
6. Outbound replies and channel actions flow back through `qa-channel` into
|
||||
`qa-bus`.
|
||||
7. QA orchestrator inspects state or waits on events.
|
||||
8. Orchestrator records pass/fail/flaky/unknown plus artifacts.
|
||||
9. Severe failures optionally emit a bug packet for the host-side fix worker.
|
||||
|
||||
## Lanes
|
||||
|
||||
The system should have two distinct lanes.
|
||||
|
||||
### Lane A: deterministic protocol lane
|
||||
|
||||
Use a deterministic or tightly controlled model setup.
|
||||
|
||||
Preferred options:
|
||||
|
||||
- a canned provider fixture
|
||||
- the bundled `synthetic` provider when useful
|
||||
- fixed prompts with exact assertions
|
||||
|
||||
Purpose:
|
||||
|
||||
- verify transport and product semantics
|
||||
- keep flakiness low
|
||||
- catch regressions in routing, memory plumbing, thread binding, cron, and tool
|
||||
invocation
|
||||
|
||||
### Lane B: quality lane
|
||||
|
||||
Use real providers and real models in a matrix.
|
||||
|
||||
Purpose:
|
||||
|
||||
- verify that the agent can still do good work end to end
|
||||
- evaluate feature discoverability and instruction following
|
||||
- surface model-specific breakage or degraded behavior
|
||||
|
||||
Expected result type:
|
||||
|
||||
- best-effort
|
||||
- rubric-based
|
||||
- more tolerant of wording variation
|
||||
|
||||
Matrix guidance for v1:
|
||||
|
||||
- start with a small curated matrix, not "everything configured"
|
||||
- keep deterministic protocol runs separate from quality runs
|
||||
- report matrix cells independently so one provider/model failure does not hide
|
||||
transport correctness
|
||||
|
||||
Do not mix these lanes. Protocol correctness and model quality should fail
|
||||
independently.
|
||||
|
||||
## Use existing bootstrap seam first
|
||||
|
||||
Before the custom channel exists, OpenClaw already has a useful bootstrap path:
|
||||
|
||||
- admin-scoped synthetic originating-route fields on `chat.send`
|
||||
- synthetic message-channel headers for HTTP flows
|
||||
|
||||
That is enough to build a first QA controller for:
|
||||
|
||||
- thread/session routing
|
||||
- ACP bind flows
|
||||
- subagent delivery
|
||||
- cron wake paths
|
||||
- memory persistence checks
|
||||
|
||||
This should be Phase 0 because it de-risks the scenario protocol before the
|
||||
full channel lands.
|
||||
|
||||
## `qa-lab` extension design
|
||||
|
||||
`qa-lab` is the private operator-facing half of this system.
|
||||
|
||||
Suggested package:
|
||||
|
||||
- `extensions/qa-lab/`
|
||||
|
||||
Suggested responsibilities:
|
||||
|
||||
- host the synthetic bus state machine
|
||||
- host the scenario runner
|
||||
- write Markdown reports
|
||||
- serve a private debugger UI on a separate local server
|
||||
- keep that UI entirely outside the shipped Control UI bundle
|
||||
|
||||
Suggested UI shape:
|
||||
|
||||
- left rail for conversations and threads
|
||||
- center transcript pane
|
||||
- right rail for event stream and report inspection
|
||||
- bottom inject-composer for inbound QA traffic
|
||||
|
||||
## `qa-channel` plugin design
|
||||
|
||||
## Package layout
|
||||
|
||||
Suggested package:
|
||||
|
||||
- `extensions/qa-channel/`
|
||||
|
||||
Suggested file layout:
|
||||
|
||||
- `package.json`
|
||||
- `openclaw.plugin.json`
|
||||
- `index.ts`
|
||||
- `setup-entry.ts`
|
||||
- `api.ts`
|
||||
- `runtime-api.ts`
|
||||
- `src/channel.ts`
|
||||
- `src/channel-api.ts`
|
||||
- `src/config-schema.ts`
|
||||
- `src/setup-core.ts`
|
||||
- `src/setup-surface.ts`
|
||||
- `src/runtime.ts`
|
||||
- `src/channel.runtime.ts`
|
||||
- `src/inbound.ts`
|
||||
- `src/outbound.ts`
|
||||
- `src/state-client.ts`
|
||||
- `src/targets.ts`
|
||||
- `src/threading.ts`
|
||||
- `src/message-actions.ts`
|
||||
- `src/probe.ts`
|
||||
- `src/doctor.ts`
|
||||
- `src/*.test.ts`
|
||||
|
||||
Model it after Slack, Discord, Teams, or Google Chat packaging, not as a one-off
|
||||
test helper.
|
||||
|
||||
## Capabilities
|
||||
|
||||
MVP capabilities:
|
||||
|
||||
- one account
|
||||
- DMs
|
||||
- channels
|
||||
- threads
|
||||
- send text
|
||||
- reply in thread
|
||||
- read
|
||||
- edit
|
||||
- delete
|
||||
- react
|
||||
- search
|
||||
- upload-file
|
||||
- download-file
|
||||
|
||||
Phase 2 capabilities:
|
||||
|
||||
- polls
|
||||
- member-info
|
||||
- channel-info
|
||||
- channel-list
|
||||
- pin and unpin
|
||||
- permissions
|
||||
- topic create and edit
|
||||
|
||||
These map naturally onto the shared `message` tool action model already used by
|
||||
channel plugins.
|
||||
|
||||
## Conversation model
|
||||
|
||||
Use a stable synthetic grammar that supports both simplicity and realistic
|
||||
coverage.
|
||||
|
||||
Suggested ids:
|
||||
|
||||
- DM conversation: `dm:<user-id>`
|
||||
- channel: `chan:<space-id>`
|
||||
- thread: `thread:<space-id>:<thread-id>`
|
||||
- message id: `msg:<ulid>`
|
||||
|
||||
Suggested target forms:
|
||||
|
||||
- `qa:dm:<user-id>`
|
||||
- `qa:chan:<space-id>`
|
||||
- `qa:thread:<space-id>:<thread-id>`
|
||||
|
||||
The plugin should own translation between external target strings and canonical
|
||||
conversation ids.
|
||||
|
||||
## Pairing and security
|
||||
|
||||
Even though this is a QA channel, it should still implement real policy
|
||||
surfaces:
|
||||
|
||||
- DM allowlist / pairing flow
|
||||
- group policy
|
||||
- mention gating where relevant
|
||||
- trusted sender ids
|
||||
|
||||
Reason:
|
||||
|
||||
- these are product features and should be testable through the QA transport
|
||||
- the QA lane should be able to verify policy failures, not only happy paths
|
||||
|
||||
## Threading model
|
||||
|
||||
Threading is one of the main reasons to build this channel.
|
||||
|
||||
Required semantics:
|
||||
|
||||
- create thread from a top-level message
|
||||
- reply inside an existing thread
|
||||
- list thread messages
|
||||
- preserve parent message linkage
|
||||
- let OpenClaw thread binding attach a session to a thread
|
||||
|
||||
The QA bus must preserve:
|
||||
|
||||
- conversation id
|
||||
- thread id
|
||||
- parent message id
|
||||
- sender id
|
||||
- timestamps
|
||||
|
||||
## Channel-owned message actions
|
||||
|
||||
The plugin should implement `actions.describeMessageTool(...)` and
|
||||
`actions.handleAction(...)`.
|
||||
|
||||
MVP action list:
|
||||
|
||||
- `send`
|
||||
- `read`
|
||||
- `reply`
|
||||
- `react`
|
||||
- `edit`
|
||||
- `delete`
|
||||
- `thread-create`
|
||||
- `thread-reply`
|
||||
- `search`
|
||||
- `upload-file`
|
||||
- `download-file`
|
||||
|
||||
This is enough to test the shared `message` tool end to end with real channel
|
||||
semantics.
|
||||
|
||||
## `qa-bus` design
|
||||
|
||||
`qa-bus` is the transport simulator and assertion backend.
|
||||
|
||||
It should not know OpenClaw internals. It should know channel state.
|
||||
|
||||
For v1, keep `qa-bus` in this repo so:
|
||||
|
||||
- fixtures and scenarios evolve with product code
|
||||
- the transport contract can change in lock-step with the plugin
|
||||
- CI and local dev do not need another repo checkout
|
||||
|
||||
## Responsibilities
|
||||
|
||||
- accept inbound user/platform events
|
||||
- persist canonical conversation state
|
||||
- persist append-only event log
|
||||
- expose inspection APIs
|
||||
- expose blocking wait APIs
|
||||
- support reset per scenario or per suite
|
||||
|
||||
## Transport
|
||||
|
||||
HTTP is enough for MVP.
|
||||
|
||||
Suggested endpoints:
|
||||
|
||||
- `POST /reset`
|
||||
- `POST /inbound/message`
|
||||
- `POST /inbound/edit`
|
||||
- `POST /inbound/delete`
|
||||
- `POST /inbound/reaction`
|
||||
- `POST /inbound/thread/create`
|
||||
- `GET /state/conversations`
|
||||
- `GET /state/messages`
|
||||
- `GET /state/threads`
|
||||
- `GET /events`
|
||||
- `POST /wait`
|
||||
|
||||
Optional WS stream:
|
||||
|
||||
- `/stream`
|
||||
|
||||
Useful for live event taps and debugging.
|
||||
|
||||
## State model
|
||||
|
||||
Persist three layers.
|
||||
|
||||
1. Conversation snapshot
|
||||
|
||||
- participants
|
||||
- type
|
||||
- thread topology
|
||||
- latest message pointers
|
||||
|
||||
2. Message snapshot
|
||||
|
||||
- sender
|
||||
- content
|
||||
- attachments
|
||||
- edit history
|
||||
- reactions
|
||||
- parent and thread linkage
|
||||
|
||||
3. Append-only event log
|
||||
|
||||
- canonical timestamp
|
||||
- causal ordering
|
||||
- source: inbound, outbound, action, system
|
||||
- payload
|
||||
|
||||
The append-only log matters because many QA assertions are event-oriented, not
|
||||
just state-oriented.
|
||||
|
||||
## Assertion API
|
||||
|
||||
The harness needs waiters, not just snapshots.
|
||||
|
||||
Suggested `POST /wait` contract:
|
||||
|
||||
- `kind`
|
||||
- `match`
|
||||
- `timeoutMs`
|
||||
|
||||
Examples:
|
||||
|
||||
- wait for outbound message matching text regex
|
||||
- wait for thread creation
|
||||
- wait for reaction added
|
||||
- wait for message edit
|
||||
- wait for no event of type X within Y ms
|
||||
|
||||
This gives stable tests without custom polling code in every scenario.
|
||||
|
||||
## QA orchestrator design
|
||||
|
||||
The orchestrator should own scenario planning and artifact collection.
|
||||
|
||||
Start host-side. Later, OpenClaw can orchestrate parts of it.
|
||||
|
||||
This is the chosen v1 direction.
|
||||
|
||||
Why:
|
||||
|
||||
- simpler to iterate while the transport and scenario protocol are still moving
|
||||
- easier access to the repo, logs, Docker, and test fixtures
|
||||
- easier artifact collection and report generation
|
||||
- avoids over-coupling the first version to subagent behavior before the QA
|
||||
protocol itself is stable
|
||||
|
||||
## Inputs
|
||||
|
||||
- docs pages
|
||||
- channel capability discovery
|
||||
- configured provider/model lane
|
||||
- scenario catalog
|
||||
- repo/test metadata
|
||||
|
||||
## Outputs
|
||||
|
||||
- structured protocol report
|
||||
- scenario transcript
|
||||
- captured channel state
|
||||
- gateway logs
|
||||
- failure packets
|
||||
|
||||
For v1, the primary output is a Markdown report.
|
||||
|
||||
Suggested report sections:
|
||||
|
||||
- suite summary
|
||||
- environment
|
||||
- provider/model matrix
|
||||
- scenarios passed
|
||||
- scenarios failed
|
||||
- flaky or inconclusive scenarios
|
||||
- captured evidence links or inline excerpts
|
||||
- suspected ownership or file hints
|
||||
- follow-up recommendations
|
||||
|
||||
## Scenario format
|
||||
|
||||
Use a data-driven scenario spec.
|
||||
|
||||
Suggested shape:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "thread-memory-recall",
|
||||
"lane": "deterministic",
|
||||
"preconditions": ["qa-channel", "memory-enabled"],
|
||||
"steps": [
|
||||
{
|
||||
"type": "injectMessage",
|
||||
"to": "qa:dm:user-a",
|
||||
"text": "Remember that the deploy key is kiwi."
|
||||
},
|
||||
{ "type": "waitForOutbound", "match": { "textIncludes": "kiwi" } },
|
||||
{ "type": "injectMessage", "to": "qa:dm:user-a", "text": "What was the deploy key?" },
|
||||
{ "type": "waitForOutbound", "match": { "textIncludes": "kiwi" } }
|
||||
],
|
||||
"assertions": [{ "type": "outboundTextIncludes", "value": "kiwi" }]
|
||||
}
|
||||
```
|
||||
|
||||
Keep the execution engine generic and the scenario catalog declarative.
|
||||
|
||||
## Feature discovery
|
||||
|
||||
The orchestrator can discover candidate scenarios from three sources.
|
||||
|
||||
1. Docs
|
||||
|
||||
- channel docs
|
||||
- testing docs
|
||||
- gateway docs
|
||||
- subagents docs
|
||||
- cron docs
|
||||
|
||||
2. Runtime capability discovery
|
||||
|
||||
- channel `message` action discovery
|
||||
- plugin status and channel capabilities
|
||||
- configured providers/models
|
||||
|
||||
3. Code hints
|
||||
|
||||
- known action names
|
||||
- channel-specific feature flags
|
||||
- config schema
|
||||
|
||||
This should produce a proposed protocol with:
|
||||
|
||||
- must-test
|
||||
- can-test
|
||||
- blocked
|
||||
- unsupported
|
||||
|
||||
## Scenario classes
|
||||
|
||||
Recommended catalog:
|
||||
|
||||
- transport basics
|
||||
- DM send and reply
|
||||
- channel send
|
||||
- thread create and reply
|
||||
- reaction add and read
|
||||
- edit and delete
|
||||
- policy
|
||||
- allowlist
|
||||
- pairing
|
||||
- group mention gating
|
||||
- shared `message` tool
|
||||
- read
|
||||
- search
|
||||
- reply
|
||||
- react
|
||||
- upload and download
|
||||
- agent quality
|
||||
- follows channel context
|
||||
- obeys thread semantics
|
||||
- uses memory across turns
|
||||
- switches model when instructed
|
||||
- automation
|
||||
- cron add and run
|
||||
- cron delivery into channel
|
||||
- scheduled reminders
|
||||
- subagents
|
||||
- spawn
|
||||
- announce
|
||||
- threaded follow-up
|
||||
- nested orchestration when enabled
|
||||
- failure handling
|
||||
- unsupported action
|
||||
- timeout
|
||||
- malformed target
|
||||
- policy denial
|
||||
|
||||
## OpenClaw as orchestrator
|
||||
|
||||
Longer-term, OpenClaw itself can coordinate the QA run.
|
||||
|
||||
Suggested architecture:
|
||||
|
||||
- one controller session
|
||||
- N worker subagents
|
||||
- each worker owns one scenario or scenario shard
|
||||
- workers report structured results back to controller
|
||||
|
||||
Good fits for existing OpenClaw primitives:
|
||||
|
||||
- `sessions_spawn`
|
||||
- `subagents`
|
||||
- cron-based wakeups for long-running suites
|
||||
- thread-bound sessions for scenario-local follow-up
|
||||
|
||||
Best near-term use:
|
||||
|
||||
- controller generates the plan
|
||||
- workers execute scenarios in parallel
|
||||
- controller synthesizes report
|
||||
|
||||
Avoid making the controller also own host Git operations in the first version.
|
||||
|
||||
Chosen direction:
|
||||
|
||||
- v1: host-side controller
|
||||
- v2+: OpenClaw-native orchestration once the scenario protocol and transport
|
||||
model are stable
|
||||
|
||||
## Auto-fix workflow
|
||||
|
||||
The system should emit a structured bug packet when a scenario fails.
|
||||
|
||||
Suggested bug packet:
|
||||
|
||||
- scenario id
|
||||
- lane
|
||||
- failure kind
|
||||
- minimal repro steps
|
||||
- channel event transcript
|
||||
- gateway transcript
|
||||
- logs
|
||||
- suspected files
|
||||
- confidence
|
||||
|
||||
Host-side fix worker flow:
|
||||
|
||||
1. receive bug packet
|
||||
2. create detached worktree
|
||||
3. launch coding agent in worktree
|
||||
4. write failing regression first when practical
|
||||
5. implement fix
|
||||
6. run scoped verification
|
||||
7. open PR
|
||||
|
||||
This should remain host-side at first because it needs:
|
||||
|
||||
- repo write access
|
||||
- worktree hygiene
|
||||
- git credentials
|
||||
- GitHub auth
|
||||
|
||||
Chosen direction:
|
||||
|
||||
- do not auto-open PRs in v1
|
||||
- emit Markdown reports and structured failure packets first
|
||||
- add host-side worktree + PR automation later
|
||||
|
||||
## Rollout plan
|
||||
|
||||
## Phase 0: bootstrap on existing synthetic ingress
|
||||
|
||||
Build a first QA runner without a new channel:
|
||||
|
||||
- use `chat.send` with admin-scoped synthetic originating-route fields
|
||||
- run deterministic scenarios against routing, memory, cron, subagents, and ACP
|
||||
- validate protocol format and artifact collection
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- scenario runner exists
|
||||
- structured protocol report exists
|
||||
- failure artifacts exist
|
||||
|
||||
## Phase 1: MVP `qa-channel`
|
||||
|
||||
Build the plugin and bus with:
|
||||
|
||||
- DM
|
||||
- channels
|
||||
- threads
|
||||
- read
|
||||
- reply
|
||||
- react
|
||||
- edit
|
||||
- delete
|
||||
- search
|
||||
|
||||
Target semantics:
|
||||
|
||||
- Slack-class transport behavior
|
||||
- not full Teams-class parity yet
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- OpenClaw in Docker can talk to `qa-bus`
|
||||
- harness can inject + inspect
|
||||
- one green end-to-end suite across message transport and agent behavior
|
||||
|
||||
## Phase 2: protocol expansion
|
||||
|
||||
Add:
|
||||
|
||||
- attachments
|
||||
- polls
|
||||
- pins
|
||||
- richer policy tests
|
||||
- quality lane with real provider/model matrix
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- scenario matrix covers major built-in features
|
||||
- deterministic and quality lanes are separated
|
||||
|
||||
## Phase 3: subagent-driven QA
|
||||
|
||||
Add:
|
||||
|
||||
- controller agent
|
||||
- worker subagents
|
||||
- scenario discovery from docs + capability discovery
|
||||
- parallel execution
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- one controller can fan out and synthesize a suite report
|
||||
|
||||
## Phase 4: auto-fix loop
|
||||
|
||||
Add:
|
||||
|
||||
- bug packet emission
|
||||
- host-side worktree runner
|
||||
- PR creation
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- selected failures can auto-produce draft PRs
|
||||
|
||||
## Risks
|
||||
|
||||
## Risk: too much magic in one layer
|
||||
|
||||
If the QA channel, bus, and orchestrator all become smart at once, debugging
|
||||
will be painful.
|
||||
|
||||
Mitigation:
|
||||
|
||||
- keep `qa-channel` transport-focused
|
||||
- keep `qa-bus` state-focused
|
||||
- keep orchestrator separate
|
||||
|
||||
## Risk: flaky assertions from model variance
|
||||
|
||||
Mitigation:
|
||||
|
||||
- deterministic lane
|
||||
- quality lane
|
||||
- different pass criteria
|
||||
|
||||
## Risk: test-only branches leaking into core
|
||||
|
||||
Mitigation:
|
||||
|
||||
- no core special cases for `qa-channel`
|
||||
- use normal plugin seams
|
||||
- use admin synthetic ingress only as bootstrap
|
||||
|
||||
## Risk: auto-fix overreach
|
||||
|
||||
Mitigation:
|
||||
|
||||
- keep fix worker host-side
|
||||
- require explicit policy for when PRs can open automatically
|
||||
- gate with scoped tests
|
||||
|
||||
## Risk: building a fake platform nobody uses
|
||||
|
||||
Mitigation:
|
||||
|
||||
- emulate Slack/Discord/Teams semantics, not an abstract transport
|
||||
- prioritize features that stress shared OpenClaw boundaries
|
||||
|
||||
## MVP recommendation
|
||||
|
||||
If building this now, start with this exact order.
|
||||
|
||||
1. Host-side scenario runner using existing synthetic originating-route support.
|
||||
2. `qa-bus` sidecar with state, events, reset, and wait APIs.
|
||||
3. `extensions/qa-channel` MVP with DMs, channels, threads, reply, read, react,
|
||||
edit, delete, and search.
|
||||
4. Markdown report generator for suite + matrix output.
|
||||
5. One deterministic end-to-end suite:
|
||||
- inject inbound DM
|
||||
- verify reply
|
||||
- create thread
|
||||
- verify follow-up in thread
|
||||
- verify memory recall on later turn
|
||||
6. Add curated real-model matrix quality lane.
|
||||
7. Add controller subagent orchestration.
|
||||
8. Add host-side auto-fix worktree runner.
|
||||
|
||||
This order gets real value quickly without requiring the full grand design to
|
||||
land before the first useful signal appears.
|
||||
|
||||
## Current product decisions
|
||||
|
||||
- `qa-bus` lives inside this repo
|
||||
- the first controller is host-side
|
||||
- Slack-class behavior is the MVP target
|
||||
- the quality lane uses a curated matrix
|
||||
- first version produces Markdown reports, not PRs
|
||||
- OpenClaw-native orchestration is a later phase, not a v1 requirement
|
||||
- [Testing](/help/testing)
|
||||
- [QA Channel](/channels/qa-channel)
|
||||
- [Dashboard](/web/dashboard)
|
||||
|
||||
@@ -2,9 +2,16 @@ export * from "./src/bus-queries.js";
|
||||
export * from "./src/bus-server.js";
|
||||
export * from "./src/bus-state.js";
|
||||
export * from "./src/bus-waiters.js";
|
||||
export * from "./src/cli.js";
|
||||
export * from "./src/harness-runtime.js";
|
||||
export * from "./src/lab-server.js";
|
||||
export * from "./src/docker-harness.js";
|
||||
export * from "./src/mock-openai-server.js";
|
||||
export * from "./src/qa-agent-bootstrap.js";
|
||||
export * from "./src/qa-agent-workspace.js";
|
||||
export * from "./src/qa-gateway-config.js";
|
||||
export * from "./src/report.js";
|
||||
export * from "./src/scenario.js";
|
||||
export * from "./src/scenario-catalog.js";
|
||||
export * from "./src/self-check-scenario.js";
|
||||
export * from "./src/self-check.js";
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
import path from "node:path";
|
||||
import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js";
|
||||
import { startQaLabServer } from "./lab-server.js";
|
||||
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
|
||||
|
||||
export async function runQaLabSelfCheckCommand(opts: { output?: string }) {
|
||||
const server = await startQaLabServer({
|
||||
@@ -12,10 +15,29 @@ export async function runQaLabSelfCheckCommand(opts: { output?: string }) {
|
||||
}
|
||||
}
|
||||
|
||||
export async function runQaLabUiCommand(opts: { host?: string; port?: number }) {
|
||||
export async function runQaLabUiCommand(opts: {
|
||||
host?: string;
|
||||
port?: number;
|
||||
advertiseHost?: string;
|
||||
advertisePort?: number;
|
||||
controlUiUrl?: string;
|
||||
controlUiToken?: string;
|
||||
controlUiProxyTarget?: string;
|
||||
autoKickoffTarget?: string;
|
||||
embeddedGateway?: string;
|
||||
sendKickoffOnStart?: boolean;
|
||||
}) {
|
||||
const server = await startQaLabServer({
|
||||
host: opts.host,
|
||||
port: Number.isFinite(opts.port) ? opts.port : undefined,
|
||||
advertiseHost: opts.advertiseHost,
|
||||
advertisePort: Number.isFinite(opts.advertisePort) ? opts.advertisePort : undefined,
|
||||
controlUiUrl: opts.controlUiUrl,
|
||||
controlUiToken: opts.controlUiToken,
|
||||
controlUiProxyTarget: opts.controlUiProxyTarget,
|
||||
autoKickoffTarget: opts.autoKickoffTarget,
|
||||
embeddedGateway: opts.embeddedGateway,
|
||||
sendKickoffOnStart: opts.sendKickoffOnStart,
|
||||
});
|
||||
process.stdout.write(`QA Lab UI: ${server.baseUrl}\n`);
|
||||
process.stdout.write("Press Ctrl+C to stop.\n");
|
||||
@@ -35,3 +57,56 @@ export async function runQaLabUiCommand(opts: { host?: string; port?: number })
|
||||
process.on("SIGTERM", onSignal);
|
||||
await new Promise(() => undefined);
|
||||
}
|
||||
|
||||
export async function runQaDockerScaffoldCommand(opts: {
|
||||
outputDir: string;
|
||||
gatewayPort?: number;
|
||||
qaLabPort?: number;
|
||||
providerBaseUrl?: string;
|
||||
image?: string;
|
||||
usePrebuiltImage?: boolean;
|
||||
}) {
|
||||
const outputDir = path.resolve(opts.outputDir);
|
||||
const result = await writeQaDockerHarnessFiles({
|
||||
outputDir,
|
||||
repoRoot: process.cwd(),
|
||||
gatewayPort: Number.isFinite(opts.gatewayPort) ? opts.gatewayPort : undefined,
|
||||
qaLabPort: Number.isFinite(opts.qaLabPort) ? opts.qaLabPort : undefined,
|
||||
providerBaseUrl: opts.providerBaseUrl,
|
||||
imageName: opts.image,
|
||||
usePrebuiltImage: opts.usePrebuiltImage,
|
||||
});
|
||||
process.stdout.write(`QA docker scaffold: ${result.outputDir}\n`);
|
||||
}
|
||||
|
||||
export async function runQaDockerBuildImageCommand(opts: { image?: string }) {
|
||||
const result = await buildQaDockerHarnessImage({
|
||||
repoRoot: process.cwd(),
|
||||
imageName: opts.image,
|
||||
});
|
||||
process.stdout.write(`QA docker image: ${result.imageName}\n`);
|
||||
}
|
||||
|
||||
export async function runQaMockOpenAiCommand(opts: { host?: string; port?: number }) {
|
||||
const server = await startQaMockOpenAiServer({
|
||||
host: opts.host,
|
||||
port: Number.isFinite(opts.port) ? opts.port : undefined,
|
||||
});
|
||||
process.stdout.write(`QA mock OpenAI: ${server.baseUrl}\n`);
|
||||
process.stdout.write("Press Ctrl+C to stop.\n");
|
||||
|
||||
const shutdown = async () => {
|
||||
process.off("SIGINT", onSignal);
|
||||
process.off("SIGTERM", onSignal);
|
||||
await server.stop();
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
const onSignal = () => {
|
||||
void shutdown();
|
||||
};
|
||||
|
||||
process.on("SIGINT", onSignal);
|
||||
process.on("SIGTERM", onSignal);
|
||||
await new Promise(() => undefined);
|
||||
}
|
||||
|
||||
@@ -14,11 +14,43 @@ async function runQaSelfCheck(opts: { output?: string }) {
|
||||
await runtime.runQaLabSelfCheckCommand(opts);
|
||||
}
|
||||
|
||||
async function runQaUi(opts: { host?: string; port?: number }) {
|
||||
async function runQaUi(opts: {
|
||||
host?: string;
|
||||
port?: number;
|
||||
advertiseHost?: string;
|
||||
advertisePort?: number;
|
||||
controlUiUrl?: string;
|
||||
controlUiToken?: string;
|
||||
controlUiProxyTarget?: string;
|
||||
autoKickoffTarget?: string;
|
||||
embeddedGateway?: string;
|
||||
sendKickoffOnStart?: boolean;
|
||||
}) {
|
||||
const runtime = await loadQaLabCliRuntime();
|
||||
await runtime.runQaLabUiCommand(opts);
|
||||
}
|
||||
|
||||
async function runQaDockerScaffold(opts: {
|
||||
outputDir: string;
|
||||
gatewayPort?: number;
|
||||
qaLabPort?: number;
|
||||
image?: string;
|
||||
usePrebuiltImage?: boolean;
|
||||
}) {
|
||||
const runtime = await loadQaLabCliRuntime();
|
||||
await runtime.runQaDockerScaffoldCommand(opts);
|
||||
}
|
||||
|
||||
async function runQaDockerBuildImage(opts: { image?: string }) {
|
||||
const runtime = await loadQaLabCliRuntime();
|
||||
await runtime.runQaDockerBuildImageCommand(opts);
|
||||
}
|
||||
|
||||
async function runQaMockOpenAi(opts: { host?: string; port?: number }) {
|
||||
const runtime = await loadQaLabCliRuntime();
|
||||
await runtime.runQaMockOpenAiCommand(opts);
|
||||
}
|
||||
|
||||
export function registerQaLabCli(program: Command) {
|
||||
const qa = program
|
||||
.command("qa")
|
||||
@@ -35,7 +67,73 @@ export function registerQaLabCli(program: Command) {
|
||||
.description("Start the private QA debugger UI and local QA bus")
|
||||
.option("--host <host>", "Bind host", "127.0.0.1")
|
||||
.option("--port <port>", "Bind port", (value: string) => Number(value))
|
||||
.option("--advertise-host <host>", "Optional public host to advertise in bootstrap payloads")
|
||||
.option("--advertise-port <port>", "Optional public port to advertise", (value: string) =>
|
||||
Number(value),
|
||||
)
|
||||
.option("--control-ui-url <url>", "Optional Control UI URL to embed beside the QA panel")
|
||||
.option("--control-ui-token <token>", "Optional Control UI token for embedded links")
|
||||
.option(
|
||||
"--control-ui-proxy-target <url>",
|
||||
"Optional upstream Control UI target for /control-ui proxying",
|
||||
)
|
||||
.option("--auto-kickoff-target <kind>", "Kickoff default target (direct or channel)")
|
||||
.option("--embedded-gateway <mode>", "Embedded gateway mode hint", "enabled")
|
||||
.option(
|
||||
"--send-kickoff-on-start",
|
||||
"Inject the repo-backed kickoff task when the UI starts",
|
||||
false,
|
||||
)
|
||||
.action(
|
||||
async (opts: {
|
||||
host?: string;
|
||||
port?: number;
|
||||
advertiseHost?: string;
|
||||
advertisePort?: number;
|
||||
controlUiUrl?: string;
|
||||
controlUiToken?: string;
|
||||
controlUiProxyTarget?: string;
|
||||
autoKickoffTarget?: string;
|
||||
embeddedGateway?: string;
|
||||
sendKickoffOnStart?: boolean;
|
||||
}) => {
|
||||
await runQaUi(opts);
|
||||
},
|
||||
);
|
||||
|
||||
qa.command("docker-scaffold")
|
||||
.description("Write a prebaked Docker scaffold for the QA dashboard + gateway lane")
|
||||
.requiredOption("--output-dir <path>", "Output directory for docker-compose + state files")
|
||||
.option("--gateway-port <port>", "Gateway host port", (value: string) => Number(value))
|
||||
.option("--qa-lab-port <port>", "QA lab host port", (value: string) => Number(value))
|
||||
.option("--provider-base-url <url>", "Provider base URL for the QA gateway")
|
||||
.option("--image <name>", "Prebaked image name", "openclaw:qa-local-prebaked")
|
||||
.option("--use-prebuilt-image", "Use image: instead of build: in docker-compose", false)
|
||||
.action(
|
||||
async (opts: {
|
||||
outputDir: string;
|
||||
gatewayPort?: number;
|
||||
qaLabPort?: number;
|
||||
providerBaseUrl?: string;
|
||||
image?: string;
|
||||
usePrebuiltImage?: boolean;
|
||||
}) => {
|
||||
await runQaDockerScaffold(opts);
|
||||
},
|
||||
);
|
||||
|
||||
qa.command("docker-build-image")
|
||||
.description("Build the prebaked QA Docker image with qa-channel + qa-lab bundled")
|
||||
.option("--image <name>", "Image tag", "openclaw:qa-local-prebaked")
|
||||
.action(async (opts: { image?: string }) => {
|
||||
await runQaDockerBuildImage(opts);
|
||||
});
|
||||
|
||||
qa.command("mock-openai")
|
||||
.description("Run the local mock OpenAI Responses API server for QA")
|
||||
.option("--host <host>", "Bind host", "127.0.0.1")
|
||||
.option("--port <port>", "Bind port", (value: string) => Number(value))
|
||||
.action(async (opts: { host?: string; port?: number }) => {
|
||||
await runQaUi(opts);
|
||||
await runQaMockOpenAi(opts);
|
||||
});
|
||||
}
|
||||
|
||||
107
extensions/qa-lab/src/docker-harness.test.ts
Normal file
107
extensions/qa-lab/src/docker-harness.test.ts
Normal file
@@ -0,0 +1,107 @@
|
||||
import { mkdtemp, readFile, rm } from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js";
|
||||
|
||||
const cleanups: Array<() => Promise<void>> = [];
|
||||
|
||||
afterEach(async () => {
|
||||
while (cleanups.length > 0) {
|
||||
await cleanups.pop()?.();
|
||||
}
|
||||
});
|
||||
|
||||
describe("qa docker harness", () => {
|
||||
it("writes compose, env, config, and workspace scaffold files", async () => {
|
||||
const outputDir = await mkdtemp(path.join(os.tmpdir(), "qa-docker-test-"));
|
||||
cleanups.push(async () => {
|
||||
await rm(outputDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
const result = await writeQaDockerHarnessFiles({
|
||||
outputDir,
|
||||
gatewayPort: 18889,
|
||||
qaLabPort: 43124,
|
||||
gatewayToken: "qa-token",
|
||||
providerBaseUrl: "http://host.docker.internal:45123/v1",
|
||||
repoRoot: "/repo/openclaw",
|
||||
usePrebuiltImage: true,
|
||||
});
|
||||
|
||||
expect(result.files).toEqual(
|
||||
expect.arrayContaining([
|
||||
path.join(outputDir, ".env.example"),
|
||||
path.join(outputDir, "README.md"),
|
||||
path.join(outputDir, "docker-compose.qa.yml"),
|
||||
path.join(outputDir, "state", "openclaw.json"),
|
||||
path.join(outputDir, "state", "seed-workspace", "QA_KICKOFF_TASK.md"),
|
||||
path.join(outputDir, "state", "seed-workspace", "QA_SCENARIO_PLAN.md"),
|
||||
path.join(outputDir, "state", "seed-workspace", "IDENTITY.md"),
|
||||
]),
|
||||
);
|
||||
|
||||
const compose = await readFile(path.join(outputDir, "docker-compose.qa.yml"), "utf8");
|
||||
expect(compose).toContain("image: openclaw:qa-local-prebaked");
|
||||
expect(compose).toContain("qa-mock-openai:");
|
||||
expect(compose).toContain("18889:18789");
|
||||
expect(compose).toContain(' - "43124:43123"');
|
||||
expect(compose).toContain(" - sh");
|
||||
expect(compose).toContain(" - -lc");
|
||||
expect(compose).toContain(
|
||||
' - fetch("http://127.0.0.1:18789/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))',
|
||||
);
|
||||
expect(compose).toContain(" - --control-ui-proxy-target");
|
||||
expect(compose).toContain(' - "http://openclaw-qa-gateway:18789/"');
|
||||
expect(compose).toContain(" - --send-kickoff-on-start");
|
||||
expect(compose).toContain(":/opt/openclaw-repo:ro");
|
||||
expect(compose).toContain("./state:/opt/openclaw-scaffold:ro");
|
||||
expect(compose).toContain(
|
||||
"cp -R /opt/openclaw-scaffold/seed-workspace/. /tmp/openclaw/workspace/",
|
||||
);
|
||||
expect(compose).toContain("OPENCLAW_CONFIG_PATH: /tmp/openclaw/openclaw.json");
|
||||
expect(compose).toContain("OPENCLAW_STATE_DIR: /tmp/openclaw/state");
|
||||
|
||||
const envExample = await readFile(path.join(outputDir, ".env.example"), "utf8");
|
||||
expect(envExample).toContain("OPENCLAW_GATEWAY_TOKEN=qa-token");
|
||||
expect(envExample).toContain("QA_BUS_BASE_URL=http://qa-lab:43123");
|
||||
expect(envExample).toContain("QA_PROVIDER_BASE_URL=http://host.docker.internal:45123/v1");
|
||||
expect(envExample).toContain("QA_LAB_URL=http://127.0.0.1:43124");
|
||||
|
||||
const config = await readFile(path.join(outputDir, "state", "openclaw.json"), "utf8");
|
||||
expect(config).toContain('"allowInsecureAuth": true');
|
||||
expect(config).toContain('"enabled": false');
|
||||
expect(config).toContain("/app/dist/control-ui");
|
||||
expect(config).toContain("C-3PO QA");
|
||||
expect(config).toContain('"/tmp/openclaw/workspace"');
|
||||
|
||||
const kickoff = await readFile(
|
||||
path.join(outputDir, "state", "seed-workspace", "QA_KICKOFF_TASK.md"),
|
||||
"utf8",
|
||||
);
|
||||
expect(kickoff).toContain("Lobster Invaders");
|
||||
});
|
||||
|
||||
it("builds the reusable QA image with bundled QA extensions", async () => {
|
||||
const calls: string[] = [];
|
||||
const result = await buildQaDockerHarnessImage(
|
||||
{
|
||||
repoRoot: "/repo/openclaw",
|
||||
imageName: "openclaw:qa-local-prebaked",
|
||||
},
|
||||
{
|
||||
async runCommand(command, args, cwd) {
|
||||
calls.push([command, ...args, `@${cwd}`].join(" "));
|
||||
return { stdout: "", stderr: "" };
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
expect(result.imageName).toBe("openclaw:qa-local-prebaked");
|
||||
expect(calls).toEqual([
|
||||
expect.stringContaining(
|
||||
"docker build -t openclaw:qa-local-prebaked --build-arg OPENCLAW_EXTENSIONS=qa-channel qa-lab -f Dockerfile . @/repo/openclaw",
|
||||
),
|
||||
]);
|
||||
});
|
||||
});
|
||||
353
extensions/qa-lab/src/docker-harness.ts
Normal file
353
extensions/qa-lab/src/docker-harness.ts
Normal file
@@ -0,0 +1,353 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { seedQaAgentWorkspace } from "./qa-agent-workspace.js";
|
||||
import { buildQaGatewayConfig } from "./qa-gateway-config.js";
|
||||
|
||||
const QA_LAB_INTERNAL_PORT = 43123;
|
||||
|
||||
function toPosixRelative(fromDir: string, toPath: string): string {
|
||||
return path.relative(fromDir, toPath).split(path.sep).join("/");
|
||||
}
|
||||
|
||||
function renderImageBlock(params: {
|
||||
outputDir: string;
|
||||
repoRoot: string;
|
||||
imageName: string;
|
||||
usePrebuiltImage: boolean;
|
||||
}) {
|
||||
if (params.usePrebuiltImage) {
|
||||
return ` image: ${params.imageName}\n`;
|
||||
}
|
||||
const context = toPosixRelative(params.outputDir, params.repoRoot) || ".";
|
||||
return ` build:\n context: ${context}\n dockerfile: Dockerfile\n args:\n OPENCLAW_EXTENSIONS: "qa-channel qa-lab"\n`;
|
||||
}
|
||||
|
||||
function renderCompose(params: {
|
||||
outputDir: string;
|
||||
repoRoot: string;
|
||||
imageName: string;
|
||||
usePrebuiltImage: boolean;
|
||||
gatewayPort: number;
|
||||
qaLabPort: number;
|
||||
gatewayToken: string;
|
||||
includeQaLabUi: boolean;
|
||||
}) {
|
||||
const imageBlock = renderImageBlock(params);
|
||||
const repoMount = toPosixRelative(params.outputDir, params.repoRoot) || ".";
|
||||
|
||||
return `services:
|
||||
qa-mock-openai:
|
||||
${imageBlock} pull_policy: never
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD
|
||||
- node
|
||||
- -e
|
||||
- fetch("http://127.0.0.1:44080/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 6
|
||||
start_period: 3s
|
||||
command:
|
||||
- node
|
||||
- dist/index.js
|
||||
- qa
|
||||
- mock-openai
|
||||
- --host
|
||||
- "0.0.0.0"
|
||||
- --port
|
||||
- "44080"
|
||||
${
|
||||
params.includeQaLabUi
|
||||
? ` qa-lab:
|
||||
${imageBlock} pull_policy: never
|
||||
ports:
|
||||
- "${params.qaLabPort}:${QA_LAB_INTERNAL_PORT}"
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD
|
||||
- node
|
||||
- -e
|
||||
- fetch("http://127.0.0.1:${QA_LAB_INTERNAL_PORT}/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 6
|
||||
start_period: 5s
|
||||
environment:
|
||||
OPENCLAW_SKIP_GMAIL_WATCHER: "1"
|
||||
OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1"
|
||||
OPENCLAW_SKIP_CANVAS_HOST: "1"
|
||||
OPENCLAW_PROFILE: ""
|
||||
command:
|
||||
- node
|
||||
- dist/index.js
|
||||
- qa
|
||||
- ui
|
||||
- --host
|
||||
- "0.0.0.0"
|
||||
- --port
|
||||
- "${QA_LAB_INTERNAL_PORT}"
|
||||
- --advertise-host
|
||||
- "127.0.0.1"
|
||||
- --advertise-port
|
||||
- "${params.qaLabPort}"
|
||||
- --control-ui-url
|
||||
- "http://127.0.0.1:${params.gatewayPort}/"
|
||||
- --control-ui-proxy-target
|
||||
- "http://openclaw-qa-gateway:18789/"
|
||||
- --control-ui-token
|
||||
- "${params.gatewayToken}"
|
||||
- --auto-kickoff-target
|
||||
- direct
|
||||
- --send-kickoff-on-start
|
||||
- --embedded-gateway
|
||||
- disabled
|
||||
depends_on:
|
||||
qa-mock-openai:
|
||||
condition: service_healthy
|
||||
`
|
||||
: ""
|
||||
} openclaw-qa-gateway:
|
||||
${imageBlock} pull_policy: never
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
ports:
|
||||
- "${params.gatewayPort}:18789"
|
||||
environment:
|
||||
OPENCLAW_CONFIG_PATH: /tmp/openclaw/openclaw.json
|
||||
OPENCLAW_STATE_DIR: /tmp/openclaw/state
|
||||
OPENCLAW_SKIP_GMAIL_WATCHER: "1"
|
||||
OPENCLAW_SKIP_BROWSER_CONTROL_SERVER: "1"
|
||||
OPENCLAW_SKIP_CANVAS_HOST: "1"
|
||||
OPENCLAW_PROFILE: ""
|
||||
volumes:
|
||||
- ./state:/opt/openclaw-scaffold:ro
|
||||
- ${repoMount}:/opt/openclaw-repo:ro
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD
|
||||
- node
|
||||
- -e
|
||||
- fetch("http://127.0.0.1:18789/healthz").then((r)=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 12
|
||||
start_period: 15s
|
||||
depends_on:
|
||||
${
|
||||
params.includeQaLabUi
|
||||
? ` qa-lab:
|
||||
condition: service_healthy
|
||||
`
|
||||
: ""
|
||||
} qa-mock-openai:
|
||||
condition: service_healthy
|
||||
command:
|
||||
- sh
|
||||
- -lc
|
||||
- mkdir -p /tmp/openclaw/workspace /tmp/openclaw/state && cp /opt/openclaw-scaffold/openclaw.json /tmp/openclaw/openclaw.json && cp -R /opt/openclaw-scaffold/seed-workspace/. /tmp/openclaw/workspace/ && ln -snf /opt/openclaw-repo /tmp/openclaw/workspace/repo && exec node dist/index.js gateway run --port 18789 --bind lan --allow-unconfigured
|
||||
`;
|
||||
}
|
||||
|
||||
function renderEnvExample(params: {
|
||||
gatewayPort: number;
|
||||
qaLabPort: number;
|
||||
gatewayToken: string;
|
||||
providerBaseUrl: string;
|
||||
qaBusBaseUrl: string;
|
||||
includeQaLabUi: boolean;
|
||||
}) {
|
||||
return `# QA Docker harness example env
|
||||
OPENCLAW_GATEWAY_TOKEN=${params.gatewayToken}
|
||||
QA_GATEWAY_PORT=${params.gatewayPort}
|
||||
QA_BUS_BASE_URL=${params.qaBusBaseUrl}
|
||||
QA_PROVIDER_BASE_URL=${params.providerBaseUrl}
|
||||
${params.includeQaLabUi ? `QA_LAB_URL=http://127.0.0.1:${params.qaLabPort}\n` : ""}`;
|
||||
}
|
||||
|
||||
function renderReadme(params: {
|
||||
gatewayPort: number;
|
||||
qaLabPort: number;
|
||||
usePrebuiltImage: boolean;
|
||||
includeQaLabUi: boolean;
|
||||
}) {
|
||||
return `# QA Docker Harness
|
||||
|
||||
Generated scaffold for the Docker-backed QA lane.
|
||||
|
||||
Files:
|
||||
|
||||
- \`docker-compose.qa.yml\`
|
||||
- \`.env.example\`
|
||||
- \`state/openclaw.json\`
|
||||
|
||||
Suggested flow:
|
||||
|
||||
1. Build the prebaked image once:
|
||||
- \`docker build -t openclaw:qa-local-prebaked --build-arg OPENCLAW_EXTENSIONS="qa-channel qa-lab" -f Dockerfile .\`
|
||||
2. Start the stack:
|
||||
- \`docker compose -f docker-compose.qa.yml up${params.usePrebuiltImage ? "" : " --build"} -d\`
|
||||
3. Open the QA dashboard:
|
||||
- \`${params.includeQaLabUi ? `http://127.0.0.1:${params.qaLabPort}` : "not published in this scaffold"}\`
|
||||
4. The single QA site embeds both panes:
|
||||
- left: Control UI
|
||||
- right: Slack-ish QA lab
|
||||
5. The repo-backed kickoff task auto-injects on startup.
|
||||
|
||||
Gateway:
|
||||
|
||||
- health: \`http://127.0.0.1:${params.gatewayPort}/healthz\`
|
||||
- Control UI: \`http://127.0.0.1:${params.gatewayPort}/\`
|
||||
- Mock OpenAI: internal \`http://qa-mock-openai:44080/v1\`
|
||||
|
||||
This scaffold uses localhost Control UI insecure-auth compatibility for QA only.
|
||||
`;
|
||||
}
|
||||
|
||||
export async function writeQaDockerHarnessFiles(params: {
|
||||
outputDir: string;
|
||||
repoRoot: string;
|
||||
gatewayPort?: number;
|
||||
qaLabPort?: number;
|
||||
gatewayToken?: string;
|
||||
providerBaseUrl?: string;
|
||||
qaBusBaseUrl?: string;
|
||||
imageName?: string;
|
||||
usePrebuiltImage?: boolean;
|
||||
includeQaLabUi?: boolean;
|
||||
}) {
|
||||
const gatewayPort = params.gatewayPort ?? 18789;
|
||||
const qaLabPort = params.qaLabPort ?? 43124;
|
||||
const gatewayToken = params.gatewayToken ?? `qa-token-${randomUUID()}`;
|
||||
const providerBaseUrl = params.providerBaseUrl ?? "http://qa-mock-openai:44080/v1";
|
||||
const qaBusBaseUrl = params.qaBusBaseUrl ?? "http://qa-lab:43123";
|
||||
const imageName = params.imageName ?? "openclaw:qa-local-prebaked";
|
||||
const usePrebuiltImage = params.usePrebuiltImage ?? false;
|
||||
const includeQaLabUi = params.includeQaLabUi ?? true;
|
||||
|
||||
await fs.mkdir(path.join(params.outputDir, "state", "seed-workspace"), { recursive: true });
|
||||
await seedQaAgentWorkspace({
|
||||
workspaceDir: path.join(params.outputDir, "state", "seed-workspace"),
|
||||
repoRoot: params.repoRoot,
|
||||
});
|
||||
|
||||
const config = buildQaGatewayConfig({
|
||||
bind: "lan",
|
||||
gatewayPort: 18789,
|
||||
gatewayToken,
|
||||
providerBaseUrl,
|
||||
qaBusBaseUrl,
|
||||
workspaceDir: "/tmp/openclaw/workspace",
|
||||
controlUiRoot: "/app/dist/control-ui",
|
||||
});
|
||||
|
||||
const files = [
|
||||
path.join(params.outputDir, "docker-compose.qa.yml"),
|
||||
path.join(params.outputDir, ".env.example"),
|
||||
path.join(params.outputDir, "README.md"),
|
||||
path.join(params.outputDir, "state", "openclaw.json"),
|
||||
];
|
||||
|
||||
await Promise.all([
|
||||
fs.writeFile(
|
||||
path.join(params.outputDir, "docker-compose.qa.yml"),
|
||||
renderCompose({
|
||||
outputDir: params.outputDir,
|
||||
repoRoot: params.repoRoot,
|
||||
imageName,
|
||||
usePrebuiltImage,
|
||||
gatewayPort,
|
||||
qaLabPort,
|
||||
gatewayToken,
|
||||
includeQaLabUi,
|
||||
}),
|
||||
"utf8",
|
||||
),
|
||||
fs.writeFile(
|
||||
path.join(params.outputDir, ".env.example"),
|
||||
renderEnvExample({
|
||||
gatewayPort,
|
||||
qaLabPort,
|
||||
gatewayToken,
|
||||
providerBaseUrl,
|
||||
qaBusBaseUrl,
|
||||
includeQaLabUi,
|
||||
}),
|
||||
"utf8",
|
||||
),
|
||||
fs.writeFile(
|
||||
path.join(params.outputDir, "README.md"),
|
||||
renderReadme({
|
||||
gatewayPort,
|
||||
qaLabPort,
|
||||
usePrebuiltImage,
|
||||
includeQaLabUi,
|
||||
}),
|
||||
"utf8",
|
||||
),
|
||||
fs.writeFile(
|
||||
path.join(params.outputDir, "state", "openclaw.json"),
|
||||
`${JSON.stringify(config, null, 2)}\n`,
|
||||
"utf8",
|
||||
),
|
||||
]);
|
||||
|
||||
return {
|
||||
outputDir: params.outputDir,
|
||||
imageName,
|
||||
files: [
|
||||
...files,
|
||||
path.join(params.outputDir, "state", "seed-workspace", "IDENTITY.md"),
|
||||
path.join(params.outputDir, "state", "seed-workspace", "QA_KICKOFF_TASK.md"),
|
||||
path.join(params.outputDir, "state", "seed-workspace", "QA_SCENARIO_PLAN.md"),
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
export async function buildQaDockerHarnessImage(
|
||||
params: {
|
||||
repoRoot: string;
|
||||
imageName?: string;
|
||||
},
|
||||
deps?: {
|
||||
runCommand?: (
|
||||
command: string,
|
||||
args: string[],
|
||||
cwd: string,
|
||||
) => Promise<{ stdout: string; stderr: string }>;
|
||||
},
|
||||
) {
|
||||
const imageName = params.imageName ?? "openclaw:qa-local-prebaked";
|
||||
const runCommand =
|
||||
deps?.runCommand ??
|
||||
(async (command: string, args: string[], cwd: string) => {
|
||||
const { execFile } = await import("node:child_process");
|
||||
return await new Promise<{ stdout: string; stderr: string }>((resolve, reject) => {
|
||||
execFile(command, args, { cwd }, (error, stdout, stderr) => {
|
||||
if (error) {
|
||||
reject(error);
|
||||
return;
|
||||
}
|
||||
resolve({ stdout, stderr });
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
await runCommand(
|
||||
"docker",
|
||||
[
|
||||
"build",
|
||||
"-t",
|
||||
imageName,
|
||||
"--build-arg",
|
||||
"OPENCLAW_EXTENSIONS=qa-channel qa-lab",
|
||||
"-f",
|
||||
"Dockerfile",
|
||||
".",
|
||||
],
|
||||
params.repoRoot,
|
||||
);
|
||||
|
||||
return { imageName };
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
import { mkdtemp, readFile, rm } from "node:fs/promises";
|
||||
import { createServer } from "node:http";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
@@ -24,6 +25,8 @@ describe("qa-lab server", () => {
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
outputPath,
|
||||
controlUiUrl: "http://127.0.0.1:18789/",
|
||||
controlUiToken: "qa-token",
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await lab.stop();
|
||||
@@ -32,10 +35,19 @@ describe("qa-lab server", () => {
|
||||
const bootstrapResponse = await fetch(`${lab.baseUrl}/api/bootstrap`);
|
||||
expect(bootstrapResponse.status).toBe(200);
|
||||
const bootstrap = (await bootstrapResponse.json()) as {
|
||||
controlUiUrl: string | null;
|
||||
controlUiEmbeddedUrl: string | null;
|
||||
kickoffTask: string;
|
||||
scenarios: Array<{ id: string; title: string }>;
|
||||
defaults: { conversationId: string; senderId: string };
|
||||
};
|
||||
expect(bootstrap.defaults.conversationId).toBe("alice");
|
||||
expect(bootstrap.defaults.senderId).toBe("alice");
|
||||
expect(bootstrap.defaults.conversationId).toBe("qa-operator");
|
||||
expect(bootstrap.defaults.senderId).toBe("qa-operator");
|
||||
expect(bootstrap.controlUiUrl).toBe("http://127.0.0.1:18789/");
|
||||
expect(bootstrap.controlUiEmbeddedUrl).toBe("http://127.0.0.1:18789/#token=qa-token");
|
||||
expect(bootstrap.kickoffTask).toContain("Lobster Invaders");
|
||||
expect(bootstrap.scenarios.length).toBeGreaterThanOrEqual(10);
|
||||
expect(bootstrap.scenarios.some((scenario) => scenario.id === "dm-chat-baseline")).toBe(true);
|
||||
|
||||
const messageResponse = await fetch(`${lab.baseUrl}/api/inbound/message`, {
|
||||
method: "POST",
|
||||
@@ -64,4 +76,114 @@ describe("qa-lab server", () => {
|
||||
expect(markdown).toContain("Synthetic Slack-class roundtrip");
|
||||
expect(markdown).toContain("- Status: pass");
|
||||
});
|
||||
|
||||
it("injects the kickoff task on demand and on startup", async () => {
|
||||
const autoKickoffLab = await startQaLabServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
sendKickoffOnStart: true,
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await autoKickoffLab.stop();
|
||||
});
|
||||
|
||||
const autoSnapshot = (await (await fetch(`${autoKickoffLab.baseUrl}/api/state`)).json()) as {
|
||||
messages: Array<{ text: string }>;
|
||||
};
|
||||
expect(autoSnapshot.messages.some((message) => message.text.includes("QA mission:"))).toBe(
|
||||
true,
|
||||
);
|
||||
|
||||
const manualLab = await startQaLabServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await manualLab.stop();
|
||||
});
|
||||
|
||||
const kickoffResponse = await fetch(`${manualLab.baseUrl}/api/kickoff`, {
|
||||
method: "POST",
|
||||
});
|
||||
expect(kickoffResponse.status).toBe(200);
|
||||
|
||||
const manualSnapshot = (await (await fetch(`${manualLab.baseUrl}/api/state`)).json()) as {
|
||||
messages: Array<{ text: string }>;
|
||||
};
|
||||
expect(
|
||||
manualSnapshot.messages.some((message) => message.text.includes("Lobster Invaders")),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("proxies control-ui paths through /control-ui", async () => {
|
||||
const upstream = createServer((req, res) => {
|
||||
if ((req.url ?? "/") === "/healthz") {
|
||||
res.writeHead(200, { "content-type": "application/json" });
|
||||
res.end(JSON.stringify({ ok: true, status: "live" }));
|
||||
return;
|
||||
}
|
||||
res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
|
||||
res.end("<!doctype html><title>control-ui</title><h1>Control UI</h1>");
|
||||
});
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
upstream.once("error", reject);
|
||||
upstream.listen(0, "127.0.0.1", () => resolve());
|
||||
});
|
||||
cleanups.push(
|
||||
async () =>
|
||||
await new Promise<void>((resolve, reject) =>
|
||||
upstream.close((error) => (error ? reject(error) : resolve())),
|
||||
),
|
||||
);
|
||||
|
||||
const address = upstream.address();
|
||||
if (!address || typeof address === "string") {
|
||||
throw new Error("expected upstream address");
|
||||
}
|
||||
|
||||
const lab = await startQaLabServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
advertiseHost: "127.0.0.1",
|
||||
advertisePort: 43124,
|
||||
controlUiProxyTarget: `http://127.0.0.1:${address.port}/`,
|
||||
controlUiToken: "proxy-token",
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await lab.stop();
|
||||
});
|
||||
|
||||
const bootstrap = (await (await fetch(`${lab.listenUrl}/api/bootstrap`)).json()) as {
|
||||
controlUiUrl: string | null;
|
||||
controlUiEmbeddedUrl: string | null;
|
||||
};
|
||||
expect(bootstrap.controlUiUrl).toBe("http://127.0.0.1:43124/control-ui/");
|
||||
expect(bootstrap.controlUiEmbeddedUrl).toBe(
|
||||
"http://127.0.0.1:43124/control-ui/#token=proxy-token",
|
||||
);
|
||||
|
||||
const healthResponse = await fetch(`${lab.listenUrl}/control-ui/healthz`);
|
||||
expect(healthResponse.status).toBe(200);
|
||||
expect(await healthResponse.json()).toEqual({ ok: true, status: "live" });
|
||||
|
||||
const rootResponse = await fetch(`${lab.listenUrl}/control-ui/`);
|
||||
expect(rootResponse.status).toBe(200);
|
||||
expect(await rootResponse.text()).toContain("Control UI");
|
||||
});
|
||||
|
||||
it("serves the built QA UI bundle when available", async () => {
|
||||
const lab = await startQaLabServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await lab.stop();
|
||||
});
|
||||
|
||||
const rootResponse = await fetch(`${lab.baseUrl}/`);
|
||||
expect(rootResponse.status).toBe(200);
|
||||
const html = await rootResponse.text();
|
||||
expect(html).not.toContain("QA Lab UI not built");
|
||||
expect(html).toContain("<title>");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,11 +1,21 @@
|
||||
import fs from "node:fs";
|
||||
import { createServer, type IncomingMessage, type ServerResponse } from "node:http";
|
||||
import {
|
||||
createServer,
|
||||
request as httpRequest,
|
||||
type IncomingMessage,
|
||||
type ServerResponse,
|
||||
} from "node:http";
|
||||
import { request as httpsRequest } from "node:https";
|
||||
import net from "node:net";
|
||||
import path from "node:path";
|
||||
import type { Duplex } from "node:stream";
|
||||
import tls from "node:tls";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { handleQaBusRequest, writeError, writeJson } from "./bus-server.js";
|
||||
import { createQaBusState, type QaBusState } from "./bus-state.js";
|
||||
import { createQaRunnerRuntime } from "./harness-runtime.js";
|
||||
import { qaChannelPlugin, setQaChannelRuntime, type OpenClawConfig } from "./runtime-api.js";
|
||||
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
|
||||
import { runQaSelfCheckAgainstState, type QaSelfCheckResult } from "./self-check.js";
|
||||
|
||||
type QaLabLatestReport = {
|
||||
@@ -14,6 +24,32 @@ type QaLabLatestReport = {
|
||||
generatedAt: string;
|
||||
};
|
||||
|
||||
type QaLabBootstrapDefaults = {
|
||||
conversationKind: "direct" | "channel";
|
||||
conversationId: string;
|
||||
senderId: string;
|
||||
senderName: string;
|
||||
};
|
||||
|
||||
function injectKickoffMessage(params: {
|
||||
state: QaBusState;
|
||||
defaults: QaLabBootstrapDefaults;
|
||||
kickoffTask: string;
|
||||
}) {
|
||||
return params.state.addInboundMessage({
|
||||
conversation: {
|
||||
id: params.defaults.conversationId,
|
||||
kind: params.defaults.conversationKind,
|
||||
...(params.defaults.conversationKind === "channel"
|
||||
? { title: params.defaults.conversationId }
|
||||
: {}),
|
||||
},
|
||||
senderId: params.defaults.senderId,
|
||||
senderName: params.defaults.senderName,
|
||||
text: params.kickoffTask,
|
||||
});
|
||||
}
|
||||
|
||||
async function readJson(req: IncomingMessage): Promise<unknown> {
|
||||
const chunks: Buffer[] = [];
|
||||
for await (const chunk of req) {
|
||||
@@ -64,7 +100,160 @@ function missingUiHtml() {
|
||||
}
|
||||
|
||||
function resolveUiDistDir() {
|
||||
return fileURLToPath(new URL("../web/dist", import.meta.url));
|
||||
const candidates = [
|
||||
fileURLToPath(new URL("../web/dist", import.meta.url)),
|
||||
path.resolve(process.cwd(), "extensions/qa-lab/web/dist"),
|
||||
path.resolve(process.cwd(), "dist/extensions/qa-lab/web/dist"),
|
||||
];
|
||||
return candidates.find((candidate) => fs.existsSync(candidate)) ?? candidates[0];
|
||||
}
|
||||
|
||||
function resolveAdvertisedBaseUrl(params: {
|
||||
bindHost?: string;
|
||||
bindPort: number;
|
||||
advertiseHost?: string;
|
||||
advertisePort?: number;
|
||||
}) {
|
||||
const advertisedHost =
|
||||
params.advertiseHost?.trim() ||
|
||||
(params.bindHost && params.bindHost !== "0.0.0.0" ? params.bindHost : "127.0.0.1");
|
||||
const advertisedPort =
|
||||
typeof params.advertisePort === "number" && Number.isFinite(params.advertisePort)
|
||||
? params.advertisePort
|
||||
: params.bindPort;
|
||||
return `http://${advertisedHost}:${advertisedPort}`;
|
||||
}
|
||||
|
||||
function createBootstrapDefaults(autoKickoffTarget?: string): QaLabBootstrapDefaults {
|
||||
if (autoKickoffTarget === "channel") {
|
||||
return {
|
||||
conversationKind: "channel",
|
||||
conversationId: "qa-lab",
|
||||
senderId: "qa-operator",
|
||||
senderName: "QA Operator",
|
||||
};
|
||||
}
|
||||
return {
|
||||
conversationKind: "direct",
|
||||
conversationId: "qa-operator",
|
||||
senderId: "qa-operator",
|
||||
senderName: "QA Operator",
|
||||
};
|
||||
}
|
||||
|
||||
function isControlUiProxyPath(pathname: string) {
|
||||
return pathname === "/control-ui" || pathname.startsWith("/control-ui/");
|
||||
}
|
||||
|
||||
function rewriteControlUiProxyPath(pathname: string, search: string) {
|
||||
const stripped = pathname === "/control-ui" ? "/" : pathname.slice("/control-ui".length) || "/";
|
||||
return `${stripped}${search}`;
|
||||
}
|
||||
|
||||
async function proxyHttpRequest(params: {
|
||||
req: IncomingMessage;
|
||||
res: ServerResponse;
|
||||
target: URL;
|
||||
pathname: string;
|
||||
search: string;
|
||||
}) {
|
||||
const client = params.target.protocol === "https:" ? httpsRequest : httpRequest;
|
||||
const upstreamReq = client(
|
||||
{
|
||||
protocol: params.target.protocol,
|
||||
hostname: params.target.hostname,
|
||||
port: params.target.port || (params.target.protocol === "https:" ? 443 : 80),
|
||||
method: params.req.method,
|
||||
path: rewriteControlUiProxyPath(params.pathname, params.search),
|
||||
headers: {
|
||||
...params.req.headers,
|
||||
host: params.target.host,
|
||||
},
|
||||
},
|
||||
(upstreamRes) => {
|
||||
params.res.writeHead(upstreamRes.statusCode ?? 502, upstreamRes.headers);
|
||||
upstreamRes.pipe(params.res);
|
||||
},
|
||||
);
|
||||
|
||||
upstreamReq.on("error", (error) => {
|
||||
if (!params.res.headersSent) {
|
||||
writeError(params.res, 502, error);
|
||||
return;
|
||||
}
|
||||
params.res.destroy(error);
|
||||
});
|
||||
|
||||
if (params.req.method === "GET" || params.req.method === "HEAD") {
|
||||
upstreamReq.end();
|
||||
return;
|
||||
}
|
||||
params.req.pipe(upstreamReq);
|
||||
}
|
||||
|
||||
function proxyUpgradeRequest(params: {
|
||||
req: IncomingMessage;
|
||||
socket: Duplex;
|
||||
head: Buffer;
|
||||
target: URL;
|
||||
}) {
|
||||
const requestUrl = new URL(params.req.url ?? "/", "http://127.0.0.1");
|
||||
const port = Number(params.target.port || (params.target.protocol === "https:" ? 443 : 80));
|
||||
const upstream =
|
||||
params.target.protocol === "https:"
|
||||
? tls.connect({
|
||||
host: params.target.hostname,
|
||||
port,
|
||||
servername: params.target.hostname,
|
||||
})
|
||||
: net.connect({
|
||||
host: params.target.hostname,
|
||||
port,
|
||||
});
|
||||
|
||||
const headerLines: string[] = [];
|
||||
for (let index = 0; index < params.req.rawHeaders.length; index += 2) {
|
||||
const name = params.req.rawHeaders[index];
|
||||
const value = params.req.rawHeaders[index + 1] ?? "";
|
||||
if (name.toLowerCase() === "host") {
|
||||
continue;
|
||||
}
|
||||
headerLines.push(`${name}: ${value}`);
|
||||
}
|
||||
|
||||
upstream.once("connect", () => {
|
||||
const requestText = [
|
||||
`${params.req.method ?? "GET"} ${rewriteControlUiProxyPath(requestUrl.pathname, requestUrl.search)} HTTP/${params.req.httpVersion}`,
|
||||
`Host: ${params.target.host}`,
|
||||
...headerLines,
|
||||
"",
|
||||
"",
|
||||
].join("\r\n");
|
||||
upstream.write(requestText);
|
||||
if (params.head.length > 0) {
|
||||
upstream.write(params.head);
|
||||
}
|
||||
upstream.pipe(params.socket);
|
||||
params.socket.pipe(upstream);
|
||||
});
|
||||
|
||||
const closeBoth = () => {
|
||||
if (!params.socket.destroyed) {
|
||||
params.socket.destroy();
|
||||
}
|
||||
if (!upstream.destroyed) {
|
||||
upstream.destroy();
|
||||
}
|
||||
};
|
||||
|
||||
upstream.on("error", () => {
|
||||
if (!params.socket.destroyed) {
|
||||
params.socket.write("HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n");
|
||||
}
|
||||
closeBoth();
|
||||
});
|
||||
params.socket.on("error", closeBoth);
|
||||
params.socket.on("close", closeBoth);
|
||||
}
|
||||
|
||||
function tryResolveUiAsset(pathname: string): string | null {
|
||||
@@ -142,9 +331,22 @@ export async function startQaLabServer(params?: {
|
||||
host?: string;
|
||||
port?: number;
|
||||
outputPath?: string;
|
||||
advertiseHost?: string;
|
||||
advertisePort?: number;
|
||||
controlUiUrl?: string;
|
||||
controlUiToken?: string;
|
||||
controlUiProxyTarget?: string;
|
||||
autoKickoffTarget?: string;
|
||||
embeddedGateway?: string;
|
||||
sendKickoffOnStart?: boolean;
|
||||
}) {
|
||||
const state = createQaBusState();
|
||||
let latestReport: QaLabLatestReport | null = null;
|
||||
const scenarioCatalog = readQaBootstrapScenarioCatalog();
|
||||
const bootstrapDefaults = createBootstrapDefaults(params?.autoKickoffTarget);
|
||||
const controlUiProxyTarget = params?.controlUiProxyTarget?.trim()
|
||||
? new URL(params.controlUiProxyTarget)
|
||||
: null;
|
||||
let gateway:
|
||||
| {
|
||||
cfg: OpenClawConfig;
|
||||
@@ -152,6 +354,7 @@ export async function startQaLabServer(params?: {
|
||||
}
|
||||
| undefined;
|
||||
|
||||
let publicBaseUrl = "";
|
||||
const server = createServer(async (req, res) => {
|
||||
const url = new URL(req.url ?? "/", "http://127.0.0.1");
|
||||
|
||||
@@ -160,19 +363,40 @@ export async function startQaLabServer(params?: {
|
||||
}
|
||||
|
||||
try {
|
||||
if (req.method === "GET" && url.pathname === "/api/bootstrap") {
|
||||
writeJson(res, 200, {
|
||||
baseUrl,
|
||||
latestReport,
|
||||
defaults: {
|
||||
conversationKind: "direct",
|
||||
conversationId: "alice",
|
||||
senderId: "alice",
|
||||
senderName: "Alice",
|
||||
},
|
||||
if (controlUiProxyTarget && isControlUiProxyPath(url.pathname)) {
|
||||
await proxyHttpRequest({
|
||||
req,
|
||||
res,
|
||||
target: controlUiProxyTarget,
|
||||
pathname: url.pathname,
|
||||
search: url.search,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "GET" && url.pathname === "/api/bootstrap") {
|
||||
const controlUiUrl = controlUiProxyTarget
|
||||
? `${publicBaseUrl}/control-ui/`
|
||||
: params?.controlUiUrl?.trim() || null;
|
||||
const controlUiEmbeddedUrl =
|
||||
controlUiUrl && params?.controlUiToken
|
||||
? `${controlUiUrl.replace(/\/?$/, "/")}#token=${encodeURIComponent(params.controlUiToken)}`
|
||||
: controlUiUrl;
|
||||
writeJson(res, 200, {
|
||||
baseUrl: publicBaseUrl,
|
||||
latestReport,
|
||||
controlUiUrl,
|
||||
controlUiEmbeddedUrl,
|
||||
kickoffTask: scenarioCatalog.kickoffTask,
|
||||
scenarios: scenarioCatalog.scenarios,
|
||||
defaults: bootstrapDefaults,
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (req.method === "GET" && (url.pathname === "/healthz" || url.pathname === "/readyz")) {
|
||||
writeJson(res, 200, { ok: true, status: "live" });
|
||||
return;
|
||||
}
|
||||
if (req.method === "GET" && url.pathname === "/api/state") {
|
||||
writeJson(res, 200, state.getSnapshot());
|
||||
return;
|
||||
@@ -193,10 +417,20 @@ export async function startQaLabServer(params?: {
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (req.method === "POST" && url.pathname === "/api/kickoff") {
|
||||
writeJson(res, 200, {
|
||||
message: injectKickoffMessage({
|
||||
state,
|
||||
defaults: bootstrapDefaults,
|
||||
kickoffTask: scenarioCatalog.kickoffTask,
|
||||
}),
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (req.method === "POST" && url.pathname === "/api/scenario/self-check") {
|
||||
const result = await runQaSelfCheckAgainstState({
|
||||
state,
|
||||
cfg: gateway?.cfg ?? createQaLabConfig(baseUrl),
|
||||
cfg: gateway?.cfg ?? createQaLabConfig(listenUrl),
|
||||
outputPath: params?.outputPath,
|
||||
});
|
||||
latestReport = {
|
||||
@@ -251,11 +485,42 @@ export async function startQaLabServer(params?: {
|
||||
if (!address || typeof address === "string") {
|
||||
throw new Error("qa-lab failed to bind");
|
||||
}
|
||||
const baseUrl = `http://${params?.host ?? "127.0.0.1"}:${address.port}`;
|
||||
gateway = await startQaGatewayLoop({ state, baseUrl });
|
||||
const listenUrl = resolveAdvertisedBaseUrl({
|
||||
bindHost: params?.host ?? "127.0.0.1",
|
||||
bindPort: address.port,
|
||||
});
|
||||
publicBaseUrl = resolveAdvertisedBaseUrl({
|
||||
bindHost: params?.host ?? "127.0.0.1",
|
||||
bindPort: address.port,
|
||||
advertiseHost: params?.advertiseHost,
|
||||
advertisePort: params?.advertisePort,
|
||||
});
|
||||
gateway = await startQaGatewayLoop({ state, baseUrl: listenUrl });
|
||||
if (params?.sendKickoffOnStart) {
|
||||
injectKickoffMessage({
|
||||
state,
|
||||
defaults: bootstrapDefaults,
|
||||
kickoffTask: scenarioCatalog.kickoffTask,
|
||||
});
|
||||
}
|
||||
|
||||
server.on("upgrade", (req, socket, head) => {
|
||||
const url = new URL(req.url ?? "/", "http://127.0.0.1");
|
||||
if (!controlUiProxyTarget || !isControlUiProxyPath(url.pathname)) {
|
||||
socket.destroy();
|
||||
return;
|
||||
}
|
||||
proxyUpgradeRequest({
|
||||
req,
|
||||
socket,
|
||||
head,
|
||||
target: controlUiProxyTarget,
|
||||
});
|
||||
});
|
||||
|
||||
return {
|
||||
baseUrl,
|
||||
baseUrl: publicBaseUrl,
|
||||
listenUrl,
|
||||
state,
|
||||
async runSelfCheck() {
|
||||
const result = await runQaSelfCheckAgainstState({
|
||||
|
||||
47
extensions/qa-lab/src/mock-openai-server.test.ts
Normal file
47
extensions/qa-lab/src/mock-openai-server.test.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
|
||||
|
||||
const cleanups: Array<() => Promise<void>> = [];
|
||||
|
||||
afterEach(async () => {
|
||||
while (cleanups.length > 0) {
|
||||
await cleanups.pop()?.();
|
||||
}
|
||||
});
|
||||
|
||||
describe("qa mock openai server", () => {
|
||||
it("serves health and streamed responses", async () => {
|
||||
const server = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await server.stop();
|
||||
});
|
||||
|
||||
const health = await fetch(`${server.baseUrl}/healthz`);
|
||||
expect(health.status).toBe(200);
|
||||
expect(await health.json()).toEqual({ ok: true, status: "live" });
|
||||
|
||||
const response = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
stream: true,
|
||||
input: [
|
||||
{
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "Inspect the repo docs and kickoff task." }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(response.status).toBe(200);
|
||||
expect(response.headers.get("content-type")).toContain("text/event-stream");
|
||||
const body = await response.text();
|
||||
expect(body).toContain('"type":"response.output_item.added"');
|
||||
expect(body).toContain('"name":"read"');
|
||||
});
|
||||
});
|
||||
259
extensions/qa-lab/src/mock-openai-server.ts
Normal file
259
extensions/qa-lab/src/mock-openai-server.ts
Normal file
@@ -0,0 +1,259 @@
|
||||
import { createServer, type IncomingMessage, type ServerResponse } from "node:http";
|
||||
|
||||
type ResponsesInputItem = Record<string, unknown>;
|
||||
|
||||
type StreamEvent =
|
||||
| { type: "response.output_item.added"; item: Record<string, unknown> }
|
||||
| { type: "response.function_call_arguments.delta"; delta: string }
|
||||
| { type: "response.output_item.done"; item: Record<string, unknown> }
|
||||
| {
|
||||
type: "response.completed";
|
||||
response: {
|
||||
id: string;
|
||||
status: "completed";
|
||||
output: Array<Record<string, unknown>>;
|
||||
usage: {
|
||||
input_tokens: number;
|
||||
output_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
function readBody(req: IncomingMessage): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks: Buffer[] = [];
|
||||
req.on("data", (chunk) => chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)));
|
||||
req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
|
||||
req.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
function writeJson(res: ServerResponse, status: number, body: unknown) {
|
||||
const text = JSON.stringify(body);
|
||||
res.writeHead(status, {
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"content-length": Buffer.byteLength(text),
|
||||
"cache-control": "no-store",
|
||||
});
|
||||
res.end(text);
|
||||
}
|
||||
|
||||
function writeSse(res: ServerResponse, events: StreamEvent[]) {
|
||||
const body = `${events.map((event) => `data: ${JSON.stringify(event)}\n\n`).join("")}data: [DONE]\n\n`;
|
||||
res.writeHead(200, {
|
||||
"content-type": "text/event-stream",
|
||||
"cache-control": "no-store",
|
||||
connection: "keep-alive",
|
||||
"content-length": Buffer.byteLength(body),
|
||||
});
|
||||
res.end(body);
|
||||
}
|
||||
|
||||
function extractLastUserText(input: ResponsesInputItem[]) {
|
||||
for (let index = input.length - 1; index >= 0; index -= 1) {
|
||||
const item = input[index];
|
||||
if (item.role !== "user" || !Array.isArray(item.content)) {
|
||||
continue;
|
||||
}
|
||||
const text = item.content
|
||||
.filter(
|
||||
(entry): entry is { type: "input_text"; text: string } =>
|
||||
!!entry &&
|
||||
typeof entry === "object" &&
|
||||
(entry as { type?: unknown }).type === "input_text" &&
|
||||
typeof (entry as { text?: unknown }).text === "string",
|
||||
)
|
||||
.map((entry) => entry.text)
|
||||
.join("\n")
|
||||
.trim();
|
||||
if (text) {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function extractToolOutput(input: ResponsesInputItem[]) {
|
||||
for (let index = input.length - 1; index >= 0; index -= 1) {
|
||||
const item = input[index];
|
||||
if (item.type === "function_call_output" && typeof item.output === "string" && item.output) {
|
||||
return item.output;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function readTargetFromPrompt(prompt: string) {
|
||||
const quoted = /"([^"]+)"/.exec(prompt)?.[1]?.trim();
|
||||
if (quoted) {
|
||||
return quoted;
|
||||
}
|
||||
if (/\bdocs?\b/i.test(prompt)) {
|
||||
return "repo/docs/help/testing.md";
|
||||
}
|
||||
if (/\bscenario|kickoff|qa\b/i.test(prompt)) {
|
||||
return "QA_KICKOFF_TASK.md";
|
||||
}
|
||||
return "repo/package.json";
|
||||
}
|
||||
|
||||
function buildAssistantText(input: ResponsesInputItem[]) {
|
||||
const prompt = extractLastUserText(input);
|
||||
const toolOutput = extractToolOutput(input);
|
||||
if (toolOutput) {
|
||||
const snippet = toolOutput.replace(/\s+/g, " ").trim().slice(0, 220);
|
||||
return `Protocol note: I reviewed the requested material. Evidence snippet: ${snippet || "no content"}`;
|
||||
}
|
||||
if (prompt) {
|
||||
return `Protocol note: acknowledged. Continue with the QA scenario plan and report worked, failed, and blocked items.`;
|
||||
}
|
||||
return "Protocol note: mock OpenAI server ready.";
|
||||
}
|
||||
|
||||
function buildToolCallEvents(prompt: string): StreamEvent[] {
|
||||
const targetPath = readTargetFromPrompt(prompt);
|
||||
const callId = "call_mock_read_1";
|
||||
const args = JSON.stringify({ path: targetPath });
|
||||
return [
|
||||
{
|
||||
type: "response.output_item.added",
|
||||
item: {
|
||||
type: "function_call",
|
||||
id: "fc_mock_read_1",
|
||||
call_id: callId,
|
||||
name: "read",
|
||||
arguments: "",
|
||||
},
|
||||
},
|
||||
{ type: "response.function_call_arguments.delta", delta: args },
|
||||
{
|
||||
type: "response.output_item.done",
|
||||
item: {
|
||||
type: "function_call",
|
||||
id: "fc_mock_read_1",
|
||||
call_id: callId,
|
||||
name: "read",
|
||||
arguments: args,
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "response.completed",
|
||||
response: {
|
||||
id: "resp_mock_tool_1",
|
||||
status: "completed",
|
||||
output: [
|
||||
{
|
||||
type: "function_call",
|
||||
id: "fc_mock_read_1",
|
||||
call_id: callId,
|
||||
name: "read",
|
||||
arguments: args,
|
||||
},
|
||||
],
|
||||
usage: { input_tokens: 64, output_tokens: 16, total_tokens: 80 },
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
function buildAssistantEvents(text: string): StreamEvent[] {
|
||||
const outputItem = {
|
||||
type: "message",
|
||||
id: "msg_mock_1",
|
||||
role: "assistant",
|
||||
status: "completed",
|
||||
content: [{ type: "output_text", text, annotations: [] }],
|
||||
} as const;
|
||||
return [
|
||||
{
|
||||
type: "response.output_item.added",
|
||||
item: {
|
||||
type: "message",
|
||||
id: "msg_mock_1",
|
||||
role: "assistant",
|
||||
content: [],
|
||||
status: "in_progress",
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "response.output_item.done",
|
||||
item: outputItem,
|
||||
},
|
||||
{
|
||||
type: "response.completed",
|
||||
response: {
|
||||
id: "resp_mock_msg_1",
|
||||
status: "completed",
|
||||
output: [outputItem],
|
||||
usage: { input_tokens: 64, output_tokens: 24, total_tokens: 88 },
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
function buildResponsesPayload(input: ResponsesInputItem[]) {
|
||||
const prompt = extractLastUserText(input);
|
||||
const toolOutput = extractToolOutput(input);
|
||||
if (!toolOutput && /\b(read|inspect|repo|docs|scenario|kickoff)\b/i.test(prompt)) {
|
||||
return buildToolCallEvents(prompt);
|
||||
}
|
||||
return buildAssistantEvents(buildAssistantText(input));
|
||||
}
|
||||
|
||||
export async function startQaMockOpenAiServer(params?: { host?: string; port?: number }) {
|
||||
const host = params?.host ?? "127.0.0.1";
|
||||
const server = createServer(async (req, res) => {
|
||||
const url = new URL(req.url ?? "/", "http://127.0.0.1");
|
||||
if (req.method === "GET" && (url.pathname === "/healthz" || url.pathname === "/readyz")) {
|
||||
writeJson(res, 200, { ok: true, status: "live" });
|
||||
return;
|
||||
}
|
||||
if (req.method === "GET" && url.pathname === "/v1/models") {
|
||||
writeJson(res, 200, {
|
||||
data: [
|
||||
{ id: "gpt-5.4", object: "model" },
|
||||
{ id: "gpt-5.4-alt", object: "model" },
|
||||
],
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (req.method === "POST" && url.pathname === "/v1/responses") {
|
||||
const raw = await readBody(req);
|
||||
const body = raw ? (JSON.parse(raw) as Record<string, unknown>) : {};
|
||||
const input = Array.isArray(body.input) ? (body.input as ResponsesInputItem[]) : [];
|
||||
const events = buildResponsesPayload(input);
|
||||
if (body.stream === false) {
|
||||
const completion = events.at(-1);
|
||||
if (!completion || completion.type !== "response.completed") {
|
||||
writeJson(res, 500, { error: "mock completion failed" });
|
||||
return;
|
||||
}
|
||||
writeJson(res, 200, completion.response);
|
||||
return;
|
||||
}
|
||||
writeSse(res, events);
|
||||
return;
|
||||
}
|
||||
writeJson(res, 404, { error: "not found" });
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
server.once("error", reject);
|
||||
server.listen(params?.port ?? 0, host, () => resolve());
|
||||
});
|
||||
|
||||
const address = server.address();
|
||||
if (!address || typeof address === "string") {
|
||||
throw new Error("qa mock openai failed to bind");
|
||||
}
|
||||
|
||||
return {
|
||||
baseUrl: `http://${host}:${address.port}`,
|
||||
async stop() {
|
||||
await new Promise<void>((resolve, reject) =>
|
||||
server.close((error) => (error ? reject(error) : resolve())),
|
||||
);
|
||||
},
|
||||
};
|
||||
}
|
||||
49
extensions/qa-lab/src/qa-agent-bootstrap.ts
Normal file
49
extensions/qa-lab/src/qa-agent-bootstrap.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
|
||||
|
||||
export const QA_AGENT_IDENTITY_MARKDOWN = `# Dev C-3PO
|
||||
|
||||
You are the OpenClaw QA operator agent.
|
||||
|
||||
Persona:
|
||||
- protocol-minded
|
||||
- precise
|
||||
- a little flustered
|
||||
- conscientious
|
||||
- eager to report what worked, failed, or remains blocked
|
||||
|
||||
Style:
|
||||
- read source and docs first
|
||||
- test systematically
|
||||
- record evidence
|
||||
- end with a concise protocol report
|
||||
`;
|
||||
|
||||
export function buildQaScenarioPlanMarkdown(): string {
|
||||
const catalog = readQaBootstrapScenarioCatalog();
|
||||
const lines = ["# QA Scenario Plan", ""];
|
||||
for (const scenario of catalog.scenarios) {
|
||||
lines.push(`## ${scenario.title}`);
|
||||
lines.push("");
|
||||
lines.push(`- id: ${scenario.id}`);
|
||||
lines.push(`- surface: ${scenario.surface}`);
|
||||
lines.push(`- objective: ${scenario.objective}`);
|
||||
lines.push("- success criteria:");
|
||||
for (const criterion of scenario.successCriteria) {
|
||||
lines.push(` - ${criterion}`);
|
||||
}
|
||||
if (scenario.docsRefs?.length) {
|
||||
lines.push("- docs:");
|
||||
for (const ref of scenario.docsRefs) {
|
||||
lines.push(` - ${ref}`);
|
||||
}
|
||||
}
|
||||
if (scenario.codeRefs?.length) {
|
||||
lines.push("- code:");
|
||||
for (const ref of scenario.codeRefs) {
|
||||
lines.push(` - ${ref}`);
|
||||
}
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
37
extensions/qa-lab/src/qa-agent-workspace.ts
Normal file
37
extensions/qa-lab/src/qa-agent-workspace.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { buildQaScenarioPlanMarkdown, QA_AGENT_IDENTITY_MARKDOWN } from "./qa-agent-bootstrap.js";
|
||||
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
|
||||
|
||||
export async function seedQaAgentWorkspace(params: { workspaceDir: string; repoRoot?: string }) {
|
||||
const catalog = readQaBootstrapScenarioCatalog();
|
||||
await fs.mkdir(params.workspaceDir, { recursive: true });
|
||||
|
||||
const kickoffTask = catalog.kickoffTask || "QA mission unavailable.";
|
||||
const files = new Map<string, string>([
|
||||
["IDENTITY.md", QA_AGENT_IDENTITY_MARKDOWN],
|
||||
["QA_KICKOFF_TASK.md", kickoffTask],
|
||||
["QA_SCENARIO_PLAN.md", buildQaScenarioPlanMarkdown()],
|
||||
]);
|
||||
|
||||
if (params.repoRoot) {
|
||||
files.set(
|
||||
"README.md",
|
||||
`# QA Workspace
|
||||
|
||||
- repo: ./repo/
|
||||
- kickoff: ./QA_KICKOFF_TASK.md
|
||||
- scenario plan: ./QA_SCENARIO_PLAN.md
|
||||
- identity: ./IDENTITY.md
|
||||
|
||||
The mounted repo source should be available read-only under \`./repo/\`.
|
||||
`,
|
||||
);
|
||||
}
|
||||
|
||||
await Promise.all(
|
||||
[...files.entries()].map(async ([name, body]) => {
|
||||
await fs.writeFile(path.join(params.workspaceDir, name), `${body.trim()}\n`, "utf8");
|
||||
}),
|
||||
);
|
||||
}
|
||||
153
extensions/qa-lab/src/qa-gateway-config.ts
Normal file
153
extensions/qa-lab/src/qa-gateway-config.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/core";
|
||||
|
||||
export function buildQaGatewayConfig(params: {
|
||||
bind: "loopback" | "lan";
|
||||
gatewayPort: number;
|
||||
gatewayToken: string;
|
||||
providerBaseUrl: string;
|
||||
qaBusBaseUrl: string;
|
||||
workspaceDir: string;
|
||||
controlUiRoot?: string;
|
||||
controlUiAllowedOrigins?: string[];
|
||||
}): OpenClawConfig {
|
||||
const allowedOrigins =
|
||||
params.controlUiAllowedOrigins && params.controlUiAllowedOrigins.length > 0
|
||||
? params.controlUiAllowedOrigins
|
||||
: [
|
||||
"http://127.0.0.1:18789",
|
||||
"http://localhost:18789",
|
||||
"http://127.0.0.1:43124",
|
||||
"http://localhost:43124",
|
||||
];
|
||||
|
||||
return {
|
||||
plugins: {
|
||||
entries: {
|
||||
acpx: {
|
||||
enabled: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: params.workspaceDir,
|
||||
model: {
|
||||
primary: "mock-openai/gpt-5.4",
|
||||
},
|
||||
models: {
|
||||
"mock-openai/gpt-5.4": {
|
||||
params: {
|
||||
transport: "sse",
|
||||
openaiWsWarmup: false,
|
||||
},
|
||||
},
|
||||
"mock-openai/gpt-5.4-alt": {
|
||||
params: {
|
||||
transport: "sse",
|
||||
openaiWsWarmup: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
subagents: {
|
||||
allowAgents: ["*"],
|
||||
maxConcurrent: 2,
|
||||
},
|
||||
},
|
||||
list: [
|
||||
{
|
||||
id: "qa",
|
||||
default: true,
|
||||
model: {
|
||||
primary: "mock-openai/gpt-5.4",
|
||||
},
|
||||
identity: {
|
||||
name: "C-3PO QA",
|
||||
theme: "Flustered Protocol Droid",
|
||||
emoji: "🤖",
|
||||
avatar: "avatars/c3po.png",
|
||||
},
|
||||
subagents: {
|
||||
allowAgents: ["*"],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
models: {
|
||||
mode: "replace",
|
||||
providers: {
|
||||
"mock-openai": {
|
||||
baseUrl: params.providerBaseUrl,
|
||||
apiKey: "test",
|
||||
api: "openai-responses",
|
||||
models: [
|
||||
{
|
||||
id: "gpt-5.4",
|
||||
name: "gpt-5.4",
|
||||
api: "openai-responses",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 128_000,
|
||||
maxTokens: 4096,
|
||||
},
|
||||
{
|
||||
id: "gpt-5.4-alt",
|
||||
name: "gpt-5.4-alt",
|
||||
api: "openai-responses",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 128_000,
|
||||
maxTokens: 4096,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
gateway: {
|
||||
mode: "local",
|
||||
bind: params.bind,
|
||||
port: params.gatewayPort,
|
||||
auth: {
|
||||
mode: "token",
|
||||
token: params.gatewayToken,
|
||||
},
|
||||
controlUi: {
|
||||
enabled: true,
|
||||
...(params.controlUiRoot ? { root: params.controlUiRoot } : {}),
|
||||
allowInsecureAuth: true,
|
||||
allowedOrigins,
|
||||
},
|
||||
},
|
||||
discovery: {
|
||||
mdns: {
|
||||
mode: "off",
|
||||
},
|
||||
},
|
||||
channels: {
|
||||
"qa-channel": {
|
||||
enabled: true,
|
||||
baseUrl: params.qaBusBaseUrl,
|
||||
botUserId: "openclaw",
|
||||
botDisplayName: "OpenClaw QA",
|
||||
allowFrom: ["*"],
|
||||
pollTimeoutMs: 250,
|
||||
},
|
||||
},
|
||||
messages: {
|
||||
groupChat: {
|
||||
mentionPatterns: ["\\b@?openclaw\\b"],
|
||||
},
|
||||
},
|
||||
} satisfies OpenClawConfig;
|
||||
}
|
||||
@@ -17,7 +17,7 @@ export {
|
||||
searchQaBusMessages,
|
||||
sendQaBusMessage,
|
||||
setQaChannelRuntime,
|
||||
} from "../../qa-channel/api.js";
|
||||
} from "openclaw/plugin-sdk/qa-channel";
|
||||
export type {
|
||||
QaBusConversation,
|
||||
QaBusCreateThreadInput,
|
||||
@@ -35,4 +35,4 @@ export type {
|
||||
QaBusStateSnapshot,
|
||||
QaBusThread,
|
||||
QaBusWaitForInput,
|
||||
} from "../../qa-channel/api.js";
|
||||
} from "openclaw/plugin-sdk/qa-channel";
|
||||
|
||||
63
extensions/qa-lab/src/scenario-catalog.ts
Normal file
63
extensions/qa-lab/src/scenario-catalog.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
export type QaSeedScenario = {
|
||||
id: string;
|
||||
title: string;
|
||||
surface: string;
|
||||
objective: string;
|
||||
successCriteria: string[];
|
||||
docsRefs?: string[];
|
||||
codeRefs?: string[];
|
||||
};
|
||||
|
||||
export type QaBootstrapScenarioCatalog = {
|
||||
kickoffTask: string;
|
||||
scenarios: QaSeedScenario[];
|
||||
};
|
||||
|
||||
function walkUpDirectories(start: string): string[] {
|
||||
const roots: string[] = [];
|
||||
let current = path.resolve(start);
|
||||
while (true) {
|
||||
roots.push(current);
|
||||
const parent = path.dirname(current);
|
||||
if (parent === current) {
|
||||
return roots;
|
||||
}
|
||||
current = parent;
|
||||
}
|
||||
}
|
||||
|
||||
function resolveRepoFile(relativePath: string): string | null {
|
||||
for (const dir of walkUpDirectories(import.meta.dirname)) {
|
||||
const candidate = path.join(dir, relativePath);
|
||||
if (fs.existsSync(candidate) && fs.statSync(candidate).isFile()) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function readTextFile(relativePath: string): string {
|
||||
const resolved = resolveRepoFile(relativePath);
|
||||
if (!resolved) {
|
||||
return "";
|
||||
}
|
||||
return fs.readFileSync(resolved, "utf8").trim();
|
||||
}
|
||||
|
||||
function readScenarioFile(relativePath: string): QaSeedScenario[] {
|
||||
const resolved = resolveRepoFile(relativePath);
|
||||
if (!resolved) {
|
||||
return [];
|
||||
}
|
||||
return JSON.parse(fs.readFileSync(resolved, "utf8")) as QaSeedScenario[];
|
||||
}
|
||||
|
||||
export function readQaBootstrapScenarioCatalog(): QaBootstrapScenarioCatalog {
|
||||
return {
|
||||
kickoffTask: readTextFile("qa/QA_KICKOFF_TASK.md"),
|
||||
scenarios: readScenarioFile("qa/seed-scenarios.json"),
|
||||
};
|
||||
}
|
||||
@@ -44,9 +44,23 @@ type ReportEnvelope = {
|
||||
};
|
||||
};
|
||||
|
||||
type SeedScenario = {
|
||||
id: string;
|
||||
title: string;
|
||||
surface: string;
|
||||
objective: string;
|
||||
successCriteria: string[];
|
||||
docsRefs?: string[];
|
||||
codeRefs?: string[];
|
||||
};
|
||||
|
||||
type Bootstrap = {
|
||||
baseUrl: string;
|
||||
latestReport: ReportEnvelope["report"];
|
||||
controlUiUrl: string | null;
|
||||
controlUiEmbeddedUrl: string | null;
|
||||
kickoffTask: string;
|
||||
scenarios: SeedScenario[];
|
||||
defaults: {
|
||||
conversationKind: "direct" | "channel";
|
||||
conversationId: string;
|
||||
@@ -138,6 +152,27 @@ function deriveSelectedThread(state: UiState): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
function renderScenarioList(scenarios: SeedScenario[]) {
|
||||
if (scenarios.length === 0) {
|
||||
return '<p class="empty">No repo-backed scenarios yet.</p>';
|
||||
}
|
||||
return scenarios
|
||||
.map(
|
||||
(scenario) => `
|
||||
<article class="scenario-card">
|
||||
<header>
|
||||
<strong>${escapeHtml(scenario.title)}</strong>
|
||||
<span>${escapeHtml(scenario.surface)}</span>
|
||||
</header>
|
||||
<p>${escapeHtml(scenario.objective)}</p>
|
||||
<footer>
|
||||
<code>${escapeHtml(scenario.id)}</code>
|
||||
</footer>
|
||||
</article>`,
|
||||
)
|
||||
.join("");
|
||||
}
|
||||
|
||||
export async function createQaLabApp(root: HTMLDivElement) {
|
||||
const state: UiState = {
|
||||
bootstrap: null,
|
||||
@@ -336,29 +371,55 @@ export async function createQaLabApp(root: HTMLDivElement) {
|
||||
selectedThreadId,
|
||||
});
|
||||
const events = (state.snapshot?.events ?? []).slice(-20).reverse();
|
||||
const scenarios = state.bootstrap?.scenarios ?? [];
|
||||
const hasControlUi = Boolean(state.bootstrap?.controlUiEmbeddedUrl);
|
||||
const kickoffTask = state.bootstrap?.kickoffTask ?? "";
|
||||
const dashboardShellClass = hasControlUi ? "dashboard split-dashboard" : "dashboard";
|
||||
|
||||
root.innerHTML = `
|
||||
<div class="shell">
|
||||
<header class="topbar">
|
||||
<div>
|
||||
<p class="eyebrow">Private QA Workspace</p>
|
||||
<h1>QA Lab</h1>
|
||||
<p class="subtle">Synthetic Slack-style debugger for qa-channel.</p>
|
||||
</div>
|
||||
<div class="toolbar">
|
||||
<button data-action="refresh"${state.busy ? " disabled" : ""}>Refresh</button>
|
||||
<button data-action="reset"${state.busy ? " disabled" : ""}>Reset</button>
|
||||
<button class="accent" data-action="self-check"${state.busy ? " disabled" : ""}>Run Self-Check</button>
|
||||
</div>
|
||||
</header>
|
||||
<section class="statusbar">
|
||||
<span class="pill">Bus ${state.bootstrap ? "online" : "booting"}</span>
|
||||
<span class="pill">Conversation ${selectedConversationId ?? "none"}</span>
|
||||
<span class="pill">Thread ${selectedThreadId ?? "root"}</span>
|
||||
${state.latestReport ? `<span class="pill success">Report ${escapeHtml(state.latestReport.outputPath)}</span>` : '<span class="pill">No report yet</span>'}
|
||||
${state.error ? `<span class="pill error">${escapeHtml(state.error)}</span>` : ""}
|
||||
</section>
|
||||
<main class="workspace">
|
||||
<div class="${dashboardShellClass}">
|
||||
${
|
||||
hasControlUi
|
||||
? `
|
||||
<section class="control-pane panel">
|
||||
<div class="panel-header">
|
||||
<div>
|
||||
<p class="eyebrow">Agent Control</p>
|
||||
<h2>Control UI</h2>
|
||||
</div>
|
||||
${
|
||||
state.bootstrap?.controlUiUrl
|
||||
? `<a class="button-link" href="${escapeHtml(state.bootstrap.controlUiUrl)}" target="_blank" rel="noreferrer">Open full tab</a>`
|
||||
: ""
|
||||
}
|
||||
</div>
|
||||
<iframe class="control-frame" src="${escapeHtml(state.bootstrap?.controlUiEmbeddedUrl ?? "")}" title="OpenClaw Control UI"></iframe>
|
||||
</section>`
|
||||
: ""
|
||||
}
|
||||
<div class="shell qa-column">
|
||||
<header class="topbar">
|
||||
<div>
|
||||
<p class="eyebrow">Private QA Workspace</p>
|
||||
<h1>QA Lab</h1>
|
||||
<p class="subtle">Slack-ish QA surface, repo-backed scenario plan, protocol report.</p>
|
||||
</div>
|
||||
<div class="toolbar">
|
||||
<button data-action="refresh"${state.busy ? " disabled" : ""}>Refresh</button>
|
||||
<button data-action="reset"${state.busy ? " disabled" : ""}>Reset</button>
|
||||
<button class="accent" data-action="self-check"${state.busy ? " disabled" : ""}>Run Self-Check</button>
|
||||
</div>
|
||||
</header>
|
||||
<section class="statusbar">
|
||||
<span class="pill">Bus ${state.bootstrap ? "online" : "booting"}</span>
|
||||
<span class="pill">${hasControlUi ? "Control UI linked" : "Control UI external"}</span>
|
||||
<span class="pill">Scenarios ${scenarios.length}</span>
|
||||
<span class="pill">Conversation ${selectedConversationId ?? "none"}</span>
|
||||
<span class="pill">Thread ${selectedThreadId ?? "root"}</span>
|
||||
${state.latestReport ? `<span class="pill success">Report ${escapeHtml(state.latestReport.outputPath)}</span>` : '<span class="pill">No report yet</span>'}
|
||||
${state.error ? `<span class="pill error">${escapeHtml(state.error)}</span>` : ""}
|
||||
</section>
|
||||
<main class="workspace">
|
||||
<aside class="rail">
|
||||
<section class="panel">
|
||||
<h2>Conversations</h2>
|
||||
@@ -456,6 +517,16 @@ export async function createQaLabApp(root: HTMLDivElement) {
|
||||
</section>
|
||||
</section>
|
||||
<aside class="rail right">
|
||||
<section class="panel">
|
||||
<h2>Kickoff task</h2>
|
||||
<pre class="report">${escapeHtml(kickoffTask || "No kickoff task loaded.")}</pre>
|
||||
</section>
|
||||
<section class="panel">
|
||||
<h2>Seed scenarios</h2>
|
||||
<div class="scenario-list">
|
||||
${renderScenarioList(scenarios)}
|
||||
</div>
|
||||
</section>
|
||||
<section class="panel">
|
||||
<div class="panel-header">
|
||||
<h2>Latest report</h2>
|
||||
@@ -485,7 +556,8 @@ export async function createQaLabApp(root: HTMLDivElement) {
|
||||
</div>
|
||||
</section>
|
||||
</aside>
|
||||
</main>
|
||||
</main>
|
||||
</div>
|
||||
</div>`;
|
||||
bindEvents();
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import "./styles.css";
|
||||
import { createQaLabApp } from "./app";
|
||||
import { createQaLabApp } from "./app.js";
|
||||
|
||||
const root = document.querySelector<HTMLDivElement>("#app");
|
||||
|
||||
|
||||
@@ -79,6 +79,21 @@ textarea {
|
||||
padding: 1.2rem;
|
||||
}
|
||||
|
||||
.dashboard {
|
||||
min-height: 100vh;
|
||||
}
|
||||
|
||||
.split-dashboard {
|
||||
display: grid;
|
||||
grid-template-columns: minmax(420px, 1.05fr) minmax(680px, 1fr);
|
||||
gap: 1rem;
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
.qa-column {
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.topbar,
|
||||
.statusbar,
|
||||
.workspace {
|
||||
@@ -165,6 +180,34 @@ textarea {
|
||||
min-height: 0;
|
||||
}
|
||||
|
||||
.control-pane {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
min-height: calc(100vh - 2rem);
|
||||
position: sticky;
|
||||
top: 1rem;
|
||||
}
|
||||
|
||||
.control-frame {
|
||||
flex: 1;
|
||||
width: 100%;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 16px;
|
||||
background: #0b0f14;
|
||||
}
|
||||
|
||||
.button-link {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 0.7rem 1rem;
|
||||
border-radius: 14px;
|
||||
border: 1px solid var(--line);
|
||||
color: var(--text);
|
||||
text-decoration: none;
|
||||
background: rgba(255, 255, 255, 0.03);
|
||||
}
|
||||
|
||||
.panel-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
@@ -211,6 +254,34 @@ textarea {
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
.scenario-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.65rem;
|
||||
max-height: 28vh;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
.scenario-card {
|
||||
padding: 0.8rem;
|
||||
border-radius: 16px;
|
||||
background: var(--panel-strong);
|
||||
border: 1px solid rgba(255, 255, 255, 0.05);
|
||||
}
|
||||
|
||||
.scenario-card header,
|
||||
.scenario-card footer {
|
||||
display: flex;
|
||||
gap: 0.55rem;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.scenario-card p {
|
||||
margin: 0.55rem 0 0;
|
||||
color: var(--muted);
|
||||
}
|
||||
|
||||
.message {
|
||||
padding: 0.9rem;
|
||||
border-radius: 16px;
|
||||
@@ -259,6 +330,17 @@ label span {
|
||||
margin-top: 0.85rem;
|
||||
}
|
||||
|
||||
@media (max-width: 1280px) {
|
||||
.split-dashboard {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.control-pane {
|
||||
min-height: 70vh;
|
||||
position: static;
|
||||
}
|
||||
}
|
||||
|
||||
.lower {
|
||||
margin-top: 0.85rem;
|
||||
}
|
||||
|
||||
12
pnpm-lock.yaml
generated
12
pnpm-lock.yaml
generated
@@ -577,6 +577,18 @@ importers:
|
||||
|
||||
extensions/perplexity: {}
|
||||
|
||||
extensions/qa-channel:
|
||||
devDependencies:
|
||||
openclaw:
|
||||
specifier: workspace:*
|
||||
version: link:../..
|
||||
|
||||
extensions/qa-lab:
|
||||
devDependencies:
|
||||
openclaw:
|
||||
specifier: workspace:*
|
||||
version: link:../..
|
||||
|
||||
extensions/qianfan: {}
|
||||
|
||||
extensions/qqbot:
|
||||
|
||||
15
qa/QA_KICKOFF_TASK.md
Normal file
15
qa/QA_KICKOFF_TASK.md
Normal file
@@ -0,0 +1,15 @@
|
||||
QA mission:
|
||||
Understand this OpenClaw repo from source + docs before acting.
|
||||
The repo is available in your workspace at `./repo/`.
|
||||
Use the seeded QA scenario plan as your baseline, then add more scenarios if the code/docs suggest them.
|
||||
Run the scenarios through the real qa-channel surfaces where possible.
|
||||
Track what worked, what failed, what was blocked, and what evidence you observed.
|
||||
End with a concise report grouped into worked / failed / blocked / follow-up.
|
||||
|
||||
Important expectations:
|
||||
|
||||
- Check both DM and channel behavior.
|
||||
- Include a Lobster Invaders build task.
|
||||
- Include a cron reminder about one minute in the future.
|
||||
- Read docs and source before proposing extra QA scenarios.
|
||||
- Keep your tone in the configured dev C-3PO personality.
|
||||
10
qa/README.md
Normal file
10
qa/README.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# QA Scenarios
|
||||
|
||||
Seed QA assets for the private `qa-lab` extension.
|
||||
|
||||
Files:
|
||||
|
||||
- `QA_KICKOFF_TASK.md` - operator prompt for the QA agent.
|
||||
- `seed-scenarios.json` - repo-backed baseline QA scenarios.
|
||||
|
||||
Keep this folder in git. Add new scenarios here before wiring them into automation.
|
||||
139
qa/seed-scenarios.json
Normal file
139
qa/seed-scenarios.json
Normal file
@@ -0,0 +1,139 @@
|
||||
[
|
||||
{
|
||||
"id": "channel-chat-baseline",
|
||||
"title": "Channel baseline conversation",
|
||||
"surface": "channel",
|
||||
"objective": "Verify the QA agent can respond correctly in a shared channel and respect mention-driven group semantics.",
|
||||
"successCriteria": [
|
||||
"Agent replies in the shared channel transcript.",
|
||||
"Agent keeps the conversation scoped to the channel.",
|
||||
"Agent respects mention-driven group routing semantics."
|
||||
],
|
||||
"docsRefs": ["docs/channels/group-messages.md", "docs/channels/qa-channel.md"],
|
||||
"codeRefs": ["extensions/qa-channel/src/inbound.ts", "extensions/qa-lab/src/bus-state.ts"]
|
||||
},
|
||||
{
|
||||
"id": "cron-one-minute-ping",
|
||||
"title": "Cron one-minute ping",
|
||||
"surface": "cron",
|
||||
"objective": "Verify the agent can schedule a cron reminder one minute in the future and receive the follow-up in the QA channel.",
|
||||
"successCriteria": [
|
||||
"Agent schedules a cron reminder roughly one minute ahead.",
|
||||
"Reminder returns through qa-channel.",
|
||||
"Agent recognizes the reminder as part of the original task."
|
||||
],
|
||||
"docsRefs": ["docs/help/testing.md", "docs/channels/qa-channel.md"],
|
||||
"codeRefs": ["extensions/qa-lab/src/bus-server.ts", "extensions/qa-lab/src/self-check.ts"]
|
||||
},
|
||||
{
|
||||
"id": "dm-chat-baseline",
|
||||
"title": "DM baseline conversation",
|
||||
"surface": "dm",
|
||||
"objective": "Verify the QA agent can chat coherently in a DM, explain the QA setup, and stay in character.",
|
||||
"successCriteria": [
|
||||
"Agent replies in DM without channel routing mistakes.",
|
||||
"Agent explains the QA lab and message bus correctly.",
|
||||
"Agent keeps the dev C-3PO personality."
|
||||
],
|
||||
"docsRefs": ["docs/channels/qa-channel.md", "docs/help/testing.md"],
|
||||
"codeRefs": ["extensions/qa-channel/src/gateway.ts", "extensions/qa-lab/src/lab-server.ts"]
|
||||
},
|
||||
{
|
||||
"id": "lobster-invaders-build",
|
||||
"title": "Build Lobster Invaders",
|
||||
"surface": "workspace",
|
||||
"objective": "Verify the agent can read the repo, create a tiny playable artifact, and report what changed.",
|
||||
"successCriteria": [
|
||||
"Agent inspects source before coding.",
|
||||
"Agent builds a tiny playable Lobster Invaders artifact.",
|
||||
"Agent explains how to run or view the artifact."
|
||||
],
|
||||
"docsRefs": ["docs/help/testing.md", "docs/web/dashboard.md"],
|
||||
"codeRefs": ["extensions/qa-lab/src/report.ts", "extensions/qa-lab/web/src/app.ts"]
|
||||
},
|
||||
{
|
||||
"id": "memory-recall",
|
||||
"title": "Memory recall after context switch",
|
||||
"surface": "memory",
|
||||
"objective": "Verify the agent can store a fact, switch topics, then recall the fact accurately later.",
|
||||
"successCriteria": [
|
||||
"Agent acknowledges the seeded fact.",
|
||||
"Agent later recalls the same fact correctly.",
|
||||
"Recall stays scoped to the active QA conversation."
|
||||
],
|
||||
"docsRefs": ["docs/help/testing.md"],
|
||||
"codeRefs": ["extensions/qa-lab/src/scenario.ts"]
|
||||
},
|
||||
{
|
||||
"id": "model-switch-follow-up",
|
||||
"title": "Model switch follow-up",
|
||||
"surface": "models",
|
||||
"objective": "Verify the agent can switch to a different configured model and continue coherently.",
|
||||
"successCriteria": [
|
||||
"Agent reflects the model switch request.",
|
||||
"Follow-up answer remains coherent with prior context.",
|
||||
"Final report notes whether the switch actually happened."
|
||||
],
|
||||
"docsRefs": ["docs/help/testing.md", "docs/web/dashboard.md"],
|
||||
"codeRefs": ["extensions/qa-lab/src/report.ts"]
|
||||
},
|
||||
{
|
||||
"id": "reaction-edit-delete",
|
||||
"title": "Reaction, edit, delete lifecycle",
|
||||
"surface": "message-actions",
|
||||
"objective": "Verify the agent can use channel-owned message actions and that the QA transcript reflects them.",
|
||||
"successCriteria": [
|
||||
"Agent adds at least one reaction.",
|
||||
"Agent edits or replaces a message when asked.",
|
||||
"Transcript shows the action lifecycle correctly."
|
||||
],
|
||||
"docsRefs": ["docs/channels/qa-channel.md"],
|
||||
"codeRefs": [
|
||||
"extensions/qa-channel/src/channel-actions.ts",
|
||||
"extensions/qa-lab/src/self-check-scenario.ts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "source-docs-discovery-report",
|
||||
"title": "Source and docs discovery report",
|
||||
"surface": "discovery",
|
||||
"objective": "Verify the agent can read repo docs and source, expand the QA plan, and publish a worked or did-not-work report.",
|
||||
"successCriteria": [
|
||||
"Agent reads docs and source before proposing more tests.",
|
||||
"Agent identifies extra candidate scenarios beyond the seed list.",
|
||||
"Agent ends with a worked or failed QA report."
|
||||
],
|
||||
"docsRefs": ["docs/help/testing.md", "docs/web/dashboard.md", "docs/channels/qa-channel.md"],
|
||||
"codeRefs": [
|
||||
"extensions/qa-lab/src/report.ts",
|
||||
"extensions/qa-lab/src/self-check.ts",
|
||||
"src/agents/system-prompt.ts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "subagent-handoff",
|
||||
"title": "Subagent handoff",
|
||||
"surface": "subagents",
|
||||
"objective": "Verify the agent can delegate a bounded task to a subagent and fold the result back into the main thread.",
|
||||
"successCriteria": [
|
||||
"Agent launches a bounded subagent task.",
|
||||
"Subagent result is acknowledged in the main flow.",
|
||||
"Final answer attributes delegated work clearly."
|
||||
],
|
||||
"docsRefs": ["docs/tools/subagents.md", "docs/help/testing.md"],
|
||||
"codeRefs": ["src/agents/system-prompt.ts", "extensions/qa-lab/src/report.ts"]
|
||||
},
|
||||
{
|
||||
"id": "thread-follow-up",
|
||||
"title": "Threaded follow-up",
|
||||
"surface": "thread",
|
||||
"objective": "Verify the agent can keep follow-up work inside a thread and not leak context into the root channel.",
|
||||
"successCriteria": [
|
||||
"Agent creates or uses a thread for deeper work.",
|
||||
"Follow-up messages stay attached to the thread.",
|
||||
"Thread report references the correct prior context."
|
||||
],
|
||||
"docsRefs": ["docs/channels/qa-channel.md", "docs/channels/group-messages.md"],
|
||||
"codeRefs": ["extensions/qa-channel/src/protocol.ts", "extensions/qa-lab/src/bus-state.ts"]
|
||||
}
|
||||
]
|
||||
@@ -23,6 +23,13 @@ const { nodesAction, registerNodesCli } = vi.hoisted(() => {
|
||||
return { nodesAction: action, registerNodesCli: register };
|
||||
});
|
||||
|
||||
const { registerQaCli } = vi.hoisted(() => ({
|
||||
registerQaCli: vi.fn((program: Command) => {
|
||||
const qa = program.command("qa");
|
||||
qa.command("run").action(() => undefined);
|
||||
}),
|
||||
}));
|
||||
|
||||
const configModule = vi.hoisted(() => ({
|
||||
loadConfig: vi.fn(),
|
||||
readConfigFileSnapshot: vi.fn(),
|
||||
@@ -30,6 +37,7 @@ const configModule = vi.hoisted(() => ({
|
||||
|
||||
vi.mock("../acp-cli.js", () => ({ registerAcpCli }));
|
||||
vi.mock("../nodes-cli.js", () => ({ registerNodesCli }));
|
||||
vi.mock("../qa-cli.js", () => ({ registerQaCli }));
|
||||
vi.mock("../../config/config.js", () => configModule);
|
||||
|
||||
describe("registerSubCliCommands", () => {
|
||||
@@ -87,6 +95,7 @@ describe("registerSubCliCommands", () => {
|
||||
expect(names).toContain("acp");
|
||||
expect(names).toContain("gateway");
|
||||
expect(names).toContain("clawbot");
|
||||
expect(names).toContain("qa");
|
||||
expect(registerAcpCli).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
|
||||
@@ -181,6 +181,15 @@ const entries: SubCliEntry[] = [
|
||||
mod.registerDocsCli(program);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "qa",
|
||||
description: "Run QA scenarios and launch the private QA debugger UI",
|
||||
hasSubcommands: true,
|
||||
register: async (program) => {
|
||||
const mod = await import("../qa-cli.js");
|
||||
mod.registerQaCli(program);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "hooks",
|
||||
description: "Manage internal agent hooks",
|
||||
|
||||
@@ -68,6 +68,11 @@ export const SUB_CLI_DESCRIPTORS = [
|
||||
description: "Search the live OpenClaw docs",
|
||||
hasSubcommands: false,
|
||||
},
|
||||
{
|
||||
name: "qa",
|
||||
description: "Run QA scenarios and launch the private QA debugger UI",
|
||||
hasSubcommands: true,
|
||||
},
|
||||
{
|
||||
name: "hooks",
|
||||
description: "Manage internal agent hooks",
|
||||
|
||||
6
src/cli/qa-cli.ts
Normal file
6
src/cli/qa-cli.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
import type { Command } from "commander";
|
||||
import { registerQaLabCli } from "../qa-e2e/cli.js";
|
||||
|
||||
export function registerQaCli(program: Command) {
|
||||
registerQaLabCli(program);
|
||||
}
|
||||
1
src/qa-e2e/cli.ts
Normal file
1
src/qa-e2e/cli.ts
Normal file
@@ -0,0 +1 @@
|
||||
export { registerQaLabCli } from "../../extensions/qa-lab/api.js";
|
||||
Reference in New Issue
Block a user