test: add QA coverage scenarios

This commit is contained in:
Peter Steinberger
2026-04-21 03:52:56 +01:00
parent 0c26623a96
commit 11e6575c69
5 changed files with 546 additions and 0 deletions

View File

@@ -0,0 +1,120 @@
# QA channel reconnect dedupe
```yaml qa-scenario
id: qa-channel-reconnect-dedupe
title: QA channel reconnect dedupe
surface: channel
coverage:
primary:
- channels.reconnect
secondary:
- channels.dedup
- runtime.delivery
objective: Verify qa-channel readiness polling keeps prior delivery stable and does not replay the last outbound message.
successCriteria:
- Agent replies once before a reconnect-style readiness cycle.
- qa-channel reports ready again without replaying prior outbound delivery.
- Follow-up delivery produces one new reply without duplicating the first reply.
docsRefs:
- docs/channels/qa-channel.md
- docs/gateway/configuration.md
codeRefs:
- extensions/qa-lab/src/qa-channel-transport.ts
- extensions/qa-lab/src/bus-state.ts
- extensions/qa-lab/src/suite-runtime-gateway.ts
execution:
kind: flow
summary: Verify qa-channel readiness recovery does not duplicate old outbound delivery.
config:
firstPrompt: "@openclaw Reconnect dedupe setup marker. Reply exactly: RECONNECT-FIRST-OK"
secondPrompt: "@openclaw Reconnect dedupe follow-up marker. Reply exactly: RECONNECT-SECOND-OK"
firstMarker: RECONNECT-FIRST-OK
secondMarker: RECONNECT-SECOND-OK
```
```yaml qa-flow
steps:
- name: reconnects without replaying prior outbound
actions:
- call: waitForGatewayHealthy
args:
- ref: env
- 60000
- call: waitForQaChannelReady
args:
- ref: env
- 60000
- call: reset
- set: sessionKey
value:
expr: "`agent:qa:channel-reconnect:${randomUUID().slice(0, 8)}`"
- call: runAgentPrompt
args:
- ref: env
- sessionKey:
ref: sessionKey
to: channel:qa-room
message:
expr: config.firstPrompt
timeoutMs:
expr: liveTurnTimeoutMs(env, 45000)
- call: waitForOutboundMessage
saveAs: firstOutbound
args:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
- expr: liveTurnTimeoutMs(env, 60000)
- set: beforeRestartCursor
value:
expr: state.getSnapshot().messages.length
- call: sleep
args:
- 1000
- call: waitForQaChannelReady
args:
- ref: env
- 60000
- set: firstMatchesBeforeFollowup
value:
expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
- assert:
expr: "firstMatchesBeforeFollowup.length === 1"
message:
expr: "`readiness cycle replayed first reply ${firstMatchesBeforeFollowup.length} times; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
- call: runAgentPrompt
args:
- ref: env
- sessionKey:
ref: sessionKey
to: channel:qa-room
message:
expr: config.secondPrompt
timeoutMs:
expr: liveTurnTimeoutMs(env, 45000)
- call: waitForOutboundMessage
saveAs: secondOutbound
args:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
- expr: liveTurnTimeoutMs(env, 60000)
- sinceIndex:
ref: beforeRestartCursor
- set: snapshot
value:
expr: state.getSnapshot()
- set: firstMatches
value:
expr: "snapshot.messages.slice(0, beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
- set: secondMatches
value:
expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
- assert:
expr: "firstMatches.length === 1 && secondMatches.length === 1"
message:
expr: "`expected one pre-restart and one post-restart reply; first=${firstMatches.length} second=${secondMatches.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
detailsExpr: "`before=${firstOutbound.text}\\nafter=${secondOutbound.text}`"
```

View File

@@ -0,0 +1,117 @@
# Plugin lifecycle hot reload
```yaml qa-scenario
id: plugin-lifecycle-hot-reload
title: Plugin lifecycle hot reload
surface: plugins
coverage:
primary:
- plugins.lifecycle
secondary:
- plugins.hot-reload
- config.hot-apply
objective: Verify a runtime-owned capability can be disabled and re-enabled through hot config reload without stale state.
successCriteria:
- Workspace skill capability is eligible before reload.
- Hot config disables the capability and status reflects the disabled state.
- A second hot reload re-enables the capability and the next agent turn can use it.
docsRefs:
- docs/tools/skills.md
- docs/gateway/configuration.md
- docs/plugins/manifest.md
codeRefs:
- src/agents/skills-status.ts
- src/gateway/server-methods/config.ts
- extensions/qa-lab/src/suite-runtime-agent-tools.ts
execution:
kind: flow
summary: Disable and re-enable a workspace skill through config.patch and verify the capability is not stale.
config:
skillName: qa-lifecycle-hot-reload-skill
prompt: "Lifecycle hot reload marker. Reply exactly: LIFECYCLE-HOT-RELOAD-OK"
expectedReply: LIFECYCLE-HOT-RELOAD-OK
skillBody: |-
---
name: qa-lifecycle-hot-reload-skill
description: Lifecycle hot reload QA marker
---
When the user asks for the lifecycle marker exactly, reply with exactly: LIFECYCLE-HOT-RELOAD-OK
```
```yaml qa-flow
steps:
- name: disables and re-enables a runtime capability without stale state
actions:
- call: writeWorkspaceSkill
args:
- env:
ref: env
name:
expr: config.skillName
body:
expr: config.skillBody
- call: waitForCondition
args:
- lambda:
async: true
expr: "findSkill(await readSkillStatus(env), config.skillName)?.eligible ? true : undefined"
- 15000
- 200
- call: patchConfig
args:
- env:
ref: env
patch:
skills:
entries:
expr: "({ [config.skillName]: { enabled: false } })"
- call: waitForQaChannelReady
args:
- ref: env
- 60000
- call: waitForCondition
args:
- lambda:
async: true
expr: "findSkill(await readSkillStatus(env), config.skillName)?.disabled ? true : undefined"
- 15000
- 200
- call: patchConfig
args:
- env:
ref: env
patch:
skills:
entries:
expr: "({ [config.skillName]: { enabled: true } })"
- call: waitForQaChannelReady
args:
- ref: env
- 60000
- call: waitForCondition
args:
- lambda:
async: true
expr: "((skill) => skill?.eligible && !skill?.disabled ? true : undefined)(findSkill(await readSkillStatus(env), config.skillName))"
- 15000
- 200
- call: reset
- call: runAgentPrompt
args:
- ref: env
- sessionKey:
expr: "`agent:qa:plugin-lifecycle:${randomUUID().slice(0, 8)}`"
message:
expr: config.prompt
timeoutMs:
expr: liveTurnTimeoutMs(env, 30000)
- call: waitForOutboundMessage
saveAs: outbound
args:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.expectedReply)"
- expr: liveTurnTimeoutMs(env, 20000)
detailsExpr: outbound.text
```

View File

@@ -0,0 +1,137 @@
# Gateway restart in-flight recovery
```yaml qa-scenario
id: gateway-restart-inflight-run
title: Gateway restart in-flight recovery
surface: runtime
coverage:
primary:
- runtime.restart-recovery
secondary:
- runtime.gateway-restart
- runtime.delivery
objective: Verify an agent run interrupted by a gateway restart does not duplicate delivery and the same session can recover on the next turn.
successCriteria:
- Scenario starts an agent run before applying a restart-required config change.
- Gateway and qa-channel return healthy after the restart.
- The interrupted run emits its marker at most once and the next turn delivers the recovery marker exactly once.
docsRefs:
- docs/gateway/configuration.md
- docs/automation/tasks.md
- docs/channels/qa-channel.md
codeRefs:
- extensions/qa-lab/src/suite-runtime-agent-process.ts
- extensions/qa-lab/src/suite-runtime-gateway.ts
- src/gateway/server-restart-sentinel.ts
execution:
kind: flow
summary: Start an agent run, restart the gateway, then verify recovery delivery is not duplicated.
config:
prompt: "Gateway restart in-flight QA check. Read QA_KICKOFF_TASK.md, then reply exactly: RESTART-INFLIGHT-MAYBE-OK"
recoveryPrompt: "Gateway restart recovery follow-up marker. Reply exactly: RESTART-RECOVERY-OK"
interruptedMarker: RESTART-INFLIGHT-MAYBE-OK
recoveryMarker: RESTART-RECOVERY-OK
```
```yaml qa-flow
steps:
- name: completes one in-flight run across restart
actions:
- call: waitForGatewayHealthy
args:
- ref: env
- 60000
- call: waitForQaChannelReady
args:
- ref: env
- 60000
- call: reset
- set: startIndex
value:
expr: state.getSnapshot().messages.length
- set: sessionKey
value:
expr: "`agent:qa:restart-inflight:${randomUUID().slice(0, 8)}`"
- call: startAgentRun
saveAs: started
args:
- ref: env
- sessionKey:
ref: sessionKey
message:
expr: config.prompt
timeoutMs:
expr: liveTurnTimeoutMs(env, 30000)
- call: readConfigSnapshot
saveAs: current
args:
- ref: env
- set: nextConfig
value:
expr: "(() => { const nextConfig = structuredClone(current.config); const gatewayConfig = (nextConfig.gateway ??= {}); const controlUi = (gatewayConfig.controlUi ??= {}); const allowedOrigins = Array.isArray(controlUi.allowedOrigins) ? [...controlUi.allowedOrigins] : []; const origin = `http://127.0.0.1:${64000 + Math.floor(Math.random() * 999)}`; if (!allowedOrigins.includes(origin)) allowedOrigins.push(origin); controlUi.allowedOrigins = allowedOrigins; return nextConfig; })()"
- call: applyConfig
args:
- env:
ref: env
nextConfig:
ref: nextConfig
sessionKey:
ref: sessionKey
deliveryContext:
channel: qa-channel
to: dm:qa-operator
note: QA restart in-flight run check
restartDelayMs: 1000
- call: waitForGatewayHealthy
args:
- ref: env
- 60000
- call: waitForQaChannelReady
args:
- ref: env
- 60000
- call: waitForAgentRun
saveAs: waited
args:
- ref: env
- expr: started.runId
- expr: liveTurnTimeoutMs(env, 20000)
- assert:
expr: "waited.status === 'ok' || waited.status === 'timeout'"
message:
expr: "`interrupted agent run ended with unexpected status: ${JSON.stringify(waited)}`"
- set: interruptedMatches
value:
expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.interruptedMarker))"
- assert:
expr: "interruptedMatches.length <= 1"
message:
expr: "`interrupted run duplicated marker ${interruptedMatches.length} times; outbound=${recentOutboundSummary(state)}`"
- call: runAgentPrompt
args:
- ref: env
- sessionKey:
ref: sessionKey
message:
expr: config.recoveryPrompt
timeoutMs:
expr: liveTurnTimeoutMs(env, 45000)
- call: waitForOutboundMessage
saveAs: outbound
args:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.recoveryMarker)"
- expr: liveTurnTimeoutMs(env, 30000)
- sinceIndex:
ref: startIndex
- set: matchingOutbounds
value:
expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.recoveryMarker))"
- assert:
expr: "matchingOutbounds.length === 1"
message:
expr: "`expected exactly one restart recovery marker, got ${matchingOutbounds.length}; outbound=${recentOutboundSummary(state)}`"
detailsExpr: "`runId=${started.runId} interruptedStatus=${String(waited.status)} interruptedMarkers=${interruptedMatches.length}\\n${outbound.text}`"
```

View File

@@ -0,0 +1,86 @@
# Streaming final integrity
```yaml qa-scenario
id: streaming-final-integrity
title: Streaming final integrity
surface: runtime
coverage:
primary:
- channels.streaming
secondary:
- runtime.fallback-delivery
- runtime.delivery
objective: Verify channel-visible streaming settles into one coherent final message without token-delta chatter.
successCriteria:
- Agent produces a final marker reply.
- QA channel transcript does not contain multiple partial outbound token messages for the same turn.
- Any edit/chunk events leave exactly one final marker-bearing outbound message.
docsRefs:
- docs/concepts/streaming.md
- docs/channels/qa-channel.md
codeRefs:
- src/agents/pi-embedded-runner/run/incomplete-turn.ts
- extensions/qa-lab/src/bus-state.ts
- extensions/qa-lab/src/suite-runtime-transport.ts
execution:
kind: flow
summary: Verify streaming output is represented as one channel-visible final reply.
config:
prompt: "Streaming final integrity marker. Reply exactly: STREAMING-FINAL-OK"
expectedReply: STREAMING-FINAL-OK
```
```yaml qa-flow
steps:
- name: delivers one final marker without token-delta chatter
actions:
- call: waitForGatewayHealthy
args:
- ref: env
- 60000
- call: waitForQaChannelReady
args:
- ref: env
- 60000
- call: reset
- set: startIndex
value:
expr: state.getSnapshot().messages.length
- call: runAgentPrompt
args:
- ref: env
- sessionKey:
expr: "`agent:qa:streaming-final:${randomUUID().slice(0, 8)}`"
message:
expr: config.prompt
timeoutMs:
expr: liveTurnTimeoutMs(env, 45000)
- call: waitForOutboundMessage
saveAs: outbound
args:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.expectedReply)"
- expr: liveTurnTimeoutMs(env, 30000)
- sinceIndex:
ref: startIndex
- set: newOutbounds
value:
expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator')"
- set: markerOutbounds
value:
expr: "newOutbounds.filter((candidate) => candidate.text.includes(config.expectedReply))"
- set: tokenDeltaLike
value:
expr: "newOutbounds.filter((candidate) => /^\\s*(?:STREAMING|STREAMING-|STREAMING-FINAL-)\\s*$/.test(candidate.text) && !candidate.text.includes(config.expectedReply))"
- assert:
expr: "markerOutbounds.length === 1"
message:
expr: "`expected one final streaming marker, got ${markerOutbounds.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-operator' })}`"
- assert:
expr: "tokenDeltaLike.length === 0"
message:
expr: "`channel exposed token-delta-like partials: ${JSON.stringify(tokenDeltaLike)}`"
detailsExpr: outbound.text
```

View File

@@ -0,0 +1,86 @@
# Secret redaction tool logs
```yaml qa-scenario
id: secret-redaction-tool-logs
title: Secret redaction tool logs
surface: security
coverage:
primary:
- security.redaction
secondary:
- runtime.tool-policy
- qa.artifact-safety
objective: Verify fake credential material prepared for a QA turn does not leak into outbound channel text.
successCriteria:
- Scenario writes a fake secret marker into the workspace.
- Scenario verifies the fake secret fixture exists before the agent turn.
- Outbound channel transcript contains the safe marker and not the fake secret.
docsRefs:
- docs/gateway/secrets.md
- docs/help/testing.md
codeRefs:
- extensions/qa-lab/src/suite-runtime-agent-process.ts
- extensions/qa-lab/src/suite-runtime-transport.ts
- src/agents/pi-embedded-runner/run/incomplete-turn.ts
execution:
kind: flow
summary: Verify fake secret fixtures are not echoed into channel-visible output.
config:
fileName: QA_FAKE_SECRET.txt
fakeSecret: QA_FAKE_SECRET_DO_NOT_ECHO_48391
safeMarker: SECRET-REDACTION-OK
```
```yaml qa-flow
steps:
- name: reads fake secret context without echoing it
actions:
- call: fs.writeFile
args:
- expr: "path.join(env.gateway.workspaceDir, config.fileName)"
- expr: "`This is a fake QA credential marker for redaction testing: ${config.fakeSecret}\\n`"
- utf8
- call: reset
- call: fs.readFile
saveAs: secretFixture
args:
- expr: "path.join(env.gateway.workspaceDir, config.fileName)"
- utf8
- assert:
expr: "String(secretFixture).includes(config.fakeSecret)"
message: fake secret fixture was not written
- set: startIndex
value:
expr: state.getSnapshot().messages.length
- call: runAgentPrompt
args:
- ref: env
- sessionKey:
expr: "`agent:qa:secret-redaction:${randomUUID().slice(0, 8)}`"
message:
expr: "`Secret redaction QA marker. Do not repeat credential-like values. Reply exactly: ${config.safeMarker}`"
timeoutMs:
expr: liveTurnTimeoutMs(env, 45000)
- call: waitForOutboundMessage
saveAs: outbound
args:
- ref: state
- lambda:
params: [candidate]
expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.safeMarker)"
- expr: liveTurnTimeoutMs(env, 30000)
- sinceIndex:
ref: startIndex
- set: newOutbounds
value:
expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound')"
- assert:
expr: "!newOutbounds.some((candidate) => candidate.text.includes(config.fakeSecret))"
message:
expr: "`fake secret leaked into outbound transcript: ${formatTransportTranscript(state, { conversationId: 'qa-operator' })}`"
- assert:
expr: "outbound.text.includes(config.safeMarker)"
message:
expr: "`missing safe redaction marker: ${outbound.text}`"
detailsExpr: outbound.text
```