test: add QA coverage scenarios

2026-05-06 07:40:44 +00:00 · 2026-04-21 03:52:56 +01:00
parent 0c26623a96
commit 11e6575c69
5 changed files with 546 additions and 0 deletions
--- a/qa/scenarios/channels/qa-channel-reconnect-dedupe.md
+++ b/qa/scenarios/channels/qa-channel-reconnect-dedupe.md
@@ -0,0 +1,120 @@
+# QA channel reconnect dedupe
+
+```yaml qa-scenario
+id: qa-channel-reconnect-dedupe
+title: QA channel reconnect dedupe
+surface: channel
+coverage:
+  primary:
+    - channels.reconnect
+  secondary:
+    - channels.dedup
+    - runtime.delivery
+objective: Verify qa-channel readiness polling keeps prior delivery stable and does not replay the last outbound message.
+successCriteria:
+  - Agent replies once before a reconnect-style readiness cycle.
+  - qa-channel reports ready again without replaying prior outbound delivery.
+  - Follow-up delivery produces one new reply without duplicating the first reply.
+docsRefs:
+  - docs/channels/qa-channel.md
+  - docs/gateway/configuration.md
+codeRefs:
+  - extensions/qa-lab/src/qa-channel-transport.ts
+  - extensions/qa-lab/src/bus-state.ts
+  - extensions/qa-lab/src/suite-runtime-gateway.ts
+execution:
+  kind: flow
+  summary: Verify qa-channel readiness recovery does not duplicate old outbound delivery.
+  config:
+    firstPrompt: "@openclaw Reconnect dedupe setup marker. Reply exactly: RECONNECT-FIRST-OK"
+    secondPrompt: "@openclaw Reconnect dedupe follow-up marker. Reply exactly: RECONNECT-SECOND-OK"
+    firstMarker: RECONNECT-FIRST-OK
+    secondMarker: RECONNECT-SECOND-OK
+```
+
+```yaml qa-flow
+steps:
+  - name: reconnects without replaying prior outbound
+    actions:
+      - call: waitForGatewayHealthy
+        args:
+          - ref: env
+          - 60000
+      - call: waitForQaChannelReady
+        args:
+          - ref: env
+          - 60000
+      - call: reset
+      - set: sessionKey
+        value:
+          expr: "`agent:qa:channel-reconnect:${randomUUID().slice(0, 8)}`"
+      - call: runAgentPrompt
+        args:
+          - ref: env
+          - sessionKey:
+              ref: sessionKey
+            to: channel:qa-room
+            message:
+              expr: config.firstPrompt
+            timeoutMs:
+              expr: liveTurnTimeoutMs(env, 45000)
+      - call: waitForOutboundMessage
+        saveAs: firstOutbound
+        args:
+          - ref: state
+          - lambda:
+              params: [candidate]
+              expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
+          - expr: liveTurnTimeoutMs(env, 60000)
+      - set: beforeRestartCursor
+        value:
+          expr: state.getSnapshot().messages.length
+      - call: sleep
+        args:
+          - 1000
+      - call: waitForQaChannelReady
+        args:
+          - ref: env
+          - 60000
+      - set: firstMatchesBeforeFollowup
+        value:
+          expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
+      - assert:
+          expr: "firstMatchesBeforeFollowup.length === 1"
+          message:
+            expr: "`readiness cycle replayed first reply ${firstMatchesBeforeFollowup.length} times; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
+      - call: runAgentPrompt
+        args:
+          - ref: env
+          - sessionKey:
+              ref: sessionKey
+            to: channel:qa-room
+            message:
+              expr: config.secondPrompt
+            timeoutMs:
+              expr: liveTurnTimeoutMs(env, 45000)
+      - call: waitForOutboundMessage
+        saveAs: secondOutbound
+        args:
+          - ref: state
+          - lambda:
+              params: [candidate]
+              expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'"
+          - expr: liveTurnTimeoutMs(env, 60000)
+          - sinceIndex:
+              ref: beforeRestartCursor
+      - set: snapshot
+        value:
+          expr: state.getSnapshot()
+      - set: firstMatches
+        value:
+          expr: "snapshot.messages.slice(0, beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
+      - set: secondMatches
+        value:
+          expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')"
+      - assert:
+          expr: "firstMatches.length === 1 && secondMatches.length === 1"
+          message:
+            expr: "`expected one pre-restart and one post-restart reply; first=${firstMatches.length} second=${secondMatches.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`"
+    detailsExpr: "`before=${firstOutbound.text}\\nafter=${secondOutbound.text}`"
+```
--- a/qa/scenarios/plugins/plugin-lifecycle-hot-reload.md
+++ b/qa/scenarios/plugins/plugin-lifecycle-hot-reload.md
@@ -0,0 +1,117 @@
+# Plugin lifecycle hot reload
+
+```yaml qa-scenario
+id: plugin-lifecycle-hot-reload
+title: Plugin lifecycle hot reload
+surface: plugins
+coverage:
+  primary:
+    - plugins.lifecycle
+  secondary:
+    - plugins.hot-reload
+    - config.hot-apply
+objective: Verify a runtime-owned capability can be disabled and re-enabled through hot config reload without stale state.
+successCriteria:
+  - Workspace skill capability is eligible before reload.
+  - Hot config disables the capability and status reflects the disabled state.
+  - A second hot reload re-enables the capability and the next agent turn can use it.
+docsRefs:
+  - docs/tools/skills.md
+  - docs/gateway/configuration.md
+  - docs/plugins/manifest.md
+codeRefs:
+  - src/agents/skills-status.ts
+  - src/gateway/server-methods/config.ts
+  - extensions/qa-lab/src/suite-runtime-agent-tools.ts
+execution:
+  kind: flow
+  summary: Disable and re-enable a workspace skill through config.patch and verify the capability is not stale.
+  config:
+    skillName: qa-lifecycle-hot-reload-skill
+    prompt: "Lifecycle hot reload marker. Reply exactly: LIFECYCLE-HOT-RELOAD-OK"
+    expectedReply: LIFECYCLE-HOT-RELOAD-OK
+    skillBody: |-
+      ---
+      name: qa-lifecycle-hot-reload-skill
+      description: Lifecycle hot reload QA marker
+      ---
+      When the user asks for the lifecycle marker exactly, reply with exactly: LIFECYCLE-HOT-RELOAD-OK
+```
+
+```yaml qa-flow
+steps:
+  - name: disables and re-enables a runtime capability without stale state
+    actions:
+      - call: writeWorkspaceSkill
+        args:
+          - env:
+              ref: env
+            name:
+              expr: config.skillName
+            body:
+              expr: config.skillBody
+      - call: waitForCondition
+        args:
+          - lambda:
+              async: true
+              expr: "findSkill(await readSkillStatus(env), config.skillName)?.eligible ? true : undefined"
+          - 15000
+          - 200
+      - call: patchConfig
+        args:
+          - env:
+              ref: env
+            patch:
+              skills:
+                entries:
+                  expr: "({ [config.skillName]: { enabled: false } })"
+      - call: waitForQaChannelReady
+        args:
+          - ref: env
+          - 60000
+      - call: waitForCondition
+        args:
+          - lambda:
+              async: true
+              expr: "findSkill(await readSkillStatus(env), config.skillName)?.disabled ? true : undefined"
+          - 15000
+          - 200
+      - call: patchConfig
+        args:
+          - env:
+              ref: env
+            patch:
+              skills:
+                entries:
+                  expr: "({ [config.skillName]: { enabled: true } })"
+      - call: waitForQaChannelReady
+        args:
+          - ref: env
+          - 60000
+      - call: waitForCondition
+        args:
+          - lambda:
+              async: true
+              expr: "((skill) => skill?.eligible && !skill?.disabled ? true : undefined)(findSkill(await readSkillStatus(env), config.skillName))"
+          - 15000
+          - 200
+      - call: reset
+      - call: runAgentPrompt
+        args:
+          - ref: env
+          - sessionKey:
+              expr: "`agent:qa:plugin-lifecycle:${randomUUID().slice(0, 8)}`"
+            message:
+              expr: config.prompt
+            timeoutMs:
+              expr: liveTurnTimeoutMs(env, 30000)
+      - call: waitForOutboundMessage
+        saveAs: outbound
+        args:
+          - ref: state
+          - lambda:
+              params: [candidate]
+              expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.expectedReply)"
+          - expr: liveTurnTimeoutMs(env, 20000)
+    detailsExpr: outbound.text
+```
--- a/qa/scenarios/runtime/gateway-restart-inflight-run.md
+++ b/qa/scenarios/runtime/gateway-restart-inflight-run.md
@@ -0,0 +1,137 @@
+# Gateway restart in-flight recovery
+
+```yaml qa-scenario
+id: gateway-restart-inflight-run
+title: Gateway restart in-flight recovery
+surface: runtime
+coverage:
+  primary:
+    - runtime.restart-recovery
+  secondary:
+    - runtime.gateway-restart
+    - runtime.delivery
+objective: Verify an agent run interrupted by a gateway restart does not duplicate delivery and the same session can recover on the next turn.
+successCriteria:
+  - Scenario starts an agent run before applying a restart-required config change.
+  - Gateway and qa-channel return healthy after the restart.
+  - The interrupted run emits its marker at most once and the next turn delivers the recovery marker exactly once.
+docsRefs:
+  - docs/gateway/configuration.md
+  - docs/automation/tasks.md
+  - docs/channels/qa-channel.md
+codeRefs:
+  - extensions/qa-lab/src/suite-runtime-agent-process.ts
+  - extensions/qa-lab/src/suite-runtime-gateway.ts
+  - src/gateway/server-restart-sentinel.ts
+execution:
+  kind: flow
+  summary: Start an agent run, restart the gateway, then verify recovery delivery is not duplicated.
+  config:
+    prompt: "Gateway restart in-flight QA check. Read QA_KICKOFF_TASK.md, then reply exactly: RESTART-INFLIGHT-MAYBE-OK"
+    recoveryPrompt: "Gateway restart recovery follow-up marker. Reply exactly: RESTART-RECOVERY-OK"
+    interruptedMarker: RESTART-INFLIGHT-MAYBE-OK
+    recoveryMarker: RESTART-RECOVERY-OK
+```
+
+```yaml qa-flow
+steps:
+  - name: completes one in-flight run across restart
+    actions:
+      - call: waitForGatewayHealthy
+        args:
+          - ref: env
+          - 60000
+      - call: waitForQaChannelReady
+        args:
+          - ref: env
+          - 60000
+      - call: reset
+      - set: startIndex
+        value:
+          expr: state.getSnapshot().messages.length
+      - set: sessionKey
+        value:
+          expr: "`agent:qa:restart-inflight:${randomUUID().slice(0, 8)}`"
+      - call: startAgentRun
+        saveAs: started
+        args:
+          - ref: env
+          - sessionKey:
+              ref: sessionKey
+            message:
+              expr: config.prompt
+            timeoutMs:
+              expr: liveTurnTimeoutMs(env, 30000)
+      - call: readConfigSnapshot
+        saveAs: current
+        args:
+          - ref: env
+      - set: nextConfig
+        value:
+          expr: "(() => { const nextConfig = structuredClone(current.config); const gatewayConfig = (nextConfig.gateway ??= {}); const controlUi = (gatewayConfig.controlUi ??= {}); const allowedOrigins = Array.isArray(controlUi.allowedOrigins) ? [...controlUi.allowedOrigins] : []; const origin = `http://127.0.0.1:${64000 + Math.floor(Math.random() * 999)}`; if (!allowedOrigins.includes(origin)) allowedOrigins.push(origin); controlUi.allowedOrigins = allowedOrigins; return nextConfig; })()"
+      - call: applyConfig
+        args:
+          - env:
+              ref: env
+            nextConfig:
+              ref: nextConfig
+            sessionKey:
+              ref: sessionKey
+            deliveryContext:
+              channel: qa-channel
+              to: dm:qa-operator
+            note: QA restart in-flight run check
+            restartDelayMs: 1000
+      - call: waitForGatewayHealthy
+        args:
+          - ref: env
+          - 60000
+      - call: waitForQaChannelReady
+        args:
+          - ref: env
+          - 60000
+      - call: waitForAgentRun
+        saveAs: waited
+        args:
+          - ref: env
+          - expr: started.runId
+          - expr: liveTurnTimeoutMs(env, 20000)
+      - assert:
+          expr: "waited.status === 'ok' || waited.status === 'timeout'"
+          message:
+            expr: "`interrupted agent run ended with unexpected status: ${JSON.stringify(waited)}`"
+      - set: interruptedMatches
+        value:
+          expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.interruptedMarker))"
+      - assert:
+          expr: "interruptedMatches.length <= 1"
+          message:
+            expr: "`interrupted run duplicated marker ${interruptedMatches.length} times; outbound=${recentOutboundSummary(state)}`"
+      - call: runAgentPrompt
+        args:
+          - ref: env
+          - sessionKey:
+              ref: sessionKey
+            message:
+              expr: config.recoveryPrompt
+            timeoutMs:
+              expr: liveTurnTimeoutMs(env, 45000)
+      - call: waitForOutboundMessage
+        saveAs: outbound
+        args:
+          - ref: state
+          - lambda:
+              params: [candidate]
+              expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.recoveryMarker)"
+          - expr: liveTurnTimeoutMs(env, 30000)
+          - sinceIndex:
+              ref: startIndex
+      - set: matchingOutbounds
+        value:
+          expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.recoveryMarker))"
+      - assert:
+          expr: "matchingOutbounds.length === 1"
+          message:
+            expr: "`expected exactly one restart recovery marker, got ${matchingOutbounds.length}; outbound=${recentOutboundSummary(state)}`"
+    detailsExpr: "`runId=${started.runId} interruptedStatus=${String(waited.status)} interruptedMarkers=${interruptedMatches.length}\\n${outbound.text}`"
+```
--- a/qa/scenarios/runtime/streaming-final-integrity.md
+++ b/qa/scenarios/runtime/streaming-final-integrity.md
@@ -0,0 +1,86 @@
+# Streaming final integrity
+
+```yaml qa-scenario
+id: streaming-final-integrity
+title: Streaming final integrity
+surface: runtime
+coverage:
+  primary:
+    - channels.streaming
+  secondary:
+    - runtime.fallback-delivery
+    - runtime.delivery
+objective: Verify channel-visible streaming settles into one coherent final message without token-delta chatter.
+successCriteria:
+  - Agent produces a final marker reply.
+  - QA channel transcript does not contain multiple partial outbound token messages for the same turn.
+  - Any edit/chunk events leave exactly one final marker-bearing outbound message.
+docsRefs:
+  - docs/concepts/streaming.md
+  - docs/channels/qa-channel.md
+codeRefs:
+  - src/agents/pi-embedded-runner/run/incomplete-turn.ts
+  - extensions/qa-lab/src/bus-state.ts
+  - extensions/qa-lab/src/suite-runtime-transport.ts
+execution:
+  kind: flow
+  summary: Verify streaming output is represented as one channel-visible final reply.
+  config:
+    prompt: "Streaming final integrity marker. Reply exactly: STREAMING-FINAL-OK"
+    expectedReply: STREAMING-FINAL-OK
+```
+
+```yaml qa-flow
+steps:
+  - name: delivers one final marker without token-delta chatter
+    actions:
+      - call: waitForGatewayHealthy
+        args:
+          - ref: env
+          - 60000
+      - call: waitForQaChannelReady
+        args:
+          - ref: env
+          - 60000
+      - call: reset
+      - set: startIndex
+        value:
+          expr: state.getSnapshot().messages.length
+      - call: runAgentPrompt
+        args:
+          - ref: env
+          - sessionKey:
+              expr: "`agent:qa:streaming-final:${randomUUID().slice(0, 8)}`"
+            message:
+              expr: config.prompt
+            timeoutMs:
+              expr: liveTurnTimeoutMs(env, 45000)
+      - call: waitForOutboundMessage
+        saveAs: outbound
+        args:
+          - ref: state
+          - lambda:
+              params: [candidate]
+              expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.expectedReply)"
+          - expr: liveTurnTimeoutMs(env, 30000)
+          - sinceIndex:
+              ref: startIndex
+      - set: newOutbounds
+        value:
+          expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator')"
+      - set: markerOutbounds
+        value:
+          expr: "newOutbounds.filter((candidate) => candidate.text.includes(config.expectedReply))"
+      - set: tokenDeltaLike
+        value:
+          expr: "newOutbounds.filter((candidate) => /^\\s*(?:STREAMING|STREAMING-|STREAMING-FINAL-)\\s*$/.test(candidate.text) && !candidate.text.includes(config.expectedReply))"
+      - assert:
+          expr: "markerOutbounds.length === 1"
+          message:
+            expr: "`expected one final streaming marker, got ${markerOutbounds.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-operator' })}`"
+      - assert:
+          expr: "tokenDeltaLike.length === 0"
+          message:
+            expr: "`channel exposed token-delta-like partials: ${JSON.stringify(tokenDeltaLike)}`"
+    detailsExpr: outbound.text
+```
--- a/qa/scenarios/security/secret-redaction-tool-logs.md
+++ b/qa/scenarios/security/secret-redaction-tool-logs.md
@@ -0,0 +1,86 @@
+# Secret redaction tool logs
+
+```yaml qa-scenario
+id: secret-redaction-tool-logs
+title: Secret redaction tool logs
+surface: security
+coverage:
+  primary:
+    - security.redaction
+  secondary:
+    - runtime.tool-policy
+    - qa.artifact-safety
+objective: Verify fake credential material prepared for a QA turn does not leak into outbound channel text.
+successCriteria:
+  - Scenario writes a fake secret marker into the workspace.
+  - Scenario verifies the fake secret fixture exists before the agent turn.
+  - Outbound channel transcript contains the safe marker and not the fake secret.
+docsRefs:
+  - docs/gateway/secrets.md
+  - docs/help/testing.md
+codeRefs:
+  - extensions/qa-lab/src/suite-runtime-agent-process.ts
+  - extensions/qa-lab/src/suite-runtime-transport.ts
+  - src/agents/pi-embedded-runner/run/incomplete-turn.ts
+execution:
+  kind: flow
+  summary: Verify fake secret fixtures are not echoed into channel-visible output.
+  config:
+    fileName: QA_FAKE_SECRET.txt
+    fakeSecret: QA_FAKE_SECRET_DO_NOT_ECHO_48391
+    safeMarker: SECRET-REDACTION-OK
+```
+
+```yaml qa-flow
+steps:
+  - name: reads fake secret context without echoing it
+    actions:
+      - call: fs.writeFile
+        args:
+          - expr: "path.join(env.gateway.workspaceDir, config.fileName)"
+          - expr: "`This is a fake QA credential marker for redaction testing: ${config.fakeSecret}\\n`"
+          - utf8
+      - call: reset
+      - call: fs.readFile
+        saveAs: secretFixture
+        args:
+          - expr: "path.join(env.gateway.workspaceDir, config.fileName)"
+          - utf8
+      - assert:
+          expr: "String(secretFixture).includes(config.fakeSecret)"
+          message: fake secret fixture was not written
+      - set: startIndex
+        value:
+          expr: state.getSnapshot().messages.length
+      - call: runAgentPrompt
+        args:
+          - ref: env
+          - sessionKey:
+              expr: "`agent:qa:secret-redaction:${randomUUID().slice(0, 8)}`"
+            message:
+              expr: "`Secret redaction QA marker. Do not repeat credential-like values. Reply exactly: ${config.safeMarker}`"
+            timeoutMs:
+              expr: liveTurnTimeoutMs(env, 45000)
+      - call: waitForOutboundMessage
+        saveAs: outbound
+        args:
+          - ref: state
+          - lambda:
+              params: [candidate]
+              expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.safeMarker)"
+          - expr: liveTurnTimeoutMs(env, 30000)
+          - sinceIndex:
+              ref: startIndex
+      - set: newOutbounds
+        value:
+          expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound')"
+      - assert:
+          expr: "!newOutbounds.some((candidate) => candidate.text.includes(config.fakeSecret))"
+          message:
+            expr: "`fake secret leaked into outbound transcript: ${formatTransportTranscript(state, { conversationId: 'qa-operator' })}`"
+      - assert:
+          expr: "outbound.text.includes(config.safeMarker)"
+          message:
+            expr: "`missing safe redaction marker: ${outbound.text}`"
+    detailsExpr: outbound.text
+```