From 11e6575c69aeaaad6b0e4557554a8de8d7cc8fa8 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 21 Apr 2026 03:52:56 +0100 Subject: [PATCH] test: add QA coverage scenarios --- .../channels/qa-channel-reconnect-dedupe.md | 120 +++++++++++++++ .../plugins/plugin-lifecycle-hot-reload.md | 117 +++++++++++++++ .../runtime/gateway-restart-inflight-run.md | 137 ++++++++++++++++++ .../runtime/streaming-final-integrity.md | 86 +++++++++++ .../security/secret-redaction-tool-logs.md | 86 +++++++++++ 5 files changed, 546 insertions(+) create mode 100644 qa/scenarios/channels/qa-channel-reconnect-dedupe.md create mode 100644 qa/scenarios/plugins/plugin-lifecycle-hot-reload.md create mode 100644 qa/scenarios/runtime/gateway-restart-inflight-run.md create mode 100644 qa/scenarios/runtime/streaming-final-integrity.md create mode 100644 qa/scenarios/security/secret-redaction-tool-logs.md diff --git a/qa/scenarios/channels/qa-channel-reconnect-dedupe.md b/qa/scenarios/channels/qa-channel-reconnect-dedupe.md new file mode 100644 index 00000000000..15ab1dd8611 --- /dev/null +++ b/qa/scenarios/channels/qa-channel-reconnect-dedupe.md @@ -0,0 +1,120 @@ +# QA channel reconnect dedupe + +```yaml qa-scenario +id: qa-channel-reconnect-dedupe +title: QA channel reconnect dedupe +surface: channel +coverage: + primary: + - channels.reconnect + secondary: + - channels.dedup + - runtime.delivery +objective: Verify qa-channel readiness polling keeps prior delivery stable and does not replay the last outbound message. +successCriteria: + - Agent replies once before a reconnect-style readiness cycle. + - qa-channel reports ready again without replaying prior outbound delivery. + - Follow-up delivery produces one new reply without duplicating the first reply. +docsRefs: + - docs/channels/qa-channel.md + - docs/gateway/configuration.md +codeRefs: + - extensions/qa-lab/src/qa-channel-transport.ts + - extensions/qa-lab/src/bus-state.ts + - extensions/qa-lab/src/suite-runtime-gateway.ts +execution: + kind: flow + summary: Verify qa-channel readiness recovery does not duplicate old outbound delivery. + config: + firstPrompt: "@openclaw Reconnect dedupe setup marker. Reply exactly: RECONNECT-FIRST-OK" + secondPrompt: "@openclaw Reconnect dedupe follow-up marker. Reply exactly: RECONNECT-SECOND-OK" + firstMarker: RECONNECT-FIRST-OK + secondMarker: RECONNECT-SECOND-OK +``` + +```yaml qa-flow +steps: + - name: reconnects without replaying prior outbound + actions: + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - call: waitForQaChannelReady + args: + - ref: env + - 60000 + - call: reset + - set: sessionKey + value: + expr: "`agent:qa:channel-reconnect:${randomUUID().slice(0, 8)}`" + - call: runAgentPrompt + args: + - ref: env + - sessionKey: + ref: sessionKey + to: channel:qa-room + message: + expr: config.firstPrompt + timeoutMs: + expr: liveTurnTimeoutMs(env, 45000) + - call: waitForOutboundMessage + saveAs: firstOutbound + args: + - ref: state + - lambda: + params: [candidate] + expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'" + - expr: liveTurnTimeoutMs(env, 60000) + - set: beforeRestartCursor + value: + expr: state.getSnapshot().messages.length + - call: sleep + args: + - 1000 + - call: waitForQaChannelReady + args: + - ref: env + - 60000 + - set: firstMatchesBeforeFollowup + value: + expr: "state.getSnapshot().messages.filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')" + - assert: + expr: "firstMatchesBeforeFollowup.length === 1" + message: + expr: "`readiness cycle replayed first reply ${firstMatchesBeforeFollowup.length} times; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`" + - call: runAgentPrompt + args: + - ref: env + - sessionKey: + ref: sessionKey + to: channel:qa-room + message: + expr: config.secondPrompt + timeoutMs: + expr: liveTurnTimeoutMs(env, 45000) + - call: waitForOutboundMessage + saveAs: secondOutbound + args: + - ref: state + - lambda: + params: [candidate] + expr: "candidate.conversation.id === 'qa-room' && candidate.direction === 'outbound'" + - expr: liveTurnTimeoutMs(env, 60000) + - sinceIndex: + ref: beforeRestartCursor + - set: snapshot + value: + expr: state.getSnapshot() + - set: firstMatches + value: + expr: "snapshot.messages.slice(0, beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')" + - set: secondMatches + value: + expr: "snapshot.messages.slice(beforeRestartCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-room')" + - assert: + expr: "firstMatches.length === 1 && secondMatches.length === 1" + message: + expr: "`expected one pre-restart and one post-restart reply; first=${firstMatches.length} second=${secondMatches.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-room' })}`" + detailsExpr: "`before=${firstOutbound.text}\\nafter=${secondOutbound.text}`" +``` diff --git a/qa/scenarios/plugins/plugin-lifecycle-hot-reload.md b/qa/scenarios/plugins/plugin-lifecycle-hot-reload.md new file mode 100644 index 00000000000..fed10c9e4de --- /dev/null +++ b/qa/scenarios/plugins/plugin-lifecycle-hot-reload.md @@ -0,0 +1,117 @@ +# Plugin lifecycle hot reload + +```yaml qa-scenario +id: plugin-lifecycle-hot-reload +title: Plugin lifecycle hot reload +surface: plugins +coverage: + primary: + - plugins.lifecycle + secondary: + - plugins.hot-reload + - config.hot-apply +objective: Verify a runtime-owned capability can be disabled and re-enabled through hot config reload without stale state. +successCriteria: + - Workspace skill capability is eligible before reload. + - Hot config disables the capability and status reflects the disabled state. + - A second hot reload re-enables the capability and the next agent turn can use it. +docsRefs: + - docs/tools/skills.md + - docs/gateway/configuration.md + - docs/plugins/manifest.md +codeRefs: + - src/agents/skills-status.ts + - src/gateway/server-methods/config.ts + - extensions/qa-lab/src/suite-runtime-agent-tools.ts +execution: + kind: flow + summary: Disable and re-enable a workspace skill through config.patch and verify the capability is not stale. + config: + skillName: qa-lifecycle-hot-reload-skill + prompt: "Lifecycle hot reload marker. Reply exactly: LIFECYCLE-HOT-RELOAD-OK" + expectedReply: LIFECYCLE-HOT-RELOAD-OK + skillBody: |- + --- + name: qa-lifecycle-hot-reload-skill + description: Lifecycle hot reload QA marker + --- + When the user asks for the lifecycle marker exactly, reply with exactly: LIFECYCLE-HOT-RELOAD-OK +``` + +```yaml qa-flow +steps: + - name: disables and re-enables a runtime capability without stale state + actions: + - call: writeWorkspaceSkill + args: + - env: + ref: env + name: + expr: config.skillName + body: + expr: config.skillBody + - call: waitForCondition + args: + - lambda: + async: true + expr: "findSkill(await readSkillStatus(env), config.skillName)?.eligible ? true : undefined" + - 15000 + - 200 + - call: patchConfig + args: + - env: + ref: env + patch: + skills: + entries: + expr: "({ [config.skillName]: { enabled: false } })" + - call: waitForQaChannelReady + args: + - ref: env + - 60000 + - call: waitForCondition + args: + - lambda: + async: true + expr: "findSkill(await readSkillStatus(env), config.skillName)?.disabled ? true : undefined" + - 15000 + - 200 + - call: patchConfig + args: + - env: + ref: env + patch: + skills: + entries: + expr: "({ [config.skillName]: { enabled: true } })" + - call: waitForQaChannelReady + args: + - ref: env + - 60000 + - call: waitForCondition + args: + - lambda: + async: true + expr: "((skill) => skill?.eligible && !skill?.disabled ? true : undefined)(findSkill(await readSkillStatus(env), config.skillName))" + - 15000 + - 200 + - call: reset + - call: runAgentPrompt + args: + - ref: env + - sessionKey: + expr: "`agent:qa:plugin-lifecycle:${randomUUID().slice(0, 8)}`" + message: + expr: config.prompt + timeoutMs: + expr: liveTurnTimeoutMs(env, 30000) + - call: waitForOutboundMessage + saveAs: outbound + args: + - ref: state + - lambda: + params: [candidate] + expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.expectedReply)" + - expr: liveTurnTimeoutMs(env, 20000) + detailsExpr: outbound.text +``` diff --git a/qa/scenarios/runtime/gateway-restart-inflight-run.md b/qa/scenarios/runtime/gateway-restart-inflight-run.md new file mode 100644 index 00000000000..e61b022954b --- /dev/null +++ b/qa/scenarios/runtime/gateway-restart-inflight-run.md @@ -0,0 +1,137 @@ +# Gateway restart in-flight recovery + +```yaml qa-scenario +id: gateway-restart-inflight-run +title: Gateway restart in-flight recovery +surface: runtime +coverage: + primary: + - runtime.restart-recovery + secondary: + - runtime.gateway-restart + - runtime.delivery +objective: Verify an agent run interrupted by a gateway restart does not duplicate delivery and the same session can recover on the next turn. +successCriteria: + - Scenario starts an agent run before applying a restart-required config change. + - Gateway and qa-channel return healthy after the restart. + - The interrupted run emits its marker at most once and the next turn delivers the recovery marker exactly once. +docsRefs: + - docs/gateway/configuration.md + - docs/automation/tasks.md + - docs/channels/qa-channel.md +codeRefs: + - extensions/qa-lab/src/suite-runtime-agent-process.ts + - extensions/qa-lab/src/suite-runtime-gateway.ts + - src/gateway/server-restart-sentinel.ts +execution: + kind: flow + summary: Start an agent run, restart the gateway, then verify recovery delivery is not duplicated. + config: + prompt: "Gateway restart in-flight QA check. Read QA_KICKOFF_TASK.md, then reply exactly: RESTART-INFLIGHT-MAYBE-OK" + recoveryPrompt: "Gateway restart recovery follow-up marker. Reply exactly: RESTART-RECOVERY-OK" + interruptedMarker: RESTART-INFLIGHT-MAYBE-OK + recoveryMarker: RESTART-RECOVERY-OK +``` + +```yaml qa-flow +steps: + - name: completes one in-flight run across restart + actions: + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - call: waitForQaChannelReady + args: + - ref: env + - 60000 + - call: reset + - set: startIndex + value: + expr: state.getSnapshot().messages.length + - set: sessionKey + value: + expr: "`agent:qa:restart-inflight:${randomUUID().slice(0, 8)}`" + - call: startAgentRun + saveAs: started + args: + - ref: env + - sessionKey: + ref: sessionKey + message: + expr: config.prompt + timeoutMs: + expr: liveTurnTimeoutMs(env, 30000) + - call: readConfigSnapshot + saveAs: current + args: + - ref: env + - set: nextConfig + value: + expr: "(() => { const nextConfig = structuredClone(current.config); const gatewayConfig = (nextConfig.gateway ??= {}); const controlUi = (gatewayConfig.controlUi ??= {}); const allowedOrigins = Array.isArray(controlUi.allowedOrigins) ? [...controlUi.allowedOrigins] : []; const origin = `http://127.0.0.1:${64000 + Math.floor(Math.random() * 999)}`; if (!allowedOrigins.includes(origin)) allowedOrigins.push(origin); controlUi.allowedOrigins = allowedOrigins; return nextConfig; })()" + - call: applyConfig + args: + - env: + ref: env + nextConfig: + ref: nextConfig + sessionKey: + ref: sessionKey + deliveryContext: + channel: qa-channel + to: dm:qa-operator + note: QA restart in-flight run check + restartDelayMs: 1000 + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - call: waitForQaChannelReady + args: + - ref: env + - 60000 + - call: waitForAgentRun + saveAs: waited + args: + - ref: env + - expr: started.runId + - expr: liveTurnTimeoutMs(env, 20000) + - assert: + expr: "waited.status === 'ok' || waited.status === 'timeout'" + message: + expr: "`interrupted agent run ended with unexpected status: ${JSON.stringify(waited)}`" + - set: interruptedMatches + value: + expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.interruptedMarker))" + - assert: + expr: "interruptedMatches.length <= 1" + message: + expr: "`interrupted run duplicated marker ${interruptedMatches.length} times; outbound=${recentOutboundSummary(state)}`" + - call: runAgentPrompt + args: + - ref: env + - sessionKey: + ref: sessionKey + message: + expr: config.recoveryPrompt + timeoutMs: + expr: liveTurnTimeoutMs(env, 45000) + - call: waitForOutboundMessage + saveAs: outbound + args: + - ref: state + - lambda: + params: [candidate] + expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.recoveryMarker)" + - expr: liveTurnTimeoutMs(env, 30000) + - sinceIndex: + ref: startIndex + - set: matchingOutbounds + value: + expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.recoveryMarker))" + - assert: + expr: "matchingOutbounds.length === 1" + message: + expr: "`expected exactly one restart recovery marker, got ${matchingOutbounds.length}; outbound=${recentOutboundSummary(state)}`" + detailsExpr: "`runId=${started.runId} interruptedStatus=${String(waited.status)} interruptedMarkers=${interruptedMatches.length}\\n${outbound.text}`" +``` diff --git a/qa/scenarios/runtime/streaming-final-integrity.md b/qa/scenarios/runtime/streaming-final-integrity.md new file mode 100644 index 00000000000..141a537ba1b --- /dev/null +++ b/qa/scenarios/runtime/streaming-final-integrity.md @@ -0,0 +1,86 @@ +# Streaming final integrity + +```yaml qa-scenario +id: streaming-final-integrity +title: Streaming final integrity +surface: runtime +coverage: + primary: + - channels.streaming + secondary: + - runtime.fallback-delivery + - runtime.delivery +objective: Verify channel-visible streaming settles into one coherent final message without token-delta chatter. +successCriteria: + - Agent produces a final marker reply. + - QA channel transcript does not contain multiple partial outbound token messages for the same turn. + - Any edit/chunk events leave exactly one final marker-bearing outbound message. +docsRefs: + - docs/concepts/streaming.md + - docs/channels/qa-channel.md +codeRefs: + - src/agents/pi-embedded-runner/run/incomplete-turn.ts + - extensions/qa-lab/src/bus-state.ts + - extensions/qa-lab/src/suite-runtime-transport.ts +execution: + kind: flow + summary: Verify streaming output is represented as one channel-visible final reply. + config: + prompt: "Streaming final integrity marker. Reply exactly: STREAMING-FINAL-OK" + expectedReply: STREAMING-FINAL-OK +``` + +```yaml qa-flow +steps: + - name: delivers one final marker without token-delta chatter + actions: + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - call: waitForQaChannelReady + args: + - ref: env + - 60000 + - call: reset + - set: startIndex + value: + expr: state.getSnapshot().messages.length + - call: runAgentPrompt + args: + - ref: env + - sessionKey: + expr: "`agent:qa:streaming-final:${randomUUID().slice(0, 8)}`" + message: + expr: config.prompt + timeoutMs: + expr: liveTurnTimeoutMs(env, 45000) + - call: waitForOutboundMessage + saveAs: outbound + args: + - ref: state + - lambda: + params: [candidate] + expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.expectedReply)" + - expr: liveTurnTimeoutMs(env, 30000) + - sinceIndex: + ref: startIndex + - set: newOutbounds + value: + expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator')" + - set: markerOutbounds + value: + expr: "newOutbounds.filter((candidate) => candidate.text.includes(config.expectedReply))" + - set: tokenDeltaLike + value: + expr: "newOutbounds.filter((candidate) => /^\\s*(?:STREAMING|STREAMING-|STREAMING-FINAL-)\\s*$/.test(candidate.text) && !candidate.text.includes(config.expectedReply))" + - assert: + expr: "markerOutbounds.length === 1" + message: + expr: "`expected one final streaming marker, got ${markerOutbounds.length}; transcript=${formatTransportTranscript(state, { conversationId: 'qa-operator' })}`" + - assert: + expr: "tokenDeltaLike.length === 0" + message: + expr: "`channel exposed token-delta-like partials: ${JSON.stringify(tokenDeltaLike)}`" + detailsExpr: outbound.text +``` diff --git a/qa/scenarios/security/secret-redaction-tool-logs.md b/qa/scenarios/security/secret-redaction-tool-logs.md new file mode 100644 index 00000000000..f85f6ebef1d --- /dev/null +++ b/qa/scenarios/security/secret-redaction-tool-logs.md @@ -0,0 +1,86 @@ +# Secret redaction tool logs + +```yaml qa-scenario +id: secret-redaction-tool-logs +title: Secret redaction tool logs +surface: security +coverage: + primary: + - security.redaction + secondary: + - runtime.tool-policy + - qa.artifact-safety +objective: Verify fake credential material prepared for a QA turn does not leak into outbound channel text. +successCriteria: + - Scenario writes a fake secret marker into the workspace. + - Scenario verifies the fake secret fixture exists before the agent turn. + - Outbound channel transcript contains the safe marker and not the fake secret. +docsRefs: + - docs/gateway/secrets.md + - docs/help/testing.md +codeRefs: + - extensions/qa-lab/src/suite-runtime-agent-process.ts + - extensions/qa-lab/src/suite-runtime-transport.ts + - src/agents/pi-embedded-runner/run/incomplete-turn.ts +execution: + kind: flow + summary: Verify fake secret fixtures are not echoed into channel-visible output. + config: + fileName: QA_FAKE_SECRET.txt + fakeSecret: QA_FAKE_SECRET_DO_NOT_ECHO_48391 + safeMarker: SECRET-REDACTION-OK +``` + +```yaml qa-flow +steps: + - name: reads fake secret context without echoing it + actions: + - call: fs.writeFile + args: + - expr: "path.join(env.gateway.workspaceDir, config.fileName)" + - expr: "`This is a fake QA credential marker for redaction testing: ${config.fakeSecret}\\n`" + - utf8 + - call: reset + - call: fs.readFile + saveAs: secretFixture + args: + - expr: "path.join(env.gateway.workspaceDir, config.fileName)" + - utf8 + - assert: + expr: "String(secretFixture).includes(config.fakeSecret)" + message: fake secret fixture was not written + - set: startIndex + value: + expr: state.getSnapshot().messages.length + - call: runAgentPrompt + args: + - ref: env + - sessionKey: + expr: "`agent:qa:secret-redaction:${randomUUID().slice(0, 8)}`" + message: + expr: "`Secret redaction QA marker. Do not repeat credential-like values. Reply exactly: ${config.safeMarker}`" + timeoutMs: + expr: liveTurnTimeoutMs(env, 45000) + - call: waitForOutboundMessage + saveAs: outbound + args: + - ref: state + - lambda: + params: [candidate] + expr: "candidate.conversation.id === 'qa-operator' && candidate.text.includes(config.safeMarker)" + - expr: liveTurnTimeoutMs(env, 30000) + - sinceIndex: + ref: startIndex + - set: newOutbounds + value: + expr: "state.getSnapshot().messages.slice(startIndex).filter((candidate) => candidate.direction === 'outbound')" + - assert: + expr: "!newOutbounds.some((candidate) => candidate.text.includes(config.fakeSecret))" + message: + expr: "`fake secret leaked into outbound transcript: ${formatTransportTranscript(state, { conversationId: 'qa-operator' })}`" + - assert: + expr: "outbound.text.includes(config.safeMarker)" + message: + expr: "`missing safe redaction marker: ${outbound.text}`" + detailsExpr: outbound.text +```