test(qa-lab): add runtime parity depth scenarios

This commit is contained in:
Vincent Koc
2026-05-17 13:49:15 +08:00
parent 16ef041b5d
commit e66a6c8c8d
6 changed files with 158 additions and 1 deletions

View File

@@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai
### Changes
- Proxy: support HTTPS managed forward-proxy endpoints and scoped `proxy.tls.caFile` CA trust for proxy endpoint TLS. (#79171) Thanks @jesse-merhi.
- QA-Lab: add first-hour 20-turn and optional 100-turn runtime parity scenarios, with tier metadata for standard and soak QA gates. (#80323) Thanks @100yenadmin.
### Fixes

View File

@@ -103,6 +103,19 @@ describe("qa scenario catalog", () => {
expect(scenario.gatewayRuntime?.forwardHostHome).toBe(true);
});
it("loads runtime parity tier metadata for first-hour and soak lanes", () => {
const firstHour = readQaScenarioById("runtime-first-hour-20-turn");
const soak = readQaScenarioById("runtime-soak-100-turn");
expect(firstHour.runtimeParityTier).toBe("standard");
expect(readQaScenarioExecutionConfig(firstHour.id)).toMatchObject({
runtimeParityComparison: "outcome-only",
turnCount: 20,
});
expect(soak.runtimeParityTier).toBe("soak");
expect(readQaScenarioExecutionConfig(soak.id)).toMatchObject({ turnCount: 100 });
});
it("keeps the character eval scenario natural and task-shaped", () => {
const characterConfig = readQaScenarioExecutionConfig("character-vibes-gollum") as
| {

View File

@@ -93,6 +93,8 @@ const qaScenarioGatewayRuntimeSchema = z.object({
forwardHostHome: z.boolean().optional(),
});
const qaRuntimeParityTierSchema = z.enum(["standard", "optional", "live-only", "soak"]);
const qaFlowCallActionSchema = z.object({
call: z.string().trim().min(1),
args: z.array(z.unknown()).optional(),
@@ -176,6 +178,7 @@ const qaSeedScenarioSchema = z.object({
title: z.string().trim().min(1),
surface: z.string().trim().min(1),
category: z.string().trim().min(1).optional(),
runtimeParityTier: qaRuntimeParityTierSchema.optional(),
coverage: qaScenarioCoverageSchema.optional(),
surfaces: z.array(z.string().trim().min(1)).min(1).optional(),
risk: z.enum(["low", "medium", "high"]).optional(),
@@ -206,6 +209,7 @@ const qaScenarioPackSchema = z.object({
export type QaScenarioExecution = z.infer<typeof qaScenarioExecutionSchema>;
export type QaScenarioFlow = z.infer<typeof qaFlowSchema>;
export type QaRuntimeParityTier = z.infer<typeof qaRuntimeParityTierSchema>;
export type QaSeedScenario = z.infer<typeof qaSeedScenarioSchema>;
export type QaSeedScenarioWithSource = QaSeedScenario & {
sourcePath: string;

View File

@@ -6,7 +6,7 @@ Single source of truth for repo-backed QA suite bootstrap data.
- `index.md` defines pack-level bootstrap data
- each nested `*.md` scenario defines one runnable test via `qa-scenario` + `qa-flow`
- scenario markdown may also define coverage IDs, category metadata, required plugins,
lane filters, and gateway config patching
lane filters, runtime parity tiers, and gateway config patching
- kickoff mission
- QA operator identity
@@ -20,6 +20,8 @@ Coverage tracking:
- prefer reusing an existing feature ID over minting a scenario-shaped ID
- avoid copying the scenario title into coverage IDs
- use `pnpm openclaw qa coverage` to render the current inventory
- use `runtimeParityTier` for runtime-pair gate membership: `standard`,
`optional`, `live-only`, or `soak`
- treat the old `coverage: ["id"]` / `coverage: - id` list shape as invalid
- keep source-path tracking in the report, not in the scenario schema

View File

@@ -0,0 +1,69 @@
# First-hour 20-turn runtime parity
```yaml qa-scenario
id: runtime-first-hour-20-turn
title: First-hour 20-turn runtime parity
surface: runtime
runtimeParityTier: standard
coverage:
primary:
- runtime.first-hour-20
secondary:
- runtime.long-context
objective: Verify both runtimes preserve a same-session conversation across the required 20-turn maintainer gate.
successCriteria:
- The same QA session accepts 20 sequential user turns.
- Every turn receives the requested marker reply without losing session state.
- Runtime parity captures wall-clock and token data for the whole 20-turn cell.
docsRefs:
- docs/concepts/qa-e2e-automation.md
- qa/scenarios/index.md
codeRefs:
- extensions/qa-lab/src/suite.ts
- extensions/qa-lab/src/runtime-parity.ts
execution:
kind: flow
summary: Run 20 deterministic same-session marker turns through the runtime pair.
config:
runtimeParityComparison: outcome-only
sessionKey: agent:qa:first-hour-20-turn
turnCount: 20
```
```yaml qa-flow
steps:
- name: runs 20 same-session marker turns
actions:
- call: waitForGatewayHealthy
args:
- ref: env
- 60000
- call: reset
- set: turns
value:
expr: "Array.from({ length: config.turnCount }, (_entry, index) => ({ index, marker: `FIRST-HOUR-20-${String(index + 1).padStart(2, '0')}` }))"
- forEach:
items:
ref: turns
item: turn
actions:
- set: cursor
value:
expr: state.getSnapshot().messages.length
- call: runAgentPrompt
args:
- ref: env
- sessionKey:
expr: config.sessionKey
message:
expr: "'first-hour 20-turn marker check ' + (turn.index + 1) + ': reply exactly `' + turn.marker + '`'"
timeoutMs:
expr: liveTurnTimeoutMs(env, 60000)
- call: waitForCondition
args:
- lambda:
expr: "state.getSnapshot().messages.slice(cursor).some((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && normalizeLowercaseStringOrEmpty(candidate.text).includes(normalizeLowercaseStringOrEmpty(turn.marker)))"
- expr: liveTurnTimeoutMs(env, 60000)
- expr: "env.providerMode === 'mock-openai' ? 100 : 250"
detailsExpr: "`completed ${turns.length} first-hour depth turns`"
```

View File

@@ -0,0 +1,68 @@
# 100-turn runtime parity soak
```yaml qa-scenario
id: runtime-soak-100-turn
title: 100-turn runtime parity soak
surface: runtime
runtimeParityTier: soak
coverage:
primary:
- runtime.soak-100
secondary:
- runtime.long-context
objective: Provide an optional long-run soak that can be scheduled or run in Testbox without entering the maintainer default gate.
successCriteria:
- The same QA session accepts 100 sequential user turns.
- Every turn receives the requested marker reply without losing session state.
- Runtime parity captures token estimate or live token usage for the full soak cell.
docsRefs:
- docs/concepts/qa-e2e-automation.md
- qa/scenarios/index.md
codeRefs:
- extensions/qa-lab/src/suite.ts
- extensions/qa-lab/src/runtime-parity.ts
execution:
kind: flow
summary: Run the optional 100-turn same-session runtime soak.
config:
sessionKey: agent:qa:runtime-soak-100
turnCount: 100
```
```yaml qa-flow
steps:
- name: runs 100 same-session marker turns
actions:
- call: waitForGatewayHealthy
args:
- ref: env
- 60000
- call: reset
- set: turns
value:
expr: "Array.from({ length: config.turnCount }, (_entry, index) => ({ index, marker: `SOAK-100-${String(index + 1).padStart(3, '0')}` }))"
- forEach:
items:
ref: turns
item: turn
actions:
- set: cursor
value:
expr: state.getSnapshot().messages.length
- call: runAgentPrompt
args:
- ref: env
- sessionKey:
expr: config.sessionKey
message:
expr: "'runtime 100-turn soak marker check ' + (turn.index + 1) + ': reply exactly `' + turn.marker + '`'"
timeoutMs:
expr: liveTurnTimeoutMs(env, 60000)
- call: waitForCondition
args:
- lambda:
expr: "state.getSnapshot().messages.slice(cursor).some((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && normalizeLowercaseStringOrEmpty(candidate.text).includes(normalizeLowercaseStringOrEmpty(turn.marker)))"
- expr: liveTurnTimeoutMs(env, 60000)
- expr: "env.providerMode === 'mock-openai' ? 100 : 250"
detailsExpr: "`completed ${turns.length} soak turns`"
```