mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-18 16:24:46 +00:00
test(qa-lab): add runtime parity depth scenarios
This commit is contained in:
@@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Changes
|
||||
|
||||
- Proxy: support HTTPS managed forward-proxy endpoints and scoped `proxy.tls.caFile` CA trust for proxy endpoint TLS. (#79171) Thanks @jesse-merhi.
|
||||
- QA-Lab: add first-hour 20-turn and optional 100-turn runtime parity scenarios, with tier metadata for standard and soak QA gates. (#80323) Thanks @100yenadmin.
|
||||
|
||||
### Fixes
|
||||
|
||||
|
||||
@@ -103,6 +103,19 @@ describe("qa scenario catalog", () => {
|
||||
expect(scenario.gatewayRuntime?.forwardHostHome).toBe(true);
|
||||
});
|
||||
|
||||
it("loads runtime parity tier metadata for first-hour and soak lanes", () => {
|
||||
const firstHour = readQaScenarioById("runtime-first-hour-20-turn");
|
||||
const soak = readQaScenarioById("runtime-soak-100-turn");
|
||||
|
||||
expect(firstHour.runtimeParityTier).toBe("standard");
|
||||
expect(readQaScenarioExecutionConfig(firstHour.id)).toMatchObject({
|
||||
runtimeParityComparison: "outcome-only",
|
||||
turnCount: 20,
|
||||
});
|
||||
expect(soak.runtimeParityTier).toBe("soak");
|
||||
expect(readQaScenarioExecutionConfig(soak.id)).toMatchObject({ turnCount: 100 });
|
||||
});
|
||||
|
||||
it("keeps the character eval scenario natural and task-shaped", () => {
|
||||
const characterConfig = readQaScenarioExecutionConfig("character-vibes-gollum") as
|
||||
| {
|
||||
|
||||
@@ -93,6 +93,8 @@ const qaScenarioGatewayRuntimeSchema = z.object({
|
||||
forwardHostHome: z.boolean().optional(),
|
||||
});
|
||||
|
||||
const qaRuntimeParityTierSchema = z.enum(["standard", "optional", "live-only", "soak"]);
|
||||
|
||||
const qaFlowCallActionSchema = z.object({
|
||||
call: z.string().trim().min(1),
|
||||
args: z.array(z.unknown()).optional(),
|
||||
@@ -176,6 +178,7 @@ const qaSeedScenarioSchema = z.object({
|
||||
title: z.string().trim().min(1),
|
||||
surface: z.string().trim().min(1),
|
||||
category: z.string().trim().min(1).optional(),
|
||||
runtimeParityTier: qaRuntimeParityTierSchema.optional(),
|
||||
coverage: qaScenarioCoverageSchema.optional(),
|
||||
surfaces: z.array(z.string().trim().min(1)).min(1).optional(),
|
||||
risk: z.enum(["low", "medium", "high"]).optional(),
|
||||
@@ -206,6 +209,7 @@ const qaScenarioPackSchema = z.object({
|
||||
|
||||
export type QaScenarioExecution = z.infer<typeof qaScenarioExecutionSchema>;
|
||||
export type QaScenarioFlow = z.infer<typeof qaFlowSchema>;
|
||||
export type QaRuntimeParityTier = z.infer<typeof qaRuntimeParityTierSchema>;
|
||||
export type QaSeedScenario = z.infer<typeof qaSeedScenarioSchema>;
|
||||
export type QaSeedScenarioWithSource = QaSeedScenario & {
|
||||
sourcePath: string;
|
||||
|
||||
@@ -6,7 +6,7 @@ Single source of truth for repo-backed QA suite bootstrap data.
|
||||
- `index.md` defines pack-level bootstrap data
|
||||
- each nested `*.md` scenario defines one runnable test via `qa-scenario` + `qa-flow`
|
||||
- scenario markdown may also define coverage IDs, category metadata, required plugins,
|
||||
lane filters, and gateway config patching
|
||||
lane filters, runtime parity tiers, and gateway config patching
|
||||
|
||||
- kickoff mission
|
||||
- QA operator identity
|
||||
@@ -20,6 +20,8 @@ Coverage tracking:
|
||||
- prefer reusing an existing feature ID over minting a scenario-shaped ID
|
||||
- avoid copying the scenario title into coverage IDs
|
||||
- use `pnpm openclaw qa coverage` to render the current inventory
|
||||
- use `runtimeParityTier` for runtime-pair gate membership: `standard`,
|
||||
`optional`, `live-only`, or `soak`
|
||||
- treat the old `coverage: ["id"]` / `coverage: - id` list shape as invalid
|
||||
- keep source-path tracking in the report, not in the scenario schema
|
||||
|
||||
|
||||
69
qa/scenarios/runtime/first-hour-20-turn.md
Normal file
69
qa/scenarios/runtime/first-hour-20-turn.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# First-hour 20-turn runtime parity
|
||||
|
||||
```yaml qa-scenario
|
||||
id: runtime-first-hour-20-turn
|
||||
title: First-hour 20-turn runtime parity
|
||||
surface: runtime
|
||||
runtimeParityTier: standard
|
||||
coverage:
|
||||
primary:
|
||||
- runtime.first-hour-20
|
||||
secondary:
|
||||
- runtime.long-context
|
||||
objective: Verify both runtimes preserve a same-session conversation across the required 20-turn maintainer gate.
|
||||
successCriteria:
|
||||
- The same QA session accepts 20 sequential user turns.
|
||||
- Every turn receives the requested marker reply without losing session state.
|
||||
- Runtime parity captures wall-clock and token data for the whole 20-turn cell.
|
||||
docsRefs:
|
||||
- docs/concepts/qa-e2e-automation.md
|
||||
- qa/scenarios/index.md
|
||||
codeRefs:
|
||||
- extensions/qa-lab/src/suite.ts
|
||||
- extensions/qa-lab/src/runtime-parity.ts
|
||||
execution:
|
||||
kind: flow
|
||||
summary: Run 20 deterministic same-session marker turns through the runtime pair.
|
||||
config:
|
||||
runtimeParityComparison: outcome-only
|
||||
sessionKey: agent:qa:first-hour-20-turn
|
||||
turnCount: 20
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
steps:
|
||||
- name: runs 20 same-session marker turns
|
||||
actions:
|
||||
- call: waitForGatewayHealthy
|
||||
args:
|
||||
- ref: env
|
||||
- 60000
|
||||
- call: reset
|
||||
- set: turns
|
||||
value:
|
||||
expr: "Array.from({ length: config.turnCount }, (_entry, index) => ({ index, marker: `FIRST-HOUR-20-${String(index + 1).padStart(2, '0')}` }))"
|
||||
- forEach:
|
||||
items:
|
||||
ref: turns
|
||||
item: turn
|
||||
actions:
|
||||
- set: cursor
|
||||
value:
|
||||
expr: state.getSnapshot().messages.length
|
||||
- call: runAgentPrompt
|
||||
args:
|
||||
- ref: env
|
||||
- sessionKey:
|
||||
expr: config.sessionKey
|
||||
message:
|
||||
expr: "'first-hour 20-turn marker check ' + (turn.index + 1) + ': reply exactly `' + turn.marker + '`'"
|
||||
timeoutMs:
|
||||
expr: liveTurnTimeoutMs(env, 60000)
|
||||
- call: waitForCondition
|
||||
args:
|
||||
- lambda:
|
||||
expr: "state.getSnapshot().messages.slice(cursor).some((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && normalizeLowercaseStringOrEmpty(candidate.text).includes(normalizeLowercaseStringOrEmpty(turn.marker)))"
|
||||
- expr: liveTurnTimeoutMs(env, 60000)
|
||||
- expr: "env.providerMode === 'mock-openai' ? 100 : 250"
|
||||
detailsExpr: "`completed ${turns.length} first-hour depth turns`"
|
||||
```
|
||||
68
qa/scenarios/runtime/soak-100-turn.md
Normal file
68
qa/scenarios/runtime/soak-100-turn.md
Normal file
@@ -0,0 +1,68 @@
|
||||
# 100-turn runtime parity soak
|
||||
|
||||
```yaml qa-scenario
|
||||
id: runtime-soak-100-turn
|
||||
title: 100-turn runtime parity soak
|
||||
surface: runtime
|
||||
runtimeParityTier: soak
|
||||
coverage:
|
||||
primary:
|
||||
- runtime.soak-100
|
||||
secondary:
|
||||
- runtime.long-context
|
||||
objective: Provide an optional long-run soak that can be scheduled or run in Testbox without entering the maintainer default gate.
|
||||
successCriteria:
|
||||
- The same QA session accepts 100 sequential user turns.
|
||||
- Every turn receives the requested marker reply without losing session state.
|
||||
- Runtime parity captures token estimate or live token usage for the full soak cell.
|
||||
docsRefs:
|
||||
- docs/concepts/qa-e2e-automation.md
|
||||
- qa/scenarios/index.md
|
||||
codeRefs:
|
||||
- extensions/qa-lab/src/suite.ts
|
||||
- extensions/qa-lab/src/runtime-parity.ts
|
||||
execution:
|
||||
kind: flow
|
||||
summary: Run the optional 100-turn same-session runtime soak.
|
||||
config:
|
||||
sessionKey: agent:qa:runtime-soak-100
|
||||
turnCount: 100
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
steps:
|
||||
- name: runs 100 same-session marker turns
|
||||
actions:
|
||||
- call: waitForGatewayHealthy
|
||||
args:
|
||||
- ref: env
|
||||
- 60000
|
||||
- call: reset
|
||||
- set: turns
|
||||
value:
|
||||
expr: "Array.from({ length: config.turnCount }, (_entry, index) => ({ index, marker: `SOAK-100-${String(index + 1).padStart(3, '0')}` }))"
|
||||
- forEach:
|
||||
items:
|
||||
ref: turns
|
||||
item: turn
|
||||
actions:
|
||||
- set: cursor
|
||||
value:
|
||||
expr: state.getSnapshot().messages.length
|
||||
- call: runAgentPrompt
|
||||
args:
|
||||
- ref: env
|
||||
- sessionKey:
|
||||
expr: config.sessionKey
|
||||
message:
|
||||
expr: "'runtime 100-turn soak marker check ' + (turn.index + 1) + ': reply exactly `' + turn.marker + '`'"
|
||||
timeoutMs:
|
||||
expr: liveTurnTimeoutMs(env, 60000)
|
||||
- call: waitForCondition
|
||||
args:
|
||||
- lambda:
|
||||
expr: "state.getSnapshot().messages.slice(cursor).some((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && normalizeLowercaseStringOrEmpty(candidate.text).includes(normalizeLowercaseStringOrEmpty(turn.marker)))"
|
||||
- expr: liveTurnTimeoutMs(env, 60000)
|
||||
- expr: "env.providerMode === 'mock-openai' ? 100 : 250"
|
||||
detailsExpr: "`completed ${turns.length} soak turns`"
|
||||
```
|
||||
Reference in New Issue
Block a user