Files
openclaw/extensions/qa-lab/confidence-profiles/codex-100.json
2026-05-25 22:00:21 +01:00

169 lines
7.1 KiB
JSON

{
"version": 1,
"profile": "codex-100",
"lanes": [
{
"id": "tool-defaults-direct",
"title": "Tool-defaults direct runtime parity",
"kind": "qa-suite-summary",
"artifact": "tool-defaults-direct/qa-suite-summary.json",
"required": true,
"productImpact": "P2",
"qaImpact": "P0",
"issue": "https://github.com/openclaw/openclaw/issues/80319",
"ownerAction": "Fix product or harness before claiming the tool-defaults gate is trusted.",
"labels": ["qa-lab", "runtime-parity", "codex"]
},
{
"id": "openclaw-dynamic-tools-direct",
"title": "OpenClaw dynamic integration tools direct runtime parity",
"kind": "qa-suite-summary",
"artifact": "openclaw-dynamic-tools-direct/qa-suite-summary.json",
"required": true,
"productImpact": "P1",
"qaImpact": "P0",
"issue": "https://github.com/openclaw/openclaw/issues/80319",
"ownerAction": "Investigate any hard failure as an OpenClaw dynamic integration or QA loading regression.",
"labels": ["qa-lab", "runtime-parity", "openclaw-dynamic-tools"]
},
{
"id": "tool-defaults-searchable",
"title": "Tool-defaults searchable runtime parity",
"kind": "qa-suite-summary",
"artifact": "tool-defaults-searchable/qa-suite-summary.json",
"required": true,
"failureVerdict": "mock-limitation",
"skipBackfillLane": "openclaw-dynamic-tools-searchable-live",
"productImpact": "P4",
"qaImpact": "P2",
"issue": "https://github.com/openclaw/openclaw/issues/80319",
"ownerAction": "Keep as report-only until searchable/deferred tool modeling has no mock-only ambiguity.",
"labels": ["qa-lab", "runtime-parity", "searchable-tools"]
},
{
"id": "first-hour-20-direct",
"title": "First-hour 20-turn direct runtime parity",
"kind": "qa-suite-summary",
"artifact": "first-hour-20-direct/qa-suite-summary.json",
"required": true,
"skipBackfillLane": "codex-native-live",
"productImpact": "P1",
"qaImpact": "P0",
"ownerAction": "Triage row-by-row; do not file product bugs unless live/native proof reproduces.",
"labels": ["qa-lab", "runtime-parity", "first-hour"]
},
{
"id": "mock-token-efficiency",
"title": "Mock assistant-message token efficiency estimate",
"kind": "token-efficiency-summary",
"artifact": "first-hour-20-direct-report/qa-runtime-token-efficiency-summary.json",
"required": true,
"expectedTokenUsageSource": "mock-estimate",
"productImpact": "P4",
"qaImpact": "P1",
"ownerAction": "Fix labeling before trusting token-efficiency comparisons.",
"labels": ["qa-lab", "runtime-parity", "token-efficiency"]
},
{
"id": "fault-injection-mock",
"title": "Mock fault-injection runtime parity",
"kind": "qa-suite-summary",
"artifact": "fault-injection-mock/qa-suite-summary.json",
"required": true,
"skipBackfillLane": "codex-native-live",
"productImpact": "P2",
"qaImpact": "P0",
"ownerAction": "Treat failures as retry/recovery regressions unless evidence shows fixture drift.",
"labels": ["qa-lab", "runtime-parity", "fault-injection"]
},
{
"id": "jsonl-expanded",
"title": "Expanded curated JSONL replay",
"kind": "jsonl-replay-summary",
"artifact": "jsonl-expanded/qa-jsonl-replay-summary.json",
"required": true,
"productImpact": "P2",
"qaImpact": "P0",
"ownerAction": "Inspect first drift turn and transcript class before filing any product issue.",
"labels": ["qa-lab", "runtime-parity", "jsonl-replay"]
},
{
"id": "confidence-self-test",
"title": "Seeded confidence negative controls",
"kind": "self-test-summary",
"artifact": "confidence-self-test/qa-confidence-self-test-summary.json",
"required": true,
"productImpact": "P4",
"qaImpact": "P0",
"ownerAction": "Fix the harness before trusting any green parity result.",
"labels": ["qa-lab", "confidence-gate", "negative-controls"]
},
{
"id": "codex-native-live",
"title": "Codex-native live workspace capability proof",
"kind": "qa-suite-summary",
"artifact": "codex-native-live/qa-suite-summary.json",
"required": true,
"missingVerdict": "environment-blocked",
"missingReason": "Live/OAuth runner or OpenAI credentials were unavailable for this proof bundle.",
"productImpact": "P1",
"qaImpact": "P1",
"ownerAction": "Run with live-frontier OAuth before using this lane as product proof.",
"labels": ["qa-lab", "runtime-parity", "live-proof"]
},
{
"id": "first-hour-live",
"title": "Live first-hour capability proof",
"kind": "qa-suite-summary",
"artifact": "first-hour-live/qa-suite-summary.json",
"required": true,
"missingVerdict": "environment-blocked",
"missingReason": "Live/OAuth runner or OpenAI credentials were unavailable for this proof bundle.",
"productImpact": "P1",
"qaImpact": "P1",
"ownerAction": "Run with live-frontier OAuth before claiming live first-hour coverage.",
"labels": ["qa-lab", "runtime-parity", "live-proof"]
},
{
"id": "openclaw-dynamic-tools-searchable-live",
"title": "Live OpenClaw dynamic tools searchable proof",
"kind": "qa-suite-summary",
"artifact": "openclaw-dynamic-tools-searchable-live/qa-suite-summary.json",
"required": true,
"missingVerdict": "environment-blocked",
"missingReason": "Live/OAuth runner or OpenAI credentials were unavailable for this proof bundle.",
"productImpact": "P1",
"qaImpact": "P1",
"ownerAction": "Run with live-frontier OAuth before claiming production-shaped searchable OpenClaw dynamic tool coverage.",
"labels": ["qa-lab", "runtime-parity", "searchable-tools", "live-proof"]
},
{
"id": "live-token-efficiency",
"title": "Live assistant-message token efficiency",
"kind": "token-efficiency-summary",
"artifact": "live-token-efficiency/qa-runtime-token-efficiency-summary.json",
"required": true,
"expectedTokenUsageSource": "live-usage",
"missingVerdict": "environment-blocked",
"missingReason": "Live/OAuth runner or OpenAI credentials were unavailable for this proof bundle.",
"productImpact": "P3",
"qaImpact": "P1",
"ownerAction": "Run a live-frontier runtime parity summary and regenerate token efficiency.",
"labels": ["qa-lab", "runtime-parity", "token-efficiency"]
},
{
"id": "soak-100",
"title": "Optional 100-turn soak",
"kind": "qa-suite-summary",
"artifact": "soak-100/qa-suite-summary.json",
"required": true,
"missingVerdict": "environment-blocked",
"missingReason": "Scheduled/Testbox soak runner did not upload artifacts for this proof bundle.",
"productImpact": "P3",
"qaImpact": "P2",
"ownerAction": "Run remotely with a long timeout or record the runner budget blocker.",
"labels": ["qa-lab", "runtime-parity", "soak"]
}
]
}