fix(qa-lab): hard gate runtime tool coverage

This commit is contained in:
Vincent Koc
2026-05-18 09:42:10 +08:00
parent 73f4657869
commit 58e1351863
19 changed files with 318 additions and 41 deletions

View File

@@ -955,6 +955,57 @@ jobs:
retention-days: 14
if-no-files-found: warn
runtime_tool_coverage_release_checks:
name: Enforce QA Lab runtime tool coverage
needs: [resolve_target, qa_lab_runtime_parity_release_checks]
if: always() && contains(fromJSON('["all","qa","qa-parity"]'), needs.resolve_target.outputs.rerun_group)
runs-on: ubuntu-24.04
timeout-minutes: 15
permissions:
contents: read
actions: read
env:
OPENCLAW_BUILD_PRIVATE_QA: "1"
OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1"
steps:
- name: Checkout selected ref
uses: actions/checkout@v6
with:
persist-credentials: false
ref: ${{ needs.resolve_target.outputs.revision }}
fetch-depth: 1
- name: Setup Node environment
uses: ./.github/actions/setup-node-env
with:
node-version: ${{ env.NODE_VERSION }}
pnpm-version: ${{ env.PNPM_VERSION }}
install-bun: "true"
- name: Download runtime parity artifacts
uses: actions/download-artifact@v4
with:
name: release-qa-runtime-parity-${{ needs.resolve_target.outputs.revision }}
path: .artifacts/qa-e2e/
- name: Enforce standard runtime tool coverage
run: |
set -euo pipefail
pnpm openclaw qa coverage \
--repo-root . \
--tools \
--summary .artifacts/qa-e2e/runtime-parity-standard/qa-suite-summary.json \
--output .artifacts/qa-e2e/runtime-parity-standard-report/qa-runtime-tool-coverage-report.md
- name: Upload runtime tool coverage artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: release-qa-runtime-tool-coverage-${{ needs.resolve_target.outputs.revision }}
path: .artifacts/qa-e2e/runtime-parity-standard-report/
retention-days: 14
if-no-files-found: warn
qa_live_matrix_release_checks:
name: Run QA Lab live Matrix lane
needs: [resolve_target]
@@ -1434,6 +1485,7 @@ jobs:
- qa_lab_parity_lane_release_checks
- qa_lab_parity_report_release_checks
- qa_lab_runtime_parity_release_checks
- runtime_tool_coverage_release_checks
- qa_live_matrix_release_checks
- qa_live_telegram_release_checks
- qa_live_discord_release_checks
@@ -1465,6 +1517,7 @@ jobs:
"qa_lab_parity_lane_release_checks=${{ needs.qa_lab_parity_lane_release_checks.result }}" \
"qa_lab_parity_report_release_checks=${{ needs.qa_lab_parity_report_release_checks.result }}" \
"qa_lab_runtime_parity_release_checks=${{ needs.qa_lab_runtime_parity_release_checks.result }}" \
"runtime_tool_coverage_release_checks=${{ needs.runtime_tool_coverage_release_checks.result }}" \
"qa_live_matrix_release_checks=${{ needs.qa_live_matrix_release_checks.result }}" \
"qa_live_telegram_release_checks=${{ needs.qa_live_telegram_release_checks.result }}" \
"qa_live_discord_release_checks=${{ needs.qa_live_discord_release_checks.result }}" \

View File

@@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai
- QA-Lab: add live-only harness self-health scenarios for plugin hook crashes, manifest contract errors, and WebChat direct-reply self-message routing. (#80323) Thanks @100yenadmin.
- QA-Lab: add runtime tool fixture scenarios and coverage reporting for Codex-native workspace tools, OpenClaw dynamic tools, and optional plugin-backed tools. Fixes #80173. Thanks @100yenadmin.
- QA-Lab: expose runtime tool fixture coverage through `openclaw qa coverage --tools`, with optional suite-summary evaluation for parity gate artifacts. Thanks @100yenadmin.
- QA-Lab: hard-gate required OpenClaw dynamic runtime-tool drift in the standard Codex-vs-Pi tier with a blocking release-check verifier and publish the tool coverage report artifact. Fixes #80339; refs #80319. Thanks @100yenadmin.
- QA-Lab: add the personal-agent approval-denial scenario so the benchmark pack verifies denied local reads stop cleanly without tool progress or fixture leaks. (#83150) Thanks @iFiras-Max1.
### Fixes

View File

@@ -203,7 +203,7 @@ Docker release-path soak; `full` forces soak on.
The umbrella records the dispatched child run ids, and the final `Verify full validation` job re-checks current child run conclusions and appends slowest-job tables for each child run. If a child workflow is rerun and turns green, rerun only the parent verifier job to refresh the umbrella result and timing summary.
For recovery, both `Full Release Validation` and `OpenClaw Release Checks` accept `rerun_group`. Use `all` for a release candidate, `ci` for only the normal full CI child, `plugin-prerelease` for only the plugin prerelease child, `release-checks` for every release child, or a narrower group: `install-smoke`, `cross-os`, `live-e2e`, `package`, `qa`, `qa-parity`, `qa-live`, or `npm-telegram` on the umbrella. This keeps a failed release box rerun bounded after a focused fix. For one failed cross-OS lane, combine `rerun_group=cross-os` with `cross_os_suite_filter`, for example `windows/packaged-upgrade`; long cross-OS commands emit heartbeat lines and packaged-upgrade summaries include per-phase timings. QA release-check lanes are advisory, so QA-only failures warn but do not block the release-check verifier.
For recovery, both `Full Release Validation` and `OpenClaw Release Checks` accept `rerun_group`. Use `all` for a release candidate, `ci` for only the normal full CI child, `plugin-prerelease` for only the plugin prerelease child, `release-checks` for every release child, or a narrower group: `install-smoke`, `cross-os`, `live-e2e`, `package`, `qa`, `qa-parity`, `qa-live`, or `npm-telegram` on the umbrella. This keeps a failed release box rerun bounded after a focused fix. For one failed cross-OS lane, combine `rerun_group=cross-os` with `cross_os_suite_filter`, for example `windows/packaged-upgrade`; long cross-OS commands emit heartbeat lines and packaged-upgrade summaries include per-phase timings. QA release-check lanes are advisory except the standard runtime tool coverage gate, which blocks when required OpenClaw dynamic tools drift or disappear from the standard tier summary.
`OpenClaw Release Checks` uses the trusted workflow ref to resolve the selected ref once into a `release-package-under-test` tarball, then passes that artifact to cross-OS checks and Package Acceptance, plus the live/E2E release-path Docker workflow when soak coverage runs. That keeps the package bytes consistent across release boxes and avoids repacking the same candidate in multiple child jobs.

View File

@@ -442,8 +442,10 @@ Focused `npm-telegram` reruns require `release_package_spec` or
`npm_telegram_package_spec`; full/all runs with `release_profile=full` use the
release-checks package artifact. Focused
cross-OS reruns can add `cross_os_suite_filter=windows/packaged-upgrade` or
another OS/suite filter. QA release-check failures are advisory; a QA-only
failure does not block release validation.
another OS/suite filter. QA release-check failures are advisory except the
standard runtime tool coverage gate, which blocks release validation when
required OpenClaw dynamic tools drift or disappear from the standard tier
summary.
### Vitest

View File

@@ -166,9 +166,10 @@ summaries include per-phase timings for packaged upgrade lanes, and long-running
commands print heartbeat lines so a stuck Windows update is visible before the
job timeout.
QA release-check lanes are advisory. A QA-only failure is reported as a warning
and does not block the release-check verifier; rerun `rerun_group=qa`,
`qa-parity`, or `qa-live` when you need fresh QA evidence.
QA release-check lanes are advisory except the standard runtime tool coverage
gate. Required OpenClaw dynamic tool drift in the standard tier blocks the
release-check verifier; other QA-only failures are reported as warnings. Rerun
`rerun_group=qa`, `qa-parity`, or `qa-live` when you need fresh QA evidence.
## Evidence to keep

View File

@@ -979,6 +979,64 @@ describe("qa cli runtime", () => {
expectWriteContains(stdoutWrite, "codex-native-workspace");
});
it("exits nonzero when tool coverage summary has required drift", async () => {
const priorExitCode = process.exitCode;
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-tool-coverage-"));
try {
await fs.writeFile(
path.join(repoRoot, "runtime-summary.json"),
JSON.stringify({
scenarios: [
{
name: "runtime-tool-web-search",
status: "fail",
runtimeParity: {
scenarioId: "runtime-tool-web-search",
drift: "tool-call-shape",
driftDetails: "Codex emitted no web_search call",
cells: {
pi: {
runtime: "pi",
transcriptBytes: "",
toolCalls: [{ tool: "web_search", argsHash: "a", resultHash: "r" }],
finalText: "",
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
wallClockMs: 1,
bootStateLines: [],
},
codex: {
runtime: "codex",
transcriptBytes: "",
toolCalls: [],
finalText: "",
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
wallClockMs: 1,
bootStateLines: [],
},
},
},
},
],
run: { runtimePair: ["pi", "codex"] },
}),
"utf8",
);
await runQaCoverageReportCommand({
repoRoot,
tools: true,
summary: "runtime-summary.json",
});
expect(process.exitCode).toBe(1);
expectWriteContains(stdoutWrite, "- Verdict: fail");
expectWriteContains(stdoutWrite, "web-search drift=tool-call-shape");
} finally {
process.exitCode = priorExitCode;
await fs.rm(repoRoot, { recursive: true, force: true });
}
});
it("resolves character eval paths and passes model refs through", async () => {
await runQaCharacterEvalCommand({
repoRoot: "/tmp/openclaw-repo",

View File

@@ -769,6 +769,9 @@ export async function runQaCoverageReportCommand(opts: {
? `${JSON.stringify(report, null, 2)}\n`
: renderQaToolCoverageMarkdownReport(report);
outputLabel = "QA tool coverage report";
if (summary && !report.pass) {
process.exitCode = 1;
}
} else {
if (opts.summary?.trim()) {
throw new Error("--summary requires --tools.");

View File

@@ -120,6 +120,7 @@ describe("qa scenario catalog", () => {
const applyPatch = readQaScenarioById("runtime-tool-apply-patch");
const messageTool = readQaScenarioById("runtime-tool-message-tool");
const tavilySearch = readQaScenarioById("runtime-tool-tavily-search");
const webSearch = readQaScenarioById("runtime-tool-web-search");
expect(applyPatch.runtimeParityTier).toBe("standard");
expect(messageTool.runtimeParityTier).toBe("optional");
@@ -140,6 +141,16 @@ describe("qa scenario catalog", () => {
required: false,
},
});
expect(readQaScenarioExecutionConfig(webSearch.id)).toMatchObject({
toolName: "web_search",
toolCoverage: {
bucket: "openclaw-dynamic-integration",
expectedLayer: "openclaw-dynamic",
capabilityLayer: "openclaw-dynamic-direct",
required: true,
},
});
expect(readQaScenarioExecutionConfig(webSearch.id)).not.toHaveProperty("knownHarnessGap");
});
it("loads the Codex Pi-shaped Read vocabulary live parity canary", () => {

View File

@@ -223,6 +223,83 @@ describe("qa tool coverage report", () => {
);
});
it("fails untracked required OpenClaw dynamic tool drift", () => {
const report = buildQaToolCoverageReport({
scenarios: [
makeScenario("tool-web-search", "web-search", {
toolName: "web_search",
toolCoverage: {
bucket: "openclaw-dynamic-integration",
expectedLayer: "openclaw-dynamic",
capabilityLayer: "openclaw-dynamic-direct",
required: true,
},
}),
],
summary: {
scenarios: [
{
name: "tool web_search",
status: "fail",
runtimeParity: {
scenarioId: "tool-web-search",
drift: "tool-call-shape",
driftDetails: "Codex emitted no web_search call",
cells: {
pi: {
runtime: "pi",
transcriptBytes: "",
toolCalls: [{ tool: "web_search", argsHash: "a", resultHash: "r" }],
finalText: "",
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
wallClockMs: 1,
bootStateLines: [],
},
codex: {
runtime: "codex",
transcriptBytes: "",
toolCalls: [],
finalText: "",
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
wallClockMs: 1,
bootStateLines: [],
},
},
},
},
],
},
generatedAt: "2026-05-10T00:00:00.000Z",
});
expect(report.pass).toBe(false);
expect(report.failures).toEqual([
"web-search drift=tool-call-shape (Codex emitted no web_search call)",
]);
});
it("fails untracked required tools missing from an evaluated summary", () => {
const report = buildQaToolCoverageReport({
scenarios: [
makeScenario("tool-web-search", "web-search", {
toolCoverage: {
bucket: "openclaw-dynamic-integration",
expectedLayer: "openclaw-dynamic",
capabilityLayer: "openclaw-dynamic-direct",
required: true,
},
}),
],
summary: {
scenarios: [],
},
generatedAt: "2026-05-10T00:00:00.000Z",
});
expect(report.pass).toBe(false);
expect(report.failures).toEqual(["web-search drift=not-run"]);
});
it("rejects unknown runtime tool coverage buckets", () => {
expect(() =>
buildQaToolCoverageReport({
@@ -301,5 +378,13 @@ describe("qa tool coverage report", () => {
"#80173 Tavily tools are listed in the phase matrix but are not exposed by the current default tool surface.",
}),
);
expect(report.rows.find((row) => row.tool === "web-search")).toEqual(
expect.objectContaining({
bucket: "openclaw-dynamic-integration",
capabilityLayer: "openclaw-dynamic-direct",
required: true,
}),
);
expect(report.rows.find((row) => row.tool === "web-search")?.tracking).toBeUndefined();
});
});

View File

@@ -71,7 +71,7 @@ type ToolFixtureGroup = {
scenarios: QaSeedScenarioWithSource[];
};
const PASSING_DRIFTS: ReadonlySet<QaToolCoverageDrift> = new Set(["none", "text-only", "not-run"]);
const PASSING_DRIFTS: ReadonlySet<QaToolCoverageDrift> = new Set(["none", "text-only"]);
function isRecord(value: unknown): value is Record<string, unknown> {
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
@@ -173,6 +173,10 @@ function mergeScenarioResults(
return failingResult;
}
function isPassingToolCoverageDrift(drift: QaToolCoverageDrift, evaluated: boolean) {
return PASSING_DRIFTS.has(drift) || (!evaluated && drift === "not-run");
}
function buildRow(params: {
group: ToolFixtureGroup;
results: ReadonlyMap<string, RuntimeParityResult>;
@@ -222,7 +226,9 @@ export function buildQaToolCoverageReport(params: {
const evaluated = Boolean(params.summary);
const failures = evaluated
? rows
.filter((row) => row.required && !row.tracking && !PASSING_DRIFTS.has(row.drift))
.filter(
(row) => row.required && !row.tracking && !isPassingToolCoverageDrift(row.drift, true),
)
.map((row) => `${row.tool} drift=${row.drift}${row.details ? ` (${row.details})` : ""}`)
: [];
return {
@@ -237,7 +243,9 @@ export function buildQaToolCoverageReport(params: {
dynamicIntegrationTools: rows.filter((row) => row.bucket === "openclaw-dynamic-integration")
.length,
optionalTools: rows.filter((row) => row.bucket === "optional-profile-or-plugin").length,
passingTools: evaluated ? rows.filter((row) => PASSING_DRIFTS.has(row.drift)).length : 0,
passingTools: evaluated
? rows.filter((row) => isPassingToolCoverageDrift(row.drift, true)).length
: 0,
failingTools: failures.length,
rows,
pass: failures.length === 0,

View File

@@ -28,7 +28,10 @@ Coverage tracking:
Runtime parity tiers:
- `standard`: required Codex-vs-Pi mock gate coverage for first-hour depth and
default runtime-tool fixtures; selected with
default runtime-tool fixtures. OpenClaw dynamic integration tools in this
tier are hard-gated by `openclaw qa coverage --tools --summary`; Codex-native
workspace rows remain separately tracked until native/live behavior is the
asserted surface. Selected with
`openclaw qa suite --runtime-pair pi,codex --runtime-parity-tier standard`
- `optional`: profile-, plugin-, or external-service-dependent runtime-tool
fixtures that stay out of the default release gate

View File

@@ -13,6 +13,7 @@ successCriteria:
- Effective tools expose image_generate after QA image-generation config is applied.
- The mock provider plans exactly one happy-path image_generate call.
- The mock provider plans one denied-input failure-path image_generate call.
- Runtime parity coverage hard-fails call/result drift in the standard direct-loading gate.
docsRefs:
- docs/tools/image-generation.md
codeRefs:
@@ -29,15 +30,12 @@ execution:
actualTool: image_generate
bucket: openclaw-dynamic-integration
expectedLayer: openclaw-dynamic
capabilityLayer: openclaw-dynamic-direct
required: true
tracking: "#80319"
codexDefaultImpact: P4
qaImpact: P1
action: teach fixture/mock planner Codex searchable OpenClaw dynamic tool behavior
reason: image_generate is an OpenClaw integration tool; QA mock provider does not yet model Codex searchable/deferred dynamic tool declarations for this fixture.
knownHarnessGap:
issue: "#80319"
reason: QA mock provider does not yet model Codex searchable/deferred OpenClaw dynamic tool declarations for this fixture.
action: hard gate in the standard direct-loading tier
reason: image_generate is an OpenClaw integration tool and must stay visible and callable under Pi and Codex direct runtime parity.
promptSnippet: "target=image_generate"
failurePromptSnippet: "failure target=image_generate"
```

View File

@@ -13,6 +13,7 @@ successCriteria:
- Effective tools expose session_status.
- The mock provider plans exactly one happy-path session_status call.
- The mock provider plans one denied-input failure-path session_status call.
- Runtime parity coverage hard-fails call/result drift in the standard direct-loading gate.
docsRefs:
- qa/scenarios/index.md
codeRefs:
@@ -28,15 +29,12 @@ execution:
actualTool: session_status
bucket: openclaw-dynamic-integration
expectedLayer: openclaw-dynamic
capabilityLayer: openclaw-dynamic-direct
required: true
tracking: "#80319"
codexDefaultImpact: P4
qaImpact: P1
action: teach fixture/mock planner Codex searchable OpenClaw dynamic tool behavior
reason: session_status is an OpenClaw integration tool; QA mock provider does not yet model Codex searchable/deferred dynamic tool declarations for this fixture.
knownHarnessGap:
issue: "#80319"
reason: QA mock provider does not yet model Codex searchable/deferred OpenClaw dynamic tool declarations for this fixture.
action: hard gate in the standard direct-loading tier
reason: session_status is an OpenClaw integration tool and must stay visible and callable under Pi and Codex direct runtime parity.
promptSnippet: "target=session_status"
failurePromptSnippet: "failure target=session_status"
```

View File

@@ -13,6 +13,7 @@ successCriteria:
- Effective tools expose sessions_spawn.
- The mock provider plans exactly one happy-path sessions_spawn call.
- The mock provider plans one denied-input failure-path sessions_spawn call.
- Runtime parity coverage hard-fails call/result drift in the standard direct-loading gate.
docsRefs:
- qa/scenarios/index.md
codeRefs:
@@ -28,15 +29,12 @@ execution:
actualTool: sessions_spawn
bucket: openclaw-dynamic-integration
expectedLayer: openclaw-dynamic
capabilityLayer: openclaw-dynamic-direct
required: true
tracking: "#80319"
codexDefaultImpact: P4
qaImpact: P1
action: teach fixture/mock planner Codex searchable OpenClaw dynamic tool behavior
reason: sessions_spawn is an OpenClaw integration tool; QA mock provider does not yet model Codex searchable/deferred dynamic tool declarations for this fixture.
knownHarnessGap:
issue: "#80319"
reason: QA mock provider does not yet model Codex searchable/deferred OpenClaw dynamic tool declarations for this fixture.
action: hard gate in the standard direct-loading tier
reason: sessions_spawn is an OpenClaw integration tool and must stay visible and callable under Pi and Codex direct runtime parity.
promptSnippet: "target=sessions_spawn"
failurePromptSnippet: "failure target=sessions_spawn"
```

View File

@@ -13,6 +13,7 @@ successCriteria:
- Effective tools expose web_fetch.
- The mock provider plans exactly one happy-path web_fetch call.
- The mock provider plans one denied-input failure-path web_fetch call.
- Runtime parity coverage hard-fails call/result drift in the standard direct-loading gate.
docsRefs:
- qa/scenarios/index.md
codeRefs:
@@ -28,15 +29,12 @@ execution:
actualTool: web_fetch
bucket: openclaw-dynamic-integration
expectedLayer: openclaw-dynamic
capabilityLayer: openclaw-dynamic-direct
required: true
tracking: "#80319"
codexDefaultImpact: P4
qaImpact: P1
action: teach fixture/mock planner Codex searchable OpenClaw dynamic tool behavior
reason: web_fetch is an OpenClaw integration tool; QA mock provider does not yet model Codex searchable/deferred dynamic tool declarations for this fixture.
knownHarnessGap:
issue: "#80319"
reason: QA mock provider does not yet model Codex searchable/deferred OpenClaw dynamic tool declarations for this fixture.
action: hard gate in the standard direct-loading tier
reason: web_fetch is an OpenClaw integration tool and must stay visible and callable under Pi and Codex direct runtime parity.
promptSnippet: "target=web_fetch"
failurePromptSnippet: "failure target=web_fetch"
```

View File

@@ -13,6 +13,7 @@ successCriteria:
- Effective tools expose web_search.
- The mock provider plans exactly one happy-path web_search call.
- The mock provider plans one denied-input failure-path web_search call.
- Runtime parity coverage hard-fails call/result drift in the standard direct-loading gate.
docsRefs:
- qa/scenarios/index.md
codeRefs:
@@ -28,15 +29,12 @@ execution:
actualTool: web_search
bucket: openclaw-dynamic-integration
expectedLayer: openclaw-dynamic
capabilityLayer: openclaw-dynamic-direct
required: true
tracking: "#80319"
codexDefaultImpact: P4
qaImpact: P1
action: teach fixture/mock planner Codex searchable OpenClaw dynamic tool behavior
reason: web_search is an OpenClaw integration tool; QA mock provider does not yet model Codex searchable/deferred dynamic tool declarations for this fixture.
knownHarnessGap:
issue: "#80319"
reason: QA mock provider does not yet model Codex searchable/deferred OpenClaw dynamic tool declarations for this fixture.
action: hard gate in the standard direct-loading tier
reason: web_search is an OpenClaw integration tool and must stay visible and callable under Pi and Codex direct runtime parity.
promptSnippet: "target=web_search"
failurePromptSnippet: "failure target=web_search"
```

View File

@@ -4,6 +4,8 @@ type Options = {
json?: boolean;
output?: string;
repoRoot?: string;
summary?: string;
tools?: boolean;
};
function takeValue(args: string[], index: number, flag: string): string {
@@ -27,6 +29,8 @@ Options:
--json Print machine-readable JSON
--output <path> Write the report to a file
--repo-root <path> Repository root to target
--summary <path> Runtime qa-suite-summary.json to overlay on --tools coverage
--tools Print runtime tool fixture coverage instead of scenario coverage
-h, --help Display help
`);
process.exit(0);
@@ -41,6 +45,13 @@ Options:
opts.repoRoot = takeValue(args, index, arg);
index += 1;
break;
case "--summary":
opts.summary = takeValue(args, index, arg);
index += 1;
break;
case "--tools":
opts.tools = true;
break;
default:
throw new Error(`Unknown qa coverage option: ${arg}`);
}
@@ -53,4 +64,6 @@ await runQaCoverageReportCommand({
...(opts.json ? { json: true } : {}),
...(opts.output ? { output: opts.output } : {}),
...(opts.repoRoot ? { repoRoot: opts.repoRoot } : {}),
...(opts.summary ? { summary: opts.summary } : {}),
...(opts.tools ? { tools: true } : {}),
});

View File

@@ -1093,7 +1093,14 @@ describe("run-node script", () => {
const exitCode = await runNodeMain({
cwd: tmp,
args: ["qa", "coverage", "--json"],
args: [
"qa",
"coverage",
"--json",
"--tools",
"--summary",
".artifacts/qa-e2e/runtime-parity-standard/qa-suite-summary.json",
],
env: {
...process.env,
OPENCLAW_RUNNER_LOG: "0",
@@ -1111,6 +1118,9 @@ describe("run-node script", () => {
"tsx",
path.join(tmp, "scripts", "qa-coverage-report.ts"),
"--json",
"--tools",
"--summary",
".artifacts/qa-e2e/runtime-parity-standard/qa-suite-summary.json",
],
]);
});

View File

@@ -490,6 +490,45 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => {
}
});
it("keeps runtime tool coverage blocking in release checks", () => {
const releaseChecksSource = readFileSync(
".github/workflows/openclaw-release-checks.yml",
"utf8",
);
const releaseChecksWorkflow = parse(releaseChecksSource);
const runtimeToolCoverage = releaseChecksWorkflow.jobs.runtime_tool_coverage_release_checks;
expect(runtimeToolCoverage["continue-on-error"]).toBeUndefined();
expect(runtimeToolCoverage.needs).toEqual([
"resolve_target",
"qa_lab_runtime_parity_release_checks",
]);
expect(runtimeToolCoverage.steps).toEqual(
expect.arrayContaining([
expect.objectContaining({
name: "Enforce standard runtime tool coverage",
run: expect.stringContaining("pnpm openclaw qa coverage"),
}),
]),
);
expect(runtimeToolCoverage.steps).toEqual(
expect.arrayContaining([
expect.objectContaining({
name: "Enforce standard runtime tool coverage",
run: expect.stringContaining(
"--summary .artifacts/qa-e2e/runtime-parity-standard/qa-suite-summary.json",
),
}),
]),
);
expect(releaseChecksWorkflow.jobs.summary.needs).toContain(
"runtime_tool_coverage_release_checks",
);
expect(releaseChecksSource).toContain(
'"runtime_tool_coverage_release_checks=${{ needs.runtime_tool_coverage_release_checks.result }}"',
);
});
it("keeps the live-ish availability check redacted", () => {
const output = execFileSync(
process.execPath,