fix(qa-lab): hard gate runtime tool coverage

2026-05-24 23:43:03 +00:00 · 2026-05-18 09:42:10 +08:00
parent 73f4657869
commit 58e1351863
19 changed files with 318 additions and 41 deletions
--- a/.github/workflows/openclaw-release-checks.yml
+++ b/.github/workflows/openclaw-release-checks.yml
@@ -955,6 +955,57 @@ jobs:
          retention-days: 14
          if-no-files-found: warn

+  runtime_tool_coverage_release_checks:
+    name: Enforce QA Lab runtime tool coverage
+    needs: [resolve_target, qa_lab_runtime_parity_release_checks]
+    if: always() && contains(fromJSON('["all","qa","qa-parity"]'), needs.resolve_target.outputs.rerun_group)
+    runs-on: ubuntu-24.04
+    timeout-minutes: 15
+    permissions:
+      contents: read
+      actions: read
+    env:
+      OPENCLAW_BUILD_PRIVATE_QA: "1"
+      OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1"
+    steps:
+      - name: Checkout selected ref
+        uses: actions/checkout@v6
+        with:
+          persist-credentials: false
+          ref: ${{ needs.resolve_target.outputs.revision }}
+          fetch-depth: 1
+
+      - name: Setup Node environment
+        uses: ./.github/actions/setup-node-env
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          pnpm-version: ${{ env.PNPM_VERSION }}
+          install-bun: "true"
+
+      - name: Download runtime parity artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: release-qa-runtime-parity-${{ needs.resolve_target.outputs.revision }}
+          path: .artifacts/qa-e2e/
+
+      - name: Enforce standard runtime tool coverage
+        run: |
+          set -euo pipefail
+          pnpm openclaw qa coverage \
+            --repo-root . \
+            --tools \
+            --summary .artifacts/qa-e2e/runtime-parity-standard/qa-suite-summary.json \
+            --output .artifacts/qa-e2e/runtime-parity-standard-report/qa-runtime-tool-coverage-report.md
+
+      - name: Upload runtime tool coverage artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: release-qa-runtime-tool-coverage-${{ needs.resolve_target.outputs.revision }}
+          path: .artifacts/qa-e2e/runtime-parity-standard-report/
+          retention-days: 14
+          if-no-files-found: warn
+
  qa_live_matrix_release_checks:
    name: Run QA Lab live Matrix lane
    needs: [resolve_target]
@@ -1434,6 +1485,7 @@ jobs:
      - qa_lab_parity_lane_release_checks
      - qa_lab_parity_report_release_checks
      - qa_lab_runtime_parity_release_checks
+      - runtime_tool_coverage_release_checks
      - qa_live_matrix_release_checks
      - qa_live_telegram_release_checks
      - qa_live_discord_release_checks
@@ -1465,6 +1517,7 @@ jobs:
            "qa_lab_parity_lane_release_checks=${{ needs.qa_lab_parity_lane_release_checks.result }}" \
            "qa_lab_parity_report_release_checks=${{ needs.qa_lab_parity_report_release_checks.result }}" \
            "qa_lab_runtime_parity_release_checks=${{ needs.qa_lab_runtime_parity_release_checks.result }}" \
+            "runtime_tool_coverage_release_checks=${{ needs.runtime_tool_coverage_release_checks.result }}" \
            "qa_live_matrix_release_checks=${{ needs.qa_live_matrix_release_checks.result }}" \
            "qa_live_telegram_release_checks=${{ needs.qa_live_telegram_release_checks.result }}" \
            "qa_live_discord_release_checks=${{ needs.qa_live_discord_release_checks.result }}" \
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai
 - QA-Lab: add live-only harness self-health scenarios for plugin hook crashes, manifest contract errors, and WebChat direct-reply self-message routing. (#80323) Thanks @100yenadmin.
 - QA-Lab: add runtime tool fixture scenarios and coverage reporting for Codex-native workspace tools, OpenClaw dynamic tools, and optional plugin-backed tools. Fixes #80173. Thanks @100yenadmin.
 - QA-Lab: expose runtime tool fixture coverage through `openclaw qa coverage --tools`, with optional suite-summary evaluation for parity gate artifacts. Thanks @100yenadmin.
+- QA-Lab: hard-gate required OpenClaw dynamic runtime-tool drift in the standard Codex-vs-Pi tier with a blocking release-check verifier and publish the tool coverage report artifact. Fixes #80339; refs #80319. Thanks @100yenadmin.
 - QA-Lab: add the personal-agent approval-denial scenario so the benchmark pack verifies denied local reads stop cleanly without tool progress or fixture leaks. (#83150) Thanks @iFiras-Max1.

 ### Fixes
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -203,7 +203,7 @@ Docker release-path soak; `full` forces soak on.

 The umbrella records the dispatched child run ids, and the final `Verify full validation` job re-checks current child run conclusions and appends slowest-job tables for each child run. If a child workflow is rerun and turns green, rerun only the parent verifier job to refresh the umbrella result and timing summary.

-For recovery, both `Full Release Validation` and `OpenClaw Release Checks` accept `rerun_group`. Use `all` for a release candidate, `ci` for only the normal full CI child, `plugin-prerelease` for only the plugin prerelease child, `release-checks` for every release child, or a narrower group: `install-smoke`, `cross-os`, `live-e2e`, `package`, `qa`, `qa-parity`, `qa-live`, or `npm-telegram` on the umbrella. This keeps a failed release box rerun bounded after a focused fix. For one failed cross-OS lane, combine `rerun_group=cross-os` with `cross_os_suite_filter`, for example `windows/packaged-upgrade`; long cross-OS commands emit heartbeat lines and packaged-upgrade summaries include per-phase timings. QA release-check lanes are advisory, so QA-only failures warn but do not block the release-check verifier.
+For recovery, both `Full Release Validation` and `OpenClaw Release Checks` accept `rerun_group`. Use `all` for a release candidate, `ci` for only the normal full CI child, `plugin-prerelease` for only the plugin prerelease child, `release-checks` for every release child, or a narrower group: `install-smoke`, `cross-os`, `live-e2e`, `package`, `qa`, `qa-parity`, `qa-live`, or `npm-telegram` on the umbrella. This keeps a failed release box rerun bounded after a focused fix. For one failed cross-OS lane, combine `rerun_group=cross-os` with `cross_os_suite_filter`, for example `windows/packaged-upgrade`; long cross-OS commands emit heartbeat lines and packaged-upgrade summaries include per-phase timings. QA release-check lanes are advisory except the standard runtime tool coverage gate, which blocks when required OpenClaw dynamic tools drift or disappear from the standard tier summary.

 `OpenClaw Release Checks` uses the trusted workflow ref to resolve the selected ref once into a `release-package-under-test` tarball, then passes that artifact to cross-OS checks and Package Acceptance, plus the live/E2E release-path Docker workflow when soak coverage runs. That keeps the package bytes consistent across release boxes and avoids repacking the same candidate in multiple child jobs.

--- a/docs/reference/RELEASING.md
+++ b/docs/reference/RELEASING.md
@@ -442,8 +442,10 @@ Focused `npm-telegram` reruns require `release_package_spec` or
 `npm_telegram_package_spec`; full/all runs with `release_profile=full` use the
 release-checks package artifact. Focused
 cross-OS reruns can add `cross_os_suite_filter=windows/packaged-upgrade` or
-another OS/suite filter. QA release-check failures are advisory; a QA-only
-failure does not block release validation.
+another OS/suite filter. QA release-check failures are advisory except the
+standard runtime tool coverage gate, which blocks release validation when
+required OpenClaw dynamic tools drift or disappear from the standard tier
+summary.

 ### Vitest

--- a/docs/reference/full-release-validation.md
+++ b/docs/reference/full-release-validation.md
@@ -166,9 +166,10 @@ summaries include per-phase timings for packaged upgrade lanes, and long-running
 commands print heartbeat lines so a stuck Windows update is visible before the
 job timeout.

-QA release-check lanes are advisory. A QA-only failure is reported as a warning
-and does not block the release-check verifier; rerun `rerun_group=qa`,
-`qa-parity`, or `qa-live` when you need fresh QA evidence.
+QA release-check lanes are advisory except the standard runtime tool coverage
+gate. Required OpenClaw dynamic tool drift in the standard tier blocks the
+release-check verifier; other QA-only failures are reported as warnings. Rerun
+`rerun_group=qa`, `qa-parity`, or `qa-live` when you need fresh QA evidence.

 ## Evidence to keep

--- a/extensions/qa-lab/src/cli.runtime.test.ts
+++ b/extensions/qa-lab/src/cli.runtime.test.ts
@@ -979,6 +979,64 @@ describe("qa cli runtime", () => {
    expectWriteContains(stdoutWrite, "codex-native-workspace");
  });

+  it("exits nonzero when tool coverage summary has required drift", async () => {
+    const priorExitCode = process.exitCode;
+    const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-tool-coverage-"));
+    try {
+      await fs.writeFile(
+        path.join(repoRoot, "runtime-summary.json"),
+        JSON.stringify({
+          scenarios: [
+            {
+              name: "runtime-tool-web-search",
+              status: "fail",
+              runtimeParity: {
+                scenarioId: "runtime-tool-web-search",
+                drift: "tool-call-shape",
+                driftDetails: "Codex emitted no web_search call",
+                cells: {
+                  pi: {
+                    runtime: "pi",
+                    transcriptBytes: "",
+                    toolCalls: [{ tool: "web_search", argsHash: "a", resultHash: "r" }],
+                    finalText: "",
+                    usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
+                    wallClockMs: 1,
+                    bootStateLines: [],
+                  },
+                  codex: {
+                    runtime: "codex",
+                    transcriptBytes: "",
+                    toolCalls: [],
+                    finalText: "",
+                    usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
+                    wallClockMs: 1,
+                    bootStateLines: [],
+                  },
+                },
+              },
+            },
+          ],
+          run: { runtimePair: ["pi", "codex"] },
+        }),
+        "utf8",
+      );
+
+      await runQaCoverageReportCommand({
+        repoRoot,
+        tools: true,
+        summary: "runtime-summary.json",
+      });
+
+      expect(process.exitCode).toBe(1);
+      expectWriteContains(stdoutWrite, "- Verdict: fail");
+      expectWriteContains(stdoutWrite, "web-search drift=tool-call-shape");
+    } finally {
+      process.exitCode = priorExitCode;
+      await fs.rm(repoRoot, { recursive: true, force: true });
+    }
+  });
+
  it("resolves character eval paths and passes model refs through", async () => {
    await runQaCharacterEvalCommand({
      repoRoot: "/tmp/openclaw-repo",
--- a/extensions/qa-lab/src/cli.runtime.ts
+++ b/extensions/qa-lab/src/cli.runtime.ts
@@ -769,6 +769,9 @@ export async function runQaCoverageReportCommand(opts: {
      ? `${JSON.stringify(report, null, 2)}\n`
      : renderQaToolCoverageMarkdownReport(report);
    outputLabel = "QA tool coverage report";
+    if (summary && !report.pass) {
+      process.exitCode = 1;
+    }
  } else {
    if (opts.summary?.trim()) {
      throw new Error("--summary requires --tools.");
--- a/extensions/qa-lab/src/scenario-catalog.test.ts
+++ b/extensions/qa-lab/src/scenario-catalog.test.ts
@@ -120,6 +120,7 @@ describe("qa scenario catalog", () => {
    const applyPatch = readQaScenarioById("runtime-tool-apply-patch");
    const messageTool = readQaScenarioById("runtime-tool-message-tool");
    const tavilySearch = readQaScenarioById("runtime-tool-tavily-search");
+    const webSearch = readQaScenarioById("runtime-tool-web-search");

    expect(applyPatch.runtimeParityTier).toBe("standard");
    expect(messageTool.runtimeParityTier).toBe("optional");
@@ -140,6 +141,16 @@ describe("qa scenario catalog", () => {
        required: false,
      },
    });
+    expect(readQaScenarioExecutionConfig(webSearch.id)).toMatchObject({
+      toolName: "web_search",
+      toolCoverage: {
+        bucket: "openclaw-dynamic-integration",
+        expectedLayer: "openclaw-dynamic",
+        capabilityLayer: "openclaw-dynamic-direct",
+        required: true,
+      },
+    });
+    expect(readQaScenarioExecutionConfig(webSearch.id)).not.toHaveProperty("knownHarnessGap");
  });

  it("loads the Codex Pi-shaped Read vocabulary live parity canary", () => {
--- a/extensions/qa-lab/src/tool-coverage-report.test.ts
+++ b/extensions/qa-lab/src/tool-coverage-report.test.ts
@@ -223,6 +223,83 @@ describe("qa tool coverage report", () => {
    );
  });

+  it("fails untracked required OpenClaw dynamic tool drift", () => {
+    const report = buildQaToolCoverageReport({
+      scenarios: [
+        makeScenario("tool-web-search", "web-search", {
+          toolName: "web_search",
+          toolCoverage: {
+            bucket: "openclaw-dynamic-integration",
+            expectedLayer: "openclaw-dynamic",
+            capabilityLayer: "openclaw-dynamic-direct",
+            required: true,
+          },
+        }),
+      ],
+      summary: {
+        scenarios: [
+          {
+            name: "tool web_search",
+            status: "fail",
+            runtimeParity: {
+              scenarioId: "tool-web-search",
+              drift: "tool-call-shape",
+              driftDetails: "Codex emitted no web_search call",
+              cells: {
+                pi: {
+                  runtime: "pi",
+                  transcriptBytes: "",
+                  toolCalls: [{ tool: "web_search", argsHash: "a", resultHash: "r" }],
+                  finalText: "",
+                  usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
+                  wallClockMs: 1,
+                  bootStateLines: [],
+                },
+                codex: {
+                  runtime: "codex",
+                  transcriptBytes: "",
+                  toolCalls: [],
+                  finalText: "",
+                  usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
+                  wallClockMs: 1,
+                  bootStateLines: [],
+                },
+              },
+            },
+          },
+        ],
+      },
+      generatedAt: "2026-05-10T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(false);
+    expect(report.failures).toEqual([
+      "web-search drift=tool-call-shape (Codex emitted no web_search call)",
+    ]);
+  });
+
+  it("fails untracked required tools missing from an evaluated summary", () => {
+    const report = buildQaToolCoverageReport({
+      scenarios: [
+        makeScenario("tool-web-search", "web-search", {
+          toolCoverage: {
+            bucket: "openclaw-dynamic-integration",
+            expectedLayer: "openclaw-dynamic",
+            capabilityLayer: "openclaw-dynamic-direct",
+            required: true,
+          },
+        }),
+      ],
+      summary: {
+        scenarios: [],
+      },
+      generatedAt: "2026-05-10T00:00:00.000Z",
+    });
+
+    expect(report.pass).toBe(false);
+    expect(report.failures).toEqual(["web-search drift=not-run"]);
+  });
+
  it("rejects unknown runtime tool coverage buckets", () => {
    expect(() =>
      buildQaToolCoverageReport({
@@ -301,5 +378,13 @@ describe("qa tool coverage report", () => {
          "#80173 Tavily tools are listed in the phase matrix but are not exposed by the current default tool surface.",
      }),
    );
+    expect(report.rows.find((row) => row.tool === "web-search")).toEqual(
+      expect.objectContaining({
+        bucket: "openclaw-dynamic-integration",
+        capabilityLayer: "openclaw-dynamic-direct",
+        required: true,
+      }),
+    );
+    expect(report.rows.find((row) => row.tool === "web-search")?.tracking).toBeUndefined();
  });
 });
--- a/extensions/qa-lab/src/tool-coverage-report.ts
+++ b/extensions/qa-lab/src/tool-coverage-report.ts
@@ -71,7 +71,7 @@ type ToolFixtureGroup = {
  scenarios: QaSeedScenarioWithSource[];
 };

-const PASSING_DRIFTS: ReadonlySet<QaToolCoverageDrift> = new Set(["none", "text-only", "not-run"]);
+const PASSING_DRIFTS: ReadonlySet<QaToolCoverageDrift> = new Set(["none", "text-only"]);

 function isRecord(value: unknown): value is Record<string, unknown> {
  return Boolean(value) && typeof value === "object" && !Array.isArray(value);
@@ -173,6 +173,10 @@ function mergeScenarioResults(
  return failingResult;
 }

+function isPassingToolCoverageDrift(drift: QaToolCoverageDrift, evaluated: boolean) {
+  return PASSING_DRIFTS.has(drift) || (!evaluated && drift === "not-run");
+}
+
 function buildRow(params: {
  group: ToolFixtureGroup;
  results: ReadonlyMap<string, RuntimeParityResult>;
@@ -222,7 +226,9 @@ export function buildQaToolCoverageReport(params: {
  const evaluated = Boolean(params.summary);
  const failures = evaluated
    ? rows
-        .filter((row) => row.required && !row.tracking && !PASSING_DRIFTS.has(row.drift))
+        .filter(
+          (row) => row.required && !row.tracking && !isPassingToolCoverageDrift(row.drift, true),
+        )
        .map((row) => `${row.tool} drift=${row.drift}${row.details ? ` (${row.details})` : ""}`)
    : [];
  return {
@@ -237,7 +243,9 @@ export function buildQaToolCoverageReport(params: {
    dynamicIntegrationTools: rows.filter((row) => row.bucket === "openclaw-dynamic-integration")
      .length,
    optionalTools: rows.filter((row) => row.bucket === "optional-profile-or-plugin").length,
-    passingTools: evaluated ? rows.filter((row) => PASSING_DRIFTS.has(row.drift)).length : 0,
+    passingTools: evaluated
+      ? rows.filter((row) => isPassingToolCoverageDrift(row.drift, true)).length
+      : 0,
    failingTools: failures.length,
    rows,
    pass: failures.length === 0,
--- a/qa/scenarios/index.md
+++ b/qa/scenarios/index.md
@@ -28,7 +28,10 @@ Coverage tracking:
 Runtime parity tiers:

 - `standard`: required Codex-vs-Pi mock gate coverage for first-hour depth and
-  default runtime-tool fixtures; selected with
+  default runtime-tool fixtures. OpenClaw dynamic integration tools in this
+  tier are hard-gated by `openclaw qa coverage --tools --summary`; Codex-native
+  workspace rows remain separately tracked until native/live behavior is the
+  asserted surface. Selected with
  `openclaw qa suite --runtime-pair pi,codex --runtime-parity-tier standard`
 - `optional`: profile-, plugin-, or external-service-dependent runtime-tool
  fixtures that stay out of the default release gate
--- a/qa/scenarios/runtime/tools/image-generate.md
+++ b/qa/scenarios/runtime/tools/image-generate.md
@@ -13,6 +13,7 @@ successCriteria:
  - Effective tools expose image_generate after QA image-generation config is applied.
  - The mock provider plans exactly one happy-path image_generate call.
  - The mock provider plans one denied-input failure-path image_generate call.
+  - Runtime parity coverage hard-fails call/result drift in the standard direct-loading gate.
 docsRefs:
  - docs/tools/image-generation.md
 codeRefs:
@@ -29,15 +30,12 @@ execution:
      actualTool: image_generate
      bucket: openclaw-dynamic-integration
      expectedLayer: openclaw-dynamic
+      capabilityLayer: openclaw-dynamic-direct
      required: true
-      tracking: "#80319"
      codexDefaultImpact: P4
      qaImpact: P1
-      action: teach fixture/mock planner Codex searchable OpenClaw dynamic tool behavior
-      reason: image_generate is an OpenClaw integration tool; QA mock provider does not yet model Codex searchable/deferred dynamic tool declarations for this fixture.
-    knownHarnessGap:
-      issue: "#80319"
-      reason: QA mock provider does not yet model Codex searchable/deferred OpenClaw dynamic tool declarations for this fixture.
+      action: hard gate in the standard direct-loading tier
+      reason: image_generate is an OpenClaw integration tool and must stay visible and callable under Pi and Codex direct runtime parity.
    promptSnippet: "target=image_generate"
    failurePromptSnippet: "failure target=image_generate"
 ```
--- a/qa/scenarios/runtime/tools/session-status.md
+++ b/qa/scenarios/runtime/tools/session-status.md
@@ -13,6 +13,7 @@ successCriteria:
  - Effective tools expose session_status.
  - The mock provider plans exactly one happy-path session_status call.
  - The mock provider plans one denied-input failure-path session_status call.
+  - Runtime parity coverage hard-fails call/result drift in the standard direct-loading gate.
 docsRefs:
  - qa/scenarios/index.md
 codeRefs:
@@ -28,15 +29,12 @@ execution:
      actualTool: session_status
      bucket: openclaw-dynamic-integration
      expectedLayer: openclaw-dynamic
+      capabilityLayer: openclaw-dynamic-direct
      required: true
-      tracking: "#80319"
      codexDefaultImpact: P4
      qaImpact: P1
-      action: teach fixture/mock planner Codex searchable OpenClaw dynamic tool behavior
-      reason: session_status is an OpenClaw integration tool; QA mock provider does not yet model Codex searchable/deferred dynamic tool declarations for this fixture.
-    knownHarnessGap:
-      issue: "#80319"
-      reason: QA mock provider does not yet model Codex searchable/deferred OpenClaw dynamic tool declarations for this fixture.
+      action: hard gate in the standard direct-loading tier
+      reason: session_status is an OpenClaw integration tool and must stay visible and callable under Pi and Codex direct runtime parity.
    promptSnippet: "target=session_status"
    failurePromptSnippet: "failure target=session_status"
 ```
--- a/qa/scenarios/runtime/tools/sessions-spawn.md
+++ b/qa/scenarios/runtime/tools/sessions-spawn.md
@@ -13,6 +13,7 @@ successCriteria:
  - Effective tools expose sessions_spawn.
  - The mock provider plans exactly one happy-path sessions_spawn call.
  - The mock provider plans one denied-input failure-path sessions_spawn call.
+  - Runtime parity coverage hard-fails call/result drift in the standard direct-loading gate.
 docsRefs:
  - qa/scenarios/index.md
 codeRefs:
@@ -28,15 +29,12 @@ execution:
      actualTool: sessions_spawn
      bucket: openclaw-dynamic-integration
      expectedLayer: openclaw-dynamic
+      capabilityLayer: openclaw-dynamic-direct
      required: true
-      tracking: "#80319"
      codexDefaultImpact: P4
      qaImpact: P1
-      action: teach fixture/mock planner Codex searchable OpenClaw dynamic tool behavior
-      reason: sessions_spawn is an OpenClaw integration tool; QA mock provider does not yet model Codex searchable/deferred dynamic tool declarations for this fixture.
-    knownHarnessGap:
-      issue: "#80319"
-      reason: QA mock provider does not yet model Codex searchable/deferred OpenClaw dynamic tool declarations for this fixture.
+      action: hard gate in the standard direct-loading tier
+      reason: sessions_spawn is an OpenClaw integration tool and must stay visible and callable under Pi and Codex direct runtime parity.
    promptSnippet: "target=sessions_spawn"
    failurePromptSnippet: "failure target=sessions_spawn"
 ```
--- a/qa/scenarios/runtime/tools/web-fetch.md
+++ b/qa/scenarios/runtime/tools/web-fetch.md
@@ -13,6 +13,7 @@ successCriteria:
  - Effective tools expose web_fetch.
  - The mock provider plans exactly one happy-path web_fetch call.
  - The mock provider plans one denied-input failure-path web_fetch call.
+  - Runtime parity coverage hard-fails call/result drift in the standard direct-loading gate.
 docsRefs:
  - qa/scenarios/index.md
 codeRefs:
@@ -28,15 +29,12 @@ execution:
      actualTool: web_fetch
      bucket: openclaw-dynamic-integration
      expectedLayer: openclaw-dynamic
+      capabilityLayer: openclaw-dynamic-direct
      required: true
-      tracking: "#80319"
      codexDefaultImpact: P4
      qaImpact: P1
-      action: teach fixture/mock planner Codex searchable OpenClaw dynamic tool behavior
-      reason: web_fetch is an OpenClaw integration tool; QA mock provider does not yet model Codex searchable/deferred dynamic tool declarations for this fixture.
-    knownHarnessGap:
-      issue: "#80319"
-      reason: QA mock provider does not yet model Codex searchable/deferred OpenClaw dynamic tool declarations for this fixture.
+      action: hard gate in the standard direct-loading tier
+      reason: web_fetch is an OpenClaw integration tool and must stay visible and callable under Pi and Codex direct runtime parity.
    promptSnippet: "target=web_fetch"
    failurePromptSnippet: "failure target=web_fetch"
 ```
--- a/qa/scenarios/runtime/tools/web-search.md
+++ b/qa/scenarios/runtime/tools/web-search.md
@@ -13,6 +13,7 @@ successCriteria:
  - Effective tools expose web_search.
  - The mock provider plans exactly one happy-path web_search call.
  - The mock provider plans one denied-input failure-path web_search call.
+  - Runtime parity coverage hard-fails call/result drift in the standard direct-loading gate.
 docsRefs:
  - qa/scenarios/index.md
 codeRefs:
@@ -28,15 +29,12 @@ execution:
      actualTool: web_search
      bucket: openclaw-dynamic-integration
      expectedLayer: openclaw-dynamic
+      capabilityLayer: openclaw-dynamic-direct
      required: true
-      tracking: "#80319"
      codexDefaultImpact: P4
      qaImpact: P1
-      action: teach fixture/mock planner Codex searchable OpenClaw dynamic tool behavior
-      reason: web_search is an OpenClaw integration tool; QA mock provider does not yet model Codex searchable/deferred dynamic tool declarations for this fixture.
-    knownHarnessGap:
-      issue: "#80319"
-      reason: QA mock provider does not yet model Codex searchable/deferred OpenClaw dynamic tool declarations for this fixture.
+      action: hard gate in the standard direct-loading tier
+      reason: web_search is an OpenClaw integration tool and must stay visible and callable under Pi and Codex direct runtime parity.
    promptSnippet: "target=web_search"
    failurePromptSnippet: "failure target=web_search"
 ```
--- a/scripts/qa-coverage-report.ts
+++ b/scripts/qa-coverage-report.ts
@@ -4,6 +4,8 @@ type Options = {
  json?: boolean;
  output?: string;
  repoRoot?: string;
+  summary?: string;
+  tools?: boolean;
 };

 function takeValue(args: string[], index: number, flag: string): string {
@@ -27,6 +29,8 @@ Options:
  --json                Print machine-readable JSON
  --output <path>       Write the report to a file
  --repo-root <path>    Repository root to target
+  --summary <path>      Runtime qa-suite-summary.json to overlay on --tools coverage
+  --tools               Print runtime tool fixture coverage instead of scenario coverage
  -h, --help            Display help
 `);
        process.exit(0);
@@ -41,6 +45,13 @@ Options:
        opts.repoRoot = takeValue(args, index, arg);
        index += 1;
        break;
+      case "--summary":
+        opts.summary = takeValue(args, index, arg);
+        index += 1;
+        break;
+      case "--tools":
+        opts.tools = true;
+        break;
      default:
        throw new Error(`Unknown qa coverage option: ${arg}`);
    }
@@ -53,4 +64,6 @@ await runQaCoverageReportCommand({
  ...(opts.json ? { json: true } : {}),
  ...(opts.output ? { output: opts.output } : {}),
  ...(opts.repoRoot ? { repoRoot: opts.repoRoot } : {}),
+  ...(opts.summary ? { summary: opts.summary } : {}),
+  ...(opts.tools ? { tools: true } : {}),
 });
--- a/src/infra/run-node.test.ts
+++ b/src/infra/run-node.test.ts
@@ -1093,7 +1093,14 @@ describe("run-node script", () => {

      const exitCode = await runNodeMain({
        cwd: tmp,
-        args: ["qa", "coverage", "--json"],
+        args: [
+          "qa",
+          "coverage",
+          "--json",
+          "--tools",
+          "--summary",
+          ".artifacts/qa-e2e/runtime-parity-standard/qa-suite-summary.json",
+        ],
        env: {
          ...process.env,
          OPENCLAW_RUNNER_LOG: "0",
@@ -1111,6 +1118,9 @@ describe("run-node script", () => {
          "tsx",
          path.join(tmp, "scripts", "qa-coverage-report.ts"),
          "--json",
+          "--tools",
+          "--summary",
+          ".artifacts/qa-e2e/runtime-parity-standard/qa-suite-summary.json",
        ],
      ]);
    });
--- a/test/scripts/plugin-prerelease-test-plan.test.ts
+++ b/test/scripts/plugin-prerelease-test-plan.test.ts
@@ -490,6 +490,45 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => {
    }
  });

+  it("keeps runtime tool coverage blocking in release checks", () => {
+    const releaseChecksSource = readFileSync(
+      ".github/workflows/openclaw-release-checks.yml",
+      "utf8",
+    );
+    const releaseChecksWorkflow = parse(releaseChecksSource);
+    const runtimeToolCoverage = releaseChecksWorkflow.jobs.runtime_tool_coverage_release_checks;
+
+    expect(runtimeToolCoverage["continue-on-error"]).toBeUndefined();
+    expect(runtimeToolCoverage.needs).toEqual([
+      "resolve_target",
+      "qa_lab_runtime_parity_release_checks",
+    ]);
+    expect(runtimeToolCoverage.steps).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          name: "Enforce standard runtime tool coverage",
+          run: expect.stringContaining("pnpm openclaw qa coverage"),
+        }),
+      ]),
+    );
+    expect(runtimeToolCoverage.steps).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          name: "Enforce standard runtime tool coverage",
+          run: expect.stringContaining(
+            "--summary .artifacts/qa-e2e/runtime-parity-standard/qa-suite-summary.json",
+          ),
+        }),
+      ]),
+    );
+    expect(releaseChecksWorkflow.jobs.summary.needs).toContain(
+      "runtime_tool_coverage_release_checks",
+    );
+    expect(releaseChecksSource).toContain(
+      '"runtime_tool_coverage_release_checks=${{ needs.runtime_tool_coverage_release_checks.result }}"',
+    );
+  });
+
  it("keeps the live-ish availability check redacted", () => {
    const output = execFileSync(
      process.execPath,