From 57e4994caf6d3f60c2721d411fb3486ebf793100 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Wed, 29 Apr 2026 19:53:29 +0100
Subject: [PATCH] ci: speed up release validation

---
 .agents/skills/openclaw-testing/SKILL.md      |  12 +-
 .github/workflows/full-release-validation.yml | 102 ++++----------
 .../openclaw-live-and-e2e-checks-reusable.yml | 100 +++++++++-----
 .github/workflows/openclaw-release-checks.yml |  15 ++-
 scripts/qa-parity-report.ts                   |  85 ++++++++++++
 scripts/run-node.mjs                          |  34 +++++
 scripts/test-projects.test-support.mjs        |   1 +
 src/agents/model-catalog.test.ts              |  20 +++
 src/agents/model-catalog.ts                   |   2 +-
 src/agents/model-selection-shared.ts          |  23 +++-
 src/agents/model-selection.test.ts            |  45 +++++++
 .../chat.directive-tags.test.ts               |  30 +++++
 src/gateway/server.reload.test.ts             | 127 ++----------------
 src/infra/run-node.test.ts                    |  50 +++++++
 test/gateway.multi.e2e.test.ts                |  57 --------
 .../package-acceptance-workflow.test.ts       |  16 ++-
 16 files changed, 427 insertions(+), 292 deletions(-)
 create mode 100644 scripts/qa-parity-report.ts

diff --git a/.agents/skills/openclaw-testing/SKILL.md b/.agents/skills/openclaw-testing/SKILL.md
index 5f479af673d..b674037e202 100644
--- a/.agents/skills/openclaw-testing/SKILL.md
+++ b/.agents/skills/openclaw-testing/SKILL.md
@@ -160,8 +160,8 @@ PRs, main pushes, and ad hoc broad CI checks do not spend Docker/package time or
 all-plugin runtime time on release-only product coverage.
 
 If a full run is already active on a newer `origin/main`, prefer watching that
-run over dispatching a duplicate. If you accidentally dispatch a stale duplicate,
-cancel it and monitor the current run.
+run over dispatching a duplicate. Do not cancel release, release-check, or child
+workflow runs unless Peter explicitly asks for cancellation.
 
 The child-dispatch jobs record the child run ids. The final
 `Verify full validation` job re-queries those child runs and is the canonical
@@ -174,9 +174,11 @@ Supported umbrella groups are `all`, `ci`, `plugin-prerelease`,
 `release-checks`, `install-smoke`, `cross-os`, `live-e2e`, `package`, `qa`,
 `qa-parity`, `qa-live`, and `npm-telegram`. Use the narrowest group that covers
 the failed box. After a targeted release-check fix, do not restart the full
-umbrella by habit: dispatch the matching `rerun_group`, cancel older duplicate
-runs for the same target/group, and rerun only the parent verifier/evidence step
-after the child is green unless the release evidence is stale.
+umbrella by habit: dispatch the matching `rerun_group` and rerun only the parent
+verifier/evidence step after the child is green unless the release evidence is
+stale. For a single failed live/E2E shard, use
+`-f rerun_group=live-e2e -f live_suite_filter=<suite_id>` so the Blacksmith
+workflow only spends setup and queue time on that suite.
 
 ### Release Evidence
 
diff --git a/.github/workflows/full-release-validation.yml b/.github/workflows/full-release-validation.yml
index 49b76948b5f..fe4ab872f7f 100644
--- a/.github/workflows/full-release-validation.yml
+++ b/.github/workflows/full-release-validation.yml
@@ -53,6 +53,11 @@ on:
           - qa-parity
           - qa-live
           - npm-telegram
+      live_suite_filter:
+        description: Optional exact live suite id for focused live/E2E reruns; blank runs all selected live suites
+        required: false
+        default: ""
+        type: string
       npm_telegram_package_spec:
         description: Optional published package spec for the post-publish Telegram E2E lane
         required: false
@@ -83,7 +88,7 @@ permissions:
 
 concurrency:
   group: full-release-validation-${{ inputs.ref }}-${{ inputs.rerun_group }}
-  cancel-in-progress: true
+  cancel-in-progress: false
 
 env:
   FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
@@ -123,6 +128,7 @@ jobs:
           NPM_TELEGRAM_PACKAGE_SPEC: ${{ inputs.npm_telegram_package_spec }}
           EVIDENCE_PACKAGE_SPEC: ${{ inputs.evidence_package_spec }}
           RERUN_GROUP: ${{ inputs.rerun_group }}
+          LIVE_SUITE_FILTER: ${{ inputs.live_suite_filter }}
         run: |
           {
             echo "## Full release validation"
@@ -131,6 +137,9 @@ jobs:
             echo "- Target SHA: \`${TARGET_SHA}\`"
             echo "- Child workflow ref: \`${CHILD_WORKFLOW_REF}\`"
             echo "- Rerun group: \`${RERUN_GROUP}\`"
+            if [[ -n "${LIVE_SUITE_FILTER// }" ]]; then
+              echo "- Live suite filter: \`${LIVE_SUITE_FILTER}\`"
+            fi
             if [[ "$RERUN_GROUP" == "all" || "$RERUN_GROUP" == "ci" ]]; then
               echo "- Normal CI: \`CI\` with \`target_ref=${TARGET_SHA}\`"
             else
@@ -213,19 +222,6 @@ jobs:
             echo "Dispatched ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
             echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"
 
-            cleanup_child_run() {
-              local exit_code=$?
-              trap - EXIT INT TERM
-              local child_status
-              child_status="$(gh run view "$run_id" --json status --jq '.status' 2>/dev/null || true)"
-              if [[ "$child_status" != "completed" ]]; then
-                echo "Cancelling child ${workflow} run ${run_id} after parent exit (${exit_code})."
-                gh run cancel "$run_id" || gh api -X POST "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/force-cancel" || true
-              fi
-              return "$exit_code"
-            }
-            trap cleanup_child_run EXIT INT TERM
-
             while true; do
               status="$(gh run view "$run_id" --json status --jq '.status')"
               if [[ "$status" == "completed" ]]; then
@@ -252,23 +248,6 @@ jobs:
             echo "- Target SHA: \`${TARGET_SHA}\`"
           } >> "$GITHUB_STEP_SUMMARY"
 
-          cancel_same_sha_push_ci() {
-            local run_ids run_id
-            run_ids="$(
-              gh run list --workflow ci.yml --limit 100 --json databaseId,event,headSha,status \
-                --jq 'map(select(.event == "push" and .headSha == env.TARGET_SHA and (.status == "queued" or .status == "in_progress" or .status == "waiting" or .status == "pending"))) | .[].databaseId'
-            )"
-            if [[ -z "${run_ids// }" ]]; then
-              return 0
-            fi
-            while IFS= read -r run_id; do
-              [[ -n "${run_id// }" ]] || continue
-              echo "Cancelling same-SHA push CI run ${run_id}; Full Release Validation dispatches the full manual CI child for ${TARGET_SHA}."
-              gh run cancel "$run_id" || gh api -X POST "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/force-cancel" || true
-            done <<< "$run_ids"
-          }
-
-          cancel_same_sha_push_ci
           dispatch_and_wait ci.yml -f target_ref="$TARGET_SHA" -f include_android=true
 
   plugin_prerelease:
@@ -328,19 +307,6 @@ jobs:
             echo "Dispatched ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
             echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"
 
-            cleanup_child_run() {
-              local exit_code=$?
-              trap - EXIT INT TERM
-              local child_status
-              child_status="$(gh run view "$run_id" --json status --jq '.status' 2>/dev/null || true)"
-              if [[ "$child_status" != "completed" ]]; then
-                echo "Cancelling child ${workflow} run ${run_id} after parent exit (${exit_code})."
-                gh run cancel "$run_id" || gh api -X POST "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/force-cancel" || true
-              fi
-              return "$exit_code"
-            }
-            trap cleanup_child_run EXIT INT TERM
-
             while true; do
               status="$(gh run view "$run_id" --json status --jq '.status')"
               if [[ "$status" == "completed" ]]; then
@@ -391,6 +357,7 @@ jobs:
           MODE: ${{ inputs.mode }}
           RELEASE_PROFILE: ${{ inputs.release_profile }}
           RERUN_GROUP: ${{ inputs.rerun_group }}
+          LIVE_SUITE_FILTER: ${{ inputs.live_suite_filter }}
         run: |
           set -euo pipefail
 
@@ -430,19 +397,6 @@ jobs:
             echo "Dispatched ${workflow}: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
             echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"
 
-            cleanup_child_run() {
-              local exit_code=$?
-              trap - EXIT INT TERM
-              local child_status
-              child_status="$(gh run view "$run_id" --json status --jq '.status' 2>/dev/null || true)"
-              if [[ "$child_status" != "completed" ]]; then
-                echo "Cancelling child ${workflow} run ${run_id} after parent exit (${exit_code})."
-                gh run cancel "$run_id" || gh api -X POST "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/force-cancel" || true
-              fi
-              return "$exit_code"
-            }
-            trap cleanup_child_run EXIT INT TERM
-
             while true; do
               status="$(gh run view "$run_id" --json status --jq '.status')"
               if [[ "$status" == "completed" ]]; then
@@ -471,6 +425,9 @@ jobs:
             echo "- Cross-OS mode: \`${MODE}\`"
             echo "- Release profile: \`${RELEASE_PROFILE}\`"
             echo "- Rerun group: \`${RERUN_GROUP}\`"
+            if [[ -n "${LIVE_SUITE_FILTER// }" ]]; then
+              echo "- Live suite filter: \`${LIVE_SUITE_FILTER}\`"
+            fi
           } >> "$GITHUB_STEP_SUMMARY"
 
           child_rerun_group="$RERUN_GROUP"
@@ -478,13 +435,19 @@ jobs:
             child_rerun_group=all
           fi
 
-          dispatch_and_wait openclaw-release-checks.yml \
-            -f ref="$TARGET_SHA" \
-            -f expected_sha="$TARGET_SHA" \
-            -f provider="$PROVIDER" \
-            -f mode="$MODE" \
-            -f release_profile="$RELEASE_PROFILE" \
+          args=(
+            -f ref="$TARGET_SHA"
+            -f expected_sha="$TARGET_SHA"
+            -f provider="$PROVIDER"
+            -f mode="$MODE"
+            -f release_profile="$RELEASE_PROFILE"
             -f rerun_group="$child_rerun_group"
+          )
+          if [[ -n "${LIVE_SUITE_FILTER// }" ]]; then
+            args+=(-f live_suite_filter="$LIVE_SUITE_FILTER")
+          fi
+
+          dispatch_and_wait openclaw-release-checks.yml "${args[@]}"
 
   npm_telegram:
     name: Run post-publish Telegram E2E
@@ -538,19 +501,6 @@ jobs:
           echo "Dispatched npm-telegram-beta-e2e.yml: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${run_id}"
           echo "run_id=${run_id}" >> "$GITHUB_OUTPUT"
 
-          cleanup_child_run() {
-            local exit_code=$?
-            trap - EXIT INT TERM
-            local child_status
-            child_status="$(gh run view "$run_id" --json status --jq '.status' 2>/dev/null || true)"
-            if [[ "$child_status" != "completed" ]]; then
-              echo "Cancelling npm-telegram-beta-e2e.yml child run ${run_id} after parent exit (${exit_code})."
-              gh run cancel "$run_id" || gh api -X POST "repos/${GITHUB_REPOSITORY}/actions/runs/${run_id}/force-cancel" || true
-            fi
-            return "$exit_code"
-          }
-          trap cleanup_child_run EXIT INT TERM
-
           while true; do
             status="$(gh run view "$run_id" --json status --jq '.status')"
             if [[ "$status" == "completed" ]]; then
diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
index bf85f14a580..dd6335c5f84 100644
--- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
+++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml
@@ -63,6 +63,11 @@ on:
         required: false
         default: ""
         type: string
+      live_suite_filter:
+        description: Optional exact live suite id to run for focused failed-shard recovery; blank runs all selected suites
+        required: false
+        default: ""
+        type: string
       release_test_profile:
         description: Release coverage profile for live/Docker/provider breadth
         required: false
@@ -133,6 +138,11 @@ on:
         required: false
         default: ""
         type: string
+      live_suite_filter:
+        description: Optional exact live suite id to run for focused failed-shard recovery; blank runs all selected suites
+        required: false
+        default: ""
+        type: string
       release_test_profile:
         description: Release coverage profile for live/Docker/provider breadth
         required: false
@@ -296,7 +306,7 @@ jobs:
 
   validate_release_live_cache:
     needs: validate_selected_ref
-    if: inputs.include_live_suites && !inputs.live_models_only
+    if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'live-cache')
     runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 60
     env:
@@ -335,7 +345,7 @@ jobs:
 
   validate_repo_e2e:
     needs: validate_selected_ref
-    if: inputs.include_repo_e2e
+    if: inputs.include_repo_e2e && inputs.live_suite_filter == ''
     runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: 90
     env:
@@ -362,7 +372,7 @@ jobs:
 
   validate_special_e2e:
     needs: validate_selected_ref
-    if: inputs.include_repo_e2e || (inputs.include_live_suites && !inputs.live_models_only)
+    if: (inputs.include_repo_e2e || (inputs.include_live_suites && !inputs.live_models_only)) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'openshell-e2e' || inputs.live_suite_filter == 'openai-ws-stream-live-e2e')
     runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: ${{ matrix.timeout_minutes }}
     strategy:
@@ -401,11 +411,15 @@ jobs:
 
       - name: Build dist for special E2E
         if: |
-          (inputs.include_repo_e2e && matrix.requires_repo_e2e) ||
-          (inputs.include_live_suites && matrix.requires_live_suites)
+          (
+            (inputs.include_repo_e2e && matrix.requires_repo_e2e) ||
+            (inputs.include_live_suites && matrix.requires_live_suites)
+          ) &&
+          (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         run: pnpm build
 
       - name: Configure suite-specific env
+        if: inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id
         shell: bash
         run: |
           set -euo pipefail
@@ -417,6 +431,7 @@ jobs:
           esac
 
       - name: Validate suite credentials
+        if: inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id
         shell: bash
         run: |
           set -euo pipefail
@@ -431,8 +446,11 @@ jobs:
 
       - name: Run ${{ matrix.label }}
         if: |
-          (inputs.include_repo_e2e && matrix.requires_repo_e2e) ||
-          (inputs.include_live_suites && matrix.requires_live_suites)
+          (
+            (inputs.include_repo_e2e && matrix.requires_repo_e2e) ||
+            (inputs.include_live_suites && matrix.requires_live_suites)
+          ) &&
+          (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         run: ${{ matrix.command }}
 
   validate_docker_e2e:
@@ -1278,7 +1296,7 @@ jobs:
 
   prepare_live_test_image:
     needs: validate_selected_ref
-    if: inputs.include_live_suites
+    if: inputs.include_live_suites && (inputs.live_suite_filter == '' || startsWith(inputs.live_suite_filter, 'live-') || startsWith(inputs.live_suite_filter, 'docker-live-models'))
     runs-on: blacksmith-32vcpu-ubuntu-2404
     timeout-minutes: 60
     permissions:
@@ -1351,7 +1369,7 @@ jobs:
   validate_live_models_docker:
     name: Docker live models (${{ matrix.provider_label }})
     needs: [validate_selected_ref, prepare_live_test_image]
-    if: inputs.include_live_suites && inputs.live_model_providers == ''
+    if: inputs.include_live_suites && inputs.live_model_providers == '' && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'docker-live-models')
     runs-on: blacksmith-32vcpu-ubuntu-2404
     timeout-minutes: 75
     strategy:
@@ -1501,7 +1519,7 @@ jobs:
   validate_live_models_docker_targeted:
     name: Docker live models (selected providers)
     needs: [validate_selected_ref, prepare_live_test_image]
-    if: inputs.include_live_suites && inputs.live_model_providers != ''
+    if: inputs.include_live_suites && inputs.live_model_providers != '' && (inputs.live_suite_filter == '' || inputs.live_suite_filter == 'docker-live-models')
     runs-on: blacksmith-32vcpu-ubuntu-2404
     timeout-minutes: 75
     env:
@@ -1674,7 +1692,7 @@ jobs:
 
   validate_live_provider_suites:
     needs: validate_selected_ref
-    if: inputs.include_live_suites && !inputs.live_models_only
+    if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || (startsWith(inputs.live_suite_filter, 'native-live-') && !startsWith(inputs.live_suite_filter, 'native-live-extensions-media') && inputs.live_suite_filter != 'native-live-extensions-a-k'))
     runs-on: blacksmith-8vcpu-ubuntu-2404
     timeout-minutes: ${{ matrix.timeout_minutes }}
     strategy:
@@ -1782,6 +1800,7 @@ jobs:
             command: node .release-harness/scripts/test-live-shard.mjs native-live-extensions-moonshot
             timeout_minutes: 60
             profile_env_only: false
+            advisory: true
             profiles: full
           - suite_id: native-live-extensions-openai
             label: Native live OpenAI plugin
@@ -1852,14 +1871,14 @@ jobs:
       OPENCLAW_VITEST_MAX_WORKERS: "2"
     steps:
       - name: Checkout selected ref
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         uses: actions/checkout@v6
         with:
           ref: ${{ needs.validate_selected_ref.outputs.selected_sha }}
           fetch-depth: 1
 
       - name: Checkout trusted live shard harness
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         uses: actions/checkout@v6
         with:
           ref: ${{ github.sha }}
@@ -1867,7 +1886,7 @@ jobs:
           path: .release-harness
 
       - name: Setup Node environment
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         uses: ./.github/actions/setup-node-env
         with:
           node-version: ${{ env.NODE_VERSION }}
@@ -1875,11 +1894,11 @@ jobs:
           install-bun: "true"
 
       - name: Hydrate live auth/profile inputs
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         run: bash scripts/ci-hydrate-live-auth.sh
 
       - name: Configure suite-specific env
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         shell: bash
         run: |
           set -euo pipefail
@@ -1932,15 +1951,28 @@ jobs:
           esac
 
       - name: Run ${{ matrix.label }}
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         env:
           OPENCLAW_LIVE_COMMAND: ${{ matrix.command }}
-        run: bash .release-harness/scripts/ci-live-command-retry.sh
+          OPENCLAW_LIVE_SUITE_ADVISORY: ${{ matrix.advisory }}
+        run: |
+          set +e
+          bash .release-harness/scripts/ci-live-command-retry.sh
+          status=$?
+          set -e
+          if [[ "$status" -eq 0 ]]; then
+            exit 0
+          fi
+          if [[ "${OPENCLAW_LIVE_SUITE_ADVISORY:-}" == "true" ]]; then
+            echo "::warning::Advisory live suite failed with exit code ${status}: ${{ matrix.suite_id }}"
+            exit 0
+          fi
+          exit "$status"
 
   validate_live_docker_provider_suites:
     name: Docker live suites (${{ matrix.label }})
     needs: [validate_selected_ref, prepare_live_test_image]
-    if: inputs.include_live_suites && !inputs.live_models_only
+    if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || startsWith(inputs.live_suite_filter, 'live-'))
     runs-on: blacksmith-32vcpu-ubuntu-2404
     timeout-minutes: ${{ matrix.timeout_minutes }}
     strategy:
@@ -2024,14 +2056,14 @@ jobs:
       OPENCLAW_VITEST_MAX_WORKERS: "2"
     steps:
       - name: Checkout selected ref
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         uses: actions/checkout@v6
         with:
           ref: ${{ needs.validate_selected_ref.outputs.selected_sha }}
           fetch-depth: 1
 
       - name: Checkout trusted live shard harness
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         uses: actions/checkout@v6
         with:
           ref: ${{ github.sha }}
@@ -2039,7 +2071,7 @@ jobs:
           path: .release-harness
 
       - name: Setup Node environment
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         uses: ./.github/actions/setup-node-env
         with:
           node-version: ${{ env.NODE_VERSION }}
@@ -2047,11 +2079,11 @@ jobs:
           install-bun: "true"
 
       - name: Hydrate live auth/profile inputs
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         run: bash scripts/ci-hydrate-live-auth.sh
 
       - name: Log in to GHCR
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
         with:
           registry: ghcr.io
@@ -2059,7 +2091,7 @@ jobs:
           password: ${{ github.token }}
 
       - name: Configure suite-specific env
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         shell: bash
         run: |
           set -euo pipefail
@@ -2093,7 +2125,7 @@ jobs:
           esac
 
       - name: Run ${{ matrix.label }}
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         env:
           OPENCLAW_LIVE_COMMAND: ${{ matrix.command }}
         run: bash .release-harness/scripts/ci-live-command-retry.sh
@@ -2101,7 +2133,7 @@ jobs:
   validate_live_media_provider_suites:
     name: Live media suites (${{ matrix.label }})
     needs: validate_selected_ref
-    if: inputs.include_live_suites && !inputs.live_models_only
+    if: inputs.include_live_suites && !inputs.live_models_only && (inputs.live_suite_filter == '' || startsWith(inputs.live_suite_filter, 'native-live-extensions-media') || inputs.live_suite_filter == 'native-live-extensions-a-k')
     runs-on: blacksmith-8vcpu-ubuntu-2404
     container:
       image: ghcr.io/openclaw/openclaw-live-media-runner:ubuntu-24.04
@@ -2194,14 +2226,14 @@ jobs:
       OPENCLAW_VITEST_MAX_WORKERS: "2"
     steps:
       - name: Checkout selected ref
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         uses: actions/checkout@v6
         with:
           ref: ${{ needs.validate_selected_ref.outputs.selected_sha }}
           fetch-depth: 1
 
       - name: Checkout trusted live shard harness
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         uses: actions/checkout@v6
         with:
           ref: ${{ github.sha }}
@@ -2209,7 +2241,7 @@ jobs:
           path: .release-harness
 
       - name: Verify preinstalled live media dependencies
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         shell: bash
         run: |
           set -euo pipefail
@@ -2217,7 +2249,7 @@ jobs:
           ffprobe -version | head -1
 
       - name: Setup Node environment
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         uses: ./.github/actions/setup-node-env
         with:
           node-version: ${{ env.NODE_VERSION }}
@@ -2225,11 +2257,11 @@ jobs:
           install-bun: "true"
 
       - name: Hydrate live auth/profile inputs
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         run: bash scripts/ci-hydrate-live-auth.sh
 
       - name: Configure suite-specific env
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         shell: bash
         run: |
           set -euo pipefail
@@ -2238,5 +2270,5 @@ jobs:
           fi
 
       - name: Run ${{ matrix.label }}
-        if: contains(matrix.profiles, inputs.release_test_profile)
+        if: contains(matrix.profiles, inputs.release_test_profile) && (inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id)
         run: ${{ matrix.command }}
diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml
index 1896c0e301e..87c27cdd6af 100644
--- a/.github/workflows/openclaw-release-checks.yml
+++ b/.github/workflows/openclaw-release-checks.yml
@@ -53,10 +53,15 @@ on:
           - qa
           - qa-parity
           - qa-live
+      live_suite_filter:
+        description: Optional exact live suite id for focused live/E2E reruns; blank runs all selected live suites
+        required: false
+        default: ""
+        type: string
 
 concurrency:
   group: openclaw-release-checks-${{ inputs.expected_sha || inputs.ref }}-${{ inputs.rerun_group }}
-  cancel-in-progress: true
+  cancel-in-progress: false
 
 env:
   FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
@@ -77,6 +82,7 @@ jobs:
       mode: ${{ steps.inputs.outputs.mode }}
       release_profile: ${{ steps.inputs.outputs.release_profile }}
       rerun_group: ${{ steps.inputs.outputs.rerun_group }}
+      live_suite_filter: ${{ steps.inputs.outputs.live_suite_filter }}
     steps:
       - name: Require main or release workflow ref for release checks
         env:
@@ -192,6 +198,7 @@ jobs:
           RELEASE_MODE_INPUT: ${{ inputs.mode }}
           RELEASE_PROFILE_INPUT: ${{ inputs.release_profile }}
           RELEASE_RERUN_GROUP_INPUT: ${{ inputs.rerun_group }}
+          RELEASE_LIVE_SUITE_FILTER_INPUT: ${{ inputs.live_suite_filter }}
         run: |
           set -euo pipefail
           {
@@ -200,6 +207,7 @@ jobs:
             printf 'mode=%s\n' "$RELEASE_MODE_INPUT"
             printf 'release_profile=%s\n' "$RELEASE_PROFILE_INPUT"
             printf 'rerun_group=%s\n' "$RELEASE_RERUN_GROUP_INPUT"
+            printf 'live_suite_filter=%s\n' "$RELEASE_LIVE_SUITE_FILTER_INPUT"
           } >> "$GITHUB_OUTPUT"
 
       - name: Summarize validated ref
@@ -211,6 +219,7 @@ jobs:
           RELEASE_MODE: ${{ inputs.mode }}
           RELEASE_PROFILE: ${{ inputs.release_profile }}
           RELEASE_RERUN_GROUP: ${{ inputs.rerun_group }}
+          RELEASE_LIVE_SUITE_FILTER: ${{ inputs.live_suite_filter }}
         run: |
           {
             echo "## Release checks"
@@ -222,6 +231,9 @@ jobs:
             echo "- Cross-OS mode: \`${RELEASE_MODE}\`"
             echo "- Release profile: \`${RELEASE_PROFILE}\`"
             echo "- Rerun group: \`${RELEASE_RERUN_GROUP}\`"
+            if [[ -n "${RELEASE_LIVE_SUITE_FILTER// }" ]]; then
+              echo "- Live suite filter: \`${RELEASE_LIVE_SUITE_FILTER}\`"
+            fi
             echo "- This run will execute cross-OS release validation, install smoke, QA Lab parity, Matrix, and Telegram lanes, and the non-Parallels Docker/live/openwebui coverage from the CI migration plan."
           } >> "$GITHUB_STEP_SUMMARY"
 
@@ -342,6 +354,7 @@ jobs:
       include_openwebui: false
       include_live_suites: true
       release_test_profile: ${{ needs.resolve_target.outputs.release_profile }}
+      live_suite_filter: ${{ needs.resolve_target.outputs.live_suite_filter }}
     secrets: &live_e2e_release_secrets
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
diff --git a/scripts/qa-parity-report.ts b/scripts/qa-parity-report.ts
new file mode 100644
index 00000000000..bd84f6380da
--- /dev/null
+++ b/scripts/qa-parity-report.ts
@@ -0,0 +1,85 @@
+import { runQaParityReportCommand } from "../extensions/qa-lab/src/cli.runtime.ts";
+
+type Options = {
+  baselineLabel?: string;
+  baselineSummary?: string;
+  candidateLabel?: string;
+  candidateSummary?: string;
+  outputDir?: string;
+  repoRoot?: string;
+};
+
+function takeValue(args: string[], index: number, flag: string): string {
+  const value = args[index + 1];
+  if (!value || value.startsWith("-")) {
+    throw new Error(`${flag} requires a value.`);
+  }
+  return value;
+}
+
+function parseArgs(args: string[]): Options {
+  const opts: Options = {};
+  for (let index = 0; index < args.length; index += 1) {
+    const arg = args[index];
+    switch (arg) {
+      case "--help":
+      case "-h":
+        process.stdout.write(`Usage: openclaw qa parity-report [options]
+
+Options:
+  --candidate-summary <path>  Candidate qa-suite-summary.json path
+  --baseline-summary <path>   Baseline qa-suite-summary.json path
+  --candidate-label <label>   Candidate display label
+  --baseline-label <label>    Baseline display label
+  --repo-root <path>          Repository root to target
+  --output-dir <path>         Artifact directory for the parity report
+  -h, --help                  Display help
+`);
+        process.exit(0);
+      case "--baseline-label":
+        opts.baselineLabel = takeValue(args, index, arg);
+        index += 1;
+        break;
+      case "--baseline-summary":
+        opts.baselineSummary = takeValue(args, index, arg);
+        index += 1;
+        break;
+      case "--candidate-label":
+        opts.candidateLabel = takeValue(args, index, arg);
+        index += 1;
+        break;
+      case "--candidate-summary":
+        opts.candidateSummary = takeValue(args, index, arg);
+        index += 1;
+        break;
+      case "--output-dir":
+        opts.outputDir = takeValue(args, index, arg);
+        index += 1;
+        break;
+      case "--repo-root":
+        opts.repoRoot = takeValue(args, index, arg);
+        index += 1;
+        break;
+      default:
+        throw new Error(`Unknown qa parity-report option: ${arg}`);
+    }
+  }
+  return opts;
+}
+
+const opts = parseArgs(process.argv.slice(2));
+if (!opts.candidateSummary) {
+  throw new Error("--candidate-summary is required.");
+}
+if (!opts.baselineSummary) {
+  throw new Error("--baseline-summary is required.");
+}
+
+await runQaParityReportCommand({
+  baselineSummary: opts.baselineSummary,
+  candidateSummary: opts.candidateSummary,
+  ...(opts.baselineLabel ? { baselineLabel: opts.baselineLabel } : {}),
+  ...(opts.candidateLabel ? { candidateLabel: opts.candidateLabel } : {}),
+  ...(opts.outputDir ? { outputDir: opts.outputDir } : {}),
+  ...(opts.repoRoot ? { repoRoot: opts.repoRoot } : {}),
+});
diff --git a/scripts/run-node.mjs b/scripts/run-node.mjs
index b70d38e860a..bc8fa28a362 100644
--- a/scripts/run-node.mjs
+++ b/scripts/run-node.mjs
@@ -809,6 +809,34 @@ const shouldUseExistingDistForGatewayClient = (deps, buildRequirement) =>
   deps.env.OPENCLAW_FORCE_BUILD !== "1" &&
   statMtime(deps.distEntry, deps.fs) != null;
 
+const isQaParityReportCommand = (args) => args[0] === "qa" && args[1] === "parity-report";
+
+const shouldRunQaParityReportFromSource = (deps, buildRequirement) =>
+  buildRequirement.reason === "missing_private_qa_dist" &&
+  isQaParityReportCommand(deps.args) &&
+  deps.env.OPENCLAW_FORCE_BUILD !== "1" &&
+  statMtime(path.join(deps.cwd, "extensions", "qa-lab", "src", "cli.runtime.ts"), deps.fs) != null;
+
+const runQaParityReportFromSource = async (deps) => {
+  const sourceEntrypoint = path.join(deps.cwd, "scripts", "qa-parity-report.ts");
+  const nodeProcess = deps.spawn(
+    deps.execPath,
+    ["--import", "tsx", sourceEntrypoint, ...deps.args.slice(2)],
+    {
+      cwd: deps.cwd,
+      env: deps.env,
+      stdio: deps.outputTee ? ["inherit", "pipe", "pipe"] : "inherit",
+    },
+  );
+  pipeSpawnedOutput(nodeProcess, deps);
+  const res = await waitForSpawnedProcess(nodeProcess, deps);
+  const interruptedExitCode = getInterruptedSpawnExitCode(res);
+  if (interruptedExitCode !== null) {
+    return interruptedExitCode;
+  }
+  return res.exitCode ?? 1;
+};
+
 export async function runNodeMain(params = {}) {
   const deps = {
     spawn: params.spawn ?? spawn,
@@ -847,9 +875,15 @@ export async function runNodeMain(params = {}) {
       deps,
       buildRequirement,
     );
+    const useQaParityReportSource = shouldRunQaParityReportFromSource(deps, buildRequirement);
     if (useExistingGatewayClientDist) {
       buildRequirement = { shouldBuild: false, reason: "gateway_client_existing_dist" };
     }
+    if (useQaParityReportSource) {
+      logRunner("Running QA parity report from source without rebuilding private QA dist.", deps);
+      exitCode = await runQaParityReportFromSource(deps);
+      return await closeRunNodeOutputTee(deps, exitCode);
+    }
     if (!buildRequirement.shouldBuild) {
       if (!useExistingGatewayClientDist && !shouldSkipCleanWatchRuntimeSync(deps)) {
         const runtimePostBuildRequirement = resolveRuntimePostBuildRequirement(deps);
diff --git a/scripts/test-projects.test-support.mjs b/scripts/test-projects.test-support.mjs
index 0730ccc19bd..ab73decf15c 100644
--- a/scripts/test-projects.test-support.mjs
+++ b/scripts/test-projects.test-support.mjs
@@ -247,6 +247,7 @@ const TOOLING_SOURCE_TEST_TARGETS = new Map([
     ],
   ],
   ["scripts/run-oxlint.mjs", ["test/scripts/run-oxlint.test.ts"]],
+  ["scripts/run-node.mjs", ["src/infra/run-node.test.ts"]],
   ["scripts/ci-run-timings.mjs", ["test/scripts/ci-run-timings.test.ts"]],
   ["scripts/test-extension-batch.mjs", ["test/scripts/test-extension.test.ts"]],
   ["scripts/lib/extension-test-plan.mjs", ["test/scripts/test-extension.test.ts"]],
diff --git a/src/agents/model-catalog.test.ts b/src/agents/model-catalog.test.ts
index 60df3fd4ef7..4dbf81e4376 100644
--- a/src/agents/model-catalog.test.ts
+++ b/src/agents/model-catalog.test.ts
@@ -125,6 +125,26 @@ describe("loadModelCatalog", () => {
     }
   });
 
+  it("reloads dynamic registry entries after clearing the cache", async () => {
+    const models = [{ id: "existing", name: "Existing", provider: "ollama" }];
+    mockPiDiscoveryModels(models);
+
+    const first = await loadModelCatalog({ config: {} as OpenClawConfig });
+    expect(first).toContainEqual({ id: "existing", name: "Existing", provider: "ollama" });
+
+    models.push({ id: "glm-5.1:cloud", name: "GLM 5.1 Cloud", provider: "ollama" });
+    resetModelCatalogCacheForTest();
+    mockPiDiscoveryModels(models);
+
+    const second = await loadModelCatalog({ config: {} as OpenClawConfig });
+    expect(second).toContainEqual({ id: "existing", name: "Existing", provider: "ollama" });
+    expect(second).toContainEqual({
+      id: "glm-5.1:cloud",
+      name: "GLM 5.1 Cloud",
+      provider: "ollama",
+    });
+  });
+
   it("returns partial results on discovery errors", async () => {
     setLoggerOverride({ level: "silent", consoleLevel: "warn" });
     try {
diff --git a/src/agents/model-catalog.ts b/src/agents/model-catalog.ts
index d256255f695..fe576e58fe8 100644
--- a/src/agents/model-catalog.ts
+++ b/src/agents/model-catalog.ts
@@ -62,11 +62,11 @@ function loadModelSuppression() {
 export function resetModelCatalogCache() {
   modelCatalogPromise = null;
   hasLoggedModelCatalogError = false;
-  importPiSdk = defaultImportPiSdk;
 }
 
 export function resetModelCatalogCacheForTest() {
   resetModelCatalogCache();
+  importPiSdk = defaultImportPiSdk;
 }
 
 // Test-only escape hatch: allow mocking the dynamic import to simulate transient failures.
diff --git a/src/agents/model-selection-shared.ts b/src/agents/model-selection-shared.ts
index aa89107a164..139a68e87d3 100644
--- a/src/agents/model-selection-shared.ts
+++ b/src/agents/model-selection-shared.ts
@@ -51,6 +51,23 @@ function sanitizeModelWarningValue(value: string): string {
   return sanitizeForLog(stripped.slice(0, controlBoundary));
 }
 
+function mergeModelCatalogEntries(params: {
+  primary: readonly ModelCatalogEntry[];
+  secondary: readonly ModelCatalogEntry[];
+}): ModelCatalogEntry[] {
+  const merged = [...params.primary];
+  const seen = new Set(merged.map((entry) => modelKey(entry.provider, entry.id)));
+  for (const entry of params.secondary) {
+    const key = modelKey(entry.provider, entry.id);
+    if (seen.has(key)) {
+      continue;
+    }
+    merged.push(entry);
+    seen.add(key);
+  }
+  return merged;
+}
+
 export function inferUniqueProviderFromConfiguredModels(params: {
   cfg: OpenClawConfig;
   model: string;
@@ -565,7 +582,11 @@ export function buildAllowedModelSetWithFallbacks(params: {
     cfg: params.cfg,
     defaultProvider: params.defaultProvider,
   });
-  const catalog = params.catalog.map((entry) => applyModelCatalogMetadata({ entry, metadata }));
+  const configuredCatalog = buildConfiguredModelCatalog({ cfg: params.cfg });
+  const catalog = mergeModelCatalogEntries({
+    primary: params.catalog,
+    secondary: configuredCatalog,
+  }).map((entry) => applyModelCatalogMetadata({ entry, metadata }));
   const rawAllowlist = (() => {
     const modelMap = params.cfg.agents?.defaults?.models ?? {};
     return Object.keys(modelMap);
diff --git a/src/agents/model-selection.test.ts b/src/agents/model-selection.test.ts
index a21a8a31e3d..669606de74f 100644
--- a/src/agents/model-selection.test.ts
+++ b/src/agents/model-selection.test.ts
@@ -662,6 +662,51 @@ describe("model-selection", () => {
       ]);
     });
 
+    it("keeps configured provider models visible when the catalog is otherwise allow-any", () => {
+      const cfg: OpenClawConfig = {
+        agents: {
+          defaults: {
+            model: { primary: "ollama/existing" },
+          },
+        },
+        models: {
+          providers: {
+            ollama: {
+              baseUrl: "http://127.0.0.1:11434",
+              api: "ollama",
+              apiKey: "ollama-local",
+              models: [
+                {
+                  id: "glm-5.1:cloud",
+                  name: "GLM 5.1 Cloud",
+                  contextWindow: 131_072,
+                },
+              ],
+            },
+          },
+        },
+      } as unknown as OpenClawConfig;
+
+      const result = buildAllowedModelSet({
+        cfg,
+        catalog: [{ provider: "ollama", id: "existing", name: "Existing" }],
+        defaultProvider: "ollama",
+        defaultModel: "existing",
+      });
+
+      expect(result.allowAny).toBe(true);
+      expect(result.allowedCatalog).toEqual([
+        { provider: "ollama", id: "existing", name: "Existing" },
+        {
+          provider: "ollama",
+          id: "glm-5.1:cloud",
+          name: "GLM 5.1 Cloud",
+          contextWindow: 131_072,
+        },
+      ]);
+      expect(result.allowedKeys.has("ollama/glm-5.1:cloud")).toBe(true);
+    });
+
     it("matches allowlisted catalog entries with normalized provider and model ids", () => {
       const cfg: OpenClawConfig = {
         agents: {
diff --git a/src/gateway/server-methods/chat.directive-tags.test.ts b/src/gateway/server-methods/chat.directive-tags.test.ts
index 24304155484..95ab7d1360f 100644
--- a/src/gateway/server-methods/chat.directive-tags.test.ts
+++ b/src/gateway/server-methods/chat.directive-tags.test.ts
@@ -998,6 +998,36 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
     );
   });
 
+  it("chat.send broadcasts final replies for telegram-shaped session keys", async () => {
+    createTranscriptFixture("openclaw-chat-send-telegram-final-");
+    mockState.finalText = "telegram ok";
+    const respond = vi.fn();
+    const context = createChatContext();
+    const sessionKey = "agent:main:telegram:direct:123456";
+
+    const payload = await runNonStreamingChatSend({
+      context,
+      respond,
+      idempotencyKey: "idem-telegram-final",
+      sessionKey,
+    });
+
+    expect(payload).toEqual(
+      expect.objectContaining({
+        runId: "idem-telegram-final",
+        sessionKey,
+        state: "final",
+        message: expect.any(Object),
+      }),
+    );
+    expect(extractFirstTextBlock(payload)).toBe("telegram ok");
+    expect(context.nodeSendToSession).toHaveBeenCalledWith(
+      sessionKey,
+      "chat",
+      expect.objectContaining({ sessionKey, state: "final" }),
+    );
+  });
+
   it("chat.send keeps explicit delivery routes for channel-scoped sessions", async () => {
     createTranscriptFixture("openclaw-chat-send-origin-routing-");
     mockState.finalText = "ok";
diff --git a/src/gateway/server.reload.test.ts b/src/gateway/server.reload.test.ts
index 659184d3511..a75f502e91d 100644
--- a/src/gateway/server.reload.test.ts
+++ b/src/gateway/server.reload.test.ts
@@ -2,13 +2,7 @@ import fs from "node:fs/promises";
 import path from "node:path";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import { WebSocket } from "ws";
-import {
-  __setModelCatalogImportForTest,
-  resetModelCatalogCacheForTest,
-} from "../agents/model-catalog.js";
-import { buildModelsProviderData } from "../auto-reply/reply/commands-models.js";
 import { resolveMainSessionKeyFromConfig } from "../config/sessions.js";
-import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { drainSystemEvents } from "../infra/system-events.js";
 import { withEnvAsync } from "../test-utils/env.js";
 import {
@@ -25,7 +19,7 @@ import {
   rpcReq,
   startServerWithClient,
   testState,
-  withGatewayServer,
+  withGatewayServer as withMinimalGatewayServer,
 } from "./test-helpers.js";
 
 const hoisted = vi.hoisted(() => {
@@ -165,11 +159,13 @@ const hoisted = vi.hoisted(() => {
     reloaderStop,
     getOnHotReload: () => onHotReload,
     getOnRestart: () => onRestart,
+    resetReloadCallbacks: () => {
+      onHotReload = null;
+      onRestart = null;
+    },
   };
 });
 
-type PiDiscoveryRuntimeModule = typeof import("../agents/pi-model-discovery-runtime.js");
-
 vi.mock("../cron/service.js", () => ({
   CronService: hoisted.CronService,
 }));
@@ -310,6 +306,7 @@ describe("gateway hot reload", () => {
     hoisted.resetModelCatalogCache.mockReset();
     hoisted.disposeAllSessionMcpRuntimes.mockReset();
     hoisted.disposeAllSessionMcpRuntimes.mockResolvedValue(undefined);
+    hoisted.resetReloadCallbacks();
   });
 
   afterEach(() => {
@@ -430,10 +427,10 @@ describe("gateway hot reload", () => {
   }
 
   async function withNonMinimalGatewayServer(
-    fn: Parameters<typeof withGatewayServer>[0],
-  ): ReturnType<typeof withGatewayServer> {
+    fn: Parameters<typeof withMinimalGatewayServer>[0],
+  ): ReturnType<typeof withMinimalGatewayServer> {
     return await withEnvAsync({ OPENCLAW_TEST_MINIMAL_GATEWAY: undefined }, async () =>
-      withGatewayServer(fn),
+      withMinimalGatewayServer(fn),
     );
   }
 
@@ -819,7 +816,7 @@ describe("gateway hot reload", () => {
   });
 
   it("clears the model catalog cache on model-related hot reloads", async () => {
-    await withGatewayServer(async () => {
+    await withNonMinimalGatewayServer(async () => {
       const onHotReload = hoisted.getOnHotReload();
       expect(onHotReload).toBeTypeOf("function");
 
@@ -852,7 +849,7 @@ describe("gateway hot reload", () => {
   });
 
   it("disposes cached MCP runtimes on MCP config hot reloads", async () => {
-    await withGatewayServer(async () => {
+    await withNonMinimalGatewayServer(async () => {
       const onHotReload = hoisted.getOnHotReload();
       expect(onHotReload).toBeTypeOf("function");
 
@@ -882,108 +879,6 @@ describe("gateway hot reload", () => {
     });
   });
 
-  it("makes newly available catalog models visible in-process after hot reload", async () => {
-    type TestRegistryEntry = { provider: string; id: string; name: string };
-    let registryEntries: TestRegistryEntry[] = [
-      { provider: "ollama", id: "existing", name: "Existing" },
-    ];
-    __setModelCatalogImportForTest(
-      async () =>
-        ({
-          discoverAuthStorage: () => ({}),
-          ModelRegistry: class {
-            getAll() {
-              return registryEntries;
-            }
-          },
-        }) as unknown as PiDiscoveryRuntimeModule,
-    );
-    resetModelCatalogCacheForTest();
-
-    try {
-      await withGatewayServer(async () => {
-        const onHotReload = hoisted.getOnHotReload();
-        expect(onHotReload).toBeTypeOf("function");
-
-        const baseConfig: OpenClawConfig = {
-          agents: {
-            defaults: {
-              model: {
-                primary: "ollama/existing",
-              },
-            },
-          },
-          models: {
-            providers: {
-              ollama: {
-                baseUrl: "http://127.0.0.1:11434",
-                api: "ollama",
-                apiKey: "ollama-local",
-                models: [
-                  {
-                    id: "existing",
-                    name: "Existing",
-                    reasoning: false,
-                    input: ["text"],
-                    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
-                    contextWindow: 131_072,
-                    maxTokens: 4096,
-                  },
-                ],
-              },
-            },
-          },
-        };
-
-        const before = await buildModelsProviderData(baseConfig);
-        expect([...(before.byProvider.get("ollama") ?? new Set()).values()]).toEqual(["existing"]);
-
-        registryEntries = [
-          ...registryEntries,
-          { provider: "ollama", id: "glm-5.1:cloud", name: "GLM 5.1 Cloud" },
-        ];
-
-        const nextConfig = structuredClone(baseConfig);
-        await onHotReload?.(
-          {
-            changedPaths: ["models.providers.ollama.models"],
-            restartGateway: false,
-            restartReasons: [],
-            hotReasons: ["models.providers.ollama.models"],
-            reloadHooks: false,
-            restartGmailWatcher: false,
-            restartCron: false,
-            restartHeartbeat: false,
-            restartChannels: new Set(),
-            noopPaths: [],
-          },
-          nextConfig,
-        );
-
-        __setModelCatalogImportForTest(
-          async () =>
-            ({
-              discoverAuthStorage: () => ({}),
-              ModelRegistry: class {
-                getAll() {
-                  return registryEntries;
-                }
-              },
-            }) as unknown as PiDiscoveryRuntimeModule,
-        );
-        const after = await buildModelsProviderData(nextConfig);
-        expect([...(after.byProvider.get("ollama") ?? new Set()).values()]).toEqual([
-          "existing",
-          "glm-5.1:cloud",
-        ]);
-        expect(hoisted.resetModelCatalogCache).toHaveBeenCalledTimes(1);
-      });
-    } finally {
-      __setModelCatalogImportForTest();
-      resetModelCatalogCacheForTest();
-    }
-  });
-
   it("serves secrets.reload immediately after startup without race failures", async () => {
     await writeEnvRefConfig();
     process.env.OPENAI_API_KEY = "sk-startup"; // pragma: allowlist secret
diff --git a/src/infra/run-node.test.ts b/src/infra/run-node.test.ts
index d7fa4266209..8c8b7dec693 100644
--- a/src/infra/run-node.test.ts
+++ b/src/infra/run-node.test.ts
@@ -697,6 +697,56 @@ describe("run-node script", () => {
     });
   });
 
+  it("runs QA parity report from source without rebuilding private QA dist", async () => {
+    await withTempDir({ prefix: "openclaw-run-node-" }, async (tmp) => {
+      await setupTrackedProject(tmp, {
+        files: {
+          "extensions/qa-lab/src/cli.runtime.ts": "export {};\n",
+        },
+        buildPaths: [DIST_ENTRY, BUILD_STAMP],
+      });
+
+      const spawnCalls: string[][] = [];
+      const spawn = (cmd: string, args: string[]) => {
+        spawnCalls.push([cmd, ...args]);
+        return createExitedProcess(0);
+      };
+
+      const exitCode = await runNodeMain({
+        cwd: tmp,
+        args: [
+          "qa",
+          "parity-report",
+          "--candidate-summary",
+          ".artifacts/qa-e2e/gpt54/qa-suite-summary.json",
+          "--baseline-summary",
+          ".artifacts/qa-e2e/opus46/qa-suite-summary.json",
+        ],
+        env: {
+          ...process.env,
+          OPENCLAW_RUNNER_LOG: "0",
+        },
+        spawn,
+        execPath: process.execPath,
+        platform: process.platform,
+      });
+
+      expect(exitCode).toBe(0);
+      expect(spawnCalls).toEqual([
+        [
+          process.execPath,
+          "--import",
+          "tsx",
+          path.join(tmp, "scripts", "qa-parity-report.ts"),
+          "--candidate-summary",
+          ".artifacts/qa-e2e/gpt54/qa-suite-summary.json",
+          "--baseline-summary",
+          ".artifacts/qa-e2e/opus46/qa-suite-summary.json",
+        ],
+      ]);
+    });
+  });
+
   it("skips runtime postbuild restaging when the runtime stamp is current", async () => {
     await withTempDir({ prefix: "openclaw-run-node-" }, async (tmp) => {
       await setupTrackedProject(tmp, {
diff --git a/test/gateway.multi.e2e.test.ts b/test/gateway.multi.e2e.test.ts
index 85bbe977a5f..a772f1cf746 100644
--- a/test/gateway.multi.e2e.test.ts
+++ b/test/gateway.multi.e2e.test.ts
@@ -1,17 +1,11 @@
-import { randomUUID } from "node:crypto";
 import { afterAll, describe, expect, it } from "vitest";
 import { GatewayClient } from "../src/gateway/client.js";
-import { connectGatewayClient } from "../src/gateway/test-helpers.e2e.js";
-import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../src/utils/message-channel.js";
 import {
-  type ChatEventPayload,
   type GatewayInstance,
   connectNode,
-  extractFirstTextBlock,
   postJson,
   spawnGatewayInstance,
   stopGatewayInstance,
-  waitForChatFinalEvent,
   waitForNodeStatus,
 } from "./helpers/gateway-e2e-harness.js";
 
@@ -20,15 +14,11 @@ const E2E_TIMEOUT_MS = 120_000;
 describe("gateway multi-instance e2e", () => {
   const instances: GatewayInstance[] = [];
   const nodeClients: GatewayClient[] = [];
-  const chatClients: GatewayClient[] = [];
 
   afterAll(async () => {
     for (const client of nodeClients) {
       client.stop();
     }
-    for (const client of chatClients) {
-      client.stop();
-    }
     for (const inst of instances) {
       await stopGatewayInstance(inst);
     }
@@ -76,51 +66,4 @@ describe("gateway multi-instance e2e", () => {
       ]);
     },
   );
-
-  it(
-    "delivers final chat event for telegram-shaped session keys",
-    { timeout: E2E_TIMEOUT_MS },
-    async () => {
-      const gw = await spawnGatewayInstance("chat-telegram-fixture");
-      instances.push(gw);
-
-      const chatEvents: ChatEventPayload[] = [];
-      const chatClient = await connectGatewayClient({
-        url: `ws://127.0.0.1:${gw.port}`,
-        token: gw.gatewayToken,
-        clientName: GATEWAY_CLIENT_NAMES.CLI,
-        clientDisplayName: "chat-e2e-cli",
-        clientVersion: "1.0.0",
-        platform: "test",
-        mode: GATEWAY_CLIENT_MODES.CLI,
-        onEvent: (evt) => {
-          if (evt.event === "chat" && evt.payload && typeof evt.payload === "object") {
-            chatEvents.push(evt.payload as ChatEventPayload);
-          }
-        },
-      });
-      chatClients.push(chatClient);
-
-      const sessionKey = "agent:main:telegram:direct:123456";
-      const idempotencyKey = `idem-${randomUUID()}`;
-      const sendRes = await chatClient.request("chat.send", {
-        sessionKey,
-        message: "/whoami",
-        idempotencyKey,
-      });
-      expect(sendRes.status).toBe("started");
-      const runId = sendRes.runId;
-      expect(typeof runId).toBe("string");
-
-      const finalEvent = await waitForChatFinalEvent({
-        events: chatEvents,
-        runId: String(runId),
-        sessionKey,
-        timeoutMs: 90_000,
-      });
-      const finalText = extractFirstTextBlock(finalEvent.message);
-      expect(typeof finalText).toBe("string");
-      expect(finalText?.length).toBeGreaterThan(0);
-    },
-  );
 });
diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts
index 99a51b12fa6..8b69a763683 100644
--- a/test/scripts/package-acceptance-workflow.test.ts
+++ b/test/scripts/package-acceptance-workflow.test.ts
@@ -131,6 +131,10 @@ describe("package artifact reuse", () => {
       "command: node .release-harness/scripts/test-live-shard.mjs native-live-src-agents",
     );
     expect(workflow).toContain("OPENCLAW_LIVE_COMMAND: ${{ matrix.command }}");
+    expect(workflow).toContain("live_suite_filter:");
+    expect(workflow).toContain(
+      "inputs.live_suite_filter == '' || inputs.live_suite_filter == matrix.suite_id",
+    );
     expect(workflow).toContain("OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.5");
     expect(workflow).toContain("OPENCLAW_LIVE_CLI_BACKEND_AUTH=api-key");
     expect(workflow).toContain("OPENCLAW_LIVE_CLI_BACKEND_USE_CI_SAFE_CODEX_CONFIG=1");
@@ -157,6 +161,9 @@ describe("package artifact reuse", () => {
     expect(workflow).toContain("suite_id: native-live-extensions-a-k");
     expect(workflow).toContain("suite_id: native-live-extensions-l-n");
     expect(workflow).toContain("suite_id: native-live-extensions-moonshot");
+    expect(workflow).toMatch(/suite_id: native-live-extensions-moonshot[\s\S]*?advisory: true/u);
+    expect(workflow).toContain("OPENCLAW_LIVE_SUITE_ADVISORY: ${{ matrix.advisory }}");
+    expect(workflow).toContain("Advisory live suite failed with exit code");
     expect(workflow).toContain("suite_id: native-live-extensions-openai");
     expect(workflow).toContain("suite_id: native-live-extensions-o-z-other");
     expect(workflow).toContain("validate_live_media_provider_suites:");
@@ -299,6 +306,10 @@ describe("package artifact reuse", () => {
       "OPENCLAW_QA_CONVEX_SECRET_CI: ${{ secrets.OPENCLAW_QA_CONVEX_SECRET_CI }}",
     );
     expect(workflow).toContain("rerun_group:");
+    expect(workflow).toContain("live_suite_filter:");
+    expect(workflow).toContain(
+      "live_suite_filter: ${{ needs.resolve_target.outputs.live_suite_filter }}",
+    );
     expect(workflow).toContain("- live-e2e");
     expect(workflow).toContain("- qa-live");
   });
@@ -347,8 +358,11 @@ describe("package artifact reuse", () => {
     expect(workflow).toContain('-f harness_ref="$TARGET_SHA"');
     expect(workflow).toContain("child_rerun_group=all");
     expect(workflow).toContain('-f rerun_group="$child_rerun_group"');
+    expect(workflow).toContain('args+=(-f live_suite_filter="$LIVE_SUITE_FILTER")');
+    expect(workflow).toContain("cancel-in-progress: false");
+    expect(workflow).not.toContain("gh run cancel");
+    expect(workflow).not.toContain("force-cancel");
     expect(workflow).toContain("NORMAL_CI_RESULT: ${{ needs.normal_ci.result }}");
-    expect(workflow.match(/trap - EXIT INT TERM/g)?.length ?? 0).toBeGreaterThanOrEqual(6);
     expect(workflow).not.toContain("workflow_ref:");
     expect(workflow).not.toContain("inputs.workflow_ref");
   });