diff --git a/.agents/skills/openclaw-testing/SKILL.md b/.agents/skills/openclaw-testing/SKILL.md index 441e6b589f3..321f46383f3 100644 --- a/.agents/skills/openclaw-testing/SKILL.md +++ b/.agents/skills/openclaw-testing/SKILL.md @@ -135,8 +135,10 @@ cancel it and monitor the current run. `OpenClaw Release Checks` (`openclaw-release-checks.yml`) is the release child workflow. It is broader than normal CI but narrower than the umbrella because it -does not dispatch the separate full normal CI child. Use it when release-path -validation is needed without rerunning the entire umbrella. +does not dispatch the separate full normal CI child. It runs Package Acceptance +with `telegram_mode=mock-openai`, so the release package tarball also goes +through Telegram package QA. Use it when release-path validation is needed +without rerunning the entire umbrella. ```bash gh workflow run openclaw-release-checks.yml \ @@ -248,7 +250,8 @@ gh workflow run package-acceptance.yml --ref main \ -f source=npm \ -f workflow_ref=main \ -f package_spec=openclaw@beta \ - -f suite_profile=product + -f suite_profile=product \ + -f telegram_mode=mock-openai ``` Npm candidate selection: @@ -315,7 +318,7 @@ gh workflow run package-acceptance.yml --ref main \ -f source=ref \ -f package_ref= \ -f suite_profile=package \ - -f telegram_mode=none + -f telegram_mode=mock-openai ``` Use `telegram_mode=mock-openai` or `telegram_mode=live-frontier` when the same @@ -323,7 +326,8 @@ resolved `package-under-test` tarball should also run through the Telegram QA workflow in the `qa-live-shared` environment. The standalone Telegram workflow still accepts a published npm spec for post-publish checks, but Package Acceptance passes the resolved artifact for `source=npm`, `ref`, `url`, and -`artifact`. +`artifact`. Use `telegram_mode=none` only when intentionally skipping Telegram +credentialed package proof for a focused rerun. Docker E2E images never copy repo sources as the app under test: the bare image is a Node/Git runner, and the functional image installs the same prebuilt npm diff --git a/.github/workflows/npm-telegram-beta-e2e.yml b/.github/workflows/npm-telegram-beta-e2e.yml index b9baf3e81fe..3e11e882b59 100644 --- a/.github/workflows/npm-telegram-beta-e2e.yml +++ b/.github/workflows/npm-telegram-beta-e2e.yml @@ -77,7 +77,7 @@ env: PNPM_VERSION: "10.33.0" jobs: - run_npm_telegram_beta_e2e: + run_package_telegram_e2e: name: Run package Telegram E2E runs-on: blacksmith-32vcpu-ubuntu-2404 timeout-minutes: 60 diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml index 1cbb282553d..4839cbc497a 100644 --- a/.github/workflows/openclaw-release-checks.yml +++ b/.github/workflows/openclaw-release-checks.yml @@ -228,7 +228,11 @@ jobs: source: ref package_ref: ${{ needs.resolve_target.outputs.ref }} suite_profile: package - telegram_mode: none + telegram_mode: mock-openai + secrets: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENCLAW_QA_CONVEX_SITE_URL: ${{ secrets.OPENCLAW_QA_CONVEX_SITE_URL }} + OPENCLAW_QA_CONVEX_SECRET_CI: ${{ secrets.OPENCLAW_QA_CONVEX_SECRET_CI }} qa_lab_parity_release_checks: name: Run QA Lab parity gate diff --git a/.github/workflows/package-acceptance.yml b/.github/workflows/package-acceptance.yml index 53cc8ea5fbe..1db8a986d38 100644 --- a/.github/workflows/package-acceptance.yml +++ b/.github/workflows/package-acceptance.yml @@ -471,7 +471,7 @@ jobs: OPENCLAW_GEMINI_SETTINGS_JSON: ${{ secrets.OPENCLAW_GEMINI_SETTINGS_JSON }} FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} - npm_telegram: + package_telegram: name: Telegram package acceptance needs: resolve_package if: needs.resolve_package.outputs.telegram_enabled == 'true' @@ -488,7 +488,7 @@ jobs: summary: name: Verify package acceptance - needs: [resolve_package, docker_acceptance, npm_telegram] + needs: [resolve_package, docker_acceptance, package_telegram] if: always() runs-on: ubuntu-24.04 timeout-minutes: 5 @@ -496,7 +496,7 @@ jobs: - name: Verify package acceptance results env: DOCKER_RESULT: ${{ needs.docker_acceptance.result }} - NPM_TELEGRAM_RESULT: ${{ needs.npm_telegram.result }} + PACKAGE_TELEGRAM_RESULT: ${{ needs.package_telegram.result }} RESOLVE_RESULT: ${{ needs.resolve_package.result }} shell: bash run: | @@ -505,7 +505,7 @@ jobs: for item in \ "resolve_package=${RESOLVE_RESULT}" \ "docker_acceptance=${DOCKER_RESULT}" \ - "npm_telegram=${NPM_TELEGRAM_RESULT}" + "package_telegram=${PACKAGE_TELEGRAM_RESULT}" do name="${item%%=*}" result="${item#*=}" diff --git a/docs/ci.md b/docs/ci.md index b8e6f9590c9..c3df14b047e 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -47,9 +47,10 @@ The workflow has four jobs: that artifact, validates the tarball inventory, prepares package-digest Docker images when needed, and runs the selected Docker lanes against that package instead of packing the workflow checkout. -3. `npm_telegram` optionally calls `NPM Telegram Beta E2E`. It runs only when - `telegram_mode` is not `none`, and only for `source=npm`, because that lane - installs a published package spec. +3. `package_telegram` optionally calls `NPM Telegram Beta E2E`. It runs when + `telegram_mode` is not `none` and installs the same `package-under-test` + artifact when Package Acceptance resolved one; standalone Telegram dispatch + can still install a published npm spec. 4. `summary` fails the workflow if package resolution, Docker acceptance, or the optional Telegram lane failed. @@ -83,11 +84,13 @@ Profiles map to Docker coverage: - `custom`: exact `docker_lanes`; required when `suite_profile=custom` Release checks call Package Acceptance with `source=ref`, -`package_ref=`, `workflow_ref=`, and -`suite_profile=package`. That profile is the GitHub-native replacement for most -Parallels package/update validation. Cross-OS release checks still cover -OS-specific onboarding, installer, and platform behavior; package/update -product validation should start with Package Acceptance. +`package_ref=`, `workflow_ref=`, +`suite_profile=package`, and `telegram_mode=mock-openai`. That profile is the +GitHub-native replacement for most Parallels package/update validation, with +Telegram proving the same package artifact through the QA live transport. +Cross-OS release checks still cover OS-specific onboarding, installer, and +platform behavior; package/update product validation should start with Package +Acceptance. Examples: @@ -98,7 +101,8 @@ gh workflow run package-acceptance.yml \ -f workflow_ref=main \ -f source=npm \ -f package_spec=openclaw@beta \ - -f suite_profile=product + -f suite_profile=product \ + -f telegram_mode=mock-openai # Pack and validate a release branch with the current harness. gh workflow run package-acceptance.yml \ @@ -106,7 +110,8 @@ gh workflow run package-acceptance.yml \ -f workflow_ref=main \ -f source=ref \ -f package_ref=release/YYYY.M.D \ - -f suite_profile=package + -f suite_profile=package \ + -f telegram_mode=mock-openai # Validate a tarball URL. SHA-256 is mandatory for source=url. gh workflow run package-acceptance.yml \ diff --git a/docs/help/testing.md b/docs/help/testing.md index 160b128baf4..4284c7938af 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -167,7 +167,8 @@ runs the same lanes before release approval. gh workflow run package-acceptance.yml --ref main \ -f source=npm \ -f package_spec=openclaw@beta \ - -f suite_profile=product + -f suite_profile=product \ + -f telegram_mode=mock-openai ``` - Exact tarball URL proof requires a digest: @@ -647,7 +648,7 @@ These Docker runners split into two buckets: `OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=90000`. Override those env vars when you explicitly want the larger exhaustive scan. - `test:docker:all` builds the live Docker image once via `test:docker:live-build`, packs OpenClaw once as an npm tarball through `scripts/package-openclaw-for-docker.mjs`, then builds/reuses two `scripts/e2e/Dockerfile` images. The bare image is only the Node/Git runner for install/update/plugin-dependency lanes; those lanes mount the prebuilt tarball. The functional image installs the same tarball into `/app` for built-app functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`; planner logic lives in `scripts/lib/docker-e2e-plan.mjs`; `scripts/test-docker-all.mjs` executes the selected plan. The aggregate uses a weighted local scheduler: `OPENCLAW_DOCKER_ALL_PARALLELISM` controls process slots, while resource caps keep heavy live, npm-install, and multi-service lanes from all starting at once. If a single lane is heavier than the active caps, the scheduler can still start it when the pool is empty and then keeps it running alone until capacity is available again. Defaults are 10 slots, `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7`; tune `OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT` or `OPENCLAW_DOCKER_ALL_DOCKER_LIMIT` only when the Docker host has more headroom. The runner performs a Docker preflight by default, removes stale OpenClaw E2E containers, prints status every 30 seconds, stores successful lane timings in `.artifacts/docker-tests/lane-timings.json`, and uses those timings to start longer lanes first on later runs. Use `OPENCLAW_DOCKER_ALL_DRY_RUN=1` to print the weighted lane manifest without building or running Docker, or `node scripts/test-docker-all.mjs --plan-json` to print the CI plan for selected lanes, package/image needs, and credentials. -- `Package Acceptance` is the GitHub-native package gate for "does this installable tarball work as a product?" It resolves one candidate package from `source=npm`, `source=ref`, `source=url`, or `source=artifact`, uploads it as `package-under-test`, then runs the reusable Docker E2E lanes against that exact tarball instead of repacking the selected ref. `workflow_ref` selects the trusted workflow/harness scripts, while `package_ref` selects the source commit/branch/tag to pack when `source=ref`; this lets current acceptance logic validate older trusted commits. Profiles are ordered by breadth: `smoke` is quick install/channel/agent plus gateway/config, `package` is the package/update/plugin contract and the default native replacement for most Parallels package/update coverage, `product` adds MCP channels, cron/subagent cleanup, OpenAI web search, and OpenWebUI, and `full` runs the release-path Docker chunks with OpenWebUI. Release validation runs the `package` profile for the target ref. +- `Package Acceptance` is the GitHub-native package gate for "does this installable tarball work as a product?" It resolves one candidate package from `source=npm`, `source=ref`, `source=url`, or `source=artifact`, uploads it as `package-under-test`, then runs the reusable Docker E2E lanes against that exact tarball instead of repacking the selected ref. `workflow_ref` selects the trusted workflow/harness scripts, while `package_ref` selects the source commit/branch/tag to pack when `source=ref`; this lets current acceptance logic validate older trusted commits. Profiles are ordered by breadth: `smoke` is quick install/channel/agent plus gateway/config, `package` is the package/update/plugin contract and the default native replacement for most Parallels package/update coverage, `product` adds MCP channels, cron/subagent cleanup, OpenAI web search, and OpenWebUI, and `full` runs the release-path Docker chunks with OpenWebUI. Release validation runs the `package` profile for the target ref with Telegram package QA enabled. - Container smoke runners: `test:docker:openwebui`, `test:docker:onboard`, `test:docker:npm-onboard-channel-agent`, `test:docker:update-channel-switch`, `test:docker:session-runtime-context`, `test:docker:agents-delete-shared-workspace`, `test:docker:gateway-network`, `test:docker:browser-cdp-snapshot`, `test:docker:mcp-channels`, `test:docker:pi-bundle-mcp-tools`, `test:docker:cron-mcp-cleanup`, `test:docker:plugins`, `test:docker:plugin-update`, and `test:docker:config-reload` boot one or more real containers and verify higher-level integration paths. The live-model Docker runners also bind-mount only the needed CLI auth homes (or all supported ones when the run is not narrowed), then copy them into the container home before the run so external-CLI OAuth can refresh tokens without mutating the host auth store: diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md index 20b5171a4a3..390fc59f5a7 100644 --- a/docs/reference/RELEASING.md +++ b/docs/reference/RELEASING.md @@ -82,10 +82,11 @@ the maintainer-only release runbook. preflight artifact via `preflight_run_id`; stable macOS release readiness also requires the packaged `.zip`, `.dmg`, `.dSYM.zip`, and updated `appcast.xml` on `main`. -11. After publish, run the npm post-publish verifier, optional published-npm - Telegram E2E, dist-tag promotion when needed, GitHub release/prerelease - notes from the complete matching `CHANGELOG.md` section, and the release - announcement steps. +11. After publish, run the npm post-publish verifier, optional standalone + published-npm Telegram E2E when you need post-publish channel proof, + dist-tag promotion when needed, GitHub release/prerelease notes from the + complete matching `CHANGELOG.md` section, and the release announcement + steps. ## Release preflight @@ -112,8 +113,9 @@ the maintainer-only release runbook. SHA-256; or `source=artifact` for a tarball uploaded by another GitHub Actions run. The workflow resolves the candidate to `package-under-test`, reuses the Docker E2E release scheduler against that - tarball, and can optionally run Telegram QA against the same tarball. - Example: `gh workflow run package-acceptance.yml --ref main -f workflow_ref=main -f source=npm -f package_spec=openclaw@beta -f suite_profile=product` + tarball, and can run Telegram QA against the same tarball with + `telegram_mode=mock-openai` or `telegram_mode=live-frontier`. + Example: `gh workflow run package-acceptance.yml --ref main -f workflow_ref=main -f source=npm -f package_spec=openclaw@beta -f suite_profile=product -f telegram_mode=mock-openai` Common profiles: - `smoke`: install/channel/agent, gateway network, and config reload lanes - `package`: package/update/plugin lanes without OpenWebUI @@ -235,13 +237,13 @@ gh workflow run full-release-validation.yml \ The workflow resolves the target ref, dispatches manual `CI` with `target_ref=`, dispatches `OpenClaw Release Checks`, and -optionally dispatches post-publish Telegram E2E when +optionally dispatches standalone post-publish Telegram E2E when `npm_telegram_package_spec` is set. `OpenClaw Release Checks` then fans out install smoke, cross-OS release checks, live/E2E Docker release-path coverage, -Package Acceptance, QA Lab parity, live Matrix, and live Telegram. A full run is -only acceptable when the `Full Release Validation` summary shows `normal_ci` and -`release_checks` as successful, and any optional `npm_telegram` child is either -successful or intentionally skipped. +Package Acceptance with Telegram package QA, QA Lab parity, live Matrix, and +live Telegram. A full run is only acceptable when the `Full Release Validation` +summary shows `normal_ci` and `release_checks` as successful, and any optional +`npm_telegram` child is either successful or intentionally skipped. Use these variants depending on release stage: @@ -363,12 +365,13 @@ Supported candidate sources: - `source=artifact`: reuse a `.tgz` uploaded by another GitHub Actions run `OpenClaw Release Checks` runs Package Acceptance with `source=ref`, -`package_ref=`, and `suite_profile=package`. That profile covers -install, update, and plugin package contracts and is the GitHub-native -replacement for most of the package/update coverage that previously required -Parallels. Cross-OS release checks still matter for OS-specific onboarding, -installer, and platform behavior, but package/update product validation should -prefer Package Acceptance. +`package_ref=`, `suite_profile=package`, and +`telegram_mode=mock-openai`. That profile covers install, update, plugin +package contracts, and Telegram package QA against the same resolved tarball, +and is the GitHub-native replacement for most of the package/update coverage +that previously required Parallels. Cross-OS release checks still matter for +OS-specific onboarding, installer, and platform behavior, but package/update +product validation should prefer Package Acceptance. Use broader Package Acceptance profiles when the release question is about an actual installable package: diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index 18b062ef892..8afe59fbee7 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -77,6 +77,7 @@ describe("package artifact reuse", () => { expect(workflow).toContain("OPENCLAW_NPM_TELEGRAM_PACKAGE_TGZ"); expect(workflow).toContain("provider_mode:"); expect(workflow).toContain("provider_mode must be mock-openai or live-frontier"); + expect(workflow).toContain("run_package_telegram_e2e:"); }); it("includes package acceptance in release checks", () => { @@ -86,5 +87,22 @@ describe("package artifact reuse", () => { expect(workflow).toContain("uses: ./.github/workflows/package-acceptance.yml"); expect(workflow).toContain("package_ref: ${{ needs.resolve_target.outputs.ref }}"); expect(workflow).toContain("suite_profile: package"); + expect(workflow).toContain("telegram_mode: mock-openai"); + expect(workflow).toContain( + "OPENCLAW_QA_CONVEX_SITE_URL: ${{ secrets.OPENCLAW_QA_CONVEX_SITE_URL }}", + ); + expect(workflow).toContain( + "OPENCLAW_QA_CONVEX_SECRET_CI: ${{ secrets.OPENCLAW_QA_CONVEX_SECRET_CI }}", + ); + }); + + it("names package acceptance Telegram as artifact-backed package validation", () => { + const workflow = readFileSync(PACKAGE_ACCEPTANCE_WORKFLOW, "utf8"); + + expect(workflow).toContain("package_telegram:"); + expect(workflow).toContain("needs: [resolve_package, docker_acceptance, package_telegram]"); + expect(workflow).toContain("PACKAGE_TELEGRAM_RESULT:"); + expect(workflow).toContain("package_telegram=${PACKAGE_TELEGRAM_RESULT}"); + expect(workflow).not.toContain("npm_telegram:"); }); });