diff --git a/.agents/skills/openclaw-testing/SKILL.md b/.agents/skills/openclaw-testing/SKILL.md index cba803168e3..36020eecea6 100644 --- a/.agents/skills/openclaw-testing/SKILL.md +++ b/.agents/skills/openclaw-testing/SKILL.md @@ -311,9 +311,12 @@ gh workflow run package-acceptance.yml --ref main \ -f telegram_mode=none ``` -Use `telegram_mode=mock-openai` or `telegram_mode=live-frontier` only with -`source=npm`; that path reuses the published npm Telegram E2E workflow and the -`qa-live-shared` environment. +Use `telegram_mode=mock-openai` or `telegram_mode=live-frontier` when the same +resolved `package-under-test` tarball should also run through the Telegram QA +workflow in the `qa-live-shared` environment. The standalone Telegram workflow +still accepts a published npm spec for post-publish checks, but Package +Acceptance passes the resolved artifact for `source=npm`, `ref`, `url`, and +`artifact`. Docker E2E images never copy repo sources as the app under test: the bare image is a Node/Git runner, and the functional image installs the same prebuilt npm diff --git a/.github/workflows/npm-telegram-beta-e2e.yml b/.github/workflows/npm-telegram-beta-e2e.yml index 960abc15c81..b9baf3e81fe 100644 --- a/.github/workflows/npm-telegram-beta-e2e.yml +++ b/.github/workflows/npm-telegram-beta-e2e.yml @@ -4,10 +4,20 @@ on: workflow_dispatch: inputs: package_spec: - description: Published OpenClaw package spec to test + description: Published OpenClaw package spec to test when no artifact is supplied required: true default: openclaw@beta type: string + package_label: + description: Optional display label for an artifact-backed package candidate + required: false + default: "" + type: string + package_artifact_name: + description: Advanced package-under-test artifact name; leave blank for registry install + required: false + default: "" + type: string provider_mode: description: QA provider mode required: true @@ -23,9 +33,19 @@ on: workflow_call: inputs: package_spec: - description: Published OpenClaw package spec to test + description: Published OpenClaw package spec to test when no artifact is supplied required: true type: string + package_artifact_name: + description: Optional package-under-test artifact from the current workflow run + required: false + default: "" + type: string + package_label: + description: Optional display label for an artifact-backed package candidate + required: false + default: "" + type: string provider_mode: description: QA provider mode required: false @@ -58,7 +78,7 @@ env: jobs: run_npm_telegram_beta_e2e: - name: Run published npm Telegram E2E + name: Run package Telegram E2E runs-on: blacksmith-32vcpu-ubuntu-2404 timeout-minutes: 60 environment: qa-live-shared @@ -101,6 +121,7 @@ jobs: - name: Validate inputs and secrets env: PACKAGE_SPEC: ${{ inputs.package_spec }} + PACKAGE_ARTIFACT_NAME: ${{ inputs.package_artifact_name || '' }} PROVIDER_MODE: ${{ inputs.provider_mode }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENCLAW_QA_CONVEX_SITE_URL: ${{ secrets.OPENCLAW_QA_CONVEX_SITE_URL }} @@ -109,9 +130,11 @@ jobs: run: | set -euo pipefail - if [[ ! "${PACKAGE_SPEC}" =~ ^openclaw@(beta|latest|[0-9]{4}\.[1-9][0-9]*\.[1-9][0-9]*(-[1-9][0-9]*|-beta\.[1-9][0-9]*)?)$ ]]; then - echo "package_spec must be openclaw@beta, openclaw@latest, or an exact OpenClaw release version; got: ${PACKAGE_SPEC}" >&2 - exit 1 + if [[ -z "${PACKAGE_ARTIFACT_NAME// }" ]]; then + if [[ ! "${PACKAGE_SPEC}" =~ ^openclaw@(beta|latest|[0-9]{4}\.[1-9][0-9]*\.[1-9][0-9]*(-[1-9][0-9]*|-beta\.[1-9][0-9]*)?)$ ]]; then + echo "package_spec must be openclaw@beta, openclaw@latest, or an exact OpenClaw release version; got: ${PACKAGE_SPEC}" >&2 + exit 1 + fi fi case "${PROVIDER_MODE}" in mock-openai | live-frontier) ;; @@ -135,7 +158,14 @@ jobs: require_var OPENAI_API_KEY fi - - name: Run npm Telegram beta E2E + - name: Download package-under-test artifact + if: inputs.package_artifact_name != '' + uses: actions/download-artifact@v8 + with: + name: ${{ inputs.package_artifact_name }} + path: .artifacts/telegram-package-under-test + + - name: Run package Telegram E2E id: run_lane shell: bash env: @@ -143,6 +173,7 @@ jobs: OPENCLAW_SKIP_DOCKER_BUILD: "1" OPENCLAW_DOCKER_E2E_IMAGE: openclaw-docker-e2e:local OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC: ${{ inputs.package_spec }} + OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL: ${{ inputs.package_label }} OPENCLAW_NPM_TELEGRAM_PROVIDER_MODE: ${{ inputs.provider_mode }} OPENCLAW_NPM_TELEGRAM_CREDENTIAL_SOURCE: convex OPENCLAW_NPM_TELEGRAM_CREDENTIAL_ROLE: ci @@ -151,6 +182,7 @@ jobs: OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1" OPENCLAW_QA_TELEGRAM_CAPTURE_CONTENT: "1" INPUT_SCENARIO: ${{ inputs.scenario }} + PACKAGE_ARTIFACT_NAME: ${{ inputs.package_artifact_name || '' }} run: | set -euo pipefail @@ -158,6 +190,20 @@ jobs: echo "output_dir=${output_dir}" >> "$GITHUB_OUTPUT" export OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR="${output_dir}" + if [[ -n "${PACKAGE_ARTIFACT_NAME// }" ]]; then + mapfile -t package_tgzs < <(find .artifacts/telegram-package-under-test -type f -name "*.tgz" | sort) + if [[ "${#package_tgzs[@]}" -ne 1 ]]; then + echo "package artifact ${PACKAGE_ARTIFACT_NAME} must contain exactly one .tgz; found ${#package_tgzs[@]}" >&2 + exit 1 + fi + export OPENCLAW_NPM_TELEGRAM_PACKAGE_TGZ="${package_tgzs[0]}" + if [[ -z "${OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL// }" ]]; then + export OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL="$(basename "${package_tgzs[0]}")" + fi + elif [[ -z "${OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL// }" ]]; then + export OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL="${OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC}" + fi + if [[ -n "${INPUT_SCENARIO// }" ]]; then export OPENCLAW_NPM_TELEGRAM_SCENARIOS="${INPUT_SCENARIO}" fi diff --git a/.github/workflows/package-acceptance.yml b/.github/workflows/package-acceptance.yml index bfa3f1807ed..53cc8ea5fbe 100644 --- a/.github/workflows/package-acceptance.yml +++ b/.github/workflows/package-acceptance.yml @@ -65,7 +65,7 @@ on: default: "" type: string telegram_mode: - description: Optional published-npm Telegram QA lane + description: Optional Telegram QA lane for the resolved package candidate required: true default: none type: choice @@ -125,7 +125,7 @@ on: default: "" type: string telegram_mode: - description: Optional published-npm Telegram QA lane + description: Optional Telegram QA lane for the resolved package candidate required: false default: none type: string @@ -366,10 +366,6 @@ jobs: telegram_enabled=false if [[ "$TELEGRAM_MODE" != "none" ]]; then - if [[ "$SOURCE" != "npm" ]]; then - echo "telegram_mode requires source=npm because the Telegram workflow installs a published package spec." >&2 - exit 1 - fi telegram_enabled=true fi @@ -476,12 +472,14 @@ jobs: FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} npm_telegram: - name: Published npm Telegram acceptance + name: Telegram package acceptance needs: resolve_package if: needs.resolve_package.outputs.telegram_enabled == 'true' uses: ./.github/workflows/npm-telegram-beta-e2e.yml with: package_spec: ${{ inputs.package_spec }} + package_artifact_name: ${{ needs.resolve_package.outputs.package_artifact_name }} + package_label: openclaw@${{ needs.resolve_package.outputs.package_version }} provider_mode: ${{ needs.resolve_package.outputs.telegram_mode }} secrets: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} diff --git a/docs/ci.md b/docs/ci.md index bdf13ad82f1..b8e6f9590c9 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -23,8 +23,117 @@ published npm spec, a trusted `package_ref` built with the selected from another GitHub Actions run, uploads it as `package-under-test`, then reuses the Docker release/E2E scheduler with that tarball instead of repacking the workflow checkout. Profiles cover smoke, package, product, full, and custom -Docker lane selections. The optional Telegram lane is published-npm only and -reuses the `NPM Telegram Beta E2E` workflow. +Docker lane selections. The optional Telegram lane reuses the +`package-under-test` artifact in the `NPM Telegram Beta E2E` workflow, with the +published npm spec path kept for standalone dispatches. + +## Package Acceptance + +Use `Package Acceptance` when the question is "does this installable OpenClaw +package work as a product?" It is different from normal CI: normal CI validates +the source tree, while package acceptance validates a single tarball through the +same Docker E2E harness users exercise after install or update. + +The workflow has four jobs: + +1. `resolve_package` checks out `workflow_ref`, resolves one package candidate, + writes `.artifacts/docker-e2e-package/openclaw-current.tgz`, writes + `.artifacts/docker-e2e-package/package-candidate.json`, uploads both as the + `package-under-test` artifact, and prints the source, workflow ref, package + ref, version, SHA-256, and profile in the GitHub step summary. +2. `docker_acceptance` calls + `openclaw-live-and-e2e-checks-reusable.yml` with `ref=workflow_ref` and + `package_artifact_name=package-under-test`. The reusable workflow downloads + that artifact, validates the tarball inventory, prepares package-digest + Docker images when needed, and runs the selected Docker lanes against that + package instead of packing the workflow checkout. +3. `npm_telegram` optionally calls `NPM Telegram Beta E2E`. It runs only when + `telegram_mode` is not `none`, and only for `source=npm`, because that lane + installs a published package spec. +4. `summary` fails the workflow if package resolution, Docker acceptance, or + the optional Telegram lane failed. + +Candidate sources: + +- `source=npm`: accepts only `openclaw@beta`, `openclaw@latest`, or an exact + OpenClaw release version such as `openclaw@2026.4.27-beta.2`. Use this for + published beta/stable acceptance. +- `source=ref`: packs a trusted `package_ref` branch, tag, or full commit SHA. + The resolver fetches OpenClaw branches/tags, verifies the selected commit is + reachable from repository branch history or a release tag, installs deps in a + detached worktree, and packs it with `scripts/package-openclaw-for-docker.mjs`. +- `source=url`: downloads an HTTPS `.tgz`; `package_sha256` is required. +- `source=artifact`: downloads one `.tgz` from `artifact_run_id` and + `artifact_name`; `package_sha256` is optional but should be supplied for + externally shared artifacts. + +Keep `workflow_ref` and `package_ref` separate. `workflow_ref` is the trusted +workflow/harness code that runs the test. `package_ref` is the source commit +that gets packed when `source=ref`. This lets the current test harness validate +older trusted source commits without running old workflow logic. + +Profiles map to Docker coverage: + +- `smoke`: `npm-onboard-channel-agent`, `gateway-network`, `config-reload` +- `package`: `install-e2e`, `npm-onboard-channel-agent`, `doctor-switch`, + `update-channel-switch`, `bundled-channel-deps`, `plugins`, `plugin-update` +- `product`: `package` plus `mcp-channels`, `cron-mcp-cleanup`, + `openai-web-search-minimal`, `openwebui` +- `full`: full Docker release-path chunks with OpenWebUI +- `custom`: exact `docker_lanes`; required when `suite_profile=custom` + +Release checks call Package Acceptance with `source=ref`, +`package_ref=`, `workflow_ref=`, and +`suite_profile=package`. That profile is the GitHub-native replacement for most +Parallels package/update validation. Cross-OS release checks still cover +OS-specific onboarding, installer, and platform behavior; package/update +product validation should start with Package Acceptance. + +Examples: + +```bash +# Validate the current beta package with product-level coverage. +gh workflow run package-acceptance.yml \ + --ref main \ + -f workflow_ref=main \ + -f source=npm \ + -f package_spec=openclaw@beta \ + -f suite_profile=product + +# Pack and validate a release branch with the current harness. +gh workflow run package-acceptance.yml \ + --ref main \ + -f workflow_ref=main \ + -f source=ref \ + -f package_ref=release/YYYY.M.D \ + -f suite_profile=package + +# Validate a tarball URL. SHA-256 is mandatory for source=url. +gh workflow run package-acceptance.yml \ + --ref main \ + -f workflow_ref=main \ + -f source=url \ + -f package_url=https://example.com/openclaw-current.tgz \ + -f package_sha256=<64-char-sha256> \ + -f suite_profile=smoke + +# Reuse a tarball uploaded by another Actions run. +gh workflow run package-acceptance.yml \ + --ref main \ + -f workflow_ref=main \ + -f source=artifact \ + -f artifact_run_id= \ + -f artifact_name=package-under-test \ + -f suite_profile=custom \ + -f docker_lanes='install-e2e plugin-update' +``` + +When debugging a failed package acceptance run, start at the `resolve_package` +summary to confirm the package source, version, and SHA-256. Then inspect the +`docker_acceptance` child run and its Docker artifacts: +`.artifacts/docker-tests/**/summary.json`, `failures.json`, lane logs, phase +timings, and rerun commands. Prefer rerunning the failed package profile or +exact Docker lanes instead of rerunning full release validation. QA Lab has dedicated CI lanes outside the main smart-scoped workflow. The `Parity gate` workflow runs on matching PR changes and manual dispatch; it @@ -127,7 +236,7 @@ act as if every scoped area changed. CI workflow edits validate the Node CI graph plus workflow linting, but do not force Windows, Android, or macOS native builds by themselves; those platform lanes stay scoped to platform source changes. CI routing-only edits, selected cheap core-test fixture edits, and narrow plugin contract helper/test-routing edits use a fast Node-only manifest path: preflight, security, and a single `checks-fast-core` task. That path avoids build artifacts, Node 22 compatibility, channel contracts, full core shards, bundled-plugin shards, and additional guard matrices when the changed files are limited to the routing or helper surfaces that the fast task exercises directly. Windows Node checks are scoped to Windows-specific process/path wrappers, npm/pnpm/UI runner helpers, package manager config, and the CI workflow surfaces that execute that lane; unrelated source, plugin, install-smoke, and test-only changes stay on the Linux Node lanes so they do not reserve a 16-vCPU Windows worker for coverage that is already exercised by the normal test shards. -The separate `install-smoke` workflow reuses the same scope script through its own `preflight` job. It splits smoke coverage into `run_fast_install_smoke` and `run_full_install_smoke`. Pull requests run the fast path for Docker/package surfaces, bundled plugin package/manifest changes, and core plugin/channel/gateway/Plugin SDK surfaces that the Docker smoke jobs exercise. Source-only bundled plugin changes, test-only edits, and docs-only edits do not reserve Docker workers. The fast path builds the root Dockerfile image once, checks the CLI, runs the agents delete shared-workspace CLI smoke, runs the container gateway-network e2e, verifies a bundled extension build arg, and runs the bounded bundled-plugin Docker profile under a 240-second aggregate command timeout with each scenario's Docker run capped separately. The full path keeps QR package install and installer Docker/update coverage for nightly scheduled runs, manual dispatches, workflow-call release checks, and pull requests that truly touch installer/package/Docker surfaces. `main` pushes, including merge commits, do not force the full path; when changed-scope logic would request full coverage on a push, the workflow keeps the fast Docker smoke and leaves the full install smoke to nightly or release validation. The slow Bun global install image-provider smoke is separately gated by `run_bun_global_install_smoke`; it runs on the nightly schedule and from the release checks workflow, and manual `install-smoke` dispatches can opt into it, but pull requests and `main` pushes do not run it. QR and installer Docker tests keep their own install-focused Dockerfiles. Local `test:docker:all` prebuilds one shared live-test image, packs OpenClaw once as an npm tarball, and builds two shared `scripts/e2e/Dockerfile` images: a bare Node/Git runner for installer/update/plugin-dependency lanes and a functional image that installs the same tarball into `/app` for normal functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`, planner logic lives in `scripts/lib/docker-e2e-plan.mjs`, and the runner only executes the selected plan. The scheduler selects the image per lane with `OPENCLAW_DOCKER_E2E_BARE_IMAGE` and `OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE`, then runs lanes with `OPENCLAW_SKIP_DOCKER_BUILD=1`; tune the default main-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_PARALLELISM` and the provider-sensitive tail-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM`. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7` so npm install and multi-service lanes do not overcommit Docker while lighter lanes still fill available slots. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=0` or another millisecond value. The local aggregate preflights Docker, removes stale OpenClaw E2E containers, emits active-lane status, persists lane timings for longest-first ordering, and supports `OPENCLAW_DOCKER_ALL_DRY_RUN=1` for scheduler inspection. It stops scheduling new pooled lanes after the first failure by default, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. `OPENCLAW_DOCKER_ALL_LANES=` runs exact scheduler lanes, including release-only lanes such as `install-e2e` and split bundled update lanes such as `bundled-channel-update-acpx`, while skipping the cleanup smoke so agents can reproduce one failed lane. The reusable live/E2E workflow asks `scripts/test-docker-all.mjs --plan-json` which package, image kind, live image, lane, and credential coverage is required, then `scripts/docker-e2e.mjs` converts that plan into GitHub outputs and summaries. It either packs OpenClaw through `scripts/package-openclaw-for-docker.mjs` or downloads a caller-provided package artifact, validates the tarball inventory, builds and pushes package-digest-tagged bare/functional GHCR Docker E2E images when the plan needs package-installed lanes, and reuses those images when the same package digest has already been prepared. The `Package Acceptance` workflow is the high-level package gate: it resolves a candidate from npm, a trusted `package_ref`, an HTTPS tarball plus SHA-256, or a prior workflow artifact, then passes that single `package-under-test` artifact into the reusable Docker E2E workflow. It keeps `workflow_ref` separate from `package_ref` so current harness logic can validate older trusted source commits without checking out old workflow code. Release checks run the `package` acceptance profile for the target ref; that profile covers package/update/plugin contracts and is the default GitHub-native replacement for most Parallels package/update coverage. The release-path Docker suite runs as at most three chunked jobs with `OPENCLAW_SKIP_DOCKER_BUILD=1` so each chunk pulls only the image kind it needs and executes multiple lanes through the same weighted scheduler (`OPENCLAW_DOCKER_ALL_PROFILE=release-path`, `OPENCLAW_DOCKER_ALL_CHUNK=core|package-update|plugins-integrations`). Each chunk uploads `.artifacts/docker-tests/` with lane logs, timings, `summary.json`, `failures.json`, phase timings, scheduler plan JSON, and per-lane rerun commands. The workflow `docker_lanes` input runs selected lanes against the prepared images instead of the three chunk jobs, which keeps failed-lane debugging bounded to one targeted Docker job and prepares or downloads the package artifact for that run; if a selected lane is a live Docker lane, the targeted job builds the live-test image locally for that rerun. Use `pnpm test:docker:rerun ` to download Docker artifacts from a GitHub run and print combined/per-lane targeted rerun commands; use `pnpm test:docker:timings ` for slow-lane and phase critical-path summaries. When Open WebUI is requested with the release-path suite, it runs inside the plugins/integrations chunk instead of reserving a fourth Docker worker; Open WebUI keeps a standalone job only for openwebui-only dispatches. The scheduled live/E2E workflow runs the full release-path Docker suite daily. The bundled update matrix is split by update target so repeated npm update and doctor repair passes can shard with other bundled checks. +The separate `install-smoke` workflow reuses the same scope script through its own `preflight` job. It splits smoke coverage into `run_fast_install_smoke` and `run_full_install_smoke`. Pull requests run the fast path for Docker/package surfaces, bundled plugin package/manifest changes, and core plugin/channel/gateway/Plugin SDK surfaces that the Docker smoke jobs exercise. Source-only bundled plugin changes, test-only edits, and docs-only edits do not reserve Docker workers. The fast path builds the root Dockerfile image once, checks the CLI, runs the agents delete shared-workspace CLI smoke, runs the container gateway-network e2e, verifies a bundled extension build arg, and runs the bounded bundled-plugin Docker profile under a 240-second aggregate command timeout with each scenario's Docker run capped separately. The full path keeps QR package install and installer Docker/update coverage for nightly scheduled runs, manual dispatches, workflow-call release checks, and pull requests that truly touch installer/package/Docker surfaces. `main` pushes, including merge commits, do not force the full path; when changed-scope logic would request full coverage on a push, the workflow keeps the fast Docker smoke and leaves the full install smoke to nightly or release validation. The slow Bun global install image-provider smoke is separately gated by `run_bun_global_install_smoke`; it runs on the nightly schedule and from the release checks workflow, and manual `install-smoke` dispatches can opt into it, but pull requests and `main` pushes do not run it. QR and installer Docker tests keep their own install-focused Dockerfiles. Local `test:docker:all` prebuilds one shared live-test image, packs OpenClaw once as an npm tarball, and builds two shared `scripts/e2e/Dockerfile` images: a bare Node/Git runner for installer/update/plugin-dependency lanes and a functional image that installs the same tarball into `/app` for normal functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`, planner logic lives in `scripts/lib/docker-e2e-plan.mjs`, and the runner only executes the selected plan. The scheduler selects the image per lane with `OPENCLAW_DOCKER_E2E_BARE_IMAGE` and `OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE`, then runs lanes with `OPENCLAW_SKIP_DOCKER_BUILD=1`; tune the default main-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_PARALLELISM` and the provider-sensitive tail-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM`. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7` so npm install and multi-service lanes do not overcommit Docker while lighter lanes still fill available slots. A single lane heavier than the effective caps can still start from an empty pool, then runs alone until it releases capacity. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=0` or another millisecond value. The local aggregate preflights Docker, removes stale OpenClaw E2E containers, emits active-lane status, persists lane timings for longest-first ordering, and supports `OPENCLAW_DOCKER_ALL_DRY_RUN=1` for scheduler inspection. It stops scheduling new pooled lanes after the first failure by default, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. `OPENCLAW_DOCKER_ALL_LANES=` runs exact scheduler lanes, including release-only lanes such as `install-e2e` and split bundled update lanes such as `bundled-channel-update-acpx`, while skipping the cleanup smoke so agents can reproduce one failed lane. The reusable live/E2E workflow asks `scripts/test-docker-all.mjs --plan-json` which package, image kind, live image, lane, and credential coverage is required, then `scripts/docker-e2e.mjs` converts that plan into GitHub outputs and summaries. It either packs OpenClaw through `scripts/package-openclaw-for-docker.mjs` or downloads a caller-provided package artifact, validates the tarball inventory, builds and pushes package-digest-tagged bare/functional GHCR Docker E2E images when the plan needs package-installed lanes, and reuses those images when the same package digest has already been prepared. The `Package Acceptance` workflow is the high-level package gate: it resolves a candidate from npm, a trusted `package_ref`, an HTTPS tarball plus SHA-256, or a prior workflow artifact, then passes that single `package-under-test` artifact into the reusable Docker E2E workflow. It keeps `workflow_ref` separate from `package_ref` so current acceptance logic can validate older trusted commits without checking out old workflow code. Release checks run the `package` acceptance profile for the target ref; that profile covers package/update/plugin contracts and is the default GitHub-native replacement for most Parallels package/update coverage. The release-path Docker suite runs as at most three chunked jobs with `OPENCLAW_SKIP_DOCKER_BUILD=1` so each chunk pulls only the image kind it needs and executes multiple lanes through the same weighted scheduler (`OPENCLAW_DOCKER_ALL_PROFILE=release-path`, `OPENCLAW_DOCKER_ALL_CHUNK=core|package-update|plugins-integrations`). Each chunk uploads `.artifacts/docker-tests/` with lane logs, timings, `summary.json`, `failures.json`, phase timings, scheduler plan JSON, and per-lane rerun commands. The workflow `docker_lanes` input runs selected lanes against the prepared images instead of the three chunk jobs, which keeps failed-lane debugging bounded to one targeted Docker job and prepares or downloads the package artifact for that run; if a selected lane is a live Docker lane, the targeted job builds the live-test image locally for that rerun. Use `pnpm test:docker:rerun ` to download Docker artifacts from a GitHub run and print combined/per-lane targeted rerun commands; use `pnpm test:docker:timings ` for slow-lane and phase critical-path summaries. When Open WebUI is requested with the release-path suite, it runs inside the plugins/integrations chunk instead of reserving a fourth Docker worker; Open WebUI keeps a standalone job only for openwebui-only dispatches. The scheduled live/E2E workflow runs the full release-path Docker suite daily. The bundled update matrix is split by update target so repeated npm update and doctor repair passes can shard with other bundled checks. Local changed-lane logic lives in `scripts/changed-lanes.mjs` and is executed by `scripts/check-changed.mjs`. That local check gate is stricter about architecture boundaries than the broad CI platform scope: core production changes run core prod and core test typecheck plus core lint/guards, core test-only changes run only core test typecheck plus core lint, extension production changes run extension prod and extension test typecheck plus extension lint, and extension test-only changes run extension test typecheck plus extension lint. Public Plugin SDK or plugin-contract changes expand to extension typecheck because extensions depend on those core contracts, but Vitest extension sweeps are explicit test work. Release metadata-only version bumps run targeted version/config/root-dependency checks. Unknown root/config changes fail safe to all check lanes. diff --git a/docs/help/testing.md b/docs/help/testing.md index 5822ea4e05f..160b128baf4 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -136,10 +136,13 @@ runs the same lanes before release approval. then seeds an affected broken session JSONL and verifies `openclaw doctor --fix` rewrites it to the active branch with a backup. - `pnpm test:docker:npm-telegram-live` - - Installs a published OpenClaw package in Docker, runs installed-package + - Installs an OpenClaw package candidate in Docker, runs installed-package onboarding, configures Telegram through the installed CLI, then reuses the live Telegram QA lane with that installed package as the SUT Gateway. - - Defaults to `OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC=openclaw@beta`. + - Defaults to `OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC=openclaw@beta`; set + `OPENCLAW_NPM_TELEGRAM_PACKAGE_TGZ=/path/to/openclaw-current.tgz` or + `OPENCLAW_CURRENT_PACKAGE_TGZ` to test a resolved local tarball instead of + installing from the registry. - Uses the same Telegram env credentials or Convex credential source as `pnpm openclaw qa telegram`. For CI/release automation, set `OPENCLAW_NPM_TELEGRAM_CREDENTIAL_SOURCE=convex` plus @@ -156,8 +159,8 @@ runs the same lanes before release approval. HTTPS tarball URL plus SHA-256, or tarball artifact from another run, uploads the normalized `openclaw-current.tgz` as `package-under-test`, then runs the existing Docker E2E scheduler with smoke, package, product, full, or custom - lane profiles. Published npm candidates can additionally run the Telegram QA - workflow. + lane profiles. Set `telegram_mode=mock-openai` or `live-frontier` to run the + Telegram QA workflow against the same `package-under-test` artifact. - Latest beta product proof: ```bash @@ -643,7 +646,7 @@ These Docker runners split into two buckets: `OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=45000`, and `OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=90000`. Override those env vars when you explicitly want the larger exhaustive scan. -- `test:docker:all` builds the live Docker image once via `test:docker:live-build`, packs OpenClaw once as an npm tarball through `scripts/package-openclaw-for-docker.mjs`, then builds/reuses two `scripts/e2e/Dockerfile` images. The bare image is only the Node/Git runner for install/update/plugin-dependency lanes; those lanes mount the prebuilt tarball. The functional image installs the same tarball into `/app` for built-app functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`; planner logic lives in `scripts/lib/docker-e2e-plan.mjs`; `scripts/test-docker-all.mjs` executes the selected plan. The aggregate uses a weighted local scheduler: `OPENCLAW_DOCKER_ALL_PARALLELISM` controls process slots, while resource caps keep heavy live, npm-install, and multi-service lanes from all starting at once. Defaults are 10 slots, `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7`; tune `OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT` or `OPENCLAW_DOCKER_ALL_DOCKER_LIMIT` only when the Docker host has more headroom. The runner performs a Docker preflight by default, removes stale OpenClaw E2E containers, prints status every 30 seconds, stores successful lane timings in `.artifacts/docker-tests/lane-timings.json`, and uses those timings to start longer lanes first on later runs. Use `OPENCLAW_DOCKER_ALL_DRY_RUN=1` to print the weighted lane manifest without building or running Docker, or `node scripts/test-docker-all.mjs --plan-json` to print the CI plan for selected lanes, package/image needs, and credentials. +- `test:docker:all` builds the live Docker image once via `test:docker:live-build`, packs OpenClaw once as an npm tarball through `scripts/package-openclaw-for-docker.mjs`, then builds/reuses two `scripts/e2e/Dockerfile` images. The bare image is only the Node/Git runner for install/update/plugin-dependency lanes; those lanes mount the prebuilt tarball. The functional image installs the same tarball into `/app` for built-app functionality lanes. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`; planner logic lives in `scripts/lib/docker-e2e-plan.mjs`; `scripts/test-docker-all.mjs` executes the selected plan. The aggregate uses a weighted local scheduler: `OPENCLAW_DOCKER_ALL_PARALLELISM` controls process slots, while resource caps keep heavy live, npm-install, and multi-service lanes from all starting at once. If a single lane is heavier than the active caps, the scheduler can still start it when the pool is empty and then keeps it running alone until capacity is available again. Defaults are 10 slots, `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7`; tune `OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT` or `OPENCLAW_DOCKER_ALL_DOCKER_LIMIT` only when the Docker host has more headroom. The runner performs a Docker preflight by default, removes stale OpenClaw E2E containers, prints status every 30 seconds, stores successful lane timings in `.artifacts/docker-tests/lane-timings.json`, and uses those timings to start longer lanes first on later runs. Use `OPENCLAW_DOCKER_ALL_DRY_RUN=1` to print the weighted lane manifest without building or running Docker, or `node scripts/test-docker-all.mjs --plan-json` to print the CI plan for selected lanes, package/image needs, and credentials. - `Package Acceptance` is the GitHub-native package gate for "does this installable tarball work as a product?" It resolves one candidate package from `source=npm`, `source=ref`, `source=url`, or `source=artifact`, uploads it as `package-under-test`, then runs the reusable Docker E2E lanes against that exact tarball instead of repacking the selected ref. `workflow_ref` selects the trusted workflow/harness scripts, while `package_ref` selects the source commit/branch/tag to pack when `source=ref`; this lets current acceptance logic validate older trusted commits. Profiles are ordered by breadth: `smoke` is quick install/channel/agent plus gateway/config, `package` is the package/update/plugin contract and the default native replacement for most Parallels package/update coverage, `product` adds MCP channels, cron/subagent cleanup, OpenAI web search, and OpenWebUI, and `full` runs the release-path Docker chunks with OpenWebUI. Release validation runs the `package` profile for the target ref. - Container smoke runners: `test:docker:openwebui`, `test:docker:onboard`, `test:docker:npm-onboard-channel-agent`, `test:docker:update-channel-switch`, `test:docker:session-runtime-context`, `test:docker:agents-delete-shared-workspace`, `test:docker:gateway-network`, `test:docker:browser-cdp-snapshot`, `test:docker:mcp-channels`, `test:docker:pi-bundle-mcp-tools`, `test:docker:cron-mcp-cleanup`, `test:docker:plugins`, `test:docker:plugin-update`, and `test:docker:config-reload` boot one or more real containers and verify higher-level integration paths. diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md index dd8bf68a4c6..20b5171a4a3 100644 --- a/docs/reference/RELEASING.md +++ b/docs/reference/RELEASING.md @@ -66,9 +66,9 @@ the maintainer-only release runbook. 6. Run `OpenClaw NPM Release` with `preflight_only=true`. Before a tag exists, a full 40-character release-branch SHA is allowed for validation-only preflight. Save the successful `preflight_run_id`. -7. Run `Full Release Validation` for the release branch, tag, or full commit - SHA. This is the umbrella run for the four big release test boxes: Vitest, - Docker, QA Lab, and Package. +7. Kick off all pre-release tests with `Full Release Validation` for the + release branch, tag, or full commit SHA. This is the one manual entrypoint + for the four big release test boxes: Vitest, Docker, QA Lab, and Package. 8. If validation fails, fix on the release branch and rerun the smallest failed file, lane, workflow job, package profile, provider, or model allowlist that proves the fix. Rerun the full umbrella only when the changed surface makes @@ -96,15 +96,14 @@ the maintainer-only release runbook. - Run `pnpm build && pnpm ui:build` before `pnpm release:check` so the expected `dist/*` release artifacts and Control UI bundle exist for the pack validation step -- Run the manual `Full Release Validation` workflow before release approval - when you need the whole release validation suite from one entrypoint. It - accepts a branch, tag, or full commit SHA, dispatches manual `CI`, and - dispatches `OpenClaw Release Checks` for install smoke, package acceptance, - Docker release-path suites, live/E2E, OpenWebUI, QA Lab parity, Matrix, and - Telegram lanes. - Provide `npm_telegram_package_spec` only after a package has been published - and the post-publish Telegram E2E should run too. - Example: `gh workflow run full-release-validation.yml --ref main -f ref=release/YYYY.M.D` +- Run the manual `Full Release Validation` workflow before release approval to + kick off all pre-release test boxes from one entrypoint. It accepts a branch, + tag, or full commit SHA, dispatches manual `CI`, and dispatches + `OpenClaw Release Checks` for install smoke, package acceptance, Docker + release-path suites, live/E2E, OpenWebUI, QA Lab parity, Matrix, and Telegram + lanes. Provide `npm_telegram_package_spec` only after a package has been + published and the post-publish Telegram E2E should run too. Example: + `gh workflow run full-release-validation.yml --ref main -f ref=release/YYYY.M.D` - Run the manual `Package Acceptance` workflow when you want side-channel proof for a package candidate while release work continues. Use `source=npm` for `openclaw@beta`, `openclaw@latest`, or an exact release version; `source=ref` @@ -113,7 +112,7 @@ the maintainer-only release runbook. SHA-256; or `source=artifact` for a tarball uploaded by another GitHub Actions run. The workflow resolves the candidate to `package-under-test`, reuses the Docker E2E release scheduler against that - tarball, and can optionally run published-npm Telegram QA. + tarball, and can optionally run Telegram QA against the same tarball. Example: `gh workflow run package-acceptance.yml --ref main -f workflow_ref=main -f source=npm -f package_spec=openclaw@beta -f suite_profile=product` Common profiles: - `smoke`: install/channel/agent, gateway network, and config reload lanes @@ -221,8 +220,9 @@ Validation` or from the `main`/release workflow ref so workflow logic and ## Release test boxes -`Full Release Validation` is the manual umbrella that operators use when they -want all release validation from one entrypoint: +`Full Release Validation` is how operators kick off all pre-release tests from +one entrypoint. Run it from the trusted `main` workflow ref and pass the release +branch, tag, or full commit SHA as `ref`: ```bash gh workflow run full-release-validation.yml \ @@ -236,9 +236,48 @@ gh workflow run full-release-validation.yml \ The workflow resolves the target ref, dispatches manual `CI` with `target_ref=`, dispatches `OpenClaw Release Checks`, and optionally dispatches post-publish Telegram E2E when -`npm_telegram_package_spec` is set. A full run is only acceptable when both -child workflows succeed or an intentionally skipped optional child is recorded -in the summary. +`npm_telegram_package_spec` is set. `OpenClaw Release Checks` then fans out +install smoke, cross-OS release checks, live/E2E Docker release-path coverage, +Package Acceptance, QA Lab parity, live Matrix, and live Telegram. A full run is +only acceptable when the `Full Release Validation` summary shows `normal_ci` and +`release_checks` as successful, and any optional `npm_telegram` child is either +successful or intentionally skipped. + +Use these variants depending on release stage: + +```bash +# Validate an unpublished release candidate branch. +gh workflow run full-release-validation.yml \ + --ref main \ + -f ref=release/YYYY.M.D \ + -f workflow_ref=main \ + -f provider=openai \ + -f mode=both + +# Validate an exact pushed commit. +gh workflow run full-release-validation.yml \ + --ref main \ + -f ref=<40-char-sha> \ + -f workflow_ref=main \ + -f provider=openai \ + -f mode=both + +# After publishing a beta, add published-package Telegram E2E. +gh workflow run full-release-validation.yml \ + --ref main \ + -f ref=release/YYYY.M.D \ + -f workflow_ref=main \ + -f provider=openai \ + -f mode=both \ + -f npm_telegram_package_spec=openclaw@YYYY.M.D-beta.N \ + -f npm_telegram_provider_mode=mock-openai +``` + +Do not use the full umbrella as the first rerun after a focused fix. If one box +fails, use the failed child workflow, job, Docker lane, package profile, model +provider, or QA lane for the next proof. Run the full umbrella again only when +the fix changed shared release orchestration or made earlier all-box evidence +stale. ### Vitest @@ -354,10 +393,10 @@ Common package profiles: - `full`: Docker release-path chunks with OpenWebUI - `custom`: exact `docker_lanes` list for focused reruns -For post-publish beta proof, use `source=npm` with the exact beta package or -`openclaw@beta`. Enable `telegram_mode=mock-openai` or -`telegram_mode=live-frontier` only for published npm packages, because that -path reuses the published-npm Telegram E2E workflow. +For package-candidate Telegram proof, enable `telegram_mode=mock-openai` or +`telegram_mode=live-frontier` on Package Acceptance. The workflow passes the +resolved `package-under-test` tarball into the Telegram lane; the standalone +Telegram workflow still accepts a published npm spec for post-publish checks. ## NPM workflow inputs diff --git a/docs/reference/test.md b/docs/reference/test.md index c375b83338c..a3ea86aa76b 100644 --- a/docs/reference/test.md +++ b/docs/reference/test.md @@ -34,7 +34,7 @@ title: "Tests" - Gateway integration: opt-in via `OPENCLAW_TEST_INCLUDE_GATEWAY=1 pnpm test` or `pnpm test:gateway`. - `pnpm test:e2e`: Runs gateway end-to-end smoke tests (multi-instance WS/HTTP/node pairing). Defaults to `threads` + `isolate: false` with adaptive workers in `vitest.e2e.config.ts`; tune with `OPENCLAW_E2E_WORKERS=` and set `OPENCLAW_E2E_VERBOSE=1` for verbose logs. - `pnpm test:live`: Runs provider live tests (minimax/zai). Requires API keys and `LIVE=1` (or provider-specific `*_LIVE_TEST=1`) to unskip. -- `pnpm test:docker:all`: Builds the shared live-test image, packs OpenClaw once as an npm tarball, builds/reuses a bare Node/Git runner image plus a functional image that installs that tarball into `/app`, then runs Docker smoke lanes with `OPENCLAW_SKIP_DOCKER_BUILD=1` through a weighted scheduler. The bare image (`OPENCLAW_DOCKER_E2E_BARE_IMAGE`) is used for installer/update/plugin-dependency lanes; those lanes mount the prebuilt tarball instead of using copied repo sources. The functional image (`OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE`) is used for normal built-app functionality lanes. `scripts/package-openclaw-for-docker.mjs` is the single local/CI package packer and validates the tarball plus `dist/postinstall-inventory.json` before Docker consumes it. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`; planner logic lives in `scripts/lib/docker-e2e-plan.mjs`; `scripts/test-docker-all.mjs` executes the selected plan. `node scripts/test-docker-all.mjs --plan-json` emits the scheduler-owned CI plan for selected lanes, image kinds, package/live-image needs, and credential checks without building or running Docker. `OPENCLAW_DOCKER_ALL_PARALLELISM=` controls process slots and defaults to 10; `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM=` controls the provider-sensitive tail pool and defaults to 10. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7`; provider caps default to one heavy lane per provider via `OPENCLAW_DOCKER_ALL_LIVE_CLAUDE_LIMIT=4`, `OPENCLAW_DOCKER_ALL_LIVE_CODEX_LIMIT=4`, and `OPENCLAW_DOCKER_ALL_LIVE_GEMINI_LIMIT=4`. Use `OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT` or `OPENCLAW_DOCKER_ALL_DOCKER_LIMIT` for larger hosts. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=`. The runner preflights Docker by default, cleans stale OpenClaw E2E containers, emits active-lane status every 30 seconds, shares provider CLI tool caches between compatible lanes, retries transient live-provider failures once by default (`OPENCLAW_DOCKER_ALL_LIVE_RETRIES=`), and stores lane timings in `.artifacts/docker-tests/lane-timings.json` for longest-first ordering on later runs. Use `OPENCLAW_DOCKER_ALL_DRY_RUN=1` to print the lane manifest without running Docker, `OPENCLAW_DOCKER_ALL_STATUS_INTERVAL_MS=` to tune status output, or `OPENCLAW_DOCKER_ALL_TIMINGS=0` to disable timing reuse. Use `OPENCLAW_DOCKER_ALL_LIVE_MODE=skip` for deterministic/local lanes only or `OPENCLAW_DOCKER_ALL_LIVE_MODE=only` for live-provider lanes only; package aliases are `pnpm test:docker:local:all` and `pnpm test:docker:live:all`. Live-only mode merges main and tail live lanes into one longest-first pool so provider buckets can pack Claude, Codex, and Gemini work together. The runner stops scheduling new pooled lanes after the first failure unless `OPENCLAW_DOCKER_ALL_FAIL_FAST=0` is set, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. CLI backend Docker setup commands have their own timeout via `OPENCLAW_LIVE_CLI_BACKEND_SETUP_TIMEOUT_SECONDS` (default 180). Per-lane logs, `summary.json`, `failures.json`, and phase timings are written under `.artifacts/docker-tests//`; use `pnpm test:docker:timings ` to inspect slow lanes and `pnpm test:docker:rerun ` to print cheap targeted rerun commands. +- `pnpm test:docker:all`: Builds the shared live-test image, packs OpenClaw once as an npm tarball, builds/reuses a bare Node/Git runner image plus a functional image that installs that tarball into `/app`, then runs Docker smoke lanes with `OPENCLAW_SKIP_DOCKER_BUILD=1` through a weighted scheduler. The bare image (`OPENCLAW_DOCKER_E2E_BARE_IMAGE`) is used for installer/update/plugin-dependency lanes; those lanes mount the prebuilt tarball instead of using copied repo sources. The functional image (`OPENCLAW_DOCKER_E2E_FUNCTIONAL_IMAGE`) is used for normal built-app functionality lanes. `scripts/package-openclaw-for-docker.mjs` is the single local/CI package packer and validates the tarball plus `dist/postinstall-inventory.json` before Docker consumes it. Docker lane definitions live in `scripts/lib/docker-e2e-scenarios.mjs`; planner logic lives in `scripts/lib/docker-e2e-plan.mjs`; `scripts/test-docker-all.mjs` executes the selected plan. `node scripts/test-docker-all.mjs --plan-json` emits the scheduler-owned CI plan for selected lanes, image kinds, package/live-image needs, and credential checks without building or running Docker. `OPENCLAW_DOCKER_ALL_PARALLELISM=` controls process slots and defaults to 10; `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM=` controls the provider-sensitive tail pool and defaults to 10. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=9`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=10`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7`; provider caps default to one heavy lane per provider via `OPENCLAW_DOCKER_ALL_LIVE_CLAUDE_LIMIT=4`, `OPENCLAW_DOCKER_ALL_LIVE_CODEX_LIMIT=4`, and `OPENCLAW_DOCKER_ALL_LIVE_GEMINI_LIMIT=4`. Use `OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT` or `OPENCLAW_DOCKER_ALL_DOCKER_LIMIT` for larger hosts. If one lane exceeds the effective weight or resource cap on a low-parallelism host, it can still start from an empty pool and will run alone until it releases capacity. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=`. The runner preflights Docker by default, cleans stale OpenClaw E2E containers, emits active-lane status every 30 seconds, shares provider CLI tool caches between compatible lanes, retries transient live-provider failures once by default (`OPENCLAW_DOCKER_ALL_LIVE_RETRIES=`), and stores lane timings in `.artifacts/docker-tests/lane-timings.json` for longest-first ordering on later runs. Use `OPENCLAW_DOCKER_ALL_DRY_RUN=1` to print the lane manifest without running Docker, `OPENCLAW_DOCKER_ALL_STATUS_INTERVAL_MS=` to tune status output, or `OPENCLAW_DOCKER_ALL_TIMINGS=0` to disable timing reuse. Use `OPENCLAW_DOCKER_ALL_LIVE_MODE=skip` for deterministic/local lanes only or `OPENCLAW_DOCKER_ALL_LIVE_MODE=only` for live-provider lanes only; package aliases are `pnpm test:docker:local:all` and `pnpm test:docker:live:all`. Live-only mode merges main and tail live lanes into one longest-first pool so provider buckets can pack Claude, Codex, and Gemini work together. The runner stops scheduling new pooled lanes after the first failure unless `OPENCLAW_DOCKER_ALL_FAIL_FAST=0` is set, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. CLI backend Docker setup commands have their own timeout via `OPENCLAW_LIVE_CLI_BACKEND_SETUP_TIMEOUT_SECONDS` (default 180). Per-lane logs, `summary.json`, `failures.json`, and phase timings are written under `.artifacts/docker-tests//`; use `pnpm test:docker:timings ` to inspect slow lanes and `pnpm test:docker:rerun ` to print cheap targeted rerun commands. - `pnpm test:docker:browser-cdp-snapshot`: Builds a Chromium-backed source E2E container, starts raw CDP plus an isolated Gateway, runs `browser doctor --deep`, and verifies CDP role snapshots include link URLs, cursor-promoted clickables, iframe refs, and frame metadata. - CLI backend live Docker probes can be run as focused lanes, for example `pnpm test:docker:live-cli-backend:codex`, `pnpm test:docker:live-cli-backend:codex:resume`, or `pnpm test:docker:live-cli-backend:codex:mcp`. Claude and Gemini have matching `:resume` and `:mcp` aliases. - `pnpm test:docker:openwebui`: Starts Dockerized OpenClaw + Open WebUI, signs in through Open WebUI, checks `/api/models`, then runs a real proxied chat through `/api/chat/completions`. Requires a usable live model key (for example OpenAI in `~/.profile`), pulls an external Open WebUI image, and is not expected to be CI-stable like the normal unit/e2e suites. diff --git a/scripts/check-openclaw-package-tarball.mjs b/scripts/check-openclaw-package-tarball.mjs index bdf62b00ded..7a54fd9dff6 100644 --- a/scripts/check-openclaw-package-tarball.mjs +++ b/scripts/check-openclaw-package-tarball.mjs @@ -37,6 +37,36 @@ const entries = list.stdout const normalized = entries.map((entry) => entry.replace(/^package\//u, "")); const entrySet = new Set(normalized); const errors = []; +const warnings = []; + +const LEGACY_OMITTED_PRIVATE_QA_INVENTORY_PREFIXES = [ + "dist/extensions/qa-channel/", + "dist/extensions/qa-lab/", + "dist/extensions/qa-matrix/", + "dist/plugin-sdk/extensions/qa-channel/", + "dist/plugin-sdk/extensions/qa-lab/", +]; +const LEGACY_OMITTED_PRIVATE_QA_INVENTORY_FILES = new Set([ + "dist/plugin-sdk/qa-channel.d.ts", + "dist/plugin-sdk/qa-channel.js", + "dist/plugin-sdk/qa-channel-protocol.d.ts", + "dist/plugin-sdk/qa-channel-protocol.js", + "dist/plugin-sdk/qa-lab.d.ts", + "dist/plugin-sdk/qa-lab.js", + "dist/plugin-sdk/qa-runtime.d.ts", + "dist/plugin-sdk/qa-runtime.js", + "dist/plugin-sdk/src/plugin-sdk/qa-channel.d.ts", + "dist/plugin-sdk/src/plugin-sdk/qa-channel-protocol.d.ts", + "dist/plugin-sdk/src/plugin-sdk/qa-lab.d.ts", + "dist/plugin-sdk/src/plugin-sdk/qa-runtime.d.ts", +]); + +function isLegacyOmittedPrivateQaInventoryEntry(relativePath) { + return ( + LEGACY_OMITTED_PRIVATE_QA_INVENTORY_FILES.has(relativePath) || + LEGACY_OMITTED_PRIVATE_QA_INVENTORY_PREFIXES.some((prefix) => relativePath.startsWith(prefix)) + ); +} function readTarEntry(entryPath) { const candidates = [entryPath, `package/${entryPath}`]; @@ -76,6 +106,12 @@ if (entrySet.has("dist/postinstall-inventory.json")) { for (const inventoryEntry of inventory) { const normalizedEntry = inventoryEntry.replace(/\\/gu, "/"); if (!entrySet.has(normalizedEntry)) { + if (isLegacyOmittedPrivateQaInventoryEntry(normalizedEntry)) { + warnings.push( + `legacy inventory references omitted private QA tar entry ${normalizedEntry}`, + ); + continue; + } errors.push(`inventory references missing tar entry ${normalizedEntry}`); } } @@ -93,4 +129,7 @@ if (errors.length > 0) { fail(`OpenClaw package tarball integrity failed:\n${errors.join("\n")}`); } +for (const warning of warnings) { + console.warn(`OpenClaw package tarball integrity warning: ${warning}`); +} console.log("OpenClaw package tarball integrity passed."); diff --git a/scripts/e2e/npm-telegram-live-docker.sh b/scripts/e2e/npm-telegram-live-docker.sh index 5cb4335973c..6d432f7422b 100755 --- a/scripts/e2e/npm-telegram-live-docker.sh +++ b/scripts/e2e/npm-telegram-live-docker.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Installs a published OpenClaw npm package in Docker, performs Telegram +# Installs an OpenClaw package candidate in Docker, performs Telegram # onboarding/doctor recovery, then runs the Telegram QA live harness. set -euo pipefail @@ -9,6 +9,8 @@ source "$ROOT_DIR/scripts/lib/docker-e2e-image.sh" IMAGE_NAME="$(docker_e2e_resolve_image "openclaw-npm-telegram-live-e2e" OPENCLAW_NPM_TELEGRAM_LIVE_E2E_IMAGE)" DOCKER_TARGET="${OPENCLAW_NPM_TELEGRAM_DOCKER_TARGET:-build}" PACKAGE_SPEC="${OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC:-openclaw@beta}" +PACKAGE_TGZ="${OPENCLAW_NPM_TELEGRAM_PACKAGE_TGZ:-${OPENCLAW_CURRENT_PACKAGE_TGZ:-}}" +PACKAGE_LABEL="${OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL:-}" OUTPUT_DIR="${OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR:-.artifacts/qa-e2e/npm-telegram-live}" resolve_credential_source() { @@ -46,7 +48,45 @@ validate_openclaw_package_spec() { exit 1 } -validate_openclaw_package_spec "$PACKAGE_SPEC" +resolve_package_tgz() { + local candidate="$1" + if [ -z "$candidate" ]; then + return 0 + fi + if [ ! -f "$candidate" ]; then + echo "OPENCLAW_NPM_TELEGRAM_PACKAGE_TGZ must point to an existing .tgz file; got: $candidate" >&2 + exit 1 + fi + case "$candidate" in + *.tgz) ;; + *) + echo "OPENCLAW_NPM_TELEGRAM_PACKAGE_TGZ must point to a .tgz file; got: $candidate" >&2 + exit 1 + ;; + esac + local dir + local base + dir="$(cd "$(dirname "$candidate")" && pwd)" + base="$(basename "$candidate")" + printf "%s/%s" "$dir" "$base" +} + +package_mount_args=() +package_install_source="$PACKAGE_SPEC" +resolved_package_tgz="$(resolve_package_tgz "$PACKAGE_TGZ")" +if [ -n "$resolved_package_tgz" ]; then + package_install_source="/package-under-test/$(basename "$resolved_package_tgz")" + package_mount_args=(-v "$resolved_package_tgz:$package_install_source:ro") +else + validate_openclaw_package_spec "$PACKAGE_SPEC" +fi +if [ -z "$PACKAGE_LABEL" ]; then + if [ -n "$resolved_package_tgz" ]; then + PACKAGE_LABEL="$(basename "$resolved_package_tgz")" + else + PACKAGE_LABEL="$PACKAGE_SPEC" + fi +fi docker_e2e_build_or_reuse "$IMAGE_NAME" npm-telegram-live "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR" "$DOCKER_TARGET" docker_e2e_harness_mount_args @@ -64,6 +104,7 @@ fi docker_env=( -e COREPACK_ENABLE_DOWNLOAD_PROMPT=0 -e OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC="$PACKAGE_SPEC" + -e OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL="$PACKAGE_LABEL" -e OPENCLAW_NPM_TELEGRAM_OUTPUT_DIR="$OUTPUT_DIR" -e OPENCLAW_NPM_TELEGRAM_FAST="${OPENCLAW_NPM_TELEGRAM_FAST:-1}" ) @@ -124,10 +165,12 @@ run_logged() { >"$run_log" } -echo "Running published npm Telegram live Docker E2E ($PACKAGE_SPEC)..." +echo "Running package Telegram live Docker E2E ($PACKAGE_LABEL)..." run_logged docker run --rm \ -e COREPACK_ENABLE_DOWNLOAD_PROMPT=0 \ - -e OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC="$PACKAGE_SPEC" \ + -e OPENCLAW_NPM_TELEGRAM_INSTALL_SOURCE="$package_install_source" \ + -e OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL="$PACKAGE_LABEL" \ + "${package_mount_args[@]}" \ -v "$npm_prefix_host:/npm-global" \ -i "$IMAGE_NAME" bash -s <<'EOF' set -euo pipefail @@ -136,15 +179,16 @@ export HOME="$(mktemp -d "/tmp/openclaw-npm-telegram-install.XXXXXX")" export NPM_CONFIG_PREFIX="/npm-global" export PATH="$NPM_CONFIG_PREFIX/bin:$PATH" -package_spec="${OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC:?missing OPENCLAW_NPM_TELEGRAM_PACKAGE_SPEC}" -echo "Installing ${package_spec}..." -npm install -g "$package_spec" --no-fund --no-audit +install_source="${OPENCLAW_NPM_TELEGRAM_INSTALL_SOURCE:?missing OPENCLAW_NPM_TELEGRAM_INSTALL_SOURCE}" +package_label="${OPENCLAW_NPM_TELEGRAM_PACKAGE_LABEL:-$install_source}" +echo "Installing ${package_label} from ${install_source}..." +npm install -g "$install_source" --no-fund --no-audit command -v openclaw openclaw --version EOF -# Mount only test harness/plugin QA sources; the SUT itself is the npm install. +# Mount only test harness/plugin QA sources; the SUT itself is the installed package candidate. run_logged docker run --rm \ "${docker_env[@]}" \ -v "$ROOT_DIR/.artifacts:/app/.artifacts" \ @@ -161,7 +205,7 @@ export OPENCLAW_NPM_TELEGRAM_REPO_ROOT="/app" dump_hotpath_logs() { local status="$1" - echo "installed npm onboarding recovery hot path failed with exit code $status" >&2 + echo "installed-package onboarding recovery hot path failed with exit code $status" >&2 for file in \ /tmp/openclaw-npm-telegram-onboard.json \ /tmp/openclaw-npm-telegram-channel-add.log \ @@ -178,11 +222,11 @@ trap 'status=$?; dump_hotpath_logs "$status"; exit "$status"' ERR command -v openclaw openclaw --version # The mounted QA harness imports openclaw/plugin-sdk; point that package import -# at the installed npm package without copying source into the test image. +# at the installed package without copying source into the test image. mkdir -p /app/node_modules ln -sfn /npm-global/lib/node_modules/openclaw /app/node_modules/openclaw -echo "Running installed npm onboarding recovery hot path..." +echo "Running installed-package onboarding recovery hot path..." OPENAI_API_KEY="${OPENAI_API_KEY:-sk-openclaw-npm-telegram-hotpath}" openclaw onboard --non-interactive --accept-risk \ --mode local \ --auth-choice openai-api-key \ @@ -210,4 +254,4 @@ trap - ERR tsx scripts/e2e/npm-telegram-live-runner.ts EOF -echo "published npm Telegram live Docker E2E passed ($PACKAGE_SPEC)" +echo "package Telegram live Docker E2E passed ($PACKAGE_LABEL)" diff --git a/scripts/e2e/npm-telegram-live-runner.ts b/scripts/e2e/npm-telegram-live-runner.ts index ad5500968fa..367a10b2602 100644 --- a/scripts/e2e/npm-telegram-live-runner.ts +++ b/scripts/e2e/npm-telegram-live-runner.ts @@ -1,6 +1,6 @@ #!/usr/bin/env -S node --import tsx -// Telegram npm-live Docker harness. -// Runs QA live transport code against the published package installed in Docker. +// Telegram package Docker harness. +// Runs QA live transport code against the package candidate installed in Docker. import fs from "node:fs/promises"; import path from "node:path"; @@ -78,9 +78,9 @@ async function main() { credentialRole: resolveCredentialRole(process.env), }); - process.stdout.write(`NPM Telegram QA report: ${result.reportPath}\n`); - process.stdout.write(`NPM Telegram QA summary: ${result.summaryPath}\n`); - process.stdout.write(`NPM Telegram QA observed messages: ${result.observedMessagesPath}\n`); + process.stdout.write(`Package Telegram QA report: ${result.reportPath}\n`); + process.stdout.write(`Package Telegram QA summary: ${result.summaryPath}\n`); + process.stdout.write(`Package Telegram QA observed messages: ${result.observedMessagesPath}\n`); if ( !parseBoolean(process.env.OPENCLAW_NPM_TELEGRAM_ALLOW_FAILURES) && result.scenarios.some((scenario) => scenario.status === "fail") @@ -101,7 +101,7 @@ async function formatRunnerErrorMessage(error: unknown) { if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) { main().catch(async (error) => { process.stderr.write( - `npm telegram live e2e failed: ${await formatRunnerErrorMessage(error)}\n`, + `package telegram live e2e failed: ${await formatRunnerErrorMessage(error)}\n`, ); process.exitCode = 1; }); diff --git a/scripts/e2e/openai-web-search-minimal-docker.sh b/scripts/e2e/openai-web-search-minimal-docker.sh index f5c8c02cacf..1ffb61df73c 100755 --- a/scripts/e2e/openai-web-search-minimal-docker.sh +++ b/scripts/e2e/openai-web-search-minimal-docker.sh @@ -359,8 +359,9 @@ node "$entry" gateway health \ --json >/dev/null cat >/tmp/openclaw-openai-web-search-minimal-client.mjs <<'NODE' -import { pathToFileURL } from "node:url"; +import { execFileSync } from "node:child_process"; +const entry = process.env.OPENCLAW_ENTRY; const port = process.env.PORT; const token = process.env.OPENCLAW_GATEWAY_TOKEN; const mode = process.argv[2]; @@ -371,47 +372,59 @@ const message = : "Return exactly OPENCLAW_SCHEMA_E2E_OK."; const id = mode === "reject" ? "schema-reject" : "schema-success"; -if (!port || !token) throw new Error("missing PORT/OPENCLAW_GATEWAY_TOKEN"); -const callGatewayUrl = new URL("dist/gateway/call.js", pathToFileURL(`${process.cwd()}/`)); -const { callGateway } = await import(callGatewayUrl.href); +if (!entry || !port || !token) throw new Error("missing OPENCLAW_ENTRY/PORT/OPENCLAW_GATEWAY_TOKEN"); -async function runAgent() { +const gatewayArgs = [ + entry, + "gateway", + "call", + "--url", + `ws://127.0.0.1:${port}`, + "--token", + token, + "--timeout", + "240000", + "--expect-final", + "--json", +]; + +function gatewayAgent(params) { try { - return await callGateway({ - method: "agent", - params: { - sessionKey, - message, - thinking: "minimal", - deliver: false, - timeout: 180, - idempotencyKey: id, - }, - expectFinal: true, - url: `ws://127.0.0.1:${port}`, - token, - timeoutMs: 240000, - }); + return { + ok: true, + value: JSON.parse(execFileSync("node", [...gatewayArgs, "agent", "--params", JSON.stringify(params)], { + encoding: "utf8", + stdio: ["ignore", "pipe", "pipe"], + })), + }; } catch (error) { - if (mode === "reject") { - console.error(String(error)); - process.exit(0); - } - throw error; + const stderr = typeof error?.stderr === "string" ? error.stderr : ""; + const stdout = typeof error?.stdout === "string" ? error.stdout : ""; + const combined = [String(error), stderr.trim(), stdout.trim()].filter(Boolean).join("\n"); + return { ok: false, error: new Error(combined) }; } } -const result = await runAgent(); +const result = gatewayAgent({ + sessionKey, + message, + thinking: "minimal", + deliver: false, + timeout: 180, + idempotencyKey: id, +}); + if (mode === "reject") { - console.error(JSON.stringify(result)); + console.error(result.ok ? JSON.stringify(result.value) : String(result.error)); process.exit(0); } -if (result?.status !== "ok") { - throw new Error(`agent run did not complete successfully: ${JSON.stringify(result)}`); +if (!result.ok) throw result.error; +if (result.value?.status !== "ok") { + throw new Error(`agent run did not complete successfully: ${JSON.stringify(result.value)}`); } NODE -PORT="$PORT" OPENCLAW_GATEWAY_TOKEN="$TOKEN" node /tmp/openclaw-openai-web-search-minimal-client.mjs success >/tmp/openclaw-openai-web-search-minimal-client-success.log 2>&1 +OPENCLAW_ENTRY="$entry" PORT="$PORT" OPENCLAW_GATEWAY_TOKEN="$TOKEN" node /tmp/openclaw-openai-web-search-minimal-client.mjs success >/tmp/openclaw-openai-web-search-minimal-client-success.log 2>&1 node - "$MOCK_REQUEST_LOG" <<'NODE' const fs = require("node:fs"); @@ -435,7 +448,7 @@ if (success.body.reasoning?.effort === "minimal") { } NODE -PORT="$PORT" OPENCLAW_GATEWAY_TOKEN="$TOKEN" node /tmp/openclaw-openai-web-search-minimal-client.mjs reject >/tmp/openclaw-openai-web-search-minimal-client-reject.log 2>&1 +OPENCLAW_ENTRY="$entry" PORT="$PORT" OPENCLAW_GATEWAY_TOKEN="$TOKEN" node /tmp/openclaw-openai-web-search-minimal-client.mjs reject >/tmp/openclaw-openai-web-search-minimal-client-reject.log 2>&1 for _ in $(seq 1 80); do if grep -Fq "$RAW_SCHEMA_ERROR" "$GATEWAY_LOG"; then diff --git a/scripts/test-docker-all.mjs b/scripts/test-docker-all.mjs index fb3dcafe23e..3a6c7b6abac 100644 --- a/scripts/test-docker-all.mjs +++ b/scripts/test-docker-all.mjs @@ -36,10 +36,15 @@ const DEFAULT_STATUS_INTERVAL_MS = 30_000; const DEFAULT_PREFLIGHT_RUN_TIMEOUT_MS = 60_000; const DEFAULT_TIMINGS_FILE = path.join(ROOT_DIR, ".artifacts/docker-tests/lane-timings.json"); const DEFAULT_GITHUB_WORKFLOW = "openclaw-live-and-e2e-checks-reusable.yml"; -const cliArgs = new Set(process.argv.slice(2)); -for (const arg of cliArgs) { - if (arg !== "--plan-json") { - throw new Error(`unknown argument: ${arg}`); +const IS_MAIN = process.argv[1] + ? path.resolve(process.argv[1]) === fileURLToPath(import.meta.url) + : false; +const cliArgs = new Set(IS_MAIN ? process.argv.slice(2) : []); +if (IS_MAIN) { + for (const arg of cliArgs) { + if (arg !== "--plan-json") { + throw new Error(`unknown argument: ${arg}`); + } } } @@ -82,6 +87,12 @@ function resourceLimitEnvName(resource) { return `OPENCLAW_DOCKER_ALL_${resource.toUpperCase().replace(/[^A-Z0-9]+/g, "_")}_LIMIT`; } +export function describeDockerSchedulerLimits(parallelism, options) { + return `parallelism=${parallelism} weightLimit=${options.weightLimit} resources=${resourceLimitsSummary( + options.resourceLimits, + )}`; +} + function parseResourceLimit(env, resource, parallelism, fallback) { const envName = resourceLimitEnvName(resource); return parsePositiveInt(env[envName], Math.min(parallelism, fallback), envName); @@ -103,6 +114,26 @@ function parseSchedulerOptions(env, parallelism) { }; } +export function canStartSchedulerLane(candidate, active, parallelism, options) { + const weight = laneWeight(candidate); + if (active.count >= parallelism) { + return false; + } + + const exceedsWeightLimit = active.weight + weight > options.weightLimit; + const exceedsResourceLimit = laneResources(candidate).some((resource) => { + const limit = options.resourceLimits[resource] ?? options.weightLimit; + const current = active.resources.get(resource) ?? 0; + return current + weight > limit; + }); + + if (!exceedsWeightLimit && !exceedsResourceLimit) { + return true; + } + + return active.count === 0; +} + function timingSeconds(timingStore, poolLane) { const fromStore = timingStore?.lanes?.[poolLane.name]?.durationSeconds; if (typeof fromStore === "number" && Number.isFinite(fromStore) && fromStore > 0) { @@ -746,18 +777,7 @@ async function runLanePool(poolLanes, baseEnv, logDir, parallelism, options) { } function canStartLane(candidate) { - const weight = laneWeight(candidate); - if (active.count >= parallelism || active.weight + weight > options.weightLimit) { - return false; - } - for (const resource of laneResources(candidate)) { - const limit = options.resourceLimits[resource] ?? options.weightLimit; - const current = active.resources.get(resource) ?? 0; - if (current + weight > limit) { - return false; - } - } - return true; + return canStartSchedulerLane(candidate, active, parallelism, options); } function reserve(candidate) { @@ -818,7 +838,12 @@ async function runLanePool(poolLanes, baseEnv, logDir, parallelism, options) { } if (running.size === 0) { const blocked = pending.map(laneSummary).join(", "); - throw new Error(`No Docker lanes fit scheduler limits: ${blocked}`); + throw new Error( + `No Docker lanes fit scheduler limits (${describeDockerSchedulerLimits( + parallelism, + options, + )}): ${blocked}. Tune OPENCLAW_DOCKER_ALL_PARALLELISM, OPENCLAW_DOCKER_ALL_WEIGHT_LIMIT, or OPENCLAW_DOCKER_ALL__LIMIT.`, + ); } const { promise, result } = await Promise.race(running); @@ -1217,7 +1242,9 @@ async function main() { console.log("==> Docker test suite passed"); } -await main().catch((error) => { - console.error(error instanceof Error ? error.message : String(error)); - process.exit(1); -}); +if (IS_MAIN) { + await main().catch((error) => { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); + }); +} diff --git a/src/gateway/gateway-codex-harness.live-helpers.test.ts b/src/gateway/gateway-codex-harness.live-helpers.test.ts index caa5ef46d15..265ad217cf4 100644 --- a/src/gateway/gateway-codex-harness.live-helpers.test.ts +++ b/src/gateway/gateway-codex-harness.live-helpers.test.ts @@ -1,10 +1,29 @@ import { describe, expect, it } from "vitest"; import { EXPECTED_CODEX_MODELS_COMMAND_TEXT, + EXPECTED_CODEX_STATUS_COMMAND_TEXT, isExpectedCodexModelsCommandText, + isExpectedCodexStatusCommandText, } from "./gateway-codex-harness.live-helpers.js"; describe("gateway codex harness live helpers", () => { + it("accepts the current codex status prose from the live harness", () => { + const text = + "OpenClaw is running on `openai/gpt-5.5` with low reasoning/text settings. Context is at `22k/272k` tokens, no compactions, and the current session is `agent:dev:live-codex-harness`."; + + expect( + EXPECTED_CODEX_STATUS_COMMAND_TEXT.some((expectedText) => text.includes(expectedText)), + ).toBe(false); + expect(isExpectedCodexStatusCommandText(text)).toBe(true); + }); + + it("rejects status prose for a different codex session", () => { + const text = + "OpenClaw is running on `openai/gpt-5.5` with low reasoning/text settings. Context is at `22k/272k` tokens, no compactions, and the current session is `agent:dev:other`."; + + expect(isExpectedCodexStatusCommandText(text)).toBe(false); + }); + it("accepts the interactive model-selection summary emitted by current codex", () => { const text = [ "`/codex models` opened an interactive model-selection prompt rather than printing a plain list.", diff --git a/src/gateway/gateway-codex-harness.live-helpers.ts b/src/gateway/gateway-codex-harness.live-helpers.ts index cee1176fc9c..eb2600fbc26 100644 --- a/src/gateway/gateway-codex-harness.live-helpers.ts +++ b/src/gateway/gateway-codex-harness.live-helpers.ts @@ -71,6 +71,39 @@ export const EXPECTED_CODEX_MODELS_COMMAND_TEXT = [ "Current OpenClaw session status reports the active model as:", ] as const; +export const EXPECTED_CODEX_STATUS_COMMAND_TEXT = [ + "Codex app-server:", + "Model: `codex/", + "Model: codex/", + "Session: `agent:dev:live-codex-harness`", + "Session: agent:dev:live-codex-harness", + "OpenClaw `", + "OpenClaw status:", + "model `codex/", + "session `agent:dev:live-codex-harness`", + "Model/status card shown above", + "Status shown above.", +] as const; + +export function isExpectedCodexStatusCommandText(text: string): boolean { + const normalized = text.toLowerCase(); + const mentionsOpenClawStatus = + normalized.includes("openclaw is running on") || normalized.includes("openclaw status:"); + const mentionsHarnessSession = + normalized.includes("session: `agent:dev:live-codex-harness`") || + normalized.includes("session: agent:dev:live-codex-harness") || + normalized.includes("session `agent:dev:live-codex-harness`") || + normalized.includes("current session is `agent:dev:live-codex-harness`") || + normalized.includes("current session is agent:dev:live-codex-harness"); + const mentionsModel = + normalized.includes("`openai/") || + normalized.includes(" openai/") || + normalized.includes("`codex/") || + normalized.includes(" codex/"); + + return mentionsOpenClawStatus && mentionsHarnessSession && mentionsModel; +} + export function isExpectedCodexModelsCommandText(text: string): boolean { const normalized = text.toLowerCase(); const mentionsCodexModelsCommand = diff --git a/src/gateway/gateway-codex-harness.live.test.ts b/src/gateway/gateway-codex-harness.live.test.ts index 7d4dd944e7c..6d4795f7771 100644 --- a/src/gateway/gateway-codex-harness.live.test.ts +++ b/src/gateway/gateway-codex-harness.live.test.ts @@ -17,7 +17,9 @@ import { } from "./gateway-cli-backend.live-helpers.js"; import { EXPECTED_CODEX_MODELS_COMMAND_TEXT, + EXPECTED_CODEX_STATUS_COMMAND_TEXT, isExpectedCodexModelsCommandText, + isExpectedCodexStatusCommandText, } from "./gateway-codex-harness.live-helpers.js"; import { assertCronJobMatches, @@ -790,19 +792,8 @@ describeLive("gateway live (Codex harness)", () => { client, sessionKey, command: "/codex status", - expectedText: [ - "Codex app-server:", - "Model: `codex/", - "Model: codex/", - "Session: `agent:dev:live-codex-harness`", - "Session: agent:dev:live-codex-harness", - "OpenClaw `", - "OpenClaw status:", - "model `codex/", - "session `agent:dev:live-codex-harness`", - "Model/status card shown above", - "Status shown above.", - ], + expectedText: [...EXPECTED_CODEX_STATUS_COMMAND_TEXT], + isExpectedText: isExpectedCodexStatusCommandText, }); logCodexLiveStep("codex-status-command", { statusText }); diff --git a/test/scripts/check-openclaw-package-tarball.test.ts b/test/scripts/check-openclaw-package-tarball.test.ts new file mode 100644 index 00000000000..5d1e987d010 --- /dev/null +++ b/test/scripts/check-openclaw-package-tarball.test.ts @@ -0,0 +1,70 @@ +import { spawnSync } from "node:child_process"; +import { mkdtempSync, rmSync, mkdirSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { dirname, join } from "node:path"; +import { describe, expect, it } from "vitest"; + +const CHECK_SCRIPT = "scripts/check-openclaw-package-tarball.mjs"; + +function withTarball( + inventory: string[], + files: Record, + testBody: (tarball: string) => void, +) { + const root = mkdtempSync(join(tmpdir(), "openclaw-package-tarball-test-")); + try { + const packageRoot = join(root, "package"); + mkdirSync(join(packageRoot, "dist"), { recursive: true }); + writeFileSync( + join(packageRoot, "package.json"), + JSON.stringify({ name: "openclaw", version: "0.0.0" }), + ); + writeFileSync( + join(packageRoot, "dist", "postinstall-inventory.json"), + JSON.stringify(inventory), + ); + for (const [relativePath, body] of Object.entries(files)) { + const filePath = join(packageRoot, relativePath); + mkdirSync(dirname(filePath), { recursive: true }); + writeFileSync(filePath, body); + } + + const tarball = join(root, "openclaw.tgz"); + const pack = spawnSync("tar", ["-czf", tarball, "-C", root, "package"], { + encoding: "utf8", + }); + expect(pack.status, pack.stderr).toBe(0); + testBody(tarball); + } finally { + rmSync(root, { recursive: true, force: true }); + } +} + +describe("check-openclaw-package-tarball", () => { + it("allows legacy private QA inventory entries omitted from shipped tarballs", () => { + withTarball( + ["dist/index.js", "dist/extensions/qa-channel/runtime-api.js"], + { "dist/index.js": "export {};\n" }, + (tarball) => { + const result = spawnSync("node", [CHECK_SCRIPT, tarball], { encoding: "utf8" }); + + expect(result.status, result.stderr).toBe(0); + expect(result.stderr).toContain("legacy inventory references omitted private QA"); + expect(result.stdout).toContain("OpenClaw package tarball integrity passed."); + }, + ); + }); + + it("still rejects non-legacy missing inventory entries", () => { + withTarball( + ["dist/index.js", "dist/cli.js"], + { "dist/index.js": "export {};\n" }, + (tarball) => { + const result = spawnSync("node", [CHECK_SCRIPT, tarball], { encoding: "utf8" }); + + expect(result.status).not.toBe(0); + expect(result.stderr).toContain("inventory references missing tar entry dist/cli.js"); + }, + ); + }); +}); diff --git a/test/scripts/docker-all-scheduler.test.ts b/test/scripts/docker-all-scheduler.test.ts new file mode 100644 index 00000000000..28f0856f1ca --- /dev/null +++ b/test/scripts/docker-all-scheduler.test.ts @@ -0,0 +1,138 @@ +import { describe, expect, it } from "vitest"; +import { + canStartSchedulerLane, + describeDockerSchedulerLimits, +} from "../../scripts/test-docker-all.mjs"; + +const limits = { + resourceLimits: { + docker: 2, + npm: 2, + }, + weightLimit: 2, +}; + +function activePool({ + count = 0, + resources = {}, + weight = 0, +}: { + count?: number; + resources?: Record; + weight?: number; +} = {}) { + return { + count, + resources: new Map(Object.entries(resources)), + weight, + }; +} + +describe("scripts/test-docker-all scheduler", () => { + it("allows an overweight lane to start alone under low parallelism", () => { + expect( + canStartSchedulerLane( + { + name: "install-e2e", + resources: ["npm"], + weight: 4, + }, + activePool(), + 2, + limits, + ), + ).toBe(true); + }); + + it("does not co-schedule another lane while an overweight lane is active", () => { + expect( + canStartSchedulerLane( + { + name: "package-update", + resources: ["npm"], + weight: 1, + }, + activePool({ + count: 1, + resources: { + docker: 4, + npm: 4, + }, + weight: 4, + }), + 2, + limits, + ), + ).toBe(false); + }); + + it("preserves the parallelism count cap", () => { + expect( + canStartSchedulerLane( + { + name: "package-update", + resources: ["npm"], + weight: 1, + }, + activePool({ + count: 2, + resources: { + docker: 1, + npm: 1, + }, + weight: 1, + }), + 2, + limits, + ), + ).toBe(false); + }); + + it("keeps resource and weight limits as co-scheduling limits", () => { + expect( + canStartSchedulerLane( + { + name: "npm-smoke", + resources: ["npm"], + weight: 1, + }, + activePool({ + count: 1, + resources: { + docker: 1, + npm: 1, + }, + weight: 1, + }), + 2, + limits, + ), + ).toBe(true); + + expect( + canStartSchedulerLane( + { + name: "npm-heavy", + resources: ["npm"], + weight: 2, + }, + activePool({ + count: 1, + resources: { + docker: 1, + npm: 1, + }, + weight: 1, + }), + 2, + limits, + ), + ).toBe(false); + }); + + it("describes effective scheduler limits for operator errors", () => { + expect(describeDockerSchedulerLimits(2, limits)).toBe( + "parallelism=2 weightLimit=2 resources=docker=2 npm=2", + ); + }); +}); diff --git a/test/scripts/docker-build-helper.test.ts b/test/scripts/docker-build-helper.test.ts index 6b7eb1a9ad9..7784160df91 100644 --- a/test/scripts/docker-build-helper.test.ts +++ b/test/scripts/docker-build-helper.test.ts @@ -91,8 +91,8 @@ describe("docker build helper", () => { it("keeps OpenAI web search smoke on one gateway agent connection", () => { const runner = readFileSync(OPENAI_WEB_SEARCH_MINIMAL_E2E_PATH, "utf8"); - expect(runner).toContain('new URL("dist/gateway/call.js"'); - expect(runner).toContain("expectFinal: true"); + expect(runner).toContain('"--expect-final"'); + expect(runner).toContain('[...gatewayArgs, "agent", "--params"'); expect(runner).not.toContain('"agent.wait"'); }); }); diff --git a/test/scripts/npm-telegram-live.test.ts b/test/scripts/npm-telegram-live.test.ts index a5912fb7abc..95704c85681 100644 --- a/test/scripts/npm-telegram-live.test.ts +++ b/test/scripts/npm-telegram-live.test.ts @@ -7,7 +7,7 @@ import { __testing } from "../../scripts/e2e/npm-telegram-live-runner.ts"; const TEST_DIR = path.dirname(fileURLToPath(import.meta.url)); const DOCKER_SCRIPT_PATH = path.resolve(TEST_DIR, "../../scripts/e2e/npm-telegram-live-docker.sh"); -describe("npm Telegram live Docker E2E", () => { +describe("package Telegram live Docker E2E", () => { it("supports npm-specific Convex credential aliases", () => { const script = readFileSync(DOCKER_SCRIPT_PATH, "utf8"); @@ -28,18 +28,33 @@ describe("npm Telegram live Docker E2E", () => { expect(script).toContain('printf "convex"'); }); - it("installs the npm package before forwarding runtime secrets", () => { + it("installs the package candidate before forwarding runtime secrets", () => { const script = readFileSync(DOCKER_SCRIPT_PATH, "utf8"); - const installRunStart = script.indexOf('echo "Running published npm Telegram live Docker E2E'); + const installRunStart = script.indexOf('echo "Running package Telegram live Docker E2E'); const installRunEnd = script.indexOf('run_logged docker run --rm \\\n "${docker_env[@]}"'); const installRun = script.slice(installRunStart, installRunEnd); - expect(installRun).toContain('npm install -g "$package_spec" --no-fund --no-audit'); + expect(installRun).toContain('npm install -g "$install_source" --no-fund --no-audit'); + expect(installRun).toContain('"${package_mount_args[@]}"'); expect(installRun).not.toContain('"${docker_env[@]}"'); expect(script).toContain('if [ -z "$credential_role" ] && [ -n "${CI:-}" ]'); expect(script).toContain('credential_role="ci"'); }); + it("can install a resolved package tarball instead of a registry spec", () => { + const script = readFileSync(DOCKER_SCRIPT_PATH, "utf8"); + + expect(script).toContain("OPENCLAW_NPM_TELEGRAM_PACKAGE_TGZ"); + expect(script).toContain("OPENCLAW_CURRENT_PACKAGE_TGZ"); + expect(script).toContain( + 'package_mount_args=(-v "$resolved_package_tgz:$package_install_source:ro")', + ); + expect(script).toContain('validate_openclaw_package_spec "$PACKAGE_SPEC"'); + expect(script.indexOf('if [ -n "$resolved_package_tgz" ]; then')).toBeLessThan( + script.indexOf('validate_openclaw_package_spec "$PACKAGE_SPEC"'), + ); + }); + it("lets npm-specific credential aliases override shared QA env", () => { expect( __testing.resolveCredentialSource({ diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index bca77db6009..18b062ef892 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -34,15 +34,21 @@ describe("package acceptance workflow", () => { ); }); - it("offers bounded product profiles and keeps Telegram published-npm only", () => { + it("offers bounded product profiles and can run Telegram against the resolved artifact", () => { const workflow = readFileSync(PACKAGE_ACCEPTANCE_WORKFLOW, "utf8"); expect(workflow).toContain("suite_profile:"); expect(workflow).toContain("npm-onboard-channel-agent gateway-network config-reload"); expect(workflow).toContain("install-e2e npm-onboard-channel-agent doctor-switch"); expect(workflow).toContain("include_release_path_suites=true"); - expect(workflow).toContain("telegram_mode requires source=npm"); + expect(workflow).not.toContain("telegram_mode requires source=npm"); expect(workflow).toContain("uses: ./.github/workflows/npm-telegram-beta-e2e.yml"); + expect(workflow).toContain( + "package_artifact_name: ${{ needs.resolve_package.outputs.package_artifact_name }}", + ); + expect(workflow).toContain( + "package_label: openclaw@${{ needs.resolve_package.outputs.package_version }}", + ); }); }); @@ -62,10 +68,13 @@ describe("package artifact reuse", () => { expect(action).toContain("name: ${{ inputs.package-artifact-name }}"); }); - it("allows the npm Telegram lane to run from reusable package acceptance", () => { + it("allows the Telegram lane to run from reusable package acceptance artifacts", () => { const workflow = readFileSync(NPM_TELEGRAM_WORKFLOW, "utf8"); expect(workflow).toContain("workflow_call:"); + expect(workflow).toContain("package_artifact_name:"); + expect(workflow).toContain("Download package-under-test artifact"); + expect(workflow).toContain("OPENCLAW_NPM_TELEGRAM_PACKAGE_TGZ"); expect(workflow).toContain("provider_mode:"); expect(workflow).toContain("provider_mode must be mock-openai or live-frontier"); });