From 6a4c866b6a8b09360f1f51aa99aac9e682b0fef1 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 29 Apr 2026 07:36:51 +0100 Subject: [PATCH] ci: speed up broad validation setup --- .github/actions/setup-node-env/action.yml | 2 ++ .github/workflows/ci.yml | 29 ++++++++++++++----- docs/ci.md | 16 +++++----- scripts/lib/extension-test-plan.mjs | 2 +- .../plugin-prerelease-test-plan.test.ts | 6 ++++ 5 files changed, 40 insertions(+), 15 deletions(-) diff --git a/.github/actions/setup-node-env/action.yml b/.github/actions/setup-node-env/action.yml index 61e51e8c7a3..fda90859e56 100644 --- a/.github/actions/setup-node-env/action.yml +++ b/.github/actions/setup-node-env/action.yml @@ -90,9 +90,11 @@ runs: install_args=( install + --prefer-offline --ignore-scripts=false --config.engine-strict=false --config.enable-pre-post-scripts=true + --config.side-effects-cache=true ) if [ -n "$LOCKFILE_FLAG" ]; then install_args+=("$LOCKFILE_FLAG") diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee14cc55e50..3020616421c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,6 +13,11 @@ on: required: false default: false type: boolean + include_android: + description: Run Android lanes for this manual CI dispatch. + required: false + default: false + type: boolean push: branches: [main] paths-ignore: @@ -127,7 +132,7 @@ jobs: OPENCLAW_CI_DOCS_CHANGED: ${{ github.event_name == 'workflow_dispatch' && 'true' || steps.docs_scope.outputs.docs_changed }} OPENCLAW_CI_RUN_NODE: ${{ github.event_name == 'workflow_dispatch' && 'true' || steps.changed_scope.outputs.run_node || 'false' }} OPENCLAW_CI_RUN_MACOS: ${{ github.event_name == 'workflow_dispatch' && 'true' || steps.changed_scope.outputs.run_macos || 'false' }} - OPENCLAW_CI_RUN_ANDROID: ${{ github.event_name == 'workflow_dispatch' && 'true' || steps.changed_scope.outputs.run_android || 'false' }} + OPENCLAW_CI_RUN_ANDROID: ${{ github.event_name == 'workflow_dispatch' && (inputs.full_release_validation || inputs.include_android) && 'true' || steps.changed_scope.outputs.run_android || 'false' }} OPENCLAW_CI_RUN_WINDOWS: ${{ github.event_name == 'workflow_dispatch' && 'true' || steps.changed_scope.outputs.run_windows || 'false' }} OPENCLAW_CI_RUN_NODE_FAST_ONLY: ${{ github.event_name == 'workflow_dispatch' && 'false' || steps.changed_scope.outputs.run_node_fast_only || 'false' }} OPENCLAW_CI_RUN_NODE_FAST_PLUGIN_CONTRACTS: ${{ github.event_name == 'workflow_dispatch' && 'false' || steps.changed_scope.outputs.run_node_fast_plugin_contracts || 'false' }} @@ -237,7 +242,7 @@ jobs: }).map((shard) => ({ check_name: shard.checkName, extensions_csv: shard.extensionIds.join(","), - runner: isCanonicalRepository && [0, 3, 4].includes(shard.index) + runner: isCanonicalRepository && [0, 1, 2, 3].includes(shard.index) ? "blacksmith-8vcpu-ubuntu-2404" : isCanonicalRepository ? "blacksmith-4vcpu-ubuntu-2404" @@ -2217,6 +2222,14 @@ jobs: apps/android/**/gradle-wrapper.properties apps/android/gradle/libs.versions.toml + - name: Cache Android SDK + uses: actions/cache@v5 + with: + path: ~/.android-sdk + key: ${{ runner.os }}-android-sdk-v1-cmdline-12266719-platform-36-build-tools-36.0.0 + restore-keys: | + ${{ runner.os }}-android-sdk-v1- + - name: Setup Android SDK cmdline-tools run: | set -euo pipefail @@ -2225,11 +2238,13 @@ jobs: ARCHIVE="commandlinetools-linux-${CMDLINE_TOOLS_VERSION}_latest.zip" URL="https://dl.google.com/android/repository/${ARCHIVE}" - mkdir -p "$ANDROID_SDK_ROOT/cmdline-tools" - curl -fsSL "$URL" -o "/tmp/${ARCHIVE}" - rm -rf "$ANDROID_SDK_ROOT/cmdline-tools/latest" - unzip -q "/tmp/${ARCHIVE}" -d "$ANDROID_SDK_ROOT/cmdline-tools" - mv "$ANDROID_SDK_ROOT/cmdline-tools/cmdline-tools" "$ANDROID_SDK_ROOT/cmdline-tools/latest" + if [ ! -x "$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager" ]; then + mkdir -p "$ANDROID_SDK_ROOT/cmdline-tools" + curl -fsSL "$URL" -o "/tmp/${ARCHIVE}" + rm -rf "$ANDROID_SDK_ROOT/cmdline-tools/latest" + unzip -q "/tmp/${ARCHIVE}" -d "$ANDROID_SDK_ROOT/cmdline-tools" + mv "$ANDROID_SDK_ROOT/cmdline-tools/cmdline-tools" "$ANDROID_SDK_ROOT/cmdline-tools/latest" + fi echo "ANDROID_SDK_ROOT=$ANDROID_SDK_ROOT" >> "$GITHUB_ENV" echo "ANDROID_HOME=$ANDROID_SDK_ROOT" >> "$GITHUB_ENV" diff --git a/docs/ci.md b/docs/ci.md index d6aee4172f4..5b3a688cc41 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -6,7 +6,7 @@ read_when: - You are debugging failing GitHub Actions checks --- -The CI runs on every push to `main` and every pull request. It uses smart scoping to skip expensive jobs when only unrelated areas changed. Manual `workflow_dispatch` runs intentionally bypass smart scoping and fan out the full normal CI graph for release candidates or broad validation. Release-only plugin prerelease lanes stay off unless `Full Release Validation` dispatches CI with `full_release_validation=true`. +The CI runs on every push to `main` and every pull request. It uses smart scoping to skip expensive jobs when only unrelated areas changed. Manual `workflow_dispatch` runs intentionally bypass smart scoping and fan out the full normal CI graph for release candidates or broad validation, with Android lanes opt-in through `include_android` for standalone manual runs. Release-only plugin prerelease lanes stay off unless `Full Release Validation` dispatches CI with `full_release_validation=true`, which also enables Android. `Full Release Validation` is the manual umbrella workflow for "run everything before release." It accepts a branch, tag, or full commit SHA, dispatches the @@ -360,9 +360,11 @@ gh workflow run duplicate-after-merge.yml \ | `test-performance-agent` | Daily Codex slow-test optimization after trusted activity | Main CI success or manual dispatch | Manual CI dispatches run the same job graph as normal CI but force every -scoped lane on: Linux Node shards, bundled-plugin shards, channel contracts, -Node 22 compatibility, `check`, `check-additional`, build smoke, docs checks, -Python skills, Windows, macOS, Android, and Control UI i18n. The plugin +non-Android scoped lane on: Linux Node shards, bundled-plugin shards, channel +contracts, Node 22 compatibility, `check`, `check-additional`, build smoke, docs +checks, Python skills, Windows, macOS, and Control UI i18n. Standalone manual CI +dispatches run Android only with `include_android=true`; the full release +umbrella enables Android by passing `full_release_validation=true`. The plugin prerelease suite is excluded from standalone manual CI and is enabled only when the full release umbrella passes `full_release_validation=true`. Manual runs use a unique concurrency group so a release-candidate full suite is not cancelled by @@ -372,7 +374,7 @@ using the workflow file from the selected dispatch ref. ```bash gh workflow run ci.yml --ref release/YYYY.M.D -gh workflow run ci.yml --ref main -f target_ref= +gh workflow run ci.yml --ref main -f target_ref= -f include_android=true gh workflow run full-release-validation.yml --ref main -f ref= ``` @@ -416,9 +418,9 @@ copy of the PR. Stop that box and warm a fresh one instead of debugging the product test failure. For intentional large deletion PRs, set `OPENCLAW_TESTBOX_ALLOW_MASS_DELETIONS=1` for that sanity run. -Manual CI dispatches run `checks-node-compat-node22` as broad compatibility coverage. `plugin-prerelease-suite` is more expensive product/package coverage, so it runs only when `Full Release Validation` dispatches CI with `full_release_validation=true`. Normal pull requests, `main` pushes, and standalone manual CI dispatches keep that suite off. +Manual CI dispatches run `checks-node-compat-node22` as broad compatibility coverage. Android is opt-in for standalone manual CI through `include_android=true` and always enabled for `Full Release Validation`. `plugin-prerelease-suite` is more expensive product/package coverage, so it runs only when `Full Release Validation` dispatches CI with `full_release_validation=true`. Normal pull requests, `main` pushes, and standalone manual CI dispatches keep that suite off. -The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, bundled plugin tests balance across six extension workers, small core unit lanes are paired, auto-reply runs as four balanced workers with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards, and agentic gateway/plugin configs are spread across the existing source-only agentic Node jobs instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Extension shard jobs run up to two plugin config groups at a time with one Vitest worker per group and a larger Node heap so import-heavy plugin batches do not create extra CI jobs. The broad agents lane uses the shared Vitest file-parallel scheduler because it is import/scheduling dominated rather than owned by a single slow test file. `runtime-config` runs with the infra core-runtime shard to keep the shared runtime shard from owning the tail. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard shard runs its small independent guards concurrently inside one job. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built, keeping their old check names as lightweight verifier jobs while avoiding two extra Blacksmith workers and a second artifact-consumer queue. +The slowest Node test families are split or balanced so each job stays small without over-reserving runners: channel contracts run as three weighted shards, bundled plugin tests balance across eight extension workers, small core unit lanes are paired, auto-reply runs as four balanced workers with the reply subtree split into agent-runner, dispatch, and commands/state-routing shards, and agentic gateway/plugin configs are spread across the existing source-only agentic Node jobs instead of waiting on built artifacts. Broad browser, QA, media, and miscellaneous plugin tests use their dedicated Vitest configs instead of the shared plugin catch-all. Extension shard jobs run up to two plugin config groups at a time with one Vitest worker per group and a larger Node heap so import-heavy plugin batches do not create extra CI jobs. The broad agents lane uses the shared Vitest file-parallel scheduler because it is import/scheduling dominated rather than owned by a single slow test file. `runtime-config` runs with the infra core-runtime shard to keep the shared runtime shard from owning the tail. Include-pattern shards record timing entries using the CI shard name, so `.artifacts/vitest-shard-timings.json` can distinguish a whole config from a filtered shard. `check-additional` keeps package-boundary compile/canary work together and separates runtime topology architecture from gateway watch coverage; the boundary guard shard runs its small independent guards concurrently inside one job. Gateway watch, channel tests, and the core support-boundary shard run concurrently inside `build-artifacts` after `dist/` and `dist-runtime/` are already built, keeping their old check names as lightweight verifier jobs while avoiding two extra Blacksmith workers and a second artifact-consumer queue. Android CI runs both `testPlayDebugUnitTest` and `testThirdPartyDebugUnitTest`, then builds the Play debug APK. The third-party flavor has no separate source set or manifest; its unit-test lane still compiles that flavor with the SMS/call-log BuildConfig flags, while avoiding a duplicate debug APK packaging job on every Android-relevant push. GitHub may mark superseded jobs as `cancelled` when a newer push lands on the same PR or `main` ref. Treat that as CI noise unless the newest run for the same ref is also failing. Aggregate shard checks use `!cancelled() && always()` so they still report normal shard failures but do not queue after the whole workflow has already been superseded. The automatic CI concurrency key is versioned (`CI-v7-*`) so a GitHub-side zombie in an old queue group cannot indefinitely block newer main runs. Manual full-suite runs use `CI-manual-v1-*` and do not cancel in-progress runs. diff --git a/scripts/lib/extension-test-plan.mjs b/scripts/lib/extension-test-plan.mjs index 38770702039..94f410b054d 100644 --- a/scripts/lib/extension-test-plan.mjs +++ b/scripts/lib/extension-test-plan.mjs @@ -28,7 +28,7 @@ import { BUNDLED_PLUGIN_PATH_PREFIX, BUNDLED_PLUGIN_ROOT_DIR } from "./bundled-p import { listAvailableExtensionIds } from "./changed-extensions.mjs"; const repoRoot = path.resolve(import.meta.dirname, "..", ".."); -export const DEFAULT_EXTENSION_TEST_SHARD_COUNT = 6; +export const DEFAULT_EXTENSION_TEST_SHARD_COUNT = 8; const EXTENSION_TEST_COST_MULTIPLIERS = { // CI shard planning uses measured wall time rather than raw file count. // These ratios come from Blacksmith extension batch timings; import-heavy diff --git a/test/scripts/plugin-prerelease-test-plan.test.ts b/test/scripts/plugin-prerelease-test-plan.test.ts index 20061613d44..f98d1ff1d6d 100644 --- a/test/scripts/plugin-prerelease-test-plan.test.ts +++ b/test/scripts/plugin-prerelease-test-plan.test.ts @@ -139,9 +139,15 @@ describe("scripts/lib/plugin-prerelease-test-plan.mjs", () => { default: false, type: "boolean", }); + expect(workflow.on.workflow_dispatch.inputs.include_android).toMatchObject({ + default: false, + type: "boolean", + }); expect(manifestEnv).toMatchObject({ OPENCLAW_CI_FULL_RELEASE_VALIDATION: "${{ github.event_name == 'workflow_dispatch' && inputs.full_release_validation && 'true' || 'false' }}", + OPENCLAW_CI_RUN_ANDROID: + "${{ github.event_name == 'workflow_dispatch' && (inputs.full_release_validation || inputs.include_android) && 'true' || steps.changed_scope.outputs.run_android || 'false' }}", }); expect(manifestScript).toContain("const isFullReleaseValidationCiRun ="); expect(manifestScript).toContain(