diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml index 56b53a09478..7247a81bae3 100644 --- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml +++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml @@ -1923,7 +1923,9 @@ jobs: - name: Run ${{ matrix.label }} if: contains(matrix.profiles, inputs.release_test_profile) - run: ${{ matrix.command }} + env: + OPENCLAW_LIVE_COMMAND: ${{ matrix.command }} + run: bash .release-harness/scripts/ci-live-command-retry.sh validate_live_docker_provider_suites: name: Docker live suites (${{ matrix.label }}) @@ -2082,7 +2084,9 @@ jobs: - name: Run ${{ matrix.label }} if: contains(matrix.profiles, inputs.release_test_profile) - run: ${{ matrix.command }} + env: + OPENCLAW_LIVE_COMMAND: ${{ matrix.command }} + run: bash .release-harness/scripts/ci-live-command-retry.sh validate_live_media_provider_suites: name: Live media suites (${{ matrix.label }}) diff --git a/scripts/ci-live-command-retry.sh b/scripts/ci-live-command-retry.sh new file mode 100755 index 00000000000..790187f5e73 --- /dev/null +++ b/scripts/ci-live-command-retry.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +set -euo pipefail + +command="${OPENCLAW_LIVE_COMMAND:-}" +if [[ -z "$command" && "$#" -gt 0 ]]; then + command="$*" +fi + +if [[ -z "$command" ]]; then + echo "Usage: OPENCLAW_LIVE_COMMAND='' $0" >&2 + exit 64 +fi + +attempts="${OPENCLAW_LIVE_COMMAND_ATTEMPTS:-2}" +delay_seconds="${OPENCLAW_LIVE_COMMAND_RETRY_DELAY_SECONDS:-10}" +retry_pattern="${OPENCLAW_LIVE_COMMAND_RETRY_PATTERN:-ECONNRESET|ETIMEDOUT|ENOTFOUND|EAI_AGAIN|fetch failed|TLS connection|socket hang up|UND_ERR|\\b429\\b|\\b529\\b}" + +if ! [[ "$attempts" =~ ^[1-9][0-9]*$ ]]; then + echo "OPENCLAW_LIVE_COMMAND_ATTEMPTS must be a positive integer, got: $attempts" >&2 + exit 64 +fi + +if ! [[ "$delay_seconds" =~ ^[0-9]+$ ]]; then + echo "OPENCLAW_LIVE_COMMAND_RETRY_DELAY_SECONDS must be a non-negative integer, got: $delay_seconds" >&2 + exit 64 +fi + +log_file="$(mktemp)" +cleanup() { + rm -f "$log_file" +} +trap cleanup EXIT + +for attempt in $(seq 1 "$attempts"); do + : >"$log_file" + set +e + bash -o pipefail -c "$command" 2>&1 | tee "$log_file" + status="${PIPESTATUS[0]}" + set -e + + if [[ "$status" -eq 0 ]]; then + exit 0 + fi + + if [[ "$attempt" -ge "$attempts" ]]; then + exit "$status" + fi + + if ! grep -Eiq "$retry_pattern" "$log_file"; then + exit "$status" + fi + + echo "Live command failed with a retryable provider/network error; retrying ($attempt/$attempts)..." >&2 + if [[ "$delay_seconds" -gt 0 ]]; then + sleep "$delay_seconds" + fi +done diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index 6c2b9404753..15a3eb73716 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -119,6 +119,7 @@ describe("package artifact reuse", () => { it("shards broad native live tests instead of one serial live-all job", () => { const workflow = readFileSync(LIVE_E2E_WORKFLOW, "utf8"); + const retryHelper = readFileSync("scripts/ci-live-command-retry.sh", "utf8"); expect(workflow).not.toContain("suite_id: live-all"); expect(workflow).not.toContain("command: pnpm test:live\n"); @@ -127,6 +128,8 @@ describe("package artifact reuse", () => { expect(workflow).toContain( "command: node .release-harness/scripts/test-live-shard.mjs native-live-src-agents", ); + expect(workflow).toContain("OPENCLAW_LIVE_COMMAND: ${{ matrix.command }}"); + expect(workflow).toContain("bash .release-harness/scripts/ci-live-command-retry.sh"); expect(workflow).toContain("suite_id: native-live-src-gateway-core"); expect(workflow).toContain("suite_id: native-live-src-gateway-backends"); expect(workflow).toContain("suite_id: native-live-src-gateway-profiles-deepseek"); @@ -150,6 +153,9 @@ describe("package artifact reuse", () => { expect(workflow).toContain("suite_id: native-live-extensions-media-music-minimax"); expect(workflow).toContain("suite_id: native-live-extensions-media-video"); expect(workflow).not.toContain("needs_ffmpeg: true"); + expect(retryHelper).toContain("OPENCLAW_LIVE_COMMAND_ATTEMPTS:-2"); + expect(retryHelper).toContain("ECONNRESET"); + expect(retryHelper).toContain("fetch failed"); }); it("runs Docker live harnesses from trusted helper scripts", () => {