diff --git a/scripts/ci-live-command-retry.sh b/scripts/ci-live-command-retry.sh index fb34192b498..ca2c0a7504e 100755 --- a/scripts/ci-live-command-retry.sh +++ b/scripts/ci-live-command-retry.sh @@ -13,7 +13,9 @@ fi attempts="${OPENCLAW_LIVE_COMMAND_ATTEMPTS:-2}" delay_seconds="${OPENCLAW_LIVE_COMMAND_RETRY_DELAY_SECONDS:-10}" +rate_limit_delay_seconds="${OPENCLAW_LIVE_COMMAND_RATE_LIMIT_RETRY_DELAY_SECONDS:-60}" retry_pattern="${OPENCLAW_LIVE_COMMAND_RETRY_PATTERN:-ECONNRESET|ETIMEDOUT|ENOTFOUND|EAI_AGAIN|fetch failed|TLS connection|socket hang up|UND_ERR|gateway request timeout|model idle timeout|did not produce a response before the model idle timeout|\\b429\\b|\\b529\\b}" +rate_limit_pattern="${OPENCLAW_LIVE_COMMAND_RATE_LIMIT_PATTERN:-Rate limit reached|rate.?limit|tokens per min|requests per min|\\bTPM\\b|\\bRPM\\b}" if ! [[ "$attempts" =~ ^[1-9][0-9]*$ ]]; then echo "OPENCLAW_LIVE_COMMAND_ATTEMPTS must be a positive integer, got: $attempts" >&2 @@ -25,6 +27,11 @@ if ! [[ "$delay_seconds" =~ ^[0-9]+$ ]]; then exit 64 fi +if ! [[ "$rate_limit_delay_seconds" =~ ^[0-9]+$ ]]; then + echo "OPENCLAW_LIVE_COMMAND_RATE_LIMIT_RETRY_DELAY_SECONDS must be a non-negative integer, got: $rate_limit_delay_seconds" >&2 + exit 64 +fi + log_file="$(mktemp)" cleanup() { rm -f "$log_file" @@ -46,12 +53,20 @@ for attempt in $(seq 1 "$attempts"); do exit "$status" fi - if ! grep -Eiq "$retry_pattern" "$log_file"; then + is_rate_limited=0 + if grep -Eiq "$rate_limit_pattern" "$log_file"; then + is_rate_limited=1 + elif ! grep -Eiq "$retry_pattern" "$log_file"; then exit "$status" fi echo "Live command failed with a retryable provider/network error; retrying ($attempt/$attempts)..." >&2 - if [[ "$delay_seconds" -gt 0 ]]; then - sleep "$delay_seconds" + next_delay_seconds="$delay_seconds" + if [[ "$is_rate_limited" -eq 1 ]]; then + next_delay_seconds="$rate_limit_delay_seconds" + echo "Provider rate limit detected; waiting ${next_delay_seconds}s before retry." >&2 + fi + if [[ "$next_delay_seconds" -gt 0 ]]; then + sleep "$next_delay_seconds" fi done diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index 280221680db..e83480b7b99 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -733,6 +733,9 @@ describe("package artifact reuse", () => { expect(retryHelper).toContain("fetch failed"); expect(retryHelper).toContain("gateway request timeout"); expect(retryHelper).toContain("model idle timeout"); + expect(retryHelper).toContain("OPENCLAW_LIVE_COMMAND_RATE_LIMIT_RETRY_DELAY_SECONDS:-60"); + expect(retryHelper).toContain("Rate limit reached"); + expect(retryHelper).toContain("tokens per min"); }); it("runs Docker live harnesses from trusted helper scripts", () => {