diff --git a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml index 6d550819d16..d055da80423 100644 --- a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml +++ b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml @@ -166,6 +166,7 @@ env: PNPM_VERSION: "10.32.1" OPENCLAW_REPOSITORY: openclaw/openclaw TSX_VERSION: "4.21.0" + OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.4-mini' }} jobs: prepare: diff --git a/docs/ci.md b/docs/ci.md index 23cbc05bfa0..4ce43092a7f 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -154,7 +154,11 @@ Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute -Windows path. +Windows path. The OpenAI cross-OS agent-turn smoke defaults to +`OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.4-mini`, so +the install and gateway proof stays fast and deterministic. Dedicated live +provider/model lanes still cover broader model routing, including slower +frontier defaults. Package Acceptance has bounded legacy-compatibility windows for already published packages. Packages through `2026.4.25`, including `2026.4.25-beta.*`, diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md index 94912c4c489..4e760d67e22 100644 --- a/docs/reference/RELEASING.md +++ b/docs/reference/RELEASING.md @@ -268,6 +268,11 @@ Use `release_profile` to select live/provider breadth: ref once as `release-package-under-test` and reuses that artifact in both release-path Docker checks and Package Acceptance. This keeps all package-facing boxes on the same bytes and avoids repeated package builds. +The cross-OS OpenAI install smoke uses `OPENCLAW_CROSS_OS_OPENAI_MODEL` when the +repo/org variable is set, otherwise `openai/gpt-5.4-mini`, because this lane is +proving package install, onboarding, gateway startup, and one live agent turn +rather than benchmarking the slowest default model. The broader live provider +matrix remains the place for model-specific coverage. Use these variants depending on release stage: diff --git a/scripts/ci-live-command-retry.sh b/scripts/ci-live-command-retry.sh index 790187f5e73..fb34192b498 100755 --- a/scripts/ci-live-command-retry.sh +++ b/scripts/ci-live-command-retry.sh @@ -13,7 +13,7 @@ fi attempts="${OPENCLAW_LIVE_COMMAND_ATTEMPTS:-2}" delay_seconds="${OPENCLAW_LIVE_COMMAND_RETRY_DELAY_SECONDS:-10}" -retry_pattern="${OPENCLAW_LIVE_COMMAND_RETRY_PATTERN:-ECONNRESET|ETIMEDOUT|ENOTFOUND|EAI_AGAIN|fetch failed|TLS connection|socket hang up|UND_ERR|\\b429\\b|\\b529\\b}" +retry_pattern="${OPENCLAW_LIVE_COMMAND_RETRY_PATTERN:-ECONNRESET|ETIMEDOUT|ENOTFOUND|EAI_AGAIN|fetch failed|TLS connection|socket hang up|UND_ERR|gateway request timeout|model idle timeout|did not produce a response before the model idle timeout|\\b429\\b|\\b529\\b}" if ! [[ "$attempts" =~ ^[1-9][0-9]*$ ]]; then echo "OPENCLAW_LIVE_COMMAND_ATTEMPTS must be a positive integer, got: $attempts" >&2 diff --git a/scripts/openclaw-cross-os-release-checks.ts b/scripts/openclaw-cross-os-release-checks.ts index e5fd0375485..7af9f670f6f 100644 --- a/scripts/openclaw-cross-os-release-checks.ts +++ b/scripts/openclaw-cross-os-release-checks.ts @@ -55,6 +55,16 @@ const providerConfig = { }, }; +export function resolveProviderConfig(provider, env = process.env) { + const config = providerConfig[provider]; + if (!config) { + return null; + } + const providerEnvKey = `OPENCLAW_CROSS_OS_${provider.toUpperCase().replace(/[^A-Z0-9]+/gu, "_")}_MODEL`; + const model = env[providerEnvKey]?.trim() || env.OPENCLAW_CROSS_OS_MODEL?.trim() || config.model; + return { ...config, model }; +} + const RELEASE_SMOKE_PLUGIN_ALLOWLIST_BASE = [ "acpx", "bonjour", @@ -304,7 +314,7 @@ async function main(argv) { throw new Error(`Unsupported provider "${provider}".`); } - const selectedProvider = providerConfig[provider]; + const selectedProvider = resolveProviderConfig(provider); const providerSecretValue = process.env[selectedProvider.secretEnv]?.trim(); if (!providerSecretValue) { throw new Error(`Missing ${selectedProvider.secretEnv}.`); @@ -1882,30 +1892,36 @@ async function runInstalledModelsSet(params) { } async function runInstalledAgentTurn(params) { - const sessionId = `cross-os-release-check-${params.label}-${Date.now()}`; - const result = await runInstalledCli({ - cliPath: params.cliPath, - args: [ - "agent", - "--agent", - "main", - "--session-id", - sessionId, - "--message", - "Reply with exact ASCII text OK only.", - "--thinking", - "minimal", - "--json", - ], - cwd: params.cwd, - env: params.env, - logPath: params.logPath, - timeoutMs: 10 * 60 * 1000, - }); - if (!agentOutputHasExpectedOkMarker(result.stdout, { logPath: params.logPath })) { - throw new Error("Agent output did not contain the expected OK marker."); + let lastError; + for (let attempt = 1; attempt <= 2; attempt += 1) { + const sessionId = `cross-os-release-check-${params.label}-${Date.now()}-${attempt}`; + try { + const result = await runInstalledCli({ + cliPath: params.cliPath, + args: buildReleaseAgentTurnArgs(sessionId), + cwd: params.cwd, + env: params.env, + logPath: params.logPath, + timeoutMs: 10 * 60 * 1000, + }); + if (!agentOutputHasExpectedOkMarker(result.stdout, { logPath: params.logPath })) { + throw new Error("Agent output did not contain the expected OK marker."); + } + return result; + } catch (error) { + lastError = error; + if (attempt >= 2 || !shouldRetryCrossOsAgentTurnError(error)) { + throw error; + } + appendFileSync( + params.logPath, + `\n[release-checks] retrying installed agent turn after retryable live failure: ${ + error instanceof Error ? error.message : String(error) + }\n`, + ); + } } - return result; + throw lastError; } export function verifyDevUpdateStatus(stdout, options = {}) { @@ -2657,18 +2673,7 @@ async function runAgentTurn(params) { const result = await runOpenClaw({ lane: params.lane, env: params.env, - args: [ - "agent", - "--agent", - "main", - "--session-id", - sessionId, - "--message", - "Reply with exact ASCII text OK only.", - "--thinking", - "minimal", - "--json", - ], + args: buildReleaseAgentTurnArgs(sessionId), logPath: params.logPath, timeoutMs: 10 * 60 * 1000, }); @@ -2683,7 +2688,7 @@ async function runAgentTurn(params) { } appendFileSync( params.logPath, - `\n[release-checks] retrying agent turn after bundled runtime deps staging failure: ${ + `\n[release-checks] retrying agent turn after retryable live failure: ${ error instanceof Error ? error.message : String(error) }\n`, ); @@ -2692,9 +2697,24 @@ async function runAgentTurn(params) { throw lastError; } +function buildReleaseAgentTurnArgs(sessionId) { + return [ + "agent", + "--agent", + "main", + "--session-id", + sessionId, + "--message", + "Reply with exact ASCII text OK only.", + "--thinking", + "minimal", + "--json", + ]; +} + export function shouldRetryCrossOsAgentTurnError(error) { const message = error instanceof Error ? error.message : String(error); - return /failed to (?:install|stage) bundled runtime deps|failed to stage bundled runtime deps after/u.test( + return /failed to (?:install|stage) bundled runtime deps|failed to stage bundled runtime deps after|Agent output did not contain the expected OK marker|model idle timeout|did not produce a response before the model idle timeout/u.test( message, ); } diff --git a/test/scripts/openclaw-cross-os-release-checks.test.ts b/test/scripts/openclaw-cross-os-release-checks.test.ts index 0b866b05c68..37728b42412 100644 --- a/test/scripts/openclaw-cross-os-release-checks.test.ts +++ b/test/scripts/openclaw-cross-os-release-checks.test.ts @@ -41,6 +41,7 @@ import { readRunnerOverrideEnv, resolveExplicitBaselineVersion, resolveInstalledPackageRootFromCliPath, + resolveProviderConfig, resolveDevUpdateVerificationRef, resolveInstalledPrefixDirFromCliPath, resolvePublishedInstallerUrl, @@ -107,9 +108,32 @@ describe("scripts/openclaw-cross-os-release-checks", () => { new Error("document-extract failed to stage bundled runtime deps after 463ms"), ), ).toBe(true); - expect(shouldRetryCrossOsAgentTurnError(new Error("Agent output did not contain OK."))).toBe( - false, - ); + expect( + shouldRetryCrossOsAgentTurnError( + new Error("Agent output did not contain the expected OK marker."), + ), + ).toBe(true); + expect( + shouldRetryCrossOsAgentTurnError( + new Error( + "The model did not produce a response before the model idle timeout. Please try again.", + ), + ), + ).toBe(true); + }); + + it("allows cross-OS provider smoke models to use faster CI overrides", () => { + expect( + resolveProviderConfig("openai", { + OPENCLAW_CROSS_OS_OPENAI_MODEL: "openai/gpt-5.4-mini", + })?.model, + ).toBe("openai/gpt-5.4-mini"); + expect( + resolveProviderConfig("openai", { + OPENCLAW_CROSS_OS_MODEL: "openai/gpt-5.4-nano", + })?.model, + ).toBe("openai/gpt-5.4-nano"); + expect(resolveProviderConfig("openai", {})?.model).toBe("openai/gpt-5.5"); }); it("keeps release smoke plugin allowlists focused on agent-turn essentials", () => { @@ -125,7 +149,10 @@ describe("scripts/openclaw-cross-os-release-checks", () => { it("keeps cross-OS live smoke agent turns on minimal thinking", () => { const source = readFileSync("scripts/openclaw-cross-os-release-checks.ts", "utf8"); - expect(source.match(/"--thinking",\s+"minimal"/g)?.length).toBeGreaterThanOrEqual(2); + expect(source).toContain('"--thinking",\n "minimal"'); + expect(source.match(/buildReleaseAgentTurnArgs\(sessionId\)/g)?.length).toBeGreaterThanOrEqual( + 2, + ); }); it("treats explicit empty-string args as values instead of boolean flags", () => { diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index 15a3eb73716..4f59d71df96 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -156,6 +156,8 @@ describe("package artifact reuse", () => { expect(retryHelper).toContain("OPENCLAW_LIVE_COMMAND_ATTEMPTS:-2"); expect(retryHelper).toContain("ECONNRESET"); expect(retryHelper).toContain("fetch failed"); + expect(retryHelper).toContain("gateway request timeout"); + expect(retryHelper).toContain("model idle timeout"); }); it("runs Docker live harnesses from trusted helper scripts", () => {