ci: speed up release live smoke retries

2026-05-06 05:50:43 +00:00 · 2026-04-29 12:33:46 +01:00
parent d8b9ace39c
commit 1446069707
7 changed files with 103 additions and 44 deletions
--- a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml
+++ b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml
@@ -166,6 +166,7 @@ env:
  PNPM_VERSION: "10.32.1"
  OPENCLAW_REPOSITORY: openclaw/openclaw
  TSX_VERSION: "4.21.0"
+  OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.4-mini' }}

 jobs:
  prepare:
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -154,7 +154,11 @@ Cross-OS release checks still cover OS-specific onboarding, installer, and
 platform behavior; package/update product validation should start with Package
 Acceptance. The Windows packaged and installer fresh lanes also verify that an
 installed package can import a browser-control override from a raw absolute
-Windows path.
+Windows path. The OpenAI cross-OS agent-turn smoke defaults to
+`OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.4-mini`, so
+the install and gateway proof stays fast and deterministic. Dedicated live
+provider/model lanes still cover broader model routing, including slower
+frontier defaults.

 Package Acceptance has bounded legacy-compatibility windows for already
 published packages. Packages through `2026.4.25`, including `2026.4.25-beta.*`,
--- a/docs/reference/RELEASING.md
+++ b/docs/reference/RELEASING.md
@@ -268,6 +268,11 @@ Use `release_profile` to select live/provider breadth:
 ref once as `release-package-under-test` and reuses that artifact in both
 release-path Docker checks and Package Acceptance. This keeps all
 package-facing boxes on the same bytes and avoids repeated package builds.
+The cross-OS OpenAI install smoke uses `OPENCLAW_CROSS_OS_OPENAI_MODEL` when the
+repo/org variable is set, otherwise `openai/gpt-5.4-mini`, because this lane is
+proving package install, onboarding, gateway startup, and one live agent turn
+rather than benchmarking the slowest default model. The broader live provider
+matrix remains the place for model-specific coverage.

 Use these variants depending on release stage:

--- a/scripts/ci-live-command-retry.sh
+++ b/scripts/ci-live-command-retry.sh
@@ -13,7 +13,7 @@ fi

 attempts="${OPENCLAW_LIVE_COMMAND_ATTEMPTS:-2}"
 delay_seconds="${OPENCLAW_LIVE_COMMAND_RETRY_DELAY_SECONDS:-10}"
-retry_pattern="${OPENCLAW_LIVE_COMMAND_RETRY_PATTERN:-ECONNRESET|ETIMEDOUT|ENOTFOUND|EAI_AGAIN|fetch failed|TLS connection|socket hang up|UND_ERR|\\b429\\b|\\b529\\b}"
+retry_pattern="${OPENCLAW_LIVE_COMMAND_RETRY_PATTERN:-ECONNRESET|ETIMEDOUT|ENOTFOUND|EAI_AGAIN|fetch failed|TLS connection|socket hang up|UND_ERR|gateway request timeout|model idle timeout|did not produce a response before the model idle timeout|\\b429\\b|\\b529\\b}"

 if ! [[ "$attempts" =~ ^[1-9][0-9]*$ ]]; then
  echo "OPENCLAW_LIVE_COMMAND_ATTEMPTS must be a positive integer, got: $attempts" >&2
--- a/scripts/openclaw-cross-os-release-checks.ts
+++ b/scripts/openclaw-cross-os-release-checks.ts
@@ -55,6 +55,16 @@ const providerConfig = {
  },
 };

+export function resolveProviderConfig(provider, env = process.env) {
+  const config = providerConfig[provider];
+  if (!config) {
+    return null;
+  }
+  const providerEnvKey = `OPENCLAW_CROSS_OS_${provider.toUpperCase().replace(/[^A-Z0-9]+/gu, "_")}_MODEL`;
+  const model = env[providerEnvKey]?.trim() || env.OPENCLAW_CROSS_OS_MODEL?.trim() || config.model;
+  return { ...config, model };
+}
+
 const RELEASE_SMOKE_PLUGIN_ALLOWLIST_BASE = [
  "acpx",
  "bonjour",
@@ -304,7 +314,7 @@ async function main(argv) {
    throw new Error(`Unsupported provider "${provider}".`);
  }

-  const selectedProvider = providerConfig[provider];
+  const selectedProvider = resolveProviderConfig(provider);
  const providerSecretValue = process.env[selectedProvider.secretEnv]?.trim();
  if (!providerSecretValue) {
    throw new Error(`Missing ${selectedProvider.secretEnv}.`);
@@ -1882,30 +1892,36 @@ async function runInstalledModelsSet(params) {
 }

 async function runInstalledAgentTurn(params) {
-  const sessionId = `cross-os-release-check-${params.label}-${Date.now()}`;
-  const result = await runInstalledCli({
-    cliPath: params.cliPath,
-    args: [
-      "agent",
-      "--agent",
-      "main",
-      "--session-id",
-      sessionId,
-      "--message",
-      "Reply with exact ASCII text OK only.",
-      "--thinking",
-      "minimal",
-      "--json",
-    ],
-    cwd: params.cwd,
-    env: params.env,
-    logPath: params.logPath,
-    timeoutMs: 10 * 60 * 1000,
-  });
-  if (!agentOutputHasExpectedOkMarker(result.stdout, { logPath: params.logPath })) {
-    throw new Error("Agent output did not contain the expected OK marker.");
+  let lastError;
+  for (let attempt = 1; attempt <= 2; attempt += 1) {
+    const sessionId = `cross-os-release-check-${params.label}-${Date.now()}-${attempt}`;
+    try {
+      const result = await runInstalledCli({
+        cliPath: params.cliPath,
+        args: buildReleaseAgentTurnArgs(sessionId),
+        cwd: params.cwd,
+        env: params.env,
+        logPath: params.logPath,
+        timeoutMs: 10 * 60 * 1000,
+      });
+      if (!agentOutputHasExpectedOkMarker(result.stdout, { logPath: params.logPath })) {
+        throw new Error("Agent output did not contain the expected OK marker.");
+      }
+      return result;
+    } catch (error) {
+      lastError = error;
+      if (attempt >= 2 || !shouldRetryCrossOsAgentTurnError(error)) {
+        throw error;
+      }
+      appendFileSync(
+        params.logPath,
+        `\n[release-checks] retrying installed agent turn after retryable live failure: ${
+          error instanceof Error ? error.message : String(error)
+        }\n`,
+      );
+    }
  }
-  return result;
+  throw lastError;
 }

 export function verifyDevUpdateStatus(stdout, options = {}) {
@@ -2657,18 +2673,7 @@ async function runAgentTurn(params) {
      const result = await runOpenClaw({
        lane: params.lane,
        env: params.env,
-        args: [
-          "agent",
-          "--agent",
-          "main",
-          "--session-id",
-          sessionId,
-          "--message",
-          "Reply with exact ASCII text OK only.",
-          "--thinking",
-          "minimal",
-          "--json",
-        ],
+        args: buildReleaseAgentTurnArgs(sessionId),
        logPath: params.logPath,
        timeoutMs: 10 * 60 * 1000,
      });
@@ -2683,7 +2688,7 @@ async function runAgentTurn(params) {
      }
      appendFileSync(
        params.logPath,
-        `\n[release-checks] retrying agent turn after bundled runtime deps staging failure: ${
+        `\n[release-checks] retrying agent turn after retryable live failure: ${
          error instanceof Error ? error.message : String(error)
        }\n`,
      );
@@ -2692,9 +2697,24 @@ async function runAgentTurn(params) {
  throw lastError;
 }

+function buildReleaseAgentTurnArgs(sessionId) {
+  return [
+    "agent",
+    "--agent",
+    "main",
+    "--session-id",
+    sessionId,
+    "--message",
+    "Reply with exact ASCII text OK only.",
+    "--thinking",
+    "minimal",
+    "--json",
+  ];
+}
+
 export function shouldRetryCrossOsAgentTurnError(error) {
  const message = error instanceof Error ? error.message : String(error);
-  return /failed to (?:install|stage) bundled runtime deps|failed to stage bundled runtime deps after/u.test(
+  return /failed to (?:install|stage) bundled runtime deps|failed to stage bundled runtime deps after|Agent output did not contain the expected OK marker|model idle timeout|did not produce a response before the model idle timeout/u.test(
    message,
  );
 }
--- a/test/scripts/openclaw-cross-os-release-checks.test.ts
+++ b/test/scripts/openclaw-cross-os-release-checks.test.ts
@@ -41,6 +41,7 @@ import {
  readRunnerOverrideEnv,
  resolveExplicitBaselineVersion,
  resolveInstalledPackageRootFromCliPath,
+  resolveProviderConfig,
  resolveDevUpdateVerificationRef,
  resolveInstalledPrefixDirFromCliPath,
  resolvePublishedInstallerUrl,
@@ -107,9 +108,32 @@ describe("scripts/openclaw-cross-os-release-checks", () => {
        new Error("document-extract failed to stage bundled runtime deps after 463ms"),
      ),
    ).toBe(true);
-    expect(shouldRetryCrossOsAgentTurnError(new Error("Agent output did not contain OK."))).toBe(
-      false,
-    );
+    expect(
+      shouldRetryCrossOsAgentTurnError(
+        new Error("Agent output did not contain the expected OK marker."),
+      ),
+    ).toBe(true);
+    expect(
+      shouldRetryCrossOsAgentTurnError(
+        new Error(
+          "The model did not produce a response before the model idle timeout. Please try again.",
+        ),
+      ),
+    ).toBe(true);
+  });
+
+  it("allows cross-OS provider smoke models to use faster CI overrides", () => {
+    expect(
+      resolveProviderConfig("openai", {
+        OPENCLAW_CROSS_OS_OPENAI_MODEL: "openai/gpt-5.4-mini",
+      })?.model,
+    ).toBe("openai/gpt-5.4-mini");
+    expect(
+      resolveProviderConfig("openai", {
+        OPENCLAW_CROSS_OS_MODEL: "openai/gpt-5.4-nano",
+      })?.model,
+    ).toBe("openai/gpt-5.4-nano");
+    expect(resolveProviderConfig("openai", {})?.model).toBe("openai/gpt-5.5");
  });

  it("keeps release smoke plugin allowlists focused on agent-turn essentials", () => {
@@ -125,7 +149,10 @@ describe("scripts/openclaw-cross-os-release-checks", () => {
  it("keeps cross-OS live smoke agent turns on minimal thinking", () => {
    const source = readFileSync("scripts/openclaw-cross-os-release-checks.ts", "utf8");

-    expect(source.match(/"--thinking",\s+"minimal"/g)?.length).toBeGreaterThanOrEqual(2);
+    expect(source).toContain('"--thinking",\n    "minimal"');
+    expect(source.match(/buildReleaseAgentTurnArgs\(sessionId\)/g)?.length).toBeGreaterThanOrEqual(
+      2,
+    );
  });

  it("treats explicit empty-string args as values instead of boolean flags", () => {
--- a/test/scripts/package-acceptance-workflow.test.ts
+++ b/test/scripts/package-acceptance-workflow.test.ts
@@ -156,6 +156,8 @@ describe("package artifact reuse", () => {
    expect(retryHelper).toContain("OPENCLAW_LIVE_COMMAND_ATTEMPTS:-2");
    expect(retryHelper).toContain("ECONNRESET");
    expect(retryHelper).toContain("fetch failed");
+    expect(retryHelper).toContain("gateway request timeout");
+    expect(retryHelper).toContain("model idle timeout");
  });

  it("runs Docker live harnesses from trusted helper scripts", () => {