ci: speed up release live smoke retries

2026-05-06 16:20:43 +00:00 · 2026-04-29 12:33:46 +01:00
parent d8b9ace39c
commit 1446069707
7 changed files with 103 additions and 44 deletions
--- a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml
+++ b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml
@@ -166,6 +166,7 @@ env:
  PNPM_VERSION: "10.32.1"
  OPENCLAW_REPOSITORY: openclaw/openclaw
  TSX_VERSION: "4.21.0"
  OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.4-mini' }}
 jobs:
  prepare:
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -154,7 +154,11 @@ Cross-OS release checks still cover OS-specific onboarding, installer, and
 platform behavior; package/update product validation should start with Package
 Acceptance. The Windows packaged and installer fresh lanes also verify that an
 installed package can import a browser-control override from a raw absolute
-Windows path.
+Windows path. The OpenAI cross-OS agent-turn smoke defaults to
 `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.4-mini`, so
 the install and gateway proof stays fast and deterministic. Dedicated live
 provider/model lanes still cover broader model routing, including slower
 frontier defaults.
 Package Acceptance has bounded legacy-compatibility windows for already
 published packages. Packages through `2026.4.25`, including `2026.4.25-beta.*`,
--- a/docs/reference/RELEASING.md
+++ b/docs/reference/RELEASING.md
@@ -268,6 +268,11 @@ Use `release_profile` to select live/provider breadth:
 ref once as `release-package-under-test` and reuses that artifact in both
 release-path Docker checks and Package Acceptance. This keeps all
 package-facing boxes on the same bytes and avoids repeated package builds.
 The cross-OS OpenAI install smoke uses `OPENCLAW_CROSS_OS_OPENAI_MODEL` when the
 repo/org variable is set, otherwise `openai/gpt-5.4-mini`, because this lane is
 proving package install, onboarding, gateway startup, and one live agent turn
 rather than benchmarking the slowest default model. The broader live provider
 matrix remains the place for model-specific coverage.
 Use these variants depending on release stage:
--- a/scripts/ci-live-command-retry.sh
+++ b/scripts/ci-live-command-retry.sh
@@ -13,7 +13,7 @@ fi
 attempts="${OPENCLAW_LIVE_COMMAND_ATTEMPTS:-2}"
 delay_seconds="${OPENCLAW_LIVE_COMMAND_RETRY_DELAY_SECONDS:-10}"
-retry_pattern="${OPENCLAW_LIVE_COMMAND_RETRY_PATTERN:-ECONNRESET|ETIMEDOUT|ENOTFOUND|EAI_AGAIN|fetch failed|TLS connection|socket hang up|UND_ERR|\\b429\\b|\\b529\\b}"
+retry_pattern="${OPENCLAW_LIVE_COMMAND_RETRY_PATTERN:-ECONNRESET|ETIMEDOUT|ENOTFOUND|EAI_AGAIN|fetch failed|TLS connection|socket hang up|UND_ERR|gateway request timeout|model idle timeout|did not produce a response before the model idle timeout|\\b429\\b|\\b529\\b}"
 if ! [[ "$attempts" =~ ^[1-9][0-9]*$ ]]; then
  echo "OPENCLAW_LIVE_COMMAND_ATTEMPTS must be a positive integer, got: $attempts" >&2
--- a/scripts/openclaw-cross-os-release-checks.ts
+++ b/scripts/openclaw-cross-os-release-checks.ts
@@ -55,6 +55,16 @@ const providerConfig = {
  },
 };
 export function resolveProviderConfig(provider, env = process.env) {
  const config = providerConfig[provider];
  if (!config) {
    return null;
  }
  const providerEnvKey = `OPENCLAW_CROSS_OS_${provider.toUpperCase().replace(/[^A-Z0-9]+/gu, "_")}_MODEL`;
  const model = env[providerEnvKey]?.trim() || env.OPENCLAW_CROSS_OS_MODEL?.trim() || config.model;
  return { ...config, model };
 }
 const RELEASE_SMOKE_PLUGIN_ALLOWLIST_BASE = [
  "acpx",
  "bonjour",
@@ -304,7 +314,7 @@ async function main(argv) {
    throw new Error(`Unsupported provider "${provider}".`);
  }
-  const selectedProvider = providerConfig[provider];
+  const selectedProvider = resolveProviderConfig(provider);
  const providerSecretValue = process.env[selectedProvider.secretEnv]?.trim();
  if (!providerSecretValue) {
    throw new Error(`Missing ${selectedProvider.secretEnv}.`);
@@ -1882,30 +1892,36 @@ async function runInstalledModelsSet(params) {
 }
 async function runInstalledAgentTurn(params) {
-  const sessionId = `cross-os-release-check-${params.label}-${Date.now()}`;
+  let lastError;
-  const result = await runInstalledCli({
+  for (let attempt = 1; attempt <= 2; attempt += 1) {
-    cliPath: params.cliPath,
+    const sessionId = `cross-os-release-check-${params.label}-${Date.now()}-${attempt}`;
-    args: [
+    try {
-      "agent",
+      const result = await runInstalledCli({
-      "--agent",
+        cliPath: params.cliPath,
-      "main",
+        args: buildReleaseAgentTurnArgs(sessionId),
-      "--session-id",
+        cwd: params.cwd,
-      sessionId,
+        env: params.env,
-      "--message",
+        logPath: params.logPath,
-      "Reply with exact ASCII text OK only.",
+        timeoutMs: 10 * 60 * 1000,
-      "--thinking",
+      });
-      "minimal",
+      if (!agentOutputHasExpectedOkMarker(result.stdout, { logPath: params.logPath })) {
-      "--json",
+        throw new Error("Agent output did not contain the expected OK marker.");
-    ],
+      }
-    cwd: params.cwd,
+      return result;
-    env: params.env,
+    } catch (error) {
-    logPath: params.logPath,
+      lastError = error;
-    timeoutMs: 10 * 60 * 1000,
+      if (attempt >= 2 || !shouldRetryCrossOsAgentTurnError(error)) {
-  });
+        throw error;
-  if (!agentOutputHasExpectedOkMarker(result.stdout, { logPath: params.logPath })) {
+      }
-    throw new Error("Agent output did not contain the expected OK marker.");
+      appendFileSync(
        params.logPath,
        `\n[release-checks] retrying installed agent turn after retryable live failure: ${
          error instanceof Error ? error.message : String(error)
        }\n`,
      );
    }
  }
-  return result;
+  throw lastError;
 }
 export function verifyDevUpdateStatus(stdout, options = {}) {
@@ -2657,18 +2673,7 @@ async function runAgentTurn(params) {
      const result = await runOpenClaw({
        lane: params.lane,
        env: params.env,
-        args: [
+        args: buildReleaseAgentTurnArgs(sessionId),
          "agent",
          "--agent",
          "main",
          "--session-id",
          sessionId,
          "--message",
          "Reply with exact ASCII text OK only.",
          "--thinking",
          "minimal",
          "--json",
        ],
        logPath: params.logPath,
        timeoutMs: 10 * 60 * 1000,
      });
@@ -2683,7 +2688,7 @@ async function runAgentTurn(params) {
      }
      appendFileSync(
        params.logPath,
-        `\n[release-checks] retrying agent turn after bundled runtime deps staging failure: ${
+        `\n[release-checks] retrying agent turn after retryable live failure: ${
          error instanceof Error ? error.message : String(error)
        }\n`,
      );
@@ -2692,9 +2697,24 @@ async function runAgentTurn(params) {
  throw lastError;
 }
 function buildReleaseAgentTurnArgs(sessionId) {
  return [
    "agent",
    "--agent",
    "main",
    "--session-id",
    sessionId,
    "--message",
    "Reply with exact ASCII text OK only.",
    "--thinking",
    "minimal",
    "--json",
  ];
 }
 export function shouldRetryCrossOsAgentTurnError(error) {
  const message = error instanceof Error ? error.message : String(error);
-  return /failed to (?:install|stage) bundled runtime deps|failed to stage bundled runtime deps after/u.test(
+  return /failed to (?:install|stage) bundled runtime deps|failed to stage bundled runtime deps after|Agent output did not contain the expected OK marker|model idle timeout|did not produce a response before the model idle timeout/u.test(
    message,
  );
 }
--- a/test/scripts/openclaw-cross-os-release-checks.test.ts
+++ b/test/scripts/openclaw-cross-os-release-checks.test.ts
@@ -41,6 +41,7 @@ import {
  readRunnerOverrideEnv,
  resolveExplicitBaselineVersion,
  resolveInstalledPackageRootFromCliPath,
  resolveProviderConfig,
  resolveDevUpdateVerificationRef,
  resolveInstalledPrefixDirFromCliPath,
  resolvePublishedInstallerUrl,
@@ -107,9 +108,32 @@ describe("scripts/openclaw-cross-os-release-checks", () => {
        new Error("document-extract failed to stage bundled runtime deps after 463ms"),
      ),
    ).toBe(true);
-    expect(shouldRetryCrossOsAgentTurnError(new Error("Agent output did not contain OK."))).toBe(
+    expect(
-      false,
+      shouldRetryCrossOsAgentTurnError(
-    );
+        new Error("Agent output did not contain the expected OK marker."),
      ),
    ).toBe(true);
    expect(
      shouldRetryCrossOsAgentTurnError(
        new Error(
          "The model did not produce a response before the model idle timeout. Please try again.",
        ),
      ),
    ).toBe(true);
  });
  it("allows cross-OS provider smoke models to use faster CI overrides", () => {
    expect(
      resolveProviderConfig("openai", {
        OPENCLAW_CROSS_OS_OPENAI_MODEL: "openai/gpt-5.4-mini",
      })?.model,
    ).toBe("openai/gpt-5.4-mini");
    expect(
      resolveProviderConfig("openai", {
        OPENCLAW_CROSS_OS_MODEL: "openai/gpt-5.4-nano",
      })?.model,
    ).toBe("openai/gpt-5.4-nano");
    expect(resolveProviderConfig("openai", {})?.model).toBe("openai/gpt-5.5");
  });
  it("keeps release smoke plugin allowlists focused on agent-turn essentials", () => {
@@ -125,7 +149,10 @@ describe("scripts/openclaw-cross-os-release-checks", () => {
  it("keeps cross-OS live smoke agent turns on minimal thinking", () => {
    const source = readFileSync("scripts/openclaw-cross-os-release-checks.ts", "utf8");
-    expect(source.match(/"--thinking",\s+"minimal"/g)?.length).toBeGreaterThanOrEqual(2);
+    expect(source).toContain('"--thinking",\n    "minimal"');
    expect(source.match(/buildReleaseAgentTurnArgs\(sessionId\)/g)?.length).toBeGreaterThanOrEqual(
      2,
    );
  });
  it("treats explicit empty-string args as values instead of boolean flags", () => {
--- a/test/scripts/package-acceptance-workflow.test.ts
+++ b/test/scripts/package-acceptance-workflow.test.ts
@@ -156,6 +156,8 @@ describe("package artifact reuse", () => {
    expect(retryHelper).toContain("OPENCLAW_LIVE_COMMAND_ATTEMPTS:-2");
    expect(retryHelper).toContain("ECONNRESET");
    expect(retryHelper).toContain("fetch failed");
    expect(retryHelper).toContain("gateway request timeout");
    expect(retryHelper).toContain("model idle timeout");
  });
  it("runs Docker live harnesses from trusted helper scripts", () => {