From f523620abe7f14bcf53ddd938d12aac2da3d4332 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 2 May 2026 13:44:55 +0100 Subject: [PATCH] ci: use gpt-5.4 for cross-os release smoke --- .../openclaw-cross-os-release-checks-reusable.yml | 2 +- .github/workflows/openclaw-release-checks.yml | 2 +- AGENTS.md | 2 +- docs/ci.md | 2 +- docs/reference/RELEASING.md | 2 +- scripts/openclaw-cross-os-release-checks.ts | 2 +- test/scripts/openclaw-cross-os-release-checks.test.ts | 10 +++++----- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml index 25bb893a46e..047477f201f 100644 --- a/.github/workflows/openclaw-cross-os-release-checks-reusable.yml +++ b/.github/workflows/openclaw-cross-os-release-checks-reusable.yml @@ -176,7 +176,7 @@ env: PNPM_VERSION: "10.32.1" OPENCLAW_REPOSITORY: openclaw/openclaw TSX_VERSION: "4.21.0" - OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.5' }} + OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.4' }} jobs: prepare: diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml index 52f1f074a02..eb1f431eff7 100644 --- a/.github/workflows/openclaw-release-checks.yml +++ b/.github/workflows/openclaw-release-checks.yml @@ -333,7 +333,7 @@ jobs: candidate_file_name: openclaw-current.tgz candidate_version: ${{ needs.prepare_release_package.outputs.package_version }} candidate_source_sha: ${{ needs.prepare_release_package.outputs.source_sha }} - openai_model: openai/gpt-5.5 + openai_model: openai/gpt-5.4 secrets: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} diff --git a/AGENTS.md b/AGENTS.md index 299fd172d22..dd25f393d1b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -126,7 +126,7 @@ Telegraph style. Root rules only. Read scoped `AGENTS.md` before subtree work. ## Tests -- Vitest. Colocated `*.test.ts`; e2e `*.e2e.test.ts`; example models `sonnet-4.6`, `gpt-5.5`; test GPT with 5.5 preferred, 5.4 ok, no GPT-4.x agent-smoke defaults. +- Vitest. Colocated `*.test.ts`; e2e `*.e2e.test.ts`; example models `sonnet-4.6`, `gpt-5.5`; test GPT with 5.5 preferred, 5.4 ok; no GPT-4.x agent-smoke defaults. - Avoid brittle tests that grep workflow/docs strings for operator policy. Prefer executable behavior, parsed config/schema checks, or live run proof; put release/CI policy reminders in AGENTS/docs instead. - Clean timers/env/globals/mocks/sockets/temp dirs/module state; `--isolate=false` safe. - Hot tests: avoid per-test `vi.resetModules()` + heavy imports. Measure with `pnpm test:perf:imports ` / `pnpm test:perf:hotspots --limit N`. diff --git a/docs/ci.md b/docs/ci.md index 95cea47f0f7..ef6dcd58687 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -239,7 +239,7 @@ For the dedicated update and plugin testing policy, including local commands, Docker lanes, Package Acceptance inputs, release defaults, and failure triage, see [Testing updates and plugins](/help/testing-updates-plugins). -Release checks call Package Acceptance with `source=artifact`, the prepared release package artifact, `suite_profile=custom`, `docker_lanes='doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update'`, `published_upgrade_survivor_baselines=release-history`, `published_upgrade_survivor_scenarios=reported-issues`, and `telegram_mode=mock-openai`. This keeps package migration, update, stale-plugin-dependency cleanup, offline plugin, plugin-update, and Telegram proof on the same resolved package tarball. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the fallback published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Set `published_upgrade_survivor_baselines=release-history` to expand the lane across a deduped history matrix: the latest six stable releases, `2026.4.23`, and the latest stable release before `2026-03-15`. Set `published_upgrade_survivor_scenarios=reported-issues` to expand the same baselines across issue-shaped fixtures for Feishu config, preserved bootstrap/persona files, tilde log paths, and stale legacy plugin dependency roots. The separate `Update Migration` workflow uses the `update-migration` Docker lane with `all-since-2026.4.23` and `plugin-deps-cleanup` when the question is exhaustive published update cleanup, not normal Full Release CI breadth. Local aggregate runs can pass exact package specs with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`, keep a single lane with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` such as `openclaw@2026.4.15`, or set `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` for the scenario matrix. The published lane configures the baseline with a baked `openclaw config set` command recipe, records recipe steps in `summary.json`, and probes `/healthz`, `/readyz`, plus RPC status after Gateway start. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.5`, so the install and gateway proof stays on the preferred GPT-5 test model. +Release checks call Package Acceptance with `source=artifact`, the prepared release package artifact, `suite_profile=custom`, `docker_lanes='doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update'`, `published_upgrade_survivor_baselines=release-history`, `published_upgrade_survivor_scenarios=reported-issues`, and `telegram_mode=mock-openai`. This keeps package migration, update, stale-plugin-dependency cleanup, offline plugin, plugin-update, and Telegram proof on the same resolved package tarball. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the fallback published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Set `published_upgrade_survivor_baselines=release-history` to expand the lane across a deduped history matrix: the latest six stable releases, `2026.4.23`, and the latest stable release before `2026-03-15`. Set `published_upgrade_survivor_scenarios=reported-issues` to expand the same baselines across issue-shaped fixtures for Feishu config, preserved bootstrap/persona files, tilde log paths, and stale legacy plugin dependency roots. The separate `Update Migration` workflow uses the `update-migration` Docker lane with `all-since-2026.4.23` and `plugin-deps-cleanup` when the question is exhaustive published update cleanup, not normal Full Release CI breadth. Local aggregate runs can pass exact package specs with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`, keep a single lane with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` such as `openclaw@2026.4.15`, or set `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` for the scenario matrix. The published lane configures the baseline with a baked `openclaw config set` command recipe, records recipe steps in `summary.json`, and probes `/healthz`, `/readyz`, plus RPC status after Gateway start. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.4`, so the install and gateway proof stays on a GPT-5 test model while avoiding GPT-4.x defaults. ### Legacy compatibility windows diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md index 953c171e168..94954a293e5 100644 --- a/docs/reference/RELEASING.md +++ b/docs/reference/RELEASING.md @@ -316,7 +316,7 @@ ref once as `release-package-under-test` and reuses that artifact in both release-path Docker checks and Package Acceptance. This keeps all package-facing boxes on the same bytes and avoids repeated package builds. The cross-OS OpenAI install smoke uses `OPENCLAW_CROSS_OS_OPENAI_MODEL` when the -repo/org variable is set, otherwise `openai/gpt-5.5`, because this lane is +repo/org variable is set, otherwise `openai/gpt-5.4`, because this lane is proving package install, onboarding, gateway startup, and one live agent turn rather than benchmarking the slowest default model. The broader live provider matrix remains the place for model-specific coverage. diff --git a/scripts/openclaw-cross-os-release-checks.ts b/scripts/openclaw-cross-os-release-checks.ts index 704a4221702..55b70271c84 100644 --- a/scripts/openclaw-cross-os-release-checks.ts +++ b/scripts/openclaw-cross-os-release-checks.ts @@ -39,7 +39,7 @@ const providerConfig = { extensionId: "openai", secretEnv: "OPENAI_API_KEY", authChoice: "openai-api-key", - model: "openai/gpt-5.5", + model: "openai/gpt-5.4", baseUrl: "https://api.openai.com/v1", timeoutSeconds: 600, }, diff --git a/test/scripts/openclaw-cross-os-release-checks.test.ts b/test/scripts/openclaw-cross-os-release-checks.test.ts index 7cdf7a2a229..31d69a95ef5 100644 --- a/test/scripts/openclaw-cross-os-release-checks.test.ts +++ b/test/scripts/openclaw-cross-os-release-checks.test.ts @@ -135,10 +135,10 @@ describe("scripts/openclaw-cross-os-release-checks", () => { OPENCLAW_CROSS_OS_MODEL: "openai/gpt-5.4-nano", })?.model, ).toBe("openai/gpt-5.4-nano"); - expect(resolveProviderConfig("openai", {})?.model).toBe("openai/gpt-5.5"); + expect(resolveProviderConfig("openai", {})?.model).toBe("openai/gpt-5.4"); }); - it("keeps release cross-OS OpenAI smoke on GPT-5.5", () => { + it("keeps release cross-OS OpenAI smoke on GPT-5.4", () => { const workflow = readFileSync( ".github/workflows/openclaw-cross-os-release-checks-reusable.yml", "utf8", @@ -146,9 +146,9 @@ describe("scripts/openclaw-cross-os-release-checks", () => { const releaseChecks = readFileSync(".github/workflows/openclaw-release-checks.yml", "utf8"); expect(workflow).toContain( - "OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.5' }}", + "OPENCLAW_CROSS_OS_OPENAI_MODEL: ${{ inputs.openai_model || vars.OPENCLAW_CROSS_OS_OPENAI_MODEL || 'openai/gpt-5.4' }}", ); - expect(releaseChecks).toContain("openai_model: openai/gpt-5.5"); + expect(releaseChecks).toContain("openai_model: openai/gpt-5.4"); }); it("keeps release smoke plugin allowlists focused on agent-turn essentials", () => { @@ -161,7 +161,7 @@ describe("scripts/openclaw-cross-os-release-checks", () => { expect(allowlist).not.toContain("web-readability"); }); - it("keeps cross-OS live smoke agent turns on GPT-5.5-safe timeouts and minimal context", () => { + it("keeps cross-OS live smoke agent turns on GPT-5-safe timeouts and minimal context", () => { const source = readFileSync("scripts/openclaw-cross-os-release-checks.ts", "utf8"); const providerOverride = "models.providers.${params.providerConfig.extensionId}";