From bcf4628092a83ffc1ecd2d3d2c89f9619db20bec Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 28 Apr 2026 08:26:42 +0100 Subject: [PATCH] ci: use gpt-5.5 for live OpenAI defaults --- .../openclaw-live-and-e2e-checks-reusable.yml | 2 +- docs/help/testing-live.md | 30 +++++++++---------- package.json | 10 +++---- scripts/lib/docker-e2e-scenarios.mjs | 2 +- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml index 9cc5bfd15c6..b400419c0f7 100644 --- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml +++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml @@ -1600,7 +1600,7 @@ jobs: profiles: stable full - suite_id: native-live-src-gateway-profiles-openai label: Native live gateway profiles OpenAI - command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=openai OPENCLAW_LIVE_GATEWAY_MODELS=openai/gpt-5.2 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles + command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=openai OPENCLAW_LIVE_GATEWAY_MODELS=openai/gpt-5.5 node .release-harness/scripts/test-live-shard.mjs native-live-src-gateway-profiles timeout_minutes: 90 needs_ffmpeg: false profile_env_only: false diff --git a/docs/help/testing-live.md b/docs/help/testing-live.md index 5d3f4ee3167..a793fa53050 100644 --- a/docs/help/testing-live.md +++ b/docs/help/testing-live.md @@ -79,7 +79,7 @@ Live tests are split into two layers so we can isolate failures: - How to select models: - `OPENCLAW_LIVE_MODELS=modern` to run the modern allowlist (Opus/Sonnet 4.6+, GPT-5.2 + Codex, Gemini 3, DeepSeek V4, GLM 4.7, MiniMax M2.7, Grok 4) - `OPENCLAW_LIVE_MODELS=all` is an alias for the modern allowlist - - or `OPENCLAW_LIVE_MODELS="openai/gpt-5.2,openai-codex/gpt-5.2,anthropic/claude-opus-4-6,..."` (comma allowlist) + - or `OPENCLAW_LIVE_MODELS="openai/gpt-5.5,openai-codex/gpt-5.5,anthropic/claude-opus-4-6,..."` (comma allowlist) - Modern/all sweeps default to a curated high-signal cap; set `OPENCLAW_LIVE_MAX_MODELS=0` for an exhaustive modern sweep or a positive number for a smaller cap. - Exhaustive sweeps use `OPENCLAW_LIVE_TEST_TIMEOUT_MS` for the whole direct-model test timeout. Default: 60 minutes. - Direct-model probes run with 20-way parallelism by default; set `OPENCLAW_LIVE_MODEL_CONCURRENCY` to override. @@ -149,7 +149,7 @@ openclaw models list --json - Default provider/model: `claude-cli/claude-sonnet-4-6` - Command/args/image behavior come from the owning CLI backend plugin metadata. - Overrides (optional): - - `OPENCLAW_LIVE_CLI_BACKEND_MODEL="codex-cli/gpt-5.2"` + - `OPENCLAW_LIVE_CLI_BACKEND_MODEL="codex-cli/gpt-5.5"` - `OPENCLAW_LIVE_CLI_BACKEND_COMMAND="/full/path/to/codex"` - `OPENCLAW_LIVE_CLI_BACKEND_ARGS='["exec","--json","--color","never","--sandbox","read-only","--skip-git-repo-check"]'` - `OPENCLAW_LIVE_CLI_BACKEND_IMAGE_PROBE=1` to send a real image attachment (paths are injected into the prompt). Docker recipes default this off unless explicitly requested. @@ -163,7 +163,7 @@ Example: ```bash OPENCLAW_LIVE_CLI_BACKEND=1 \ - OPENCLAW_LIVE_CLI_BACKEND_MODEL="codex-cli/gpt-5.2" \ + OPENCLAW_LIVE_CLI_BACKEND_MODEL="codex-cli/gpt-5.5" \ pnpm test:live src/gateway/gateway-cli-backend.live.test.ts ``` @@ -227,11 +227,11 @@ Notes: - `OPENCLAW_LIVE_ACP_BIND_AGENT=opencode` - `OPENCLAW_LIVE_ACP_BIND_AGENTS=claude,codex,gemini` - `OPENCLAW_LIVE_ACP_BIND_AGENT_COMMAND='npx -y @agentclientprotocol/claude-agent-acp@'` - - `OPENCLAW_LIVE_ACP_BIND_CODEX_MODEL=gpt-5.2` + - `OPENCLAW_LIVE_ACP_BIND_CODEX_MODEL=gpt-5.5` - `OPENCLAW_LIVE_ACP_BIND_OPENCODE_MODEL=opencode/kimi-k2.6` - `OPENCLAW_LIVE_ACP_BIND_REQUIRE_TRANSCRIPT=1` - `OPENCLAW_LIVE_ACP_BIND_REQUIRE_CRON=1` - - `OPENCLAW_LIVE_ACP_BIND_PARENT_MODEL=openai/gpt-5.2` + - `OPENCLAW_LIVE_ACP_BIND_PARENT_MODEL=openai/gpt-5.5` - Notes: - This lane uses the gateway `chat.send` surface with admin-only synthetic originating-route fields so tests can attach message-channel context without pretending to deliver externally. - When `OPENCLAW_LIVE_ACP_BIND_AGENT_COMMAND` is unset, the test uses the embedded `acpx` plugin's built-in agent registry for the selected ACP harness agent. @@ -277,7 +277,7 @@ Docker notes: `agent` method: - load the bundled `codex` plugin - select `OPENCLAW_AGENT_RUNTIME=codex` - - send a first gateway agent turn to `openai/gpt-5.2` with the Codex harness forced + - send a first gateway agent turn to `openai/gpt-5.5` with the Codex harness forced - send a second turn to the same OpenClaw session and verify the app-server thread can resume - run `/codex status` and `/codex models` through the same gateway command @@ -287,7 +287,7 @@ Docker notes: denied so the agent asks back - Test: `src/gateway/gateway-codex-harness.live.test.ts` - Enable: `OPENCLAW_LIVE_CODEX_HARNESS=1` -- Default model: `openai/gpt-5.2` +- Default model: `openai/gpt-5.5` - Optional image probe: `OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE=1` - Optional MCP/tool probe: `OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE=1` - Optional Guardian probe: `OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE=1` @@ -305,7 +305,7 @@ OPENCLAW_LIVE_CODEX_HARNESS=1 \ OPENCLAW_LIVE_CODEX_HARNESS_IMAGE_PROBE=1 \ OPENCLAW_LIVE_CODEX_HARNESS_MCP_PROBE=1 \ OPENCLAW_LIVE_CODEX_HARNESS_GUARDIAN_PROBE=1 \ - OPENCLAW_LIVE_CODEX_HARNESS_MODEL=openai/gpt-5.2 \ + OPENCLAW_LIVE_CODEX_HARNESS_MODEL=openai/gpt-5.5 \ pnpm test:live -- src/gateway/gateway-codex-harness.live.test.ts ``` @@ -336,13 +336,13 @@ Docker notes: Narrow, explicit allowlists are fastest and least flaky: - Single model, direct (no gateway): - - `OPENCLAW_LIVE_MODELS="openai/gpt-5.2" pnpm test:live src/agents/models.profiles.live.test.ts` + - `OPENCLAW_LIVE_MODELS="openai/gpt-5.5" pnpm test:live src/agents/models.profiles.live.test.ts` - Single model, gateway smoke: - - `OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.2" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` + - `OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.5" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` - Tool calling across several providers: - - `OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.2,openai-codex/gpt-5.2,anthropic/claude-opus-4-6,google/gemini-3-flash-preview,deepseek/deepseek-v4-flash,zai/glm-5.1,minimax/MiniMax-M2.7" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` + - `OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.5,openai-codex/gpt-5.5,anthropic/claude-opus-4-6,google/gemini-3-flash-preview,deepseek/deepseek-v4-flash,zai/glm-5.1,minimax/MiniMax-M2.7" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` - Google focus (Gemini API key + Antigravity): - Gemini (API key): `OPENCLAW_LIVE_GATEWAY_MODELS="google/gemini-3-flash-preview" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` @@ -370,8 +370,8 @@ There is no fixed “CI model list” (live is opt-in), but these are the **reco This is the “common models” run we expect to keep working: -- OpenAI (non-Codex): `openai/gpt-5.2` -- OpenAI Codex OAuth: `openai-codex/gpt-5.2` +- OpenAI (non-Codex): `openai/gpt-5.5` +- OpenAI Codex OAuth: `openai-codex/gpt-5.5` - Anthropic: `anthropic/claude-opus-4-6` (or `anthropic/claude-sonnet-4-6`) - Google (Gemini API): `google/gemini-3.1-pro-preview` and `google/gemini-3-flash-preview` (avoid older Gemini 2.x models) - Google (Antigravity): `google-antigravity/claude-opus-4-6-thinking` and `google-antigravity/gemini-3-flash` @@ -380,13 +380,13 @@ This is the “common models” run we expect to keep working: - MiniMax: `minimax/MiniMax-M2.7` Run gateway smoke with tools + image: -`OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.2,openai-codex/gpt-5.2,anthropic/claude-opus-4-6,google/gemini-3.1-pro-preview,google/gemini-3-flash-preview,google-antigravity/claude-opus-4-6-thinking,google-antigravity/gemini-3-flash,deepseek/deepseek-v4-flash,zai/glm-5.1,minimax/MiniMax-M2.7" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` +`OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.5,openai-codex/gpt-5.5,anthropic/claude-opus-4-6,google/gemini-3.1-pro-preview,google/gemini-3-flash-preview,google-antigravity/claude-opus-4-6-thinking,google-antigravity/gemini-3-flash,deepseek/deepseek-v4-flash,zai/glm-5.1,minimax/MiniMax-M2.7" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` ### Baseline: tool calling (Read + optional Exec) Pick at least one per provider family: -- OpenAI: `openai/gpt-5.2` +- OpenAI: `openai/gpt-5.5` - Anthropic: `anthropic/claude-opus-4-6` (or `anthropic/claude-sonnet-4-6`) - Google: `google/gemini-3-flash-preview` (or `google/gemini-3.1-pro-preview`) - DeepSeek: `deepseek/deepseek-v4-flash` diff --git a/package.json b/package.json index 7a9d8b36747..ff0b70d46b6 100644 --- a/package.json +++ b/package.json @@ -1464,9 +1464,9 @@ "test:docker:live-cli-backend:claude-subscription": "OPENCLAW_LIVE_CLI_BACKEND_AUTH=subscription OPENCLAW_LIVE_CLI_BACKEND_MODEL=claude-cli/claude-sonnet-4-6 OPENCLAW_LIVE_CLI_BACKEND_DISABLE_MCP_CONFIG=1 OPENCLAW_LIVE_CLI_BACKEND_MODEL_SWITCH_PROBE=0 OPENCLAW_LIVE_CLI_BACKEND_RESUME_PROBE=1 OPENCLAW_LIVE_CLI_BACKEND_IMAGE_PROBE=0 OPENCLAW_LIVE_CLI_BACKEND_MCP_PROBE=0 bash scripts/test-live-cli-backend-docker.sh", "test:docker:live-cli-backend:claude:mcp": "OPENCLAW_LIVE_CLI_BACKEND_MODEL=claude-cli/claude-sonnet-4-6 OPENCLAW_LIVE_CLI_BACKEND_MCP_PROBE=1 bash scripts/test-live-cli-backend-docker.sh", "test:docker:live-cli-backend:claude:resume": "OPENCLAW_LIVE_CLI_BACKEND_MODEL=claude-cli/claude-sonnet-4-6 OPENCLAW_LIVE_CLI_BACKEND_RESUME_PROBE=1 bash scripts/test-live-cli-backend-docker.sh", - "test:docker:live-cli-backend:codex": "OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.2 bash scripts/test-live-cli-backend-docker.sh", - "test:docker:live-cli-backend:codex:mcp": "OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.2 OPENCLAW_LIVE_CLI_BACKEND_MCP_PROBE=1 bash scripts/test-live-cli-backend-docker.sh", - "test:docker:live-cli-backend:codex:resume": "OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.2 OPENCLAW_LIVE_CLI_BACKEND_RESUME_PROBE=1 bash scripts/test-live-cli-backend-docker.sh", + "test:docker:live-cli-backend:codex": "OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.5 bash scripts/test-live-cli-backend-docker.sh", + "test:docker:live-cli-backend:codex:mcp": "OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.5 OPENCLAW_LIVE_CLI_BACKEND_MCP_PROBE=1 bash scripts/test-live-cli-backend-docker.sh", + "test:docker:live-cli-backend:codex:resume": "OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.5 OPENCLAW_LIVE_CLI_BACKEND_RESUME_PROBE=1 bash scripts/test-live-cli-backend-docker.sh", "test:docker:live-cli-backend:gemini": "OPENCLAW_LIVE_CLI_BACKEND_MODEL=google-gemini-cli/gemini-3-flash-preview bash scripts/test-live-cli-backend-docker.sh", "test:docker:live-cli-backend:gemini:mcp": "OPENCLAW_LIVE_CLI_BACKEND_MODEL=google-gemini-cli/gemini-3-flash-preview OPENCLAW_LIVE_CLI_BACKEND_MCP_PROBE=1 bash scripts/test-live-cli-backend-docker.sh", "test:docker:live-cli-backend:gemini:resume": "OPENCLAW_LIVE_CLI_BACKEND_MODEL=google-gemini-cli/gemini-3-flash-preview OPENCLAW_LIVE_CLI_BACKEND_RESUME_PROBE=1 bash scripts/test-live-cli-backend-docker.sh", @@ -1474,11 +1474,11 @@ "test:docker:live-codex-harness": "bash scripts/test-live-codex-harness-docker.sh", "test:docker:live-gateway": "bash scripts/test-live-gateway-models-docker.sh", "test:docker:live-gateway:claude": "OPENCLAW_LIVE_GATEWAY_PROVIDERS=claude-cli OPENCLAW_LIVE_GATEWAY_MODELS=claude-cli/claude-sonnet-4-6 bash scripts/test-live-gateway-models-docker.sh", - "test:docker:live-gateway:codex": "OPENCLAW_LIVE_GATEWAY_PROVIDERS=codex-cli OPENCLAW_LIVE_GATEWAY_MODELS=codex-cli/gpt-5.2 bash scripts/test-live-gateway-models-docker.sh", + "test:docker:live-gateway:codex": "OPENCLAW_LIVE_GATEWAY_PROVIDERS=codex-cli OPENCLAW_LIVE_GATEWAY_MODELS=codex-cli/gpt-5.5 bash scripts/test-live-gateway-models-docker.sh", "test:docker:live-gateway:gemini": "OPENCLAW_LIVE_GATEWAY_PROVIDERS=google-gemini-cli OPENCLAW_LIVE_GATEWAY_MODELS=google-gemini-cli/gemini-3.1-pro-preview bash scripts/test-live-gateway-models-docker.sh", "test:docker:live-models": "bash scripts/test-live-models-docker.sh", "test:docker:live-models:claude": "OPENCLAW_LIVE_PROVIDERS=claude-cli OPENCLAW_LIVE_MODELS=claude-cli/claude-sonnet-4-6 bash scripts/test-live-models-docker.sh", - "test:docker:live-models:codex": "OPENCLAW_LIVE_PROVIDERS=codex-cli OPENCLAW_LIVE_MODELS=codex-cli/gpt-5.2 bash scripts/test-live-models-docker.sh", + "test:docker:live-models:codex": "OPENCLAW_LIVE_PROVIDERS=codex-cli OPENCLAW_LIVE_MODELS=codex-cli/gpt-5.5 bash scripts/test-live-models-docker.sh", "test:docker:live-models:gemini": "OPENCLAW_LIVE_PROVIDERS=google-gemini-cli OPENCLAW_LIVE_MODELS=google-gemini-cli/gemini-3.1-pro-preview bash scripts/test-live-models-docker.sh", "test:docker:live:all": "OPENCLAW_DOCKER_ALL_LIVE_MODE=only node scripts/test-docker-all.mjs", "test:docker:local:all": "OPENCLAW_DOCKER_ALL_LIVE_MODE=skip node scripts/test-docker-all.mjs", diff --git a/scripts/lib/docker-e2e-scenarios.mjs b/scripts/lib/docker-e2e-scenarios.mjs index 87dc92d66c5..e78149a5622 100644 --- a/scripts/lib/docker-e2e-scenarios.mjs +++ b/scripts/lib/docker-e2e-scenarios.mjs @@ -324,7 +324,7 @@ export const tailLanes = [ "live-cli-backend-codex", liveDockerScriptCommand( "test-live-cli-backend-docker.sh", - "OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.2", + "OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.5", ), { cacheKey: "cli-backend-codex",