mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:30:42 +00:00
test(release): prefer GPT-5.5 smoke models
This commit is contained in:
@@ -125,7 +125,7 @@ Telegraph style. Root rules only. Read scoped `AGENTS.md` before subtree work.
|
||||
|
||||
## Tests
|
||||
|
||||
- Vitest. Colocated `*.test.ts`; e2e `*.e2e.test.ts`; example models `sonnet-4.6`, `gpt-5.4`.
|
||||
- Vitest. Colocated `*.test.ts`; e2e `*.e2e.test.ts`; example models `sonnet-4.6`, `gpt-5.5`; test GPT with 5.5 preferred, 5.4 ok, no GPT-4.x agent-smoke defaults.
|
||||
- Avoid brittle tests that grep workflow/docs strings for operator policy. Prefer executable behavior, parsed config/schema checks, or live run proof; put release/CI policy reminders in AGENTS/docs instead.
|
||||
- Clean timers/env/globals/mocks/sockets/temp dirs/module state; `--isolate=false` safe.
|
||||
- Hot tests: avoid per-test `vi.resetModules()` + heavy imports. Measure with `pnpm test:perf:imports <file>` / `pnpm test:perf:hotspots --limit N`.
|
||||
|
||||
@@ -188,7 +188,7 @@ Keep `workflow_ref` and `package_ref` separate. `workflow_ref` is the trusted wo
|
||||
|
||||
The `package` profile uses offline plugin coverage so published-package validation is not gated on live ClawHub availability. The optional Telegram lane reuses the `package-under-test` artifact in `NPM Telegram Beta E2E`, with the published npm spec path kept for standalone dispatches.
|
||||
|
||||
Release checks call Package Acceptance with `source=ref`, `package_ref=<release-ref>`, `workflow_ref=<release workflow ref>`, `suite_profile=custom`, `docker_lanes='plugins-offline plugin-update'`, and `telegram_mode=mock-openai`. Release-path Docker chunks cover the overlapping package/update/plugin lanes; Package Acceptance keeps offline plugin, update, and Telegram proof against the same resolved package tarball. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the fallback published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Set `published_upgrade_survivor_baselines=release-history` to expand the lane across a deduped history matrix: the latest six stable releases, `2026.4.23`, and the latest stable release before `2026-03-15`. Set `published_upgrade_survivor_scenarios=reported-issues` to expand the same baselines across issue-shaped fixtures for Feishu config, preserved bootstrap/persona files, tilde log paths, and stale legacy plugin dependency roots. Local aggregate runs can pass exact package specs with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`, keep a single lane with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` such as `openclaw@2026.4.15`, or set `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` for the scenario matrix. The published lane configures the baseline with a baked `openclaw config set` command recipe, records recipe steps in `summary.json`, and probes `/healthz`, `/readyz`, plus RPC status after Gateway start. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.4-mini`, so the install and gateway proof stays fast and deterministic.
|
||||
Release checks call Package Acceptance with `source=ref`, `package_ref=<release-ref>`, `workflow_ref=<release workflow ref>`, `suite_profile=custom`, `docker_lanes='plugins-offline plugin-update'`, and `telegram_mode=mock-openai`. Release-path Docker chunks cover the overlapping package/update/plugin lanes; Package Acceptance keeps offline plugin, update, and Telegram proof against the same resolved package tarball. Cross-OS release checks still cover OS-specific onboarding, installer, and platform behavior; package/update product validation should start with Package Acceptance. The `published-upgrade-survivor` Docker lane validates one published package baseline per run. In Package Acceptance, the resolved `package-under-test` tarball is always the candidate and `published_upgrade_survivor_baseline` selects the fallback published baseline, defaulting to `openclaw@latest`; failed-lane rerun commands preserve that baseline. Set `published_upgrade_survivor_baselines=release-history` to expand the lane across a deduped history matrix: the latest six stable releases, `2026.4.23`, and the latest stable release before `2026-03-15`. Set `published_upgrade_survivor_scenarios=reported-issues` to expand the same baselines across issue-shaped fixtures for Feishu config, preserved bootstrap/persona files, tilde log paths, and stale legacy plugin dependency roots. Local aggregate runs can pass exact package specs with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS`, keep a single lane with `OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC` such as `openclaw@2026.4.15`, or set `OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS` for the scenario matrix. The published lane configures the baseline with a baked `openclaw config set` command recipe, records recipe steps in `summary.json`, and probes `/healthz`, `/readyz`, plus RPC status after Gateway start. The Windows packaged and installer fresh lanes also verify that an installed package can import a browser-control override from a raw absolute Windows path. The OpenAI cross-OS agent-turn smoke defaults to `OPENCLAW_CROSS_OS_OPENAI_MODEL` when set, otherwise `openai/gpt-5.5`, so the install and gateway proof stays on the preferred GPT-5 test model.
|
||||
|
||||
### Legacy compatibility windows
|
||||
|
||||
|
||||
@@ -248,7 +248,7 @@ gh workflow run package-acceptance.yml --ref main \
|
||||
- Use `--platform macos`, `--platform windows`, or `--platform linux` while
|
||||
iterating on one guest. Use `--json` for the summary artifact path and
|
||||
per-lane status.
|
||||
- The OpenAI lane uses `openai/gpt-5.4` for the live agent-turn proof by
|
||||
- The OpenAI lane uses `openai/gpt-5.5` for the live agent-turn proof by
|
||||
default. Pass `--model <provider/model>` or set
|
||||
`OPENCLAW_PARALLELS_OPENAI_MODEL` when deliberately validating another
|
||||
OpenAI model.
|
||||
|
||||
@@ -276,7 +276,7 @@ ref once as `release-package-under-test` and reuses that artifact in both
|
||||
release-path Docker checks and Package Acceptance. This keeps all
|
||||
package-facing boxes on the same bytes and avoids repeated package builds.
|
||||
The cross-OS OpenAI install smoke uses `OPENCLAW_CROSS_OS_OPENAI_MODEL` when the
|
||||
repo/org variable is set, otherwise `openai/gpt-5.4-mini`, because this lane is
|
||||
repo/org variable is set, otherwise `openai/gpt-5.5`, because this lane is
|
||||
proving package install, onboarding, gateway startup, and one live agent turn
|
||||
rather than benchmarking the slowest default model. The broader live provider
|
||||
matrix remains the place for model-specific coverage.
|
||||
|
||||
@@ -605,11 +605,9 @@ run_profile() {
|
||||
if [[ "$agent_model_provider" == "openai" ]]; then
|
||||
agent_model="$(set_agent_model "$profile" \
|
||||
"openai/gpt-5.5" \
|
||||
"openai/gpt-4o-mini" \
|
||||
"openai/gpt-4o")"
|
||||
"openai/gpt-5.4-mini")"
|
||||
image_model="$(set_image_model "$profile" \
|
||||
"openai/gpt-4o-mini" \
|
||||
"openai/gpt-4o")"
|
||||
"openai/gpt-5.4-image-2")"
|
||||
else
|
||||
agent_model="$(set_agent_model "$profile" \
|
||||
"anthropic/claude-opus-4-6" \
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"defaults": {
|
||||
"model": {
|
||||
"primary": "openai/gpt-4.1-mini"
|
||||
"primary": "openai/gpt-5.5"
|
||||
},
|
||||
"contextTokens": 64000
|
||||
},
|
||||
@@ -12,7 +12,7 @@
|
||||
"name": "Main",
|
||||
"workspace": "~/workspace",
|
||||
"model": {
|
||||
"primary": "openai/gpt-4.1-mini"
|
||||
"primary": "openai/gpt-5.5"
|
||||
},
|
||||
"thinkingDefault": "low",
|
||||
"skills": ["memory"]
|
||||
@@ -22,7 +22,7 @@
|
||||
"name": "Ops",
|
||||
"workspace": "~/workspace/ops",
|
||||
"model": {
|
||||
"primary": "openai/gpt-4.1-mini"
|
||||
"primary": "openai/gpt-5.5"
|
||||
},
|
||||
"fastModeDefault": true
|
||||
}
|
||||
|
||||
@@ -8,9 +8,9 @@ source "$ROOT_DIR/scripts/lib/docker-e2e-image.sh"
|
||||
|
||||
IMAGE_NAME="$(docker_e2e_resolve_image "openclaw-openwebui-e2e" OPENCLAW_OPENWEBUI_E2E_IMAGE)"
|
||||
OPENWEBUI_IMAGE="${OPENWEBUI_IMAGE:-ghcr.io/open-webui/open-webui:v0.8.10}"
|
||||
# Keep the default on a broadly available non-reasoning OpenAI model for
|
||||
# Open WebUI compatibility smoke. Callers can still override this explicitly.
|
||||
MODEL="${OPENCLAW_OPENWEBUI_MODEL:-openai/gpt-4.1-mini}"
|
||||
# Keep the default on the preferred GPT-5 OpenAI model for Open WebUI
|
||||
# compatibility smoke. Callers can still override this explicitly.
|
||||
MODEL="${OPENCLAW_OPENWEBUI_MODEL:-openai/gpt-5.5}"
|
||||
PROMPT_NONCE="OPENWEBUI_DOCKER_E2E_$(date +%s)_$$"
|
||||
PROMPT="${OPENCLAW_OPENWEBUI_PROMPT:-Reply with exactly this token and nothing else: ${PROMPT_NONCE}}"
|
||||
PORT="${OPENCLAW_OPENWEBUI_GATEWAY_PORT:-18789}"
|
||||
|
||||
@@ -42,7 +42,7 @@ export function resolveProviderAuth(input: {
|
||||
apiKeyEnv: input.apiKeyEnv || "OPENAI_API_KEY",
|
||||
authChoice: "openai-api-key",
|
||||
authKeyFlag: "openai-api-key",
|
||||
modelId: input.modelId || process.env.OPENCLAW_PARALLELS_OPENAI_MODEL || "openai/gpt-5.4",
|
||||
modelId: input.modelId || process.env.OPENCLAW_PARALLELS_OPENAI_MODEL || "openai/gpt-5.5",
|
||||
},
|
||||
};
|
||||
const resolved = providerDefaults[input.provider];
|
||||
@@ -69,7 +69,7 @@ export function resolveWindowsProviderAuth(input: {
|
||||
if (process.env.OPENCLAW_PARALLELS_OPENAI_MODEL?.trim()) {
|
||||
return auth;
|
||||
}
|
||||
return { ...auth, modelId: "openai/gpt-4.1-mini" };
|
||||
return { ...auth, modelId: "openai/gpt-5.5" };
|
||||
}
|
||||
|
||||
export function providerIdFromModelId(modelId: string): string {
|
||||
|
||||
@@ -117,7 +117,7 @@ function scenarioConfig(scenario, options = {}) {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "openai/gpt-4.1-mini",
|
||||
primary: "openai/gpt-5.5",
|
||||
},
|
||||
contextTokens: 64000,
|
||||
skills: ["memory"],
|
||||
@@ -129,7 +129,7 @@ function scenarioConfig(scenario, options = {}) {
|
||||
name: "Main",
|
||||
workspace: "~/workspace",
|
||||
model: {
|
||||
primary: "openai/gpt-4.1-mini",
|
||||
primary: "openai/gpt-5.5",
|
||||
},
|
||||
thinkingDefault: "low",
|
||||
skills: ["memory"],
|
||||
@@ -140,7 +140,7 @@ function scenarioConfig(scenario, options = {}) {
|
||||
name: "Ops",
|
||||
workspace: "~/workspace/ops",
|
||||
model: {
|
||||
primary: "openai/gpt-4.1-mini",
|
||||
primary: "openai/gpt-5.5",
|
||||
},
|
||||
fastModeDefault: true,
|
||||
},
|
||||
@@ -433,7 +433,7 @@ OPENCLAW_TEST_STATE_JSON
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"model": {
|
||||
"primary": "openai/gpt-4.1-mini"
|
||||
"primary": "openai/gpt-5.5"
|
||||
},
|
||||
"contextTokens": 64000,
|
||||
"skills": [
|
||||
@@ -447,7 +447,7 @@ OPENCLAW_TEST_STATE_JSON
|
||||
"name": "Main",
|
||||
"workspace": "~/workspace",
|
||||
"model": {
|
||||
"primary": "openai/gpt-4.1-mini"
|
||||
"primary": "openai/gpt-5.5"
|
||||
},
|
||||
"thinkingDefault": "low",
|
||||
"skills": [
|
||||
@@ -460,7 +460,7 @@ OPENCLAW_TEST_STATE_JSON
|
||||
"name": "Ops",
|
||||
"workspace": "~/workspace/ops",
|
||||
"model": {
|
||||
"primary": "openai/gpt-4.1-mini"
|
||||
"primary": "openai/gpt-5.5"
|
||||
},
|
||||
"fastModeDefault": true
|
||||
}
|
||||
|
||||
@@ -322,7 +322,7 @@ function writePackedBundledPluginActivationConfig(homeDir: string): void {
|
||||
{
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "openai/gpt-4.1-mini" },
|
||||
model: { primary: "openai/gpt-5.5" },
|
||||
},
|
||||
},
|
||||
channels: {
|
||||
|
||||
@@ -97,7 +97,7 @@ const modelCatalogMocks = getSharedMocks("openclaw.trigger-handling.model-catalo
|
||||
name: "Claude Opus 4.5 (OpenRouter)",
|
||||
contextWindow: 200000,
|
||||
},
|
||||
{ provider: "openai", id: "gpt-4.1-mini", name: "GPT-4.1 mini" },
|
||||
{ provider: "openai", id: "gpt-5.4-mini", name: "GPT-5.4 mini" },
|
||||
{ provider: "openai", id: "gpt-5.5", name: "GPT-5.5" },
|
||||
{ provider: "openai-codex", id: "gpt-5.5", name: "GPT-5.5 (Codex)" },
|
||||
{ provider: "minimax", id: "MiniMax-M2.7", name: "MiniMax M2.7" },
|
||||
|
||||
@@ -88,8 +88,7 @@ describe("Parallels smoke model selection", () => {
|
||||
|
||||
expect(providerAuth).toContain("OPENCLAW_PARALLELS_OPENAI_MODEL");
|
||||
expect(providerAuth).toContain("OPENCLAW_PARALLELS_WINDOWS_OPENAI_MODEL");
|
||||
expect(providerAuth).toContain("openai/gpt-5.4");
|
||||
expect(providerAuth).toContain("openai/gpt-4.1-mini");
|
||||
expect(providerAuth).toContain("openai/gpt-5.5");
|
||||
expect(providerAuth).toContain('authChoice: "openai-api-key"');
|
||||
expect(providerAuth).toContain('authChoice: "apiKey"');
|
||||
expect(providerAuth).toContain('authChoice: "minimax-global-api"');
|
||||
@@ -106,16 +105,14 @@ describe("Parallels smoke model selection", () => {
|
||||
it("writes full model ids as config map keys in provider batches", () => {
|
||||
const source = `
|
||||
import { modelProviderConfigBatchJson } from "./${TS_PATHS.common}";
|
||||
const result = modelProviderConfigBatchJson("openai/gpt-4.1-mini", "windows");
|
||||
const result = modelProviderConfigBatchJson("openai/gpt-5.5", "windows");
|
||||
console.log(result);
|
||||
`;
|
||||
const batch = JSON.parse(runTsEval(source, { OPENAI_API_KEY: "sk-openai" })) as Array<{
|
||||
path: string;
|
||||
}>;
|
||||
|
||||
expect(batch.map((entry) => entry.path)).toContain(
|
||||
'agents.defaults.models["openai/gpt-4.1-mini"]',
|
||||
);
|
||||
expect(batch.map((entry) => entry.path)).toContain('agents.defaults.models["openai/gpt-5.5"]');
|
||||
});
|
||||
|
||||
it("keeps snapshot, host, package, and quote helpers shared", () => {
|
||||
@@ -242,7 +239,7 @@ console.log(resolveUbuntuVmName("Ubuntu missing"));
|
||||
apiKeyValue: "sk-openai",
|
||||
authChoice: "openai-api-key",
|
||||
authKeyFlag: "openai-api-key",
|
||||
modelId: "openai/gpt-5.4",
|
||||
modelId: "openai/gpt-5.5",
|
||||
});
|
||||
|
||||
expect(
|
||||
@@ -260,7 +257,7 @@ console.log(resolveUbuntuVmName("Ubuntu missing"));
|
||||
});
|
||||
});
|
||||
|
||||
it("uses the faster OpenAI model for Windows smoke unless overridden", () => {
|
||||
it("uses the shared GPT-5 OpenAI model for Windows smoke unless overridden", () => {
|
||||
const source = `
|
||||
import { resolveWindowsProviderAuth } from "./${TS_PATHS.common}";
|
||||
const result = resolveWindowsProviderAuth({
|
||||
@@ -270,7 +267,7 @@ console.log(JSON.stringify(result));
|
||||
`;
|
||||
expect(JSON.parse(runTsEval(source, { OPENAI_API_KEY: "sk-openai" }))).toMatchObject({
|
||||
apiKeyEnv: "OPENAI_API_KEY",
|
||||
modelId: "openai/gpt-4.1-mini",
|
||||
modelId: "openai/gpt-5.5",
|
||||
});
|
||||
|
||||
expect(
|
||||
|
||||
Reference in New Issue
Block a user