test(openai): cover GPT-5.5 defaults

This commit is contained in:
Peter Steinberger
2026-04-23 20:00:51 +01:00
parent a36903b94c
commit cd5bc2fc93
65 changed files with 437 additions and 181 deletions

View File

@@ -223,7 +223,7 @@ describe("runQaCharacterEval", () => {
expect(runSuite).toHaveBeenCalledTimes(8);
expect(runSuite.mock.calls.map(([params]) => params.primaryModel)).toEqual([
"openai/gpt-5.4",
"openai/gpt-5.5",
"openai/gpt-5.2",
"openai/gpt-5",
"anthropic/claude-opus-4-6",
@@ -254,7 +254,7 @@ describe("runQaCharacterEval", () => {
]);
expect(runJudge).toHaveBeenCalledTimes(2);
expect(runJudge.mock.calls.map(([params]) => params.judgeModel)).toEqual([
"openai/gpt-5.4",
"openai/gpt-5.5",
"anthropic/claude-opus-4-6",
]);
expect(runJudge.mock.calls.map(([params]) => params.judgeThinkingDefault)).toEqual([

View File

@@ -1135,8 +1135,8 @@ describe("qa cli runtime", () => {
repoRoot: path.resolve("/tmp/openclaw-repo"),
transportId: "qa-channel",
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
primaryModel: "openai/gpt-5.5",
alternateModel: "openai/gpt-5.5",
fastMode: undefined,
message: "read qa kickoff and reply short",
timeoutMs: undefined,
@@ -1166,7 +1166,7 @@ describe("qa cli runtime", () => {
it("defaults manual frontier runs onto Codex OAuth when the runtime resolver prefers it", async () => {
defaultQaRuntimeModelForMode.mockImplementation((mode, options) =>
mode === "live-frontier"
? "openai-codex/gpt-5.4"
? "openai-codex/gpt-5.5"
: defaultQaProviderModelForMode(mode as QaProviderModeInput, options),
);
@@ -1179,8 +1179,8 @@ describe("qa cli runtime", () => {
repoRoot: path.resolve("/tmp/openclaw-repo"),
transportId: "qa-channel",
providerMode: "live-frontier",
primaryModel: "openai-codex/gpt-5.4",
alternateModel: "openai-codex/gpt-5.4",
primaryModel: "openai-codex/gpt-5.5",
alternateModel: "openai-codex/gpt-5.5",
fastMode: undefined,
message: "read qa kickoff and reply short",
timeoutMs: undefined,

View File

@@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest";
import { selectQaRunnerModelOptions } from "./model-catalog.runtime.js";
describe("qa runner model catalog", () => {
it("filters to available rows and prefers gpt-5.4 first", () => {
it("filters to available rows and prefers gpt-5.5 first", () => {
expect(
selectQaRunnerModelOptions([
{
@@ -13,8 +13,8 @@ describe("qa runner model catalog", () => {
missing: false,
},
{
key: "openai/gpt-5.4",
name: "gpt-5.4",
key: "openai/gpt-5.5",
name: "gpt-5.5",
input: "text,image",
available: true,
missing: false,
@@ -27,6 +27,6 @@ describe("qa runner model catalog", () => {
missing: false,
},
]).map((entry) => entry.key),
).toEqual(["openai/gpt-5.4", "anthropic/claude-sonnet-4-5"]);
).toEqual(["openai/gpt-5.5", "anthropic/claude-sonnet-4-5"]);
});
});

View File

@@ -34,7 +34,7 @@ describe("qa model selection runtime", () => {
resolveEnvApiKey.mockReturnValue({ apiKey: "sk-test" });
expect(resolveQaPreferredLiveModel()).toBeUndefined();
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai/gpt-5.4");
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai/gpt-5.5");
expect(loadAuthProfileStoreForRuntime).not.toHaveBeenCalled();
});
@@ -43,8 +43,8 @@ describe("qa model selection runtime", () => {
provider === "openai-codex" ? ["openai-codex:user@example.com"] : [],
);
expect(resolveQaPreferredLiveModel()).toBe("openai-codex/gpt-5.4");
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai-codex/gpt-5.4");
expect(resolveQaPreferredLiveModel()).toBe("openai-codex/gpt-5.5");
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai-codex/gpt-5.5");
});
it("keeps the OpenAI live default when stored OpenAI profiles are available", () => {
@@ -53,7 +53,7 @@ describe("qa model selection runtime", () => {
);
expect(resolveQaPreferredLiveModel()).toBeUndefined();
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai/gpt-5.4");
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai/gpt-5.5");
});
it("leaves mock defaults unchanged", () => {

View File

@@ -19,7 +19,7 @@ describe("extractQaFailureReplyText", () => {
it("classifies explicit provider auth guidance as a failure", () => {
expect(
extractQaFailureReplyText(
'⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
'⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.5 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.5.',
),
).toContain('No API key found for provider "openai".');
});
@@ -27,7 +27,7 @@ describe("extractQaFailureReplyText", () => {
it("classifies curated missing-key guidance as a failure", () => {
expect(
extractQaFailureReplyText(
"⚠️ Missing API key for OpenAI on the gateway. Use `openai-codex/gpt-5.4` for OAuth, or set `OPENAI_API_KEY`, then try again.",
"⚠️ Missing API key for OpenAI on the gateway. Use `openai-codex/gpt-5.5` for OAuth, or set `OPENAI_API_KEY`, then try again.",
),
).toContain("Missing API key for OpenAI on the gateway.");
});

View File

@@ -45,8 +45,8 @@ describe("qa run config", () => {
it("creates a live-by-default selection that arms every scenario", () => {
expect(createDefaultQaRunSelection(scenarios)).toEqual({
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
primaryModel: "openai/gpt-5.5",
alternateModel: "openai/gpt-5.5",
fastMode: true,
scenarioIds: ["dm-chat-baseline", "thread-lifecycle"],
});
@@ -57,7 +57,7 @@ describe("qa run config", () => {
normalizeQaRunSelection(
{
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
primaryModel: "openai/gpt-5.5",
alternateModel: "",
fastMode: false,
scenarioIds: ["thread-lifecycle", "missing", "thread-lifecycle"],
@@ -66,8 +66,8 @@ describe("qa run config", () => {
),
).toEqual({
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
primaryModel: "openai/gpt-5.5",
alternateModel: "openai/gpt-5.5",
fastMode: true,
scenarioIds: ["thread-lifecycle"],
});
@@ -99,13 +99,13 @@ describe("qa run config", () => {
});
it("keeps idle snapshots on static defaults so startup does not inspect auth profiles", () => {
defaultQaRuntimeModelForMode.mockReturnValue("openai-codex/gpt-5.4");
defaultQaRuntimeModelForMode.mockReturnValue("openai-codex/gpt-5.5");
defaultQaRuntimeModelForMode.mockClear();
expect(createIdleQaRunnerSnapshot(scenarios).selection).toMatchObject({
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
primaryModel: "openai/gpt-5.5",
alternateModel: "openai/gpt-5.5",
});
expect(defaultQaRuntimeModelForMode).not.toHaveBeenCalled();
});
@@ -138,14 +138,14 @@ describe("qa run config", () => {
it("prefers the Codex OAuth default when the runtime resolver says it is available", () => {
defaultQaRuntimeModelForMode.mockImplementation((mode, options) =>
mode === "live-frontier"
? "openai-codex/gpt-5.4"
? "openai-codex/gpt-5.5"
: defaultQaProviderModelForMode(mode as QaProviderModeInput, options),
);
expect(createDefaultQaRunSelection(scenarios)).toEqual({
providerMode: "live-frontier",
primaryModel: "openai-codex/gpt-5.4",
alternateModel: "openai-codex/gpt-5.4",
primaryModel: "openai-codex/gpt-5.5",
alternateModel: "openai-codex/gpt-5.5",
fastMode: true,
scenarioIds: ["dm-chat-baseline", "thread-lifecycle"],
});

View File

@@ -137,7 +137,7 @@ describe("qa scenario catalog", () => {
expect(scenario.sourcePath).toBe("qa/scenarios/models/gpt54-thinking-visibility-switch.md");
expect(config?.requiredLiveProvider).toBe("openai");
expect(config?.requiredLiveModel).toBe("gpt-5.4");
expect(config?.requiredLiveModel).toBe("gpt-5.5");
expect(config?.offDirective).toBe("/think off");
expect(config?.maxDirective).toBe("/think max");
expect(config?.reasoningDirective).toBe("/reasoning on");
@@ -169,10 +169,10 @@ describe("qa scenario catalog", () => {
},
});
expect(config?.requiredProvider).toBe("openai");
expect(config?.requiredModel).toBe("gpt-5.4");
expect(config?.requiredModel).toBe("gpt-5.5");
expect(config?.expectedMarker).toBe("WEB-SEARCH-OK");
expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([
"confirms live OpenAI GPT-5.4 web search auto mode",
"confirms live OpenAI GPT-5.5 web search auto mode",
"searches official OpenAI News through the live model",
]);
});
@@ -191,7 +191,7 @@ describe("qa scenario catalog", () => {
expect(scenario.sourcePath).toBe("qa/scenarios/models/thinking-slash-model-remap.md");
expect(config?.requiredProviderMode).toBe("live-frontier");
expect(config?.anthropicModelRef).toBe("anthropic/claude-sonnet-4-6");
expect(config?.openAiXhighModelRef).toBe("openai/gpt-5.4");
expect(config?.openAiXhighModelRef).toBe("openai/gpt-5.5");
expect(config?.noXhighModelRef).toBe("anthropic/claude-sonnet-4-6");
expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([
"selects Anthropic and verifies adaptive options",

View File

@@ -35,7 +35,7 @@ describe("qa suite transport helpers", () => {
state.addOutboundMessage({
to: "dm:qa-operator",
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.5 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.5.',
senderId: "openclaw",
senderName: "OpenClaw QA",
});
@@ -117,7 +117,7 @@ describe("qa suite transport helpers", () => {
state.addOutboundMessage({
to: "dm:qa-operator",
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.5 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.5.',
senderId: "openclaw",
senderName: "OpenClaw QA",
});
@@ -164,7 +164,7 @@ describe("qa suite transport helpers", () => {
state.addOutboundMessage({
to: "dm:qa-operator",
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.5 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.5.',
senderId: "openclaw",
senderName: "OpenClaw QA",
});