test(openai): cover GPT-5.5 defaults

This commit is contained in:
Peter Steinberger
2026-04-23 20:00:51 +01:00
parent a36903b94c
commit cd5bc2fc93
65 changed files with 437 additions and 181 deletions

View File

@@ -224,7 +224,7 @@ describe("openai codex provider", () => {
},
},
],
defaultModel: "openai-codex/gpt-5.4",
defaultModel: "openai-codex/gpt-5.5",
});
expect(result?.profiles[0]?.credential).not.toHaveProperty("idToken");
expect(result?.profiles[0]?.credential).not.toHaveProperty("accountId");
@@ -329,6 +329,40 @@ describe("openai codex provider", () => {
});
});
it("resolves gpt-5.5 and gpt-5.5-pro with launch pricing and codex-sized runtime cap", () => {
const provider = buildOpenAICodexProviderPlugin();
const model = provider.resolveDynamicModel?.({
provider: "openai-codex",
modelId: "gpt-5.5",
modelRegistry: createSingleModelRegistry(createCodexTemplate({ id: "gpt-5.4" })) as never,
});
const pro = provider.resolveDynamicModel?.({
provider: "openai-codex",
modelId: "gpt-5.5-pro",
modelRegistry: createSingleModelRegistry(createCodexTemplate({ id: "gpt-5.4-pro" })) as never,
});
expect(model).toMatchObject({
id: "gpt-5.5",
api: "openai-codex-responses",
baseUrl: "https://chatgpt.com/backend-api",
contextWindow: 1_000_000,
contextTokens: 272_000,
maxTokens: 128_000,
cost: { input: 5, output: 30, cacheRead: 0, cacheWrite: 0 },
});
expect(pro).toMatchObject({
id: "gpt-5.5-pro",
api: "openai-codex-responses",
baseUrl: "https://chatgpt.com/backend-api",
contextWindow: 1_000_000,
contextTokens: 272_000,
maxTokens: 128_000,
cost: { input: 30, output: 180, cacheRead: 0, cacheWrite: 0 },
});
});
it("resolves gpt-5.4-pro from a gpt-5.4 runtime template when legacy codex rows are absent", () => {
const provider = buildOpenAICodexProviderPlugin();
@@ -398,7 +432,7 @@ describe("openai codex provider", () => {
expect(model).not.toHaveProperty("contextTokens");
});
it("augments catalog with gpt-5.4 native contextWindow and runtime cap", () => {
it("augments catalog with gpt-5.5 and gpt-5.4 native metadata", () => {
const provider = buildOpenAICodexProviderPlugin();
const entries = provider.augmentModelCatalog?.({
@@ -415,6 +449,22 @@ describe("openai codex provider", () => {
],
} as never);
expect(entries).toContainEqual(
expect.objectContaining({
id: "gpt-5.5",
contextWindow: 1_000_000,
contextTokens: 272_000,
cost: { input: 5, output: 30, cacheRead: 0, cacheWrite: 0 },
}),
);
expect(entries).toContainEqual(
expect.objectContaining({
id: "gpt-5.5-pro",
contextWindow: 1_000_000,
contextTokens: 272_000,
cost: { input: 30, output: 180, cacheRead: 0, cacheWrite: 0 },
}),
);
expect(entries).toContainEqual(
expect.objectContaining({
id: "gpt-5.4",

View File

@@ -3,7 +3,7 @@ import { describe, expect, it } from "vitest";
import { buildOpenAIProvider } from "./openai-provider.js";
const OPENAI_API_KEY = process.env.OPENAI_API_KEY ?? "";
const DEFAULT_LIVE_MODEL_IDS = ["gpt-5.4-mini", "gpt-5.4-nano"] as const;
const DEFAULT_LIVE_MODEL_IDS = ["gpt-5.5", "gpt-5.4-mini", "gpt-5.4-nano"] as const;
const liveEnabled = OPENAI_API_KEY.trim().length > 0 && process.env.OPENCLAW_LIVE_TEST === "1";
const describeLive = liveEnabled ? describe : describe.skip;
@@ -18,6 +18,24 @@ type LiveModelCase = {
function resolveLiveModelCase(modelId: string): LiveModelCase {
switch (modelId) {
case "gpt-5.5":
return {
modelId,
templateId: "gpt-5.4",
templateName: "GPT-5.4",
cost: { input: 5, output: 30, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1_000_000,
maxTokens: 128_000,
};
case "gpt-5.5-pro":
return {
modelId,
templateId: "gpt-5.4-pro",
templateName: "GPT-5.4 Pro",
cost: { input: 30, output: 180, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1_000_000,
maxTokens: 128_000,
};
case "gpt-5.4":
return {
modelId,

View File

@@ -229,6 +229,101 @@ describe("buildOpenAIProvider", () => {
});
});
it("resolves gpt-5.5 and gpt-5.5-pro with launch metadata", () => {
const provider = buildOpenAIProvider();
const model = provider.resolveDynamicModel?.({
provider: "openai",
modelId: "gpt-5.5",
modelRegistry: {
find: (_provider: string, id: string) =>
id === "gpt-5.4"
? {
id,
name: "GPT-5.4",
provider: "openai",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
reasoning: true,
input: ["text", "image"],
cost: { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 },
contextWindow: 1_050_000,
maxTokens: 128_000,
}
: null,
} as never,
});
const pro = provider.resolveDynamicModel?.({
provider: "openai",
modelId: "gpt-5.5-pro",
modelRegistry: {
find: (_provider: string, id: string) =>
id === "gpt-5.4-pro"
? {
id,
name: "GPT-5.4 Pro",
provider: "openai",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
reasoning: true,
input: ["text", "image"],
cost: { input: 30, output: 180, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1_050_000,
maxTokens: 128_000,
}
: null,
} as never,
});
expect(model).toMatchObject({
provider: "openai",
id: "gpt-5.5",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
contextWindow: 1_000_000,
maxTokens: 128_000,
cost: { input: 5, output: 30, cacheRead: 0, cacheWrite: 0 },
});
expect(pro).toMatchObject({
provider: "openai",
id: "gpt-5.5-pro",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
contextWindow: 1_000_000,
maxTokens: 128_000,
cost: { input: 30, output: 180, cacheRead: 0, cacheWrite: 0 },
});
});
it("surfaces gpt-5.5 in xhigh and augmented catalog metadata", () => {
const provider = buildOpenAIProvider();
expect(
provider
.resolveThinkingProfile?.({
provider: "openai",
modelId: "gpt-5.5",
} as never)
?.levels.some((level) => level.id === "xhigh"),
).toBe(true);
const entries = provider.augmentModelCatalog?.({
env: process.env,
entries: [{ provider: "openai", id: "gpt-5.4", name: "GPT-5.4" }],
} as never);
expect(entries).toContainEqual(
expect.objectContaining({
provider: "openai",
id: "gpt-5.5",
name: "gpt-5.5",
reasoning: true,
input: ["text", "image"],
contextWindow: 1_000_000,
}),
);
});
it("keeps modern live selection on OpenAI 5.2+ and Codex 5.2+", () => {
const provider = buildOpenAIProvider();
const codexProvider = buildOpenAICodexProviderPlugin();
@@ -251,6 +346,12 @@ describe("buildOpenAIProvider", () => {
modelId: "gpt-5.4",
} as never),
).toBe(true);
expect(
provider.isModernModelRef?.({
provider: "openai",
modelId: "gpt-5.5",
} as never),
).toBe(true);
expect(
codexProvider.isModernModelRef?.({
@@ -276,6 +377,12 @@ describe("buildOpenAIProvider", () => {
modelId: "gpt-5.4",
} as never),
).toBe(true);
expect(
codexProvider.isModernModelRef?.({
provider: "openai-codex",
modelId: "gpt-5.5",
} as never),
).toBe(true);
});
it("owns replay policy for OpenAI and Codex transports", () => {

View File

@@ -17,7 +17,7 @@ import { runRealtimeSttLiveTest } from "../../test/helpers/stt-live-audio.js";
import plugin from "./index.js";
const OPENAI_API_KEY = process.env.OPENAI_API_KEY ?? "";
const LIVE_MODEL_ID = process.env.OPENCLAW_LIVE_OPENAI_PLUGIN_MODEL?.trim() || "gpt-5.4-nano";
const LIVE_MODEL_ID = process.env.OPENCLAW_LIVE_OPENAI_PLUGIN_MODEL?.trim() || "gpt-5.5";
const LIVE_IMAGE_MODEL = process.env.OPENCLAW_LIVE_OPENAI_IMAGE_MODEL?.trim() || "gpt-image-2";
const LIVE_VISION_MODEL = process.env.OPENCLAW_LIVE_OPENAI_VISION_MODEL?.trim() || "gpt-4.1-mini";
const liveEnabled = OPENAI_API_KEY.trim().length > 0 && process.env.OPENCLAW_LIVE_TEST === "1";
@@ -29,6 +29,8 @@ const ModelRegistryCtor = ModelRegistry as unknown as {
function resolveTemplateModelId(modelId: string) {
switch (modelId) {
case "gpt-5.5":
return "gpt-5.4";
case "gpt-5.4":
return "gpt-5.2";
case "gpt-5.4-mini":

View File

@@ -1,6 +1,7 @@
import { beforeEach, describe, it, vi } from "vitest";
import {
expectAugmentedCodexCatalog,
expectedAugmentedOpenaiCodexCatalogEntriesWithGpt55,
expectCodexBuiltInSuppression,
expectCodexMissingAuthHint,
importProviderRuntimeCatalogModule,
@@ -116,7 +117,10 @@ export function describeOpenAIProviderCatalogContract() {
it("keeps bundled model augmentation wired through the provider runtime", async () => {
const { augmentModelCatalogWithProviderPlugins } = await contractDepsPromise;
await expectAugmentedCodexCatalog(augmentModelCatalogWithProviderPlugins);
await expectAugmentedCodexCatalog(
augmentModelCatalogWithProviderPlugins,
expectedAugmentedOpenaiCodexCatalogEntriesWithGpt55,
);
});
},
);

View File

@@ -223,7 +223,7 @@ describe("runQaCharacterEval", () => {
expect(runSuite).toHaveBeenCalledTimes(8);
expect(runSuite.mock.calls.map(([params]) => params.primaryModel)).toEqual([
"openai/gpt-5.4",
"openai/gpt-5.5",
"openai/gpt-5.2",
"openai/gpt-5",
"anthropic/claude-opus-4-6",
@@ -254,7 +254,7 @@ describe("runQaCharacterEval", () => {
]);
expect(runJudge).toHaveBeenCalledTimes(2);
expect(runJudge.mock.calls.map(([params]) => params.judgeModel)).toEqual([
"openai/gpt-5.4",
"openai/gpt-5.5",
"anthropic/claude-opus-4-6",
]);
expect(runJudge.mock.calls.map(([params]) => params.judgeThinkingDefault)).toEqual([

View File

@@ -1135,8 +1135,8 @@ describe("qa cli runtime", () => {
repoRoot: path.resolve("/tmp/openclaw-repo"),
transportId: "qa-channel",
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
primaryModel: "openai/gpt-5.5",
alternateModel: "openai/gpt-5.5",
fastMode: undefined,
message: "read qa kickoff and reply short",
timeoutMs: undefined,
@@ -1166,7 +1166,7 @@ describe("qa cli runtime", () => {
it("defaults manual frontier runs onto Codex OAuth when the runtime resolver prefers it", async () => {
defaultQaRuntimeModelForMode.mockImplementation((mode, options) =>
mode === "live-frontier"
? "openai-codex/gpt-5.4"
? "openai-codex/gpt-5.5"
: defaultQaProviderModelForMode(mode as QaProviderModeInput, options),
);
@@ -1179,8 +1179,8 @@ describe("qa cli runtime", () => {
repoRoot: path.resolve("/tmp/openclaw-repo"),
transportId: "qa-channel",
providerMode: "live-frontier",
primaryModel: "openai-codex/gpt-5.4",
alternateModel: "openai-codex/gpt-5.4",
primaryModel: "openai-codex/gpt-5.5",
alternateModel: "openai-codex/gpt-5.5",
fastMode: undefined,
message: "read qa kickoff and reply short",
timeoutMs: undefined,

View File

@@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest";
import { selectQaRunnerModelOptions } from "./model-catalog.runtime.js";
describe("qa runner model catalog", () => {
it("filters to available rows and prefers gpt-5.4 first", () => {
it("filters to available rows and prefers gpt-5.5 first", () => {
expect(
selectQaRunnerModelOptions([
{
@@ -13,8 +13,8 @@ describe("qa runner model catalog", () => {
missing: false,
},
{
key: "openai/gpt-5.4",
name: "gpt-5.4",
key: "openai/gpt-5.5",
name: "gpt-5.5",
input: "text,image",
available: true,
missing: false,
@@ -27,6 +27,6 @@ describe("qa runner model catalog", () => {
missing: false,
},
]).map((entry) => entry.key),
).toEqual(["openai/gpt-5.4", "anthropic/claude-sonnet-4-5"]);
).toEqual(["openai/gpt-5.5", "anthropic/claude-sonnet-4-5"]);
});
});

View File

@@ -34,7 +34,7 @@ describe("qa model selection runtime", () => {
resolveEnvApiKey.mockReturnValue({ apiKey: "sk-test" });
expect(resolveQaPreferredLiveModel()).toBeUndefined();
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai/gpt-5.4");
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai/gpt-5.5");
expect(loadAuthProfileStoreForRuntime).not.toHaveBeenCalled();
});
@@ -43,8 +43,8 @@ describe("qa model selection runtime", () => {
provider === "openai-codex" ? ["openai-codex:user@example.com"] : [],
);
expect(resolveQaPreferredLiveModel()).toBe("openai-codex/gpt-5.4");
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai-codex/gpt-5.4");
expect(resolveQaPreferredLiveModel()).toBe("openai-codex/gpt-5.5");
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai-codex/gpt-5.5");
});
it("keeps the OpenAI live default when stored OpenAI profiles are available", () => {
@@ -53,7 +53,7 @@ describe("qa model selection runtime", () => {
);
expect(resolveQaPreferredLiveModel()).toBeUndefined();
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai/gpt-5.4");
expect(defaultQaRuntimeModelForMode("live-frontier")).toBe("openai/gpt-5.5");
});
it("leaves mock defaults unchanged", () => {

View File

@@ -19,7 +19,7 @@ describe("extractQaFailureReplyText", () => {
it("classifies explicit provider auth guidance as a failure", () => {
expect(
extractQaFailureReplyText(
'⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
'⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.5 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.5.',
),
).toContain('No API key found for provider "openai".');
});
@@ -27,7 +27,7 @@ describe("extractQaFailureReplyText", () => {
it("classifies curated missing-key guidance as a failure", () => {
expect(
extractQaFailureReplyText(
"⚠️ Missing API key for OpenAI on the gateway. Use `openai-codex/gpt-5.4` for OAuth, or set `OPENAI_API_KEY`, then try again.",
"⚠️ Missing API key for OpenAI on the gateway. Use `openai-codex/gpt-5.5` for OAuth, or set `OPENAI_API_KEY`, then try again.",
),
).toContain("Missing API key for OpenAI on the gateway.");
});

View File

@@ -45,8 +45,8 @@ describe("qa run config", () => {
it("creates a live-by-default selection that arms every scenario", () => {
expect(createDefaultQaRunSelection(scenarios)).toEqual({
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
primaryModel: "openai/gpt-5.5",
alternateModel: "openai/gpt-5.5",
fastMode: true,
scenarioIds: ["dm-chat-baseline", "thread-lifecycle"],
});
@@ -57,7 +57,7 @@ describe("qa run config", () => {
normalizeQaRunSelection(
{
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
primaryModel: "openai/gpt-5.5",
alternateModel: "",
fastMode: false,
scenarioIds: ["thread-lifecycle", "missing", "thread-lifecycle"],
@@ -66,8 +66,8 @@ describe("qa run config", () => {
),
).toEqual({
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
primaryModel: "openai/gpt-5.5",
alternateModel: "openai/gpt-5.5",
fastMode: true,
scenarioIds: ["thread-lifecycle"],
});
@@ -99,13 +99,13 @@ describe("qa run config", () => {
});
it("keeps idle snapshots on static defaults so startup does not inspect auth profiles", () => {
defaultQaRuntimeModelForMode.mockReturnValue("openai-codex/gpt-5.4");
defaultQaRuntimeModelForMode.mockReturnValue("openai-codex/gpt-5.5");
defaultQaRuntimeModelForMode.mockClear();
expect(createIdleQaRunnerSnapshot(scenarios).selection).toMatchObject({
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
primaryModel: "openai/gpt-5.5",
alternateModel: "openai/gpt-5.5",
});
expect(defaultQaRuntimeModelForMode).not.toHaveBeenCalled();
});
@@ -138,14 +138,14 @@ describe("qa run config", () => {
it("prefers the Codex OAuth default when the runtime resolver says it is available", () => {
defaultQaRuntimeModelForMode.mockImplementation((mode, options) =>
mode === "live-frontier"
? "openai-codex/gpt-5.4"
? "openai-codex/gpt-5.5"
: defaultQaProviderModelForMode(mode as QaProviderModeInput, options),
);
expect(createDefaultQaRunSelection(scenarios)).toEqual({
providerMode: "live-frontier",
primaryModel: "openai-codex/gpt-5.4",
alternateModel: "openai-codex/gpt-5.4",
primaryModel: "openai-codex/gpt-5.5",
alternateModel: "openai-codex/gpt-5.5",
fastMode: true,
scenarioIds: ["dm-chat-baseline", "thread-lifecycle"],
});

View File

@@ -137,7 +137,7 @@ describe("qa scenario catalog", () => {
expect(scenario.sourcePath).toBe("qa/scenarios/models/gpt54-thinking-visibility-switch.md");
expect(config?.requiredLiveProvider).toBe("openai");
expect(config?.requiredLiveModel).toBe("gpt-5.4");
expect(config?.requiredLiveModel).toBe("gpt-5.5");
expect(config?.offDirective).toBe("/think off");
expect(config?.maxDirective).toBe("/think max");
expect(config?.reasoningDirective).toBe("/reasoning on");
@@ -169,10 +169,10 @@ describe("qa scenario catalog", () => {
},
});
expect(config?.requiredProvider).toBe("openai");
expect(config?.requiredModel).toBe("gpt-5.4");
expect(config?.requiredModel).toBe("gpt-5.5");
expect(config?.expectedMarker).toBe("WEB-SEARCH-OK");
expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([
"confirms live OpenAI GPT-5.4 web search auto mode",
"confirms live OpenAI GPT-5.5 web search auto mode",
"searches official OpenAI News through the live model",
]);
});
@@ -191,7 +191,7 @@ describe("qa scenario catalog", () => {
expect(scenario.sourcePath).toBe("qa/scenarios/models/thinking-slash-model-remap.md");
expect(config?.requiredProviderMode).toBe("live-frontier");
expect(config?.anthropicModelRef).toBe("anthropic/claude-sonnet-4-6");
expect(config?.openAiXhighModelRef).toBe("openai/gpt-5.4");
expect(config?.openAiXhighModelRef).toBe("openai/gpt-5.5");
expect(config?.noXhighModelRef).toBe("anthropic/claude-sonnet-4-6");
expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([
"selects Anthropic and verifies adaptive options",

View File

@@ -35,7 +35,7 @@ describe("qa suite transport helpers", () => {
state.addOutboundMessage({
to: "dm:qa-operator",
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.5 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.5.',
senderId: "openclaw",
senderName: "OpenClaw QA",
});
@@ -117,7 +117,7 @@ describe("qa suite transport helpers", () => {
state.addOutboundMessage({
to: "dm:qa-operator",
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.5 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.5.',
senderId: "openclaw",
senderName: "OpenClaw QA",
});
@@ -164,7 +164,7 @@ describe("qa suite transport helpers", () => {
state.addOutboundMessage({
to: "dm:qa-operator",
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.4 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.4.',
text: '⚠️ No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.5 (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.5.',
senderId: "openclaw",
senderName: "OpenClaw QA",
});