fix: align openai fast mode with priority processing

This commit is contained in:
Peter Steinberger
2026-03-29 23:13:29 +01:00
parent 27519cf061
commit d82d6ba0c4
9 changed files with 149 additions and 58 deletions

View File

@@ -1616,6 +1616,45 @@ describe("createOpenAIWebSocketStreamFn", () => {
expect(sent.reasoning).toEqual({ effort: "high", summary: "auto" });
});
it("applies onPayload mutations before sending response.create", async () => {
const streamFn = createOpenAIWebSocketStreamFn("sk-test", "sess-onpayload");
const stream = streamFn(
modelStub as Parameters<typeof streamFn>[0],
contextStub as Parameters<typeof streamFn>[1],
{
onPayload: (payload: unknown) => {
const request = payload as Record<string, unknown>;
request.reasoning = { effort: "none" };
request.text = { verbosity: "low" };
request.service_tier = "priority";
return undefined;
},
} as unknown as Parameters<typeof streamFn>[2],
);
await new Promise<void>((resolve, reject) => {
queueMicrotask(async () => {
try {
await new Promise((r) => setImmediate(r));
MockManager.lastInstance!.simulateEvent({
type: "response.completed",
response: makeResponseObject("resp-onpayload", "Done"),
});
for await (const _ of await resolveStream(stream)) {
/* consume */
}
resolve();
} catch (e) {
reject(e);
}
});
});
const sent = MockManager.lastInstance!.sentEvents[0] as Record<string, unknown>;
expect(sent.type).toBe("response.create");
expect(sent.reasoning).toEqual({ effort: "none" });
expect(sent.text).toEqual({ verbosity: "low" });
expect(sent.service_tier).toBe("priority");
});
it("forwards topP and toolChoice to response.create", async () => {
const streamFn = createOpenAIWebSocketStreamFn("sk-test", "sess-topp");
const opts = { topP: 0.9, toolChoice: "auto" };

View File

@@ -1880,6 +1880,33 @@ describe("applyExtraParamsToAgent", () => {
expect(payload.service_tier).toBe("priority");
});
it("injects configured OpenAI service_tier into Codex Responses payloads", () => {
const payload = runResponsesPayloadMutationCase({
applyProvider: "openai-codex",
applyModelId: "gpt-5.4",
cfg: {
agents: {
defaults: {
models: {
"openai-codex/gpt-5.4": {
params: {
serviceTier: "priority",
},
},
},
},
},
},
model: {
api: "openai-codex-responses",
provider: "openai-codex",
id: "gpt-5.4",
baseUrl: "https://chatgpt.com/backend-api",
} as unknown as Model<"openai-codex-responses">,
});
expect(payload.service_tier).toBe("priority");
});
it("preserves caller-provided service_tier values", () => {
const payload = runResponsesPayloadMutationCase({
applyProvider: "openai",
@@ -1911,7 +1938,7 @@ describe("applyExtraParamsToAgent", () => {
expect(payload.service_tier).toBe("default");
});
it("injects fast-mode payload defaults for direct OpenAI Responses", () => {
it("maps fast mode to priority service_tier for direct OpenAI Responses", () => {
const payload = runResponsesPayloadMutationCase({
applyProvider: "openai",
applyModelId: "gpt-5.4",
@@ -1938,8 +1965,8 @@ describe("applyExtraParamsToAgent", () => {
store: false,
},
});
expect(payload.reasoning).toEqual({ effort: "low" });
expect(payload.text).toEqual({ verbosity: "low" });
expect(payload).not.toHaveProperty("reasoning");
expect(payload).not.toHaveProperty("text");
expect(payload.service_tier).toBe("priority");
});
@@ -2130,7 +2157,7 @@ describe("applyExtraParamsToAgent", () => {
expect(payload).not.toHaveProperty("service_tier");
});
it("applies fast-mode defaults for openai-codex responses without service_tier", () => {
it("maps fast mode to priority service_tier for openai-codex responses", () => {
const payload = runResponsesPayloadMutationCase({
applyProvider: "openai-codex",
applyModelId: "gpt-5.4",
@@ -2145,9 +2172,9 @@ describe("applyExtraParamsToAgent", () => {
store: false,
},
});
expect(payload.reasoning).toEqual({ effort: "low" });
expect(payload.text).toEqual({ verbosity: "low" });
expect(payload).not.toHaveProperty("service_tier");
expect(payload).not.toHaveProperty("reasoning");
expect(payload).not.toHaveProperty("text");
expect(payload.service_tier).toBe("priority");
});
it("does not inject service_tier for non-openai providers", () => {

View File

@@ -6,7 +6,6 @@ import { log } from "./logger.js";
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
type OpenAIServiceTier = "auto" | "default" | "flex" | "priority";
type OpenAIReasoningEffort = "low" | "medium" | "high";
const OPENAI_RESPONSES_APIS = new Set(["openai-responses"]);
const OPENAI_RESPONSES_PROVIDERS = new Set(["openai", "azure-openai", "azure-openai-responses"]);
@@ -77,6 +76,28 @@ function shouldApplyOpenAIAttributionHeaders(model: {
return undefined;
}
function shouldApplyOpenAIServiceTier(model: {
api?: unknown;
provider?: unknown;
baseUrl?: unknown;
}): boolean {
if (
model.provider === "openai" &&
model.api === "openai-responses" &&
isOpenAIPublicApiBaseUrl(model.baseUrl)
) {
return true;
}
if (
model.provider === "openai-codex" &&
(model.api === "openai-codex-responses" || model.api === "openai-responses") &&
isOpenAICodexBaseUrl(model.baseUrl)
) {
return true;
}
return false;
}
function shouldForceResponsesStore(model: {
api?: unknown;
provider?: unknown;
@@ -263,44 +284,11 @@ export function resolveOpenAIFastMode(
return normalized;
}
function resolveFastModeReasoningEffort(modelId: unknown): OpenAIReasoningEffort {
if (typeof modelId !== "string") {
return "low";
}
const normalized = modelId.trim().toLowerCase();
// Keep fast mode broadly compatible across GPT-5 family variants by using
// the lowest shared non-disabled effort that current transports accept.
if (normalized.startsWith("gpt-5")) {
return "low";
}
return "low";
}
function applyOpenAIFastModePayloadOverrides(params: {
payloadObj: Record<string, unknown>;
model: { provider?: unknown; id?: unknown; baseUrl?: unknown; api?: unknown };
}): void {
if (params.payloadObj.reasoning === undefined) {
params.payloadObj.reasoning = {
effort: resolveFastModeReasoningEffort(params.model.id),
};
}
const existingText = params.payloadObj.text;
if (existingText === undefined) {
params.payloadObj.text = { verbosity: "low" };
} else if (existingText && typeof existingText === "object" && !Array.isArray(existingText)) {
const textObj = existingText as Record<string, unknown>;
if (textObj.verbosity === undefined) {
textObj.verbosity = "low";
}
}
if (
params.model.provider === "openai" &&
params.payloadObj.service_tier === undefined &&
isOpenAIPublicApiBaseUrl(params.model.baseUrl)
) {
if (params.payloadObj.service_tier === undefined && shouldApplyOpenAIServiceTier(params.model)) {
params.payloadObj.service_tier = "priority";
}
}
@@ -373,11 +361,7 @@ export function createOpenAIServiceTierWrapper(
): StreamFn {
const underlying = baseStreamFn ?? streamSimple;
return (model, context, options) => {
if (
model.api !== "openai-responses" ||
model.provider !== "openai" ||
!isOpenAIPublicApiBaseUrl(model.baseUrl)
) {
if (!shouldApplyOpenAIServiceTier(model)) {
return underlying(model, context, options);
}
return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => {