mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 12:30:44 +00:00
fix(openai): route Codex audio to transcription model
This commit is contained in:
@@ -58,9 +58,9 @@ const mediaMetadataPlugins = vi.hoisted(() => [
|
||||
autoPriority: { image: 10, audio: 10 },
|
||||
},
|
||||
"openai-codex": {
|
||||
capabilities: ["image"],
|
||||
defaultModels: { image: "gpt-5.5" },
|
||||
autoPriority: { image: 20 },
|
||||
capabilities: ["image", "audio"],
|
||||
defaultModels: { image: "gpt-5.5", audio: "gpt-4o-transcribe" },
|
||||
autoPriority: { image: 20, audio: 20 },
|
||||
},
|
||||
opencode: { capabilities: ["image"], defaultModels: { image: "gpt-5-nano" } },
|
||||
"opencode-go": { capabilities: ["image"], defaultModels: { image: "kimi-k2.6" } },
|
||||
@@ -108,6 +108,9 @@ describe("resolveDefaultMediaModel", () => {
|
||||
expect(resolveDefaultMediaModel({ providerId: "mistral", capability: "audio" })).toBe(
|
||||
"voxtral-mini-latest",
|
||||
);
|
||||
expect(resolveDefaultMediaModel({ providerId: "openai-codex", capability: "audio" })).toBe(
|
||||
"gpt-4o-transcribe",
|
||||
);
|
||||
});
|
||||
|
||||
it("resolves bundled image defaults beyond the historical core set", () => {
|
||||
@@ -136,6 +139,7 @@ describe("resolveAutoMediaKeyProviders", () => {
|
||||
it("keeps the bundled audio fallback order", () => {
|
||||
expect(resolveAutoMediaKeyProviders({ capability: "audio" })).toEqual([
|
||||
"openai",
|
||||
"openai-codex",
|
||||
"xai",
|
||||
"google",
|
||||
"mistral",
|
||||
|
||||
@@ -95,6 +95,52 @@ describe("runCapability auto audio entries", () => {
|
||||
expect(result.decision.outcome).toBe("success");
|
||||
});
|
||||
|
||||
it("uses the provider audio default instead of the active Codex chat model", async () => {
|
||||
let runResult: Awaited<ReturnType<typeof runCapability>> | undefined;
|
||||
let seenModel: string | undefined;
|
||||
|
||||
await withAudioFixture("openclaw-auto-audio-codex", async ({ ctx, media, cache }) => {
|
||||
const providerRegistry = createProviderRegistry({
|
||||
"openai-codex": {
|
||||
id: "openai-codex",
|
||||
capabilities: ["image", "audio"],
|
||||
defaultModels: { image: "gpt-5.5", audio: "gpt-4o-transcribe" },
|
||||
transcribeAudio: async (req) => {
|
||||
seenModel = req.model;
|
||||
return { text: "codex audio", model: req.model ?? "unknown" };
|
||||
},
|
||||
},
|
||||
});
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
"openai-codex": {
|
||||
apiKey: "codex-test-key", // pragma: allowlist secret
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as unknown as OpenClawConfig;
|
||||
|
||||
runResult = await runCapability({
|
||||
capability: "audio",
|
||||
cfg,
|
||||
ctx,
|
||||
attachments: cache,
|
||||
media,
|
||||
providerRegistry,
|
||||
activeModel: { provider: "openai-codex", model: "gpt-5.5" },
|
||||
});
|
||||
});
|
||||
|
||||
expect(runResult?.outputs[0]).toMatchObject({
|
||||
provider: "openai-codex",
|
||||
model: "gpt-4o-transcribe",
|
||||
text: "codex audio",
|
||||
});
|
||||
expect(seenModel).toBe("gpt-4o-transcribe");
|
||||
});
|
||||
|
||||
it("prefers provider keys over auto-detected local whisper", async () => {
|
||||
const binDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-auto-audio-bin-"));
|
||||
try {
|
||||
|
||||
@@ -749,16 +749,24 @@ async function resolveActiveModelEntry(params: {
|
||||
if (!hasAuth) {
|
||||
return null;
|
||||
}
|
||||
const model =
|
||||
params.capability === "image"
|
||||
? await resolveAutoImageModelId({
|
||||
cfg: params.cfg,
|
||||
providerId,
|
||||
providerRegistry: params.providerRegistry,
|
||||
explicitModel: params.activeModel?.model,
|
||||
})
|
||||
: params.activeModel?.model;
|
||||
if (params.capability === "image" && !model) {
|
||||
let model: string | undefined;
|
||||
if (params.capability === "image") {
|
||||
model = await resolveAutoImageModelId({
|
||||
cfg: params.cfg,
|
||||
providerId,
|
||||
providerRegistry: params.providerRegistry,
|
||||
explicitModel: params.activeModel?.model,
|
||||
});
|
||||
} else if (params.capability === "audio") {
|
||||
model = resolveDefaultMediaModelFromRegistry({
|
||||
providerId,
|
||||
capability: "audio",
|
||||
providerRegistry: params.providerRegistry,
|
||||
});
|
||||
} else {
|
||||
model = params.activeModel?.model;
|
||||
}
|
||||
if ((params.capability === "image" || params.capability === "audio") && !model) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user