mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:20:43 +00:00
496 lines
16 KiB
TypeScript
496 lines
16 KiB
TypeScript
import fs from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { getModel, type Api, type Model } from "@mariozechner/pi-ai";
|
|
import { AuthStorage, ModelRegistry } from "@mariozechner/pi-coding-agent";
|
|
import OpenAI from "openai";
|
|
import type { ResolvedTtsConfig } from "openclaw/plugin-sdk/agent-runtime";
|
|
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
|
|
import { encodePngRgba, fillPixel } from "openclaw/plugin-sdk/media-runtime";
|
|
import {
|
|
registerProviderPlugin,
|
|
requireRegisteredProvider,
|
|
} from "openclaw/plugin-sdk/plugin-test-runtime";
|
|
import { runRealtimeSttLiveTest } from "openclaw/plugin-sdk/provider-test-contracts";
|
|
import { getRuntimeConfig } from "openclaw/plugin-sdk/runtime-config-snapshot";
|
|
import {
|
|
isOverloadedErrorMessage,
|
|
isServerErrorMessage,
|
|
isTimeoutErrorMessage,
|
|
} from "openclaw/plugin-sdk/test-env";
|
|
import { describe, expect, it } from "vitest";
|
|
import plugin from "./index.js";
|
|
|
|
const OPENAI_API_KEY = process.env.OPENAI_API_KEY ?? "";
|
|
const LIVE_MODEL_ID = process.env.OPENCLAW_LIVE_OPENAI_PLUGIN_MODEL?.trim() || "gpt-5.5";
|
|
const LIVE_IMAGE_MODEL = process.env.OPENCLAW_LIVE_OPENAI_IMAGE_MODEL?.trim() || "gpt-image-2";
|
|
const LIVE_VISION_MODEL = process.env.OPENCLAW_LIVE_OPENAI_VISION_MODEL?.trim() || "gpt-4.1-mini";
|
|
const liveEnabled = OPENAI_API_KEY.trim().length > 0 && process.env.OPENCLAW_LIVE_TEST === "1";
|
|
const describeLive = liveEnabled ? describe : describe.skip;
|
|
const EMPTY_AUTH_STORE = { version: 1, profiles: {} } as const;
|
|
const ModelRegistryCtor = ModelRegistry as unknown as {
|
|
new (authStorage: AuthStorage, modelsJsonPath?: string): ModelRegistry;
|
|
};
|
|
|
|
function findOpenAIModel(modelId: string): Model<Api> | null {
|
|
return (getModel("openai", modelId as never) as Model<Api> | undefined) ?? null;
|
|
}
|
|
|
|
function resolveTemplateModelId(modelId: string) {
|
|
switch (modelId) {
|
|
case "gpt-5.5":
|
|
return "gpt-5.4";
|
|
case "gpt-5.4":
|
|
return "gpt-5.2";
|
|
case "gpt-5.4-mini":
|
|
return "gpt-5-mini";
|
|
case "gpt-5.4-nano":
|
|
return "gpt-5-nano";
|
|
default:
|
|
throw new Error(`Unsupported live OpenAI plugin model: ${modelId}`);
|
|
}
|
|
}
|
|
|
|
function createLiveModelRegistry(modelId: string): ModelRegistry {
|
|
const registry = new ModelRegistryCtor(AuthStorage.inMemory());
|
|
const template = findOpenAIModel(modelId) ?? findOpenAIModel(resolveTemplateModelId(modelId));
|
|
if (!template) {
|
|
throw new Error(`Unsupported live OpenAI plugin model: ${modelId}`);
|
|
}
|
|
registry.registerProvider("openai", {
|
|
apiKey: "test",
|
|
baseUrl: template.baseUrl,
|
|
models: [
|
|
{
|
|
id: template.id,
|
|
name: template.name,
|
|
api: template.api,
|
|
reasoning: template.reasoning,
|
|
input: template.input,
|
|
cost: template.cost,
|
|
contextWindow: template.contextWindow,
|
|
maxTokens: template.maxTokens,
|
|
...(template.compat ? { compat: template.compat } : {}),
|
|
},
|
|
],
|
|
});
|
|
return registry;
|
|
}
|
|
|
|
const registerOpenAIPlugin = () =>
|
|
registerProviderPlugin({
|
|
plugin,
|
|
id: "openai",
|
|
name: "OpenAI Provider",
|
|
});
|
|
|
|
function createReferencePng(): Buffer {
|
|
const width = 96;
|
|
const height = 96;
|
|
const buf = Buffer.alloc(width * height * 4, 255);
|
|
|
|
for (let y = 0; y < height; y += 1) {
|
|
for (let x = 0; x < width; x += 1) {
|
|
fillPixel(buf, x, y, width, 225, 242, 255, 255);
|
|
}
|
|
}
|
|
|
|
for (let y = 24; y < 72; y += 1) {
|
|
for (let x = 24; x < 72; x += 1) {
|
|
fillPixel(buf, x, y, width, 255, 153, 51, 255);
|
|
}
|
|
}
|
|
|
|
return encodePngRgba(buf, width, height);
|
|
}
|
|
|
|
function formatLiveOpenAIError(error: unknown): string {
|
|
return error instanceof Error ? error.message : String(error);
|
|
}
|
|
|
|
function resolveLiveOpenAISkipReason(error: unknown): string | null {
|
|
const message = formatLiveOpenAIError(error);
|
|
if (isTimeoutErrorMessage(message) || /timed out|operation was aborted/i.test(message)) {
|
|
return "provider timeout";
|
|
}
|
|
if (isOverloadedErrorMessage(message) || isServerErrorMessage(message)) {
|
|
return "provider outage";
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function createLiveConfig(): OpenClawConfig {
|
|
const cfg = getRuntimeConfig();
|
|
return {
|
|
...cfg,
|
|
models: {
|
|
...cfg.models,
|
|
providers: {
|
|
...cfg.models?.providers,
|
|
openai: {
|
|
...cfg.models?.providers?.openai,
|
|
apiKey: OPENAI_API_KEY,
|
|
baseUrl: "https://api.openai.com/v1",
|
|
},
|
|
},
|
|
},
|
|
} as OpenClawConfig;
|
|
}
|
|
|
|
function createLiveTtsConfig(): ResolvedTtsConfig {
|
|
return {
|
|
auto: "off",
|
|
mode: "final",
|
|
provider: "openai",
|
|
providerSource: "config",
|
|
modelOverrides: {
|
|
enabled: true,
|
|
allowText: true,
|
|
allowProvider: true,
|
|
allowVoice: true,
|
|
allowModelId: true,
|
|
allowVoiceSettings: true,
|
|
allowNormalization: true,
|
|
allowSeed: true,
|
|
},
|
|
providerConfigs: {
|
|
openai: {
|
|
apiKey: OPENAI_API_KEY,
|
|
baseUrl: "https://api.openai.com/v1",
|
|
model: "gpt-4o-mini-tts",
|
|
voice: "alloy",
|
|
},
|
|
},
|
|
personas: {},
|
|
maxTextLength: 4_000,
|
|
timeoutMs: 30_000,
|
|
};
|
|
}
|
|
|
|
async function createTempAgentDir(): Promise<string> {
|
|
return await fs.mkdtemp(path.join(os.tmpdir(), "openai-plugin-live-"));
|
|
}
|
|
|
|
async function removeTempAgentDir(agentDir: string): Promise<void> {
|
|
await fs.rm(agentDir, { recursive: true, force: true, maxRetries: 5, retryDelay: 100 });
|
|
}
|
|
|
|
function normalizeTranscriptForMatch(value: string): string {
|
|
return value.toLowerCase().replace(/[^a-z0-9]+/g, "");
|
|
}
|
|
|
|
function linearToMulaw(sample: number): number {
|
|
const bias = 132;
|
|
const clip = 32635;
|
|
let next = Math.max(-clip, Math.min(clip, sample));
|
|
const sign = next < 0 ? 0x80 : 0;
|
|
if (next < 0) {
|
|
next = -next;
|
|
}
|
|
|
|
next += bias;
|
|
let exponent = 7;
|
|
for (let expMask = 0x4000; (next & expMask) === 0 && exponent > 0; exponent -= 1) {
|
|
expMask >>= 1;
|
|
}
|
|
|
|
const mantissa = (next >> (exponent + 3)) & 0x0f;
|
|
return ~(sign | (exponent << 4) | mantissa) & 0xff;
|
|
}
|
|
|
|
function convertPcm24kToMulaw8k(pcm: Buffer): Buffer {
|
|
const inputSamples = Math.floor(pcm.length / 2);
|
|
const outputSamples = Math.floor(inputSamples / 3);
|
|
const mulaw = Buffer.alloc(outputSamples);
|
|
|
|
for (let i = 0; i < outputSamples; i += 1) {
|
|
mulaw[i] = linearToMulaw(pcm.readInt16LE(i * 3 * 2));
|
|
}
|
|
|
|
return mulaw;
|
|
}
|
|
|
|
describeLive("openai plugin live", () => {
|
|
it("registers an OpenAI provider that can complete a live request", async () => {
|
|
const { providers } = await registerOpenAIPlugin();
|
|
const provider = requireRegisteredProvider(providers, "openai");
|
|
const modelRegistry = createLiveModelRegistry(LIVE_MODEL_ID);
|
|
|
|
const resolved =
|
|
modelRegistry.find("openai", LIVE_MODEL_ID) ??
|
|
provider.resolveDynamicModel?.({
|
|
provider: "openai",
|
|
modelId: LIVE_MODEL_ID,
|
|
modelRegistry,
|
|
});
|
|
|
|
if (!resolved) {
|
|
throw new Error("openai provider did not resolve the live model");
|
|
}
|
|
|
|
const normalized = provider.normalizeResolvedModel?.({
|
|
provider: "openai",
|
|
modelId: resolved.id,
|
|
model: resolved,
|
|
});
|
|
|
|
expect(normalized).toMatchObject({
|
|
provider: "openai",
|
|
id: LIVE_MODEL_ID,
|
|
api: "openai-responses",
|
|
baseUrl: "https://api.openai.com/v1",
|
|
});
|
|
|
|
const client = new OpenAI({
|
|
apiKey: OPENAI_API_KEY,
|
|
baseURL: normalized?.baseUrl,
|
|
});
|
|
const response = await client.responses.create({
|
|
model: normalized?.id ?? LIVE_MODEL_ID,
|
|
instructions: "Return exactly OK and no other text.",
|
|
input: "Return exactly OK.",
|
|
max_output_tokens: 64,
|
|
reasoning: { effort: "none" },
|
|
text: { verbosity: "low" },
|
|
});
|
|
|
|
expect(response.output_text.trim()).toMatch(/^OK[.!]?$/);
|
|
}, 30_000);
|
|
|
|
it("lists voices and synthesizes audio through the registered speech provider", async () => {
|
|
const { speechProviders } = await registerOpenAIPlugin();
|
|
const speechProvider = requireRegisteredProvider(speechProviders, "openai");
|
|
|
|
const voices = await speechProvider.listVoices?.({});
|
|
if (!voices) {
|
|
throw new Error("openai speech provider did not return voices");
|
|
}
|
|
expect(voices).toEqual(expect.arrayContaining([expect.objectContaining({ id: "alloy" })]));
|
|
|
|
const cfg = createLiveConfig();
|
|
const ttsConfig = createLiveTtsConfig();
|
|
|
|
const audioFile = await speechProvider.synthesize({
|
|
text: "OpenClaw integration test OK.",
|
|
cfg,
|
|
providerConfig: ttsConfig.providerConfigs.openai ?? {},
|
|
target: "audio-file",
|
|
timeoutMs: ttsConfig.timeoutMs,
|
|
});
|
|
expect(audioFile.outputFormat).toBe("mp3");
|
|
expect(audioFile.fileExtension).toBe(".mp3");
|
|
expect(audioFile.audioBuffer.byteLength).toBeGreaterThan(512);
|
|
|
|
const telephony = await speechProvider.synthesizeTelephony?.({
|
|
text: "Telephony check OK.",
|
|
cfg,
|
|
providerConfig: ttsConfig.providerConfigs.openai ?? {},
|
|
timeoutMs: ttsConfig.timeoutMs,
|
|
});
|
|
expect(telephony?.outputFormat).toBe("pcm");
|
|
expect(telephony?.sampleRate).toBe(24_000);
|
|
expect(telephony?.audioBuffer.byteLength).toBeGreaterThan(512);
|
|
}, 45_000);
|
|
|
|
it("transcribes synthesized speech through the registered media provider", async () => {
|
|
const { speechProviders, mediaProviders } = await registerOpenAIPlugin();
|
|
const speechProvider = requireRegisteredProvider(speechProviders, "openai");
|
|
const mediaProvider = requireRegisteredProvider(mediaProviders, "openai");
|
|
|
|
const cfg = createLiveConfig();
|
|
const ttsConfig = createLiveTtsConfig();
|
|
|
|
const synthesized = await speechProvider.synthesize({
|
|
text: "Speech transcription check okay.",
|
|
cfg,
|
|
providerConfig: ttsConfig.providerConfigs.openai ?? {},
|
|
target: "audio-file",
|
|
timeoutMs: ttsConfig.timeoutMs,
|
|
});
|
|
|
|
const transcription = await mediaProvider.transcribeAudio?.({
|
|
buffer: synthesized.audioBuffer,
|
|
fileName: "openai-plugin-live.mp3",
|
|
mime: "audio/mpeg",
|
|
apiKey: OPENAI_API_KEY,
|
|
timeoutMs: 30_000,
|
|
});
|
|
|
|
const text = (transcription?.text ?? "").toLowerCase();
|
|
const collapsedText = text.replace(/[\s-]+/g, "");
|
|
expect(text.length).toBeGreaterThan(0);
|
|
expect(collapsedText).toContain("speech");
|
|
expect(collapsedText).toContain("check");
|
|
}, 45_000);
|
|
|
|
it("opens OpenAI realtime STT before sending audio", async () => {
|
|
const { realtimeTranscriptionProviders } = await registerOpenAIPlugin();
|
|
const realtimeProvider = requireRegisteredProvider(realtimeTranscriptionProviders, "openai");
|
|
const errors: Error[] = [];
|
|
const session = realtimeProvider.createSession({
|
|
providerConfig: {
|
|
apiKey: OPENAI_API_KEY,
|
|
language: "en",
|
|
},
|
|
onError: (error) => errors.push(error),
|
|
});
|
|
|
|
try {
|
|
await session.connect();
|
|
await new Promise((resolve) => setTimeout(resolve, 1_000));
|
|
expect(errors).toEqual([]);
|
|
expect(session.isConnected()).toBe(true);
|
|
} finally {
|
|
session.close();
|
|
}
|
|
}, 30_000);
|
|
|
|
it("streams realtime STT through the registered transcription provider", async () => {
|
|
const { realtimeTranscriptionProviders, speechProviders } = await registerOpenAIPlugin();
|
|
const realtimeProvider = requireRegisteredProvider(realtimeTranscriptionProviders, "openai");
|
|
const speechProvider = requireRegisteredProvider(speechProviders, "openai");
|
|
const cfg = createLiveConfig();
|
|
const ttsConfig = createLiveTtsConfig();
|
|
const phrase = "Testing OpenClaw OpenAI realtime transcription integration test OK.";
|
|
|
|
const telephony = await speechProvider.synthesizeTelephony?.({
|
|
text: phrase,
|
|
cfg,
|
|
providerConfig: ttsConfig.providerConfigs.openai ?? {},
|
|
timeoutMs: ttsConfig.timeoutMs,
|
|
});
|
|
if (!telephony) {
|
|
throw new Error("OpenAI telephony synthesis did not return audio");
|
|
}
|
|
expect(telephony.outputFormat).toBe("pcm");
|
|
expect(telephony.sampleRate).toBe(24_000);
|
|
|
|
const speech = convertPcm24kToMulaw8k(telephony.audioBuffer);
|
|
const silence = Buffer.alloc(8_000, 0xff);
|
|
const audio = Buffer.concat([silence.subarray(0, 4_000), speech, silence]);
|
|
const { transcripts, partials } = await runRealtimeSttLiveTest({
|
|
provider: realtimeProvider,
|
|
providerConfig: {
|
|
apiKey: OPENAI_API_KEY,
|
|
language: "en",
|
|
silenceDurationMs: 500,
|
|
},
|
|
audio,
|
|
expectedNormalizedText: /openai.*realtime.*transcription/,
|
|
});
|
|
|
|
const normalized = transcripts.join(" ").toLowerCase();
|
|
const compact = normalizeTranscriptForMatch(normalized);
|
|
expect(compact).toContain("openai");
|
|
expect(normalized).toContain("transcription");
|
|
expect(partials.length + transcripts.length).toBeGreaterThan(0);
|
|
}, 180_000);
|
|
|
|
it("generates an image through the registered image provider", async () => {
|
|
const { imageProviders } = await registerOpenAIPlugin();
|
|
const imageProvider = requireRegisteredProvider(imageProviders, "openai");
|
|
|
|
const cfg = createLiveConfig();
|
|
const agentDir = await createTempAgentDir();
|
|
|
|
try {
|
|
const generated = await imageProvider.generateImage({
|
|
provider: "openai",
|
|
model: LIVE_IMAGE_MODEL,
|
|
prompt: "Create a minimal flat orange square centered on a white background.",
|
|
cfg,
|
|
agentDir,
|
|
authStore: EMPTY_AUTH_STORE,
|
|
timeoutMs: 180_000,
|
|
count: 1,
|
|
size: "1536x1024",
|
|
});
|
|
|
|
expect(generated.model).toBe(LIVE_IMAGE_MODEL);
|
|
expect(generated.images.length).toBeGreaterThan(0);
|
|
expect(generated.images[0]?.mimeType).toBe("image/png");
|
|
expect(generated.images[0]?.buffer.byteLength).toBeGreaterThan(1_000);
|
|
} finally {
|
|
await removeTempAgentDir(agentDir);
|
|
}
|
|
}, 240_000);
|
|
|
|
it("edits a reference image through the registered image provider", async () => {
|
|
const { imageProviders } = await registerOpenAIPlugin();
|
|
const imageProvider = requireRegisteredProvider(imageProviders, "openai");
|
|
|
|
const cfg = createLiveConfig();
|
|
const agentDir = await createTempAgentDir();
|
|
|
|
try {
|
|
const edited = await imageProvider.generateImage({
|
|
provider: "openai",
|
|
model: LIVE_IMAGE_MODEL,
|
|
prompt:
|
|
"Edit this image: remove the orange square in the center and keep the background clean and light blue.",
|
|
cfg,
|
|
agentDir,
|
|
authStore: EMPTY_AUTH_STORE,
|
|
timeoutMs: 180_000,
|
|
count: 1,
|
|
size: "1024x1536",
|
|
inputImages: [
|
|
{
|
|
buffer: createReferencePng(),
|
|
mimeType: "image/png",
|
|
fileName: "reference.png",
|
|
},
|
|
],
|
|
});
|
|
|
|
expect(edited.model).toBe(LIVE_IMAGE_MODEL);
|
|
expect(edited.images.length).toBeGreaterThan(0);
|
|
expect(edited.images[0]?.mimeType).toBe("image/png");
|
|
expect(edited.images[0]?.buffer.byteLength).toBeGreaterThan(1_000);
|
|
} finally {
|
|
await removeTempAgentDir(agentDir);
|
|
}
|
|
}, 240_000);
|
|
|
|
it("describes a deterministic image through the registered media provider", async () => {
|
|
const { mediaProviders } = await registerOpenAIPlugin();
|
|
const mediaProvider = requireRegisteredProvider(mediaProviders, "openai");
|
|
|
|
const cfg = createLiveConfig();
|
|
const agentDir = await createTempAgentDir();
|
|
|
|
try {
|
|
let description:
|
|
| Awaited<ReturnType<NonNullable<typeof mediaProvider.describeImage>>>
|
|
| undefined;
|
|
try {
|
|
description = await mediaProvider.describeImage?.({
|
|
buffer: createReferencePng(),
|
|
fileName: "reference.png",
|
|
mime: "image/png",
|
|
prompt: "Reply with one lowercase word for the dominant center color.",
|
|
timeoutMs: 45_000,
|
|
agentDir,
|
|
cfg,
|
|
authStore: EMPTY_AUTH_STORE,
|
|
model: LIVE_VISION_MODEL,
|
|
provider: "openai",
|
|
});
|
|
} catch (err) {
|
|
const skipReason = resolveLiveOpenAISkipReason(err);
|
|
if (skipReason) {
|
|
console.warn(
|
|
`[live:openai] image description skipped: ${skipReason}: ${formatLiveOpenAIError(err)}`,
|
|
);
|
|
return;
|
|
}
|
|
throw err;
|
|
}
|
|
|
|
expect((description?.text ?? "").toLowerCase()).toContain("orange");
|
|
} finally {
|
|
await removeTempAgentDir(agentDir);
|
|
}
|
|
}, 240_000);
|
|
});
|