refactor: route capability runtime through channel stores

This commit is contained in:
Peter Steinberger
2026-04-05 09:07:00 +01:00
parent 1903be5401
commit b57372d665
10 changed files with 44 additions and 68 deletions

View File

@@ -1,2 +1,2 @@
884e6fd12b7a8086a11f547e15201f46dea0f2dc46735fad055d4f1b96d5fb82 plugin-sdk-api-baseline.json
100f6b29793abf858f94cb8c292afc0dc56573f4e264d27496a96e17f8de4c1e plugin-sdk-api-baseline.jsonl
cbffdf76d6a7254d8b2d3a601e1206d7b6c835bc44f170d4038bc711a35ef756 plugin-sdk-api-baseline.json
fe026bf3ba1e3b55f6c0b560d76940f3c301d8f593d6f0f6dcc4625745c76d31 plugin-sdk-api-baseline.jsonl

View File

@@ -49,7 +49,7 @@ is a small, self-contained module with a clear purpose and documented contract.
Legacy provider convenience seams for bundled channels are also gone. Imports
such as `openclaw/plugin-sdk/slack`, `openclaw/plugin-sdk/discord`,
`openclaw/plugin-sdk/signal`, `openclaw/plugin-sdk/whatsapp`,
`openclaw/plugin-sdk/whatsapp-surface`, and
channel-branded helper seams, and
`openclaw/plugin-sdk/telegram-core` were private mono-repo shortcuts, not
stable plugin contracts. Use narrow generic SDK subpaths instead. Inside the
bundled plugin workspace, keep provider-owned helpers in that plugin's own
@@ -255,17 +255,16 @@ Current bundled provider examples:
| `plugin-sdk/provider-stream` | Provider stream wrapper helpers | `ProviderStreamFamily`, `buildProviderStreamFamilyHooks`, `composeProviderStreamWrappers`, stream wrapper types, and shared Anthropic/Bedrock/Google/Kilocode/Moonshot/OpenAI/OpenRouter/Z.A.I/MiniMax/Copilot wrapper helpers |
| `plugin-sdk/keyed-async-queue` | Ordered async queue | `KeyedAsyncQueue` |
| `plugin-sdk/media-runtime` | Shared media helpers | Media fetch/transform/store helpers plus media payload builders |
| `plugin-sdk/media-understanding-runtime` | Media-understanding runtime facade | Media-understanding runner facade and typed result helpers |
| `plugin-sdk/media-understanding` | Media-understanding helpers | Media understanding provider types plus provider-facing image/audio helper exports |
| `plugin-sdk/text-runtime` | Shared text helpers | Assistant-visible-text stripping, markdown render/chunking/table helpers, redaction helpers, directive-tag helpers, safe-text utilities, and related text/logging helpers |
| `plugin-sdk/text-chunking` | Text chunking helpers | Outbound text chunking helper |
| `plugin-sdk/speech-runtime` | Speech runtime facade | TTS resolution and synthesis helpers |
| `plugin-sdk/speech` | Speech helpers | Speech provider types plus provider-facing directive, registry, and validation helpers |
| `plugin-sdk/speech-core` | Shared speech core | Speech provider types, registry, directives, normalization |
| `plugin-sdk/realtime-transcription` | Realtime transcription helpers | Provider types and registry helpers |
| `plugin-sdk/realtime-voice` | Realtime voice helpers | Provider types and registry helpers |
| `plugin-sdk/image-generation-core` | Shared image-generation core | Image-generation types, failover, auth, and registry helpers |
| `plugin-sdk/video-generation` | Video-generation provider types | Video-generation provider/request/result types for provider plugins |
| `plugin-sdk/video-generation` | Video-generation helpers | Video-generation provider/request/result types |
| `plugin-sdk/video-generation-core` | Shared video-generation core | Video-generation types, failover helpers, provider lookup, and model-ref parsing |
| `plugin-sdk/video-generation-runtime` | Video-generation runtime facade | Shared runtime `generateVideo` / `listRuntimeVideoGenerationProviders` facade |
| `plugin-sdk/interactive-runtime` | Interactive reply helpers | Interactive reply payload normalization/reduction |
| `plugin-sdk/channel-config-primitives` | Channel config primitives | Narrow channel config-schema primitives |
| `plugin-sdk/channel-config-writes` | Channel config-write helpers | Channel config-write authorization helpers |
@@ -314,21 +313,10 @@ The same rule applies to other generated bundled-helper families such as:
- LINE: `plugin-sdk/line*`
- IRC: `plugin-sdk/irc*`
- bundled helper/plugin surfaces like `plugin-sdk/googlechat`,
`plugin-sdk/whatsapp-surface`, `plugin-sdk/zalouser`,
`plugin-sdk/bluebubbles*`,
`plugin-sdk/zalouser`, `plugin-sdk/bluebubbles*`,
`plugin-sdk/mattermost*`, `plugin-sdk/msteams`,
`plugin-sdk/nextcloud-talk`, `plugin-sdk/nostr`, `plugin-sdk/tlon`,
`plugin-sdk/twitch`, `plugin-sdk/openai`, `plugin-sdk/moonshot`,
`plugin-sdk/qwen*`, `plugin-sdk/modelstudio*`,
`plugin-sdk/provider-moonshot`,
`plugin-sdk/cloudflare-ai-gateway`, `plugin-sdk/byteplus`,
`plugin-sdk/chutes`, `plugin-sdk/deepseek`, `plugin-sdk/google`,
`plugin-sdk/huggingface`, `plugin-sdk/kimi-coding`,
`plugin-sdk/kilocode`, `plugin-sdk/minimax`, `plugin-sdk/mistral`,
`plugin-sdk/nvidia`, `plugin-sdk/opencode`,
`plugin-sdk/opencode-go`, `plugin-sdk/qianfan`, `plugin-sdk/sglang`,
`plugin-sdk/synthetic`, `plugin-sdk/venice`, `plugin-sdk/vllm`,
`plugin-sdk/xai`, `plugin-sdk/volcengine`,
`plugin-sdk/twitch`,
`plugin-sdk/github-copilot-login`, `plugin-sdk/github-copilot-token`,
`plugin-sdk/diagnostics-otel`, `plugin-sdk/diffs`, `plugin-sdk/llm-task`,
`plugin-sdk/thread-ownership`, and `plugin-sdk/voice-call`
@@ -337,15 +325,6 @@ The same rule applies to other generated bundled-helper families such as:
surface `DEFAULT_COPILOT_API_BASE_URL`,
`deriveCopilotApiBaseUrlFromToken`, and `resolveCopilotApiToken`.
`plugin-sdk/whatsapp-surface` currently exposes `DEFAULT_WEB_MEDIA_BYTES`,
WhatsApp auth/account helpers, directory-config helpers, group-policy helpers,
outbound-target resolution, and the narrow `WebChannelStatus` /
`WebInboundMessage` / `WebListenerCloseReason` / `WebMonitorTuning` types.
For Qwen specifically, prefer the canonical `plugin-sdk/qwen` and
`plugin-sdk/qwen-definitions` seams. `plugin-sdk/modelstudio*` remains
exported as a compatibility alias for older plugin code.
Use the narrowest import that matches the job. If you cannot find an export,
check the source at `src/plugin-sdk/` or ask in Discord.

View File

@@ -38,7 +38,7 @@ the broader umbrella surface and shared helpers such as
Do not add or depend on provider-named convenience seams such as
`openclaw/plugin-sdk/slack`, `openclaw/plugin-sdk/discord`,
`openclaw/plugin-sdk/signal`, `openclaw/plugin-sdk/whatsapp`, or
`openclaw/plugin-sdk/whatsapp-surface`. Bundled plugins should compose generic
channel-branded helper seams. Bundled plugins should compose generic
SDK subpaths inside their own `api.ts` or `runtime-api.ts` barrels, and core
should either use those plugin-local barrels or add a narrow generic SDK
contract when the need is truly cross-channel.
@@ -224,20 +224,17 @@ explicitly promotes one as public.
| Subpath | Key exports |
| --- | --- |
| `plugin-sdk/media-runtime` | Shared media fetch/transform/store helpers plus media payload builders |
| `plugin-sdk/media-understanding-runtime` | Media-understanding runner facade and typed result helpers |
| `plugin-sdk/media-understanding` | Media understanding provider types plus provider-facing image/audio helper exports |
| `plugin-sdk/text-runtime` | Shared text/markdown/logging helpers such as assistant-visible-text stripping, markdown render/chunking/table helpers, redaction helpers, directive-tag helpers, and safe-text utilities |
| `plugin-sdk/text-chunking` | Outbound text chunking helper |
| `plugin-sdk/speech-runtime` | Speech-core runtime facade for TTS resolution and synthesis |
| `plugin-sdk/speech` | Speech provider types plus provider-facing directive, registry, and validation helpers |
| `plugin-sdk/speech-core` | Shared speech provider types, registry, directive, and normalization helpers |
| `plugin-sdk/realtime-transcription` | Realtime transcription provider types and registry helpers |
| `plugin-sdk/realtime-voice` | Realtime voice provider types and registry helpers |
| `plugin-sdk/image-generation` | Image generation provider types |
| `plugin-sdk/image-generation-core` | Shared image-generation types, failover, auth, and registry helpers |
| `plugin-sdk/video-generation` | Video generation provider types |
| `plugin-sdk/video-generation` | Video generation provider/request/result types |
| `plugin-sdk/video-generation-core` | Shared video-generation types, failover helpers, provider lookup, and model-ref parsing |
| `plugin-sdk/video-generation-runtime` | Shared runtime `generateVideo` / `listRuntimeVideoGenerationProviders` facade |
| `plugin-sdk/media-understanding` | Media understanding provider types |
| `plugin-sdk/speech` | Speech provider types |
| `plugin-sdk/webhook-targets` | Webhook target registry and route-install helpers |
| `plugin-sdk/webhook-path` | Webhook path normalization helpers |
| `plugin-sdk/web-media` | Shared remote/local media loading helpers |
@@ -267,12 +264,11 @@ explicitly promotes one as public.
<Accordion title="Reserved bundled-helper subpaths">
| Family | Current generated subpaths | Intended use |
| --- | --- | --- |
| Browser | `plugin-sdk/browser`, `plugin-sdk/browser-runtime`, `plugin-sdk/browser-config-support`, `plugin-sdk/browser-support` | Bundled browser plugin maintenance and compatibility |
| Browser | `plugin-sdk/browser`, `plugin-sdk/browser-config-support`, `plugin-sdk/browser-support` | Bundled browser plugin maintenance and compatibility |
| Matrix | `plugin-sdk/matrix`, `plugin-sdk/matrix-helper`, `plugin-sdk/matrix-runtime-heavy`, `plugin-sdk/matrix-runtime-shared`, `plugin-sdk/matrix-runtime-surface`, `plugin-sdk/matrix-surface`, `plugin-sdk/matrix-thread-bindings` | Bundled Matrix helper/runtime surface |
| Line | `plugin-sdk/line`, `plugin-sdk/line-core`, `plugin-sdk/line-runtime`, `plugin-sdk/line-surface` | Bundled LINE helper/runtime surface |
| IRC | `plugin-sdk/irc`, `plugin-sdk/irc-surface` | Bundled IRC helper surface |
| Channel-specific helpers | `plugin-sdk/googlechat`, `plugin-sdk/whatsapp-surface`, `plugin-sdk/zalouser`, `plugin-sdk/bluebubbles`, `plugin-sdk/bluebubbles-policy`, `plugin-sdk/mattermost`, `plugin-sdk/mattermost-policy`, `plugin-sdk/feishu-conversation`, `plugin-sdk/msteams`, `plugin-sdk/nextcloud-talk`, `plugin-sdk/nostr`, `plugin-sdk/tlon`, `plugin-sdk/twitch` | Bundled channel compatibility/helper seams. `plugin-sdk/whatsapp-surface` currently exports `DEFAULT_WEB_MEDIA_BYTES`, WhatsApp auth/account helpers, directory-config helpers, group-policy helpers, outbound-target resolution, and the narrow `WebChannelStatus` / `WebInboundMessage` / `WebListenerCloseReason` / `WebMonitorTuning` types. |
| Provider-specific helpers | `plugin-sdk/openai`, `plugin-sdk/moonshot`, `plugin-sdk/qwen`, `plugin-sdk/qwen-definitions`, `plugin-sdk/modelstudio`, `plugin-sdk/modelstudio-definitions`, `plugin-sdk/provider-moonshot`, `plugin-sdk/together`, `plugin-sdk/amazon-bedrock`, `plugin-sdk/anthropic-vertex`, `plugin-sdk/cloudflare-ai-gateway`, `plugin-sdk/byteplus`, `plugin-sdk/chutes`, `plugin-sdk/deepseek`, `plugin-sdk/google`, `plugin-sdk/huggingface`, `plugin-sdk/kimi-coding`, `plugin-sdk/kilocode`, `plugin-sdk/minimax`, `plugin-sdk/mistral`, `plugin-sdk/nvidia`, `plugin-sdk/opencode`, `plugin-sdk/opencode-go`, `plugin-sdk/qianfan`, `plugin-sdk/sglang`, `plugin-sdk/synthetic`, `plugin-sdk/venice`, `plugin-sdk/vllm`, `plugin-sdk/xai`, `plugin-sdk/volcengine` | Bundled provider-specific helper seams; prefer canonical `qwen*`, keep `modelstudio*` as compatibility aliases |
| Channel-specific helpers | `plugin-sdk/googlechat`, `plugin-sdk/zalouser`, `plugin-sdk/bluebubbles`, `plugin-sdk/bluebubbles-policy`, `plugin-sdk/mattermost`, `plugin-sdk/mattermost-policy`, `plugin-sdk/feishu-conversation`, `plugin-sdk/msteams`, `plugin-sdk/nextcloud-talk`, `plugin-sdk/nostr`, `plugin-sdk/tlon`, `plugin-sdk/twitch` | Bundled channel compatibility/helper seams |
| Auth/plugin-specific helpers | `plugin-sdk/github-copilot-login`, `plugin-sdk/github-copilot-token`, `plugin-sdk/diagnostics-otel`, `plugin-sdk/diffs`, `plugin-sdk/llm-task`, `plugin-sdk/thread-ownership`, `plugin-sdk/voice-call` | Bundled feature/plugin helper seams; `plugin-sdk/github-copilot-token` currently exports `DEFAULT_COPILOT_API_BASE_URL`, `deriveCopilotApiBaseUrlFromToken`, and `resolveCopilotApiToken` |
</Accordion>
</AccordionGroup>

View File

@@ -9,6 +9,7 @@ const {
resolveAgentRouteMock,
agentCommandMock,
transcribeAudioFileMock,
textToSpeechMock,
} = vi.hoisted(() => {
type EventHandler = (...args: unknown[]) => unknown;
type MockConnection = {
@@ -66,6 +67,7 @@ const {
resolveAgentRouteMock: vi.fn(() => ({ agentId: "agent-1", sessionKey: "discord:g1:c1" })),
agentCommandMock: vi.fn(async (_opts?: unknown, _runtime?: unknown) => ({ payloads: [] })),
transcribeAudioFileMock: vi.fn(async () => ({ text: "hello from voice" })),
textToSpeechMock: vi.fn(async () => ({ success: true, audioPath: "/tmp/voice.mp3" })),
};
});
@@ -107,8 +109,15 @@ vi.mock("openclaw/plugin-sdk/agent-runtime", async () => {
};
});
vi.mock("openclaw/plugin-sdk/media-understanding-runtime", () => ({
transcribeAudioFile: transcribeAudioFileMock,
vi.mock("../runtime.js", () => ({
getDiscordRuntime: () => ({
mediaUnderstanding: {
transcribeAudioFile: transcribeAudioFileMock,
},
tts: {
textToSpeech: textToSpeechMock,
},
}),
}));
let managerModule: typeof import("./manager.js");
@@ -157,6 +166,8 @@ describe("DiscordVoiceManager", () => {
agentCommandMock.mockResolvedValue({ payloads: [] });
transcribeAudioFileMock.mockReset();
transcribeAudioFileMock.mockResolvedValue({ text: "hello from voice" });
textToSpeechMock.mockReset();
textToSpeechMock.mockResolvedValue({ success: true, audioPath: "/tmp/voice.mp3" });
});
const createManager = (

View File

@@ -10,18 +10,17 @@ import { agentCommandFromIngress } from "openclaw/plugin-sdk/agent-runtime";
import { resolveTtsConfig, type ResolvedTtsConfig } from "openclaw/plugin-sdk/agent-runtime";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import type { DiscordAccountConfig, TtsConfig } from "openclaw/plugin-sdk/config-runtime";
import { transcribeAudioFile } from "openclaw/plugin-sdk/media-understanding-runtime";
import { resolveAgentRoute } from "openclaw/plugin-sdk/routing";
import { logVerbose, shouldLogVerbose } from "openclaw/plugin-sdk/runtime-env";
import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env";
import type { RuntimeEnv } from "openclaw/plugin-sdk/runtime-env";
import { parseTtsDirectives } from "openclaw/plugin-sdk/speech";
import { textToSpeech } from "openclaw/plugin-sdk/speech-runtime";
import { formatErrorMessage } from "openclaw/plugin-sdk/ssrf-runtime";
import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path";
import { formatMention } from "../mentions.js";
import { normalizeDiscordSlug, resolveDiscordOwnerAccess } from "../monitor/allow-list.js";
import { formatDiscordUserTag } from "../monitor/format.js";
import { getDiscordRuntime } from "../runtime.js";
import { authorizeDiscordVoiceIngress } from "./access.js";
import { loadDiscordVoiceSdk } from "./sdk-runtime.js";
@@ -226,7 +225,7 @@ async function transcribeAudio(params: {
agentId: string;
filePath: string;
}): Promise<string | undefined> {
const result = await transcribeAudioFile({
const result = await getDiscordRuntime().mediaUnderstanding.transcribeAudioFile({
filePath: params.filePath,
cfg: params.cfg,
agentDir: resolveAgentDir(params.cfg, params.agentId),
@@ -703,7 +702,7 @@ export class DiscordVoiceManager {
return;
}
const ttsResult = await textToSpeech({
const ttsResult = await getDiscordRuntime().tts.textToSpeech({
text: speakText,
cfg: ttsCfg,
channel: "discord",

View File

@@ -4,10 +4,10 @@ import path from "node:path";
import { getModel } from "@mariozechner/pi-ai";
import { AuthStorage, ModelRegistry } from "@mariozechner/pi-coding-agent";
import OpenAI from "openai";
import type { ResolvedTtsConfig } from "openclaw/plugin-sdk/agent-runtime";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import { loadConfig } from "openclaw/plugin-sdk/config-runtime";
import { encodePngRgba, fillPixel } from "openclaw/plugin-sdk/media-runtime";
import type { ResolvedTtsConfig } from "openclaw/plugin-sdk/speech-runtime";
import { describe, expect, it } from "vitest";
import {
registerProviderPlugin,

View File

@@ -2,7 +2,6 @@ import crypto from "node:crypto";
import fs from "node:fs";
import path from "node:path";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import { textToSpeech as globalTextToSpeech } from "openclaw/plugin-sdk/speech-runtime";
import {
getAccessToken,
sendC2CMessage,
@@ -19,6 +18,7 @@ import {
sendC2CFileMessage,
sendGroupFileMessage,
} from "./api.js";
import { getQQBotRuntime } from "./runtime.js";
import type { ResolvedQQBotAccount } from "./types.js";
import {
isGlobalTTSAvailable,
@@ -387,7 +387,7 @@ async function handleAudioPayload(ctx: ReplyContext, payload: MediaPayload): Pro
return;
}
log?.info(`[qqbot:${account.accountId}] TTS (global fallback): "${ttsText.slice(0, 50)}..."`);
const globalResult = await globalTextToSpeech({
const globalResult = await getQQBotRuntime().tts.textToSpeech({
text: ttsText,
cfg: cfg as OpenClawConfig,
channel: "qqbot",

View File

@@ -16,8 +16,12 @@ vi.mock("openclaw/plugin-sdk/media-runtime", () => ({
resolveDefaultMediaModel: vi.fn(() => "gpt-4.1-mini"),
}));
vi.mock("openclaw/plugin-sdk/media-understanding-runtime", () => ({
describeImageFileWithModel: vi.fn(),
vi.mock("./runtime.js", () => ({
getTelegramRuntime: () => ({
mediaUnderstanding: {
describeImageFileWithModel: vi.fn(),
},
}),
}));
const TEST_CACHE_DIR = "/tmp/openclaw-test-sticker-cache/telegram";

View File

@@ -14,9 +14,9 @@ import {
resolveAutoMediaKeyProviders,
resolveDefaultMediaModel,
} from "openclaw/plugin-sdk/media-runtime";
import { describeImageFileWithModel } from "openclaw/plugin-sdk/media-understanding-runtime";
import { logVerbose } from "openclaw/plugin-sdk/runtime-env";
import { STATE_DIR } from "openclaw/plugin-sdk/state-paths";
import { getTelegramRuntime } from "./runtime.js";
const CACHE_FILE = path.join(STATE_DIR, "telegram", "sticker-cache.json");
const CACHE_VERSION = 1;
@@ -246,7 +246,7 @@ export async function describeStickerImage(params: DescribeStickerParams): Promi
logVerbose(`telegram: describing sticker with ${provider}/${model}`);
try {
const result = await describeImageFileWithModel({
const result = await getTelegramRuntime().mediaUnderstanding.describeImageFileWithModel({
filePath: imagePath,
mime: "image/webp",
cfg,

View File

@@ -1,12 +1,10 @@
import { rmSync } from "node:fs";
import type { OpenClawConfig } from "../config/config.js";
import type { ResolvedTtsConfig } from "../tts/tts.js";
// Public speech helpers for bundled or third-party plugins.
//
// Keep this surface neutral and import-light. Provider builders commonly import
// this module just to get types and a few validation helpers, so avoid pulling
// in the heavy TTS runtime graph at module load time.
// Keep this surface provider-facing: types, validation, directive parsing, and
// registry helpers. Runtime synthesis lives on `api.runtime.tts` or narrower
// core/runtime seams, not here.
export type { SpeechProviderPlugin } from "../plugins/types.js";
export type {
@@ -98,14 +96,3 @@ export function scheduleCleanup(
}, delayMs);
timer.unref();
}
export async function summarizeText(params: {
text: string;
targetLength: number;
cfg: OpenClawConfig;
config: ResolvedTtsConfig;
timeoutMs: number;
}) {
const { summarizeText: summarizeTextRuntime } = await import("../tts/tts-core.js");
return summarizeTextRuntime(params);
}