fix: keep configured media STT providers registered

This commit is contained in:
Peter Steinberger
2026-05-01 11:37:59 +01:00
parent 5403df0bc2
commit f3d5c54884
3 changed files with 126 additions and 18 deletions

View File

@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Discord/voice: merge configured media-understanding providers such as Deepgram into partial active provider registries, so follow-up voice turns keep transcribing after another media plugin is already active. Fixes #65687. Thanks @OneMintJulep.
- Discord/voice: apply per-channel Discord `systemPrompt` overrides to voice transcript turns by forwarding the trusted channel prompt through the voice agent run. Fixes #47095. Thanks @qearlyao.
- Discord/voice: run voice-channel turns under a voice-output policy that hides the agent `tts` tool and asks for spoken reply text, so `/vc join` sessions synthesize and play agent replies instead of ending with `NO_REPLY`. Fixes #61536. Thanks @aounakram.
- Plugins/runtime-deps: prune inactive same-package versioned runtime-deps roots after bundled dependency repair, so upgrades do not leave old `openclaw-<version>-<hash>` package caches behind after doctor runs. Thanks @vincentkoc.

View File

@@ -265,6 +265,83 @@ describe("resolvePluginCapabilityProviders", () => {
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith();
});
it("merges configured media-understanding providers missing from the active registry", () => {
const active = createEmptyPluginRegistry();
active.mediaUnderstandingProviders.push({
pluginId: "openai",
pluginName: "OpenAI",
source: "test",
provider: {
id: "openai",
capabilities: ["image"],
},
} as never);
const loaded = createEmptyPluginRegistry();
loaded.mediaUnderstandingProviders.push(
{
pluginId: "deepgram",
pluginName: "Deepgram",
source: "test",
provider: {
id: "deepgram",
capabilities: ["audio"],
},
} as never,
{
pluginId: "google",
pluginName: "Google",
source: "test",
provider: {
id: "google",
capabilities: ["image", "audio", "video"],
},
} as never,
);
mocks.loadPluginManifestRegistry.mockReturnValue({
plugins: [
{
id: "deepgram",
origin: "bundled",
contracts: { mediaUnderstandingProviders: ["deepgram"] },
},
{
id: "google",
origin: "bundled",
contracts: { mediaUnderstandingProviders: ["google"] },
},
] as never,
diagnostics: [],
});
mocks.resolveRuntimePluginRegistry.mockImplementation((params?: unknown) =>
params === undefined ? active : loaded,
);
const providers = resolvePluginCapabilityProviders({
key: "mediaUnderstandingProviders",
cfg: {
plugins: { allow: ["openai", "deepgram", "google"] },
tools: {
media: {
audio: { enabled: true, models: [{ provider: "deepgram", model: "nova-3" }] },
},
},
} as OpenClawConfig,
});
expectResolvedCapabilityProviderIds(providers, ["openai", "deepgram"]);
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith();
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({
config: expect.objectContaining({
plugins: expect.objectContaining({
allow: ["openai", "deepgram", "google"],
}),
}),
onlyPluginIds: ["deepgram", "google"],
activate: false,
installBundledRuntimeDeps: false,
});
});
it("keeps active speech providers when cfg requests an active provider alias", () => {
const active = createEmptyPluginRegistry();
active.speechProviders.push({

View File

@@ -241,6 +241,43 @@ function collectRequestedSpeechProviderIds(cfg: OpenClawConfig | undefined): Set
return requested;
}
function addMediaModelProviders(target: Set<string>, value: unknown): void {
if (!Array.isArray(value)) {
return;
}
for (const entry of value) {
if (typeof entry === "object" && entry !== null) {
addStringValue(target, (entry as { provider?: unknown }).provider);
}
}
}
function collectRequestedMediaUnderstandingProviderIds(
cfg: OpenClawConfig | undefined,
): Set<string> {
const requested = new Set<string>();
const media = cfg?.tools?.media;
addMediaModelProviders(requested, media?.models);
addMediaModelProviders(requested, media?.image?.models);
addMediaModelProviders(requested, media?.audio?.models);
addMediaModelProviders(requested, media?.video?.models);
return requested;
}
function collectRequestedCapabilityProviderIds(params: {
key: CapabilityProviderRegistryKey;
cfg?: OpenClawConfig;
}): Set<string> | undefined {
switch (params.key) {
case "speechProviders":
return collectRequestedSpeechProviderIds(params.cfg);
case "mediaUnderstandingProviders":
return collectRequestedMediaUnderstandingProviderIds(params.cfg);
default:
return undefined;
}
}
function removeActiveProviderIds(requested: Set<string>, entries: readonly unknown[]): void {
for (const entry of entries as Array<{ provider: { id?: unknown; aliases?: unknown } }>) {
const provider = entry.provider as { id?: unknown; aliases?: unknown };
@@ -262,7 +299,7 @@ function filterLoadedProvidersForRequestedConfig<K extends CapabilityProviderReg
requested: Set<string>;
entries: PluginRegistry[K];
}): PluginRegistry[K] {
if (params.key !== "speechProviders") {
if (params.key !== "speechProviders" && params.key !== "mediaUnderstandingProviders") {
return [] as unknown as PluginRegistry[K];
}
if (params.requested.size === 0) {
@@ -341,23 +378,16 @@ export function resolvePluginCapabilityProviders<K extends CapabilityProviderReg
const activeRegistry = resolveRuntimePluginRegistry();
const activeProviders = activeRegistry?.[params.key] ?? [];
if (
activeProviders.length > 0 &&
params.key !== "memoryEmbeddingProviders" &&
params.key !== "speechProviders"
) {
return activeProviders.map((entry) => entry.provider) as CapabilityProviderForKey<K>[];
}
if (activeProviders.length > 0 && params.key === "speechProviders" && !params.cfg) {
return activeProviders.map((entry) => entry.provider) as CapabilityProviderForKey<K>[];
}
const missingRequestedSpeechProviders =
activeProviders.length > 0 && params.key === "speechProviders"
? collectRequestedSpeechProviderIds(params.cfg)
const missingRequestedProviders =
activeProviders.length > 0
? collectRequestedCapabilityProviderIds({ key: params.key, cfg: params.cfg })
: undefined;
if (missingRequestedSpeechProviders) {
removeActiveProviderIds(missingRequestedSpeechProviders, activeProviders);
if (missingRequestedSpeechProviders.size === 0) {
if (activeProviders.length > 0 && params.key !== "memoryEmbeddingProviders") {
if (!missingRequestedProviders) {
return activeProviders.map((entry) => entry.provider) as CapabilityProviderForKey<K>[];
}
removeActiveProviderIds(missingRequestedProviders, activeProviders);
if (missingRequestedProviders.size === 0) {
return activeProviders.map((entry) => entry.provider) as CapabilityProviderForKey<K>[];
}
}
@@ -390,7 +420,7 @@ export function resolvePluginCapabilityProviders<K extends CapabilityProviderReg
activeProviders.length > 0
? filterLoadedProvidersForRequestedConfig({
key: params.key,
requested: missingRequestedSpeechProviders ?? new Set(),
requested: missingRequestedProviders ?? new Set(),
entries: loadedProviders,
})
: loadedProviders;