fix: preserve manifest-backed model and media capabilities

This commit is contained in:
Shakker
2026-05-01 20:48:15 +01:00
parent b745d049b7
commit 5adbec66e8
9 changed files with 475 additions and 17 deletions

View File

@@ -163,7 +163,9 @@ Docs: https://docs.openclaw.ai
- Agents/runtime: reuse the Gateway metadata startup plan when ensuring reply runtime plugins are loaded, so live agent turns do not broad-load plugin runtimes after the Gateway already scoped startup activation. Thanks @shakkernerd.
- Agents/runtime: delegate scoped reply runtime registry reuse to the plugin loader cache-key compatibility checks, so config changes with the same startup plugin ids cannot keep stale runtime hooks or tools active. Thanks @shakkernerd.
- Agents/runtime: validate agent model allowlists against manifest model catalog metadata during reply startup, avoiding broad provider runtime catalog loading before the agent run lane starts. Thanks @shakkernerd.
- Agents/runtime: keep allowlisted configured model thinking metadata available when manifest catalog rows are absent, so explicit high-reasoning levels remain valid for custom configured models. Thanks @shakkernerd.
- Agents/tools: route media and generation capability lookups through the Gateway plugin metadata snapshot during reply tool registration, avoiding repeated manifest registry reloads on the live reply path. Thanks @shakkernerd.
- Agents/tools: let plugins declare media generation auth aliases and base-url guards in manifests, preserving OpenAI Codex OAuth image generation availability without core-owned provider special cases. Thanks @shakkernerd.
- Agents/tools: reuse the auth profile store already loaded for the active run when deciding media and generation tool availability, avoiding repeated provider-auth runtime discovery during reply startup. Thanks @shakkernerd.
- Agents/tools: keep image, video, and music generation tool registration on manifest/auth control-plane checks instead of loading runtime provider registries during reply startup, reducing live-path tool-prep blocking while leaving provider runtime resolution for execution and list actions. Thanks @shakkernerd.
- fix: block workspace CLOUDSDK_PYTHON override and always set trusted interpreter for gcloud. (#74492) Thanks @pgondhi987.

View File

@@ -758,6 +758,24 @@
"imageGenerationProviders": ["openai"],
"videoGenerationProviders": ["openai"]
},
"imageGenerationProviderMetadata": {
"openai": {
"aliases": ["openai-codex"],
"authSignals": [
{
"provider": "openai"
},
{
"provider": "openai-codex",
"providerBaseUrl": {
"provider": "openai",
"defaultBaseUrl": "https://api.openai.com/v1",
"allowedBaseUrls": ["https://api.openai.com/v1"]
}
}
]
}
},
"mediaUnderstandingProviderMetadata": {
"openai": {
"capabilities": ["image", "audio"],

View File

@@ -295,16 +295,73 @@ vi.mock("./model-catalog.js", () => ({
}));
vi.mock("./model-selection.js", () => ({
buildAllowedModelSet: () => ({
allowedKeys: new Set<string>([
"anthropic/claude",
"codex-cli/gpt-5.4",
"openai/claude",
"openai/gpt-5.4",
]),
allowedCatalog: [],
allowAny: false,
}),
buildAllowedModelSet: ({
cfg,
catalog,
defaultProvider,
defaultModel,
}: {
cfg?: unknown;
catalog?: Array<{ provider: string; id: string }>;
defaultProvider: string;
defaultModel?: string;
}) => {
const modelMap =
(cfg as { agents?: { defaults?: { models?: Record<string, unknown> } } } | undefined)?.agents
?.defaults?.models ?? {};
const configuredCatalog = (
(cfg as { models?: { providers?: Record<string, { models?: unknown[] }> } } | undefined)
?.models?.providers
? Object.entries(
(cfg as { models?: { providers?: Record<string, { models?: unknown[] }> } }).models!
.providers!,
).flatMap(([provider, entry]) =>
Array.isArray(entry?.models)
? entry.models
.filter(
(model): model is Record<string, unknown> =>
!!model && typeof model === "object",
)
.map((model) => {
const id = typeof model.id === "string" ? model.id : "";
return {
provider,
id,
name: typeof model.name === "string" ? model.name : id,
reasoning: typeof model.reasoning === "boolean" ? model.reasoning : undefined,
compat: model.compat,
};
})
.filter((model) => model.id)
: [],
)
: []
) as Array<{ provider: string; id: string }>;
const combinedCatalog = [...(catalog ?? []), ...configuredCatalog];
const allowedKeys = new Set<string>(
Object.keys(modelMap).map((ref) => {
const [provider, ...modelParts] = ref.split("/");
return `${provider}/${modelParts.join("/")}`;
}),
);
if (defaultModel) {
allowedKeys.add(`${defaultProvider}/${defaultModel}`);
}
if (Object.keys(modelMap).length === 0) {
return {
allowedKeys,
allowedCatalog: combinedCatalog,
allowAny: true,
};
}
return {
allowedKeys,
allowedCatalog: combinedCatalog.filter((entry) =>
allowedKeys.has(`${entry.provider}/${entry.id}`),
),
allowAny: false,
};
},
buildConfiguredModelCatalog: ({ cfg }: { cfg?: unknown }) => {
const providers = (cfg as { models?: { providers?: Record<string, { models?: unknown[] }> } })
?.models?.providers;
@@ -598,6 +655,67 @@ describe("agentCommand LiveSessionModelSwitchError retry", () => {
);
});
it("validates explicit thinking against allowlisted configured model compat when manifest catalog is empty", async () => {
state.runtimeConfigMock = {
agents: {
defaults: {
model: { primary: "gmn/gpt-5.4" },
models: {
"gmn/gpt-5.4": {},
},
},
},
models: {
providers: {
gmn: {
models: [
{
id: "gpt-5.4",
name: "GPT 5.4 via GMN",
reasoning: true,
compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] },
},
],
},
},
},
};
state.loadManifestModelCatalogMock.mockReturnValue([]);
state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => {
const result = await params.run(params.provider, params.model);
return {
result,
provider: params.provider,
model: params.model,
attempts: [],
};
});
state.runAgentAttemptMock.mockResolvedValue(makeSuccessResult("gmn", "gpt-5.4"));
await agentCommand({
message: "hello",
to: "+1234567890",
senderIsOwner: true,
thinking: "xhigh",
});
expect(state.loadManifestModelCatalogMock).toHaveBeenCalled();
expect(state.isThinkingLevelSupportedMock).toHaveBeenCalledWith(
expect.objectContaining({
provider: "gmn",
model: "gpt-5.4",
level: "xhigh",
catalog: [
expect.objectContaining({
provider: "gmn",
id: "gpt-5.4",
compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] },
}),
],
}),
);
});
it("records fallback steps to the session trajectory runtime", async () => {
state.runWithModelFallbackMock.mockImplementation(async (params: FallbackRunnerParams) => {
await params.onFallbackStep?.({
@@ -687,6 +805,15 @@ describe("agentCommand LiveSessionModelSwitchError retry", () => {
skillsSnapshot: { prompt: "", skills: [], version: 0 },
};
state.sessionEntryMock = sessionEntry;
state.runtimeConfigMock = {
agents: {
defaults: {
models: {
"codex-cli/gpt-5.4": {},
},
},
},
};
state.authProfileStoreMock = {
profiles: {
"openai-codex:work": {

View File

@@ -830,8 +830,11 @@ async function agentCommandInternal(
}
const catalogForThinking =
modelCatalog ??
(allowedModelCatalog.length > 0 ? allowedModelCatalog : configuredThinkingCatalog);
allowedModelCatalog.length > 0
? allowedModelCatalog
: modelCatalog && modelCatalog.length > 0
? modelCatalog
: configuredThinkingCatalog;
const thinkingCatalog = catalogForThinking.length > 0 ? catalogForThinking : undefined;
if (!resolvedThinkLevel) {
resolvedThinkLevel = resolveThinkingDefault({

View File

@@ -30,6 +30,7 @@ function createPlugin(params: {
id: string;
origin?: PluginManifestRecord["origin"];
contracts: NonNullable<PluginManifestRecord["contracts"]>;
imageGenerationProviderMetadata?: PluginManifestRecord["imageGenerationProviderMetadata"];
setupProviders?: Array<{ id: string; envVars?: string[] }>;
}): PluginManifestRecord {
return {
@@ -44,6 +45,7 @@ function createPlugin(params: {
skills: [],
hooks: [],
contracts: params.contracts,
imageGenerationProviderMetadata: params.imageGenerationProviderMetadata,
setup: params.setupProviders ? { providers: params.setupProviders } : undefined,
};
}
@@ -277,6 +279,85 @@ describe("optional media tool factory planning", () => {
});
});
it("keeps manifest-declared image provider auth aliases on the factory path", () => {
const config: OpenClawConfig = {};
installSnapshot(config, [
createPlugin({
id: "openai",
contracts: { imageGenerationProviders: ["openai"] },
imageGenerationProviderMetadata: {
openai: {
aliases: ["openai-codex"],
authSignals: [
{
provider: "openai",
},
{
provider: "openai-codex",
providerBaseUrl: {
provider: "openai",
defaultBaseUrl: "https://api.openai.com/v1",
allowedBaseUrls: ["https://api.openai.com/v1"],
},
},
],
},
},
}),
]);
expect(
__testing.resolveOptionalMediaToolFactoryPlan({
config,
authStore: createAuthStore(["openai-codex"]),
}),
).toMatchObject({
imageGenerate: true,
});
});
it("honors manifest-declared image provider auth alias base-url guards", () => {
const config: OpenClawConfig = {
models: {
providers: {
openai: {
baseUrl: "http://localhost:11434/v1",
},
},
},
};
installSnapshot(config, [
createPlugin({
id: "openai",
contracts: { imageGenerationProviders: ["openai"] },
imageGenerationProviderMetadata: {
openai: {
aliases: ["openai-codex"],
authSignals: [
{
provider: "openai-codex",
providerBaseUrl: {
provider: "openai",
defaultBaseUrl: "https://api.openai.com/v1",
allowedBaseUrls: ["https://api.openai.com/v1"],
},
},
],
},
},
}),
]);
expect(
__testing.resolveOptionalMediaToolFactoryPlan({
config,
authStore: createAuthStore(["openai-codex"]),
}),
).toMatchObject({
imageGenerate: false,
});
});
it("ignores external manifest capability providers excluded by plugin policy", () => {
const config: OpenClawConfig = {
plugins: {

View File

@@ -68,6 +68,11 @@ type CapabilityContractKey =
| "musicGenerationProviders"
| "mediaUnderstandingProviders";
type CapabilityProviderMetadataKey =
| "imageGenerationProviderMetadata"
| "videoGenerationProviderMetadata"
| "musicGenerationProviderMetadata";
type OptionalMediaToolFactoryPlan = {
imageGenerate: boolean;
videoGenerate: boolean;
@@ -106,6 +111,69 @@ function hasNonEmptyEnvCandidate(envVars: readonly string[]): boolean {
});
}
function metadataKeyForCapabilityContract(
key: CapabilityContractKey,
): CapabilityProviderMetadataKey | undefined {
switch (key) {
case "imageGenerationProviders":
return "imageGenerationProviderMetadata";
case "videoGenerationProviders":
return "videoGenerationProviderMetadata";
case "musicGenerationProviders":
return "musicGenerationProviderMetadata";
case "mediaUnderstandingProviders":
return undefined;
}
}
function normalizeBaseUrlForManifestGuard(value: string): string {
return value.trim().replace(/\/+$/, "");
}
function providerBaseUrlGuardPasses(params: {
config?: OpenClawConfig;
guard: NonNullable<
NonNullable<PluginManifestRecord["imageGenerationProviderMetadata"]>[string]["authSignals"]
>[number]["providerBaseUrl"];
}): boolean {
const guard = params.guard;
if (!guard) {
return true;
}
const providerConfig = params.config?.models?.providers?.[guard.provider];
const rawBaseUrl =
typeof providerConfig?.baseUrl === "string" && providerConfig.baseUrl.trim()
? providerConfig.baseUrl
: guard.defaultBaseUrl;
if (!rawBaseUrl) {
return false;
}
const normalizedBaseUrl = normalizeBaseUrlForManifestGuard(rawBaseUrl);
return guard.allowedBaseUrls.some(
(allowedBaseUrl) => normalizeBaseUrlForManifestGuard(allowedBaseUrl) === normalizedBaseUrl,
);
}
function listCapabilityAuthSignals(params: {
plugin: PluginManifestRecord;
key: CapabilityContractKey;
providerId: string;
}): Array<{
provider: string;
providerBaseUrl?: NonNullable<
NonNullable<PluginManifestRecord["imageGenerationProviderMetadata"]>[string]["authSignals"]
>[number]["providerBaseUrl"];
}> {
const metadataKey = metadataKeyForCapabilityContract(params.key);
const metadata = metadataKey ? params.plugin[metadataKey]?.[params.providerId] : undefined;
if (metadata?.authSignals?.length) {
return metadata.authSignals;
}
return [params.providerId, ...(metadata?.aliases ?? []), ...(metadata?.authProviders ?? [])].map(
(provider) => ({ provider }),
);
}
function hasAuthSignalForSnapshotCapability(params: {
snapshot: PluginMetadataSnapshot;
authStore: AuthProfileStore;
@@ -123,11 +191,25 @@ function hasAuthSignalForSnapshotCapability(params: {
continue;
}
for (const providerId of plugin.contracts?.[params.key] ?? []) {
if (listProfilesForProvider(params.authStore, providerId).length > 0) {
return true;
}
if (hasNonEmptyEnvCandidate(pluginSetupProviderEnvVars(plugin, providerId))) {
return true;
for (const signal of listCapabilityAuthSignals({
plugin,
key: params.key,
providerId,
})) {
if (
!providerBaseUrlGuardPasses({
config: params.config,
guard: signal.providerBaseUrl,
})
) {
continue;
}
if (listProfilesForProvider(params.authStore, signal.provider).length > 0) {
return true;
}
if (hasNonEmptyEnvCandidate(pluginSetupProviderEnvVars(plugin, signal.provider))) {
return true;
}
}
}
}

View File

@@ -1194,6 +1194,23 @@ describe("loadPluginManifestRegistry", () => {
id: "openai",
contracts: {
mediaUnderstandingProviders: ["openai"],
imageGenerationProviders: ["openai"],
},
imageGenerationProviderMetadata: {
openai: {
aliases: ["openai-codex"],
authProviders: ["openai"],
authSignals: [
{
provider: "openai-codex",
providerBaseUrl: {
provider: "openai",
defaultBaseUrl: "https://api.openai.com/v1",
allowedBaseUrls: ["https://api.openai.com/v1"],
},
},
],
},
},
mediaUnderstandingProviderMetadata: {
openai: {
@@ -1220,6 +1237,22 @@ describe("loadPluginManifestRegistry", () => {
origin: "bundled",
});
expect(registry.plugins[0]?.imageGenerationProviderMetadata).toEqual({
openai: {
aliases: ["openai-codex"],
authProviders: ["openai"],
authSignals: [
{
provider: "openai-codex",
providerBaseUrl: {
provider: "openai",
defaultBaseUrl: "https://api.openai.com/v1",
allowedBaseUrls: ["https://api.openai.com/v1"],
},
},
],
},
});
expect(registry.plugins[0]?.mediaUnderstandingProviderMetadata).toEqual({
openai: {
capabilities: ["image", "audio"],

View File

@@ -25,6 +25,7 @@ import {
type PluginManifestActivation,
type PluginManifestConfigContracts,
type PluginManifest,
type PluginManifestCapabilityProviderMetadata,
type PluginManifestChannelCommandDefaults,
type PluginManifestChannelConfig,
type PluginManifestContracts,
@@ -149,6 +150,9 @@ export type PluginManifestRecord = {
string,
PluginManifestMediaUnderstandingProviderMetadata
>;
imageGenerationProviderMetadata?: Record<string, PluginManifestCapabilityProviderMetadata>;
videoGenerationProviderMetadata?: Record<string, PluginManifestCapabilityProviderMetadata>;
musicGenerationProviderMetadata?: Record<string, PluginManifestCapabilityProviderMetadata>;
configContracts?: PluginManifestConfigContracts;
channelConfigs?: Record<string, PluginManifestChannelConfig>;
channelCatalogMeta?: {
@@ -330,6 +334,9 @@ function buildRecord(params: {
configUiHints: params.manifest.uiHints,
contracts: params.manifest.contracts,
mediaUnderstandingProviderMetadata: params.manifest.mediaUnderstandingProviderMetadata,
imageGenerationProviderMetadata: params.manifest.imageGenerationProviderMetadata,
videoGenerationProviderMetadata: params.manifest.videoGenerationProviderMetadata,
musicGenerationProviderMetadata: params.manifest.musicGenerationProviderMetadata,
configContracts: params.manifest.configContracts,
channelConfigs,
...(params.candidate.packageManifest?.channel?.id

View File

@@ -375,6 +375,12 @@ export type PluginManifest = {
string,
PluginManifestMediaUnderstandingProviderMetadata
>;
/** Cheap image-generation provider auth metadata without importing plugin runtime. */
imageGenerationProviderMetadata?: Record<string, PluginManifestCapabilityProviderMetadata>;
/** Cheap video-generation provider auth metadata without importing plugin runtime. */
videoGenerationProviderMetadata?: Record<string, PluginManifestCapabilityProviderMetadata>;
/** Cheap music-generation provider auth metadata without importing plugin runtime. */
musicGenerationProviderMetadata?: Record<string, PluginManifestCapabilityProviderMetadata>;
/** Manifest-owned config behavior consumed by generic core helpers. */
configContracts?: PluginManifestConfigContracts;
channelConfigs?: Record<string, PluginManifestChannelConfig>;
@@ -414,6 +420,23 @@ export type PluginManifestMediaUnderstandingProviderMetadata = {
nativeDocumentInputs?: Array<"pdf">;
};
export type PluginManifestProviderBaseUrlGuard = {
provider: string;
defaultBaseUrl?: string;
allowedBaseUrls: string[];
};
export type PluginManifestCapabilityProviderAuthSignal = {
provider: string;
providerBaseUrl?: PluginManifestProviderBaseUrlGuard;
};
export type PluginManifestCapabilityProviderMetadata = {
aliases?: string[];
authProviders?: string[];
authSignals?: PluginManifestCapabilityProviderAuthSignal[];
};
export type PluginManifestProviderAuthChoice = {
/** Provider id owned by this manifest entry. */
provider: string;
@@ -574,6 +597,76 @@ function normalizeMediaUnderstandingProviderMetadata(
return Object.keys(normalized).length > 0 ? normalized : undefined;
}
function normalizeProviderBaseUrlGuard(
value: unknown,
): PluginManifestProviderBaseUrlGuard | undefined {
if (!isRecord(value)) {
return undefined;
}
const provider = normalizeOptionalString(value.provider);
const allowedBaseUrls = normalizeTrimmedStringList(value.allowedBaseUrls);
if (!provider || allowedBaseUrls.length === 0) {
return undefined;
}
const defaultBaseUrl = normalizeOptionalString(value.defaultBaseUrl);
return {
provider,
...(defaultBaseUrl ? { defaultBaseUrl } : {}),
allowedBaseUrls,
};
}
function normalizeCapabilityProviderAuthSignals(
value: unknown,
): PluginManifestCapabilityProviderAuthSignal[] | undefined {
if (!Array.isArray(value)) {
return undefined;
}
const signals: PluginManifestCapabilityProviderAuthSignal[] = [];
for (const rawSignal of value) {
if (!isRecord(rawSignal)) {
continue;
}
const provider = normalizeOptionalString(rawSignal.provider);
if (!provider) {
continue;
}
const providerBaseUrl = normalizeProviderBaseUrlGuard(rawSignal.providerBaseUrl);
signals.push({
provider,
...(providerBaseUrl ? { providerBaseUrl } : {}),
});
}
return signals.length > 0 ? signals : undefined;
}
function normalizeCapabilityProviderMetadata(
value: unknown,
): Record<string, PluginManifestCapabilityProviderMetadata> | undefined {
if (!isRecord(value)) {
return undefined;
}
const normalized: Record<string, PluginManifestCapabilityProviderMetadata> = Object.create(null);
for (const [rawProviderId, rawMetadata] of Object.entries(value)) {
const providerId = normalizeOptionalString(rawProviderId) ?? "";
if (!providerId || isBlockedObjectKey(providerId) || !isRecord(rawMetadata)) {
continue;
}
const aliases = normalizeTrimmedStringList(rawMetadata.aliases);
const authProviders = normalizeTrimmedStringList(rawMetadata.authProviders);
const authSignals = normalizeCapabilityProviderAuthSignals(rawMetadata.authSignals);
const metadata = {
...(aliases.length > 0 ? { aliases } : {}),
...(authProviders.length > 0 ? { authProviders } : {}),
...(authSignals ? { authSignals } : {}),
} satisfies PluginManifestCapabilityProviderMetadata;
if (Object.keys(metadata).length > 0) {
normalized[providerId] = metadata;
}
}
return Object.keys(normalized).length > 0 ? normalized : undefined;
}
function normalizeManifestContracts(value: unknown): PluginManifestContracts | undefined {
if (!isRecord(value)) {
return undefined;
@@ -1393,6 +1486,15 @@ export function loadPluginManifest(
const mediaUnderstandingProviderMetadata = normalizeMediaUnderstandingProviderMetadata(
raw.mediaUnderstandingProviderMetadata,
);
const imageGenerationProviderMetadata = normalizeCapabilityProviderMetadata(
raw.imageGenerationProviderMetadata,
);
const videoGenerationProviderMetadata = normalizeCapabilityProviderMetadata(
raw.videoGenerationProviderMetadata,
);
const musicGenerationProviderMetadata = normalizeCapabilityProviderMetadata(
raw.musicGenerationProviderMetadata,
);
const configContracts = normalizeManifestConfigContracts(raw.configContracts);
const channelConfigs = normalizeChannelConfigs(raw.channelConfigs);
@@ -1439,6 +1541,9 @@ export function loadPluginManifest(
uiHints,
contracts,
mediaUnderstandingProviderMetadata,
imageGenerationProviderMetadata,
videoGenerationProviderMetadata,
musicGenerationProviderMetadata,
configContracts,
channelConfigs,
},