fix: stabilize model run probes

This commit is contained in:
Peter Steinberger
2026-04-26 06:59:15 +01:00
parent 6ff7a30b9f
commit 54f4c45e5d
13 changed files with 227 additions and 87 deletions

View File

@@ -73,6 +73,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- CLI/model runs: keep `openclaw infer model run` on explicit OpenRouter models from loading the full provider catalog or inheriting chat-agent silent-reply policy, restoring non-empty one-shot probe output. Fixes #68791. Thanks @limpredator.
- Installer/macOS: rerun Homebrew install steps without the gum spinner when raw-mode ioctl failures occur, and avoid claiming `node@24` was installed when the Homebrew keg binary is missing. Fixes #70411. Thanks @1fanwang and @dad-io.
- Installer: load nvm before Node.js detection so `curl | bash` installs respect nvm-managed Node instead of stale system Node. Fixes #49556. Thanks @heavenlxj.
- CLI/Volta: respawn raw `openclaw` CLI runs through the named `node` shim when the current Node executable resolves to `volta-shim`, avoiding direct shim execution failures in non-interactive shells. Fixes #68672. Thanks @sanchezm86.

View File

@@ -702,11 +702,11 @@ async function agentCommandInternal(
if (hasExplicitRunOverride && opts.allowModelOverride !== true) {
throw new Error("Model override is not authorized for this caller.");
}
const needsModelCatalog = hasAllowlist || hasStoredOverride || hasExplicitRunOverride;
const needsModelCatalog = Boolean(hasAllowlist);
let allowedModelKeys = new Set<string>();
let allowedModelCatalog: Awaited<ReturnType<typeof loadModelCatalog>> = [];
let modelCatalog: Awaited<ReturnType<typeof loadModelCatalog>> | null = null;
let allowAnyModel = false;
let allowAnyModel = !hasAllowlist;
if (needsModelCatalog) {
modelCatalog = await loadModelCatalog({ config: cfg });
@@ -805,16 +805,12 @@ async function agentCommandInternal(
}
if (!resolvedThinkLevel) {
let catalogForThinking = modelCatalog ?? allowedModelCatalog;
if (!catalogForThinking || catalogForThinking.length === 0) {
modelCatalog = await loadModelCatalog({ config: cfg });
catalogForThinking = modelCatalog;
}
const catalogForThinking = modelCatalog ?? allowedModelCatalog;
resolvedThinkLevel = resolveThinkingDefault({
cfg,
provider,
model,
catalog: catalogForThinking,
catalog: catalogForThinking.length > 0 ? catalogForThinking : undefined,
});
}
if (!isThinkingLevelSupported({ provider, model, level: resolvedThinkLevel })) {

View File

@@ -460,6 +460,9 @@ export function runAgentAttempt(params: {
agentDir: params.agentDir,
allowTransientCooldownProbe: params.allowTransientCooldownProbe,
cleanupBundleMcpOnRunEnd: params.opts.cleanupBundleMcpOnRunEnd,
modelRun: params.opts.modelRun,
promptMode: params.opts.promptMode,
disableTools: params.opts.modelRun === true,
onAgentEvent: params.onAgentEvent,
bootstrapPromptWarningSignaturesSeen,
bootstrapPromptWarningSignature,

View File

@@ -1,5 +1,6 @@
import type { AgentInternalEvent } from "../../agents/internal-events.js";
import type { SpawnedRunMetadata } from "../../agents/spawned-context.js";
import type { PromptMode } from "../../agents/system-prompt.types.js";
import type { ChannelOutboundTargetMode } from "../../channels/plugins/types.public.js";
import type { PromptImageOrderEntry } from "../../media/prompt-image-order.js";
import type { InputProvenance } from "../../sessions/input-provenance.js";
@@ -93,6 +94,10 @@ export type AgentCommandOpts = {
workspaceDir?: SpawnedRunMetadata["workspaceDir"];
/** Force bundled MCP teardown when a one-shot local run completes. */
cleanupBundleMcpOnRunEnd?: boolean;
/** Internal one-shot model probe mode: no tools, no workspace/chat prompt policy. */
modelRun?: boolean;
/** Internal prompt-mode override for trusted local/gateway callsites. */
promptMode?: PromptMode;
};
export type AgentCommandIngressOpts = Omit<

View File

@@ -21,6 +21,10 @@ const disposeSessionMcpRuntimeMock = vi.fn<(sessionId: string) => Promise<void>>
});
const resolveSessionKeyForRequestMock = vi.fn();
const resolveStoredSessionKeyForSessionIdMock = vi.fn();
const resolveModelAsyncMock = vi.fn(async (provider: string, modelId: string) =>
createResolvedEmbeddedRunnerModel(provider, modelId),
);
const ensureOpenClawModelsJsonMock = vi.fn(async () => ({ wrote: false }));
const loggerWarnMock = vi.fn();
let refreshRuntimeAuthOnFirstPromptError = false;
@@ -121,8 +125,8 @@ const installRunEmbeddedMocks = () => {
);
return {
...actual,
resolveModelAsync: async (provider: string, modelId: string) =>
createResolvedEmbeddedRunnerModel(provider, modelId),
resolveModelAsync: (...args: Parameters<typeof resolveModelAsyncMock>) =>
resolveModelAsyncMock(...args),
};
});
vi.doMock("./pi-embedded-runner/run/auth-controller.js", () => ({
@@ -148,7 +152,8 @@ const installRunEmbeddedMocks = () => {
const mod = await vi.importActual<typeof import("./models-config.js")>("./models-config.js");
return {
...mod,
ensureOpenClawModelsJson: vi.fn(async () => ({ wrote: false })),
ensureOpenClawModelsJson: (...args: Parameters<typeof ensureOpenClawModelsJsonMock>) =>
ensureOpenClawModelsJsonMock(...args),
};
});
};
@@ -182,6 +187,12 @@ beforeEach(() => {
disposeSessionMcpRuntimeMock.mockReset();
resolveSessionKeyForRequestMock.mockReset();
resolveStoredSessionKeyForSessionIdMock.mockReset();
resolveModelAsyncMock.mockReset();
resolveModelAsyncMock.mockImplementation(async (provider: string, modelId: string) =>
createResolvedEmbeddedRunnerModel(provider, modelId),
);
ensureOpenClawModelsJsonMock.mockReset();
ensureOpenClawModelsJsonMock.mockResolvedValue({ wrote: false });
loggerWarnMock.mockReset();
refreshRuntimeAuthOnFirstPromptError = false;
runEmbeddedAttemptMock.mockImplementation(async () => {
@@ -285,6 +296,42 @@ const runDefaultEmbeddedTurn = async (sessionFile: string, prompt: string, sessi
};
describe("runEmbeddedPiAgent", () => {
it("skips models.json generation when dynamic model resolution succeeds", async () => {
const sessionFile = nextSessionFile();
const cfg = createEmbeddedPiRunnerOpenAiConfig([]);
runEmbeddedAttemptMock.mockResolvedValueOnce(
makeEmbeddedRunnerAttempt({
assistantTexts: ["ok"],
lastAssistant: buildEmbeddedRunnerAssistant({
content: [{ type: "text", text: "ok" }],
}),
}),
);
await runEmbeddedPiAgent({
sessionId: "dynamic-model",
sessionFile,
workspaceDir,
config: cfg,
prompt: "hello",
provider: "openrouter",
model: "openrouter/auto",
timeoutMs: 5_000,
agentDir,
runId: nextRunId("dynamic-model"),
enqueue: immediateEnqueue,
});
expect(resolveModelAsyncMock).toHaveBeenCalledWith(
"openrouter",
"openrouter/auto",
agentDir,
cfg,
expect.objectContaining({ skipPiDiscovery: true }),
);
expect(ensureOpenClawModelsJsonMock).not.toHaveBeenCalled();
});
it("backfills a trimmed session key from sessionId when the embedded run omits it", async () => {
const sessionFile = nextSessionFile();
const cfg = createEmbeddedPiRunnerOpenAiConfig(["mock-1"]);

View File

@@ -380,20 +380,26 @@ export async function runEmbeddedPiAgent(
agentHarnessId: params.agentHarnessId,
});
const pluginHarnessOwnsTransport = agentHarness.id !== "pi";
if (!pluginHarnessOwnsTransport) {
await ensureOpenClawModelsJson(params.config, agentDir);
}
const { model, error, authStorage, modelRegistry } = await resolveModelAsync(
const dynamicModelResolution = await resolveModelAsync(
provider,
modelId,
agentDir,
params.config,
// Plugin harnesses may expose synthetic providers that PI cannot
// discover safely; resolve their model metadata without touching PI
// auth/model stores.
{ skipPiDiscovery: pluginHarnessOwnsTransport },
{
// Plugin dynamic model hooks can resolve explicit model refs without
// first generating PI models.json. This keeps one-shot model runs from
// blocking on unrelated provider discovery.
skipPiDiscovery: true,
},
);
const modelResolution =
dynamicModelResolution.model || pluginHarnessOwnsTransport
? dynamicModelResolution
: await (async () => {
await ensureOpenClawModelsJson(params.config, agentDir);
return await resolveModelAsync(provider, modelId, agentDir, params.config);
})();
const { model, error, authStorage, modelRegistry } = modelResolution;
if (!model) {
throw new FailoverError(error ?? `Unknown model: ${provider}/${modelId}`, {
reason: "model_not_found",

View File

@@ -681,71 +681,72 @@ export async function runEmbeddedAttempt(
...(err ? { errorCategory: diagnosticErrorCategory(err) } : {}),
});
};
const toolsRaw = params.disableTools
? []
: (() => {
const allTools = createOpenClawCodingTools({
agentId: sessionAgentId,
...buildEmbeddedAttemptToolRunContext({ ...params, trace: runTrace }),
exec: {
...params.execOverrides,
elevated: params.bashElevated,
},
sandbox,
messageProvider: params.messageChannel ?? params.messageProvider,
agentAccountId: params.agentAccountId,
messageTo: params.messageTo,
messageThreadId: params.messageThreadId,
groupId: params.groupId,
groupChannel: params.groupChannel,
groupSpace: params.groupSpace,
memberRoleIds: params.memberRoleIds,
spawnedBy: params.spawnedBy,
senderId: params.senderId,
senderName: params.senderName,
senderUsername: params.senderUsername,
senderE164: params.senderE164,
senderIsOwner: params.senderIsOwner,
allowGatewaySubagentBinding: params.allowGatewaySubagentBinding,
sessionKey: sandboxSessionKey,
sessionId: params.sessionId,
runId: params.runId,
agentDir,
workspaceDir: effectiveWorkspace,
// When sandboxing uses a copied workspace (`ro` or `none`), effectiveWorkspace points
// at the sandbox copy. Spawned subagents should inherit the real workspace instead.
spawnWorkspaceDir: resolveAttemptSpawnWorkspaceDir({
const toolsRaw =
params.disableTools || params.modelRun
? []
: (() => {
const allTools = createOpenClawCodingTools({
agentId: sessionAgentId,
...buildEmbeddedAttemptToolRunContext({ ...params, trace: runTrace }),
exec: {
...params.execOverrides,
elevated: params.bashElevated,
},
sandbox,
resolvedWorkspace,
}),
config: params.config,
abortSignal: runAbortController.signal,
modelProvider: params.model.provider,
modelId: params.modelId,
modelCompat: extractModelCompat(params.model),
modelApi: params.model.api,
modelContextWindowTokens: params.model.contextWindow,
modelAuthMode: resolveModelAuthMode(params.model.provider, params.config),
currentChannelId: params.currentChannelId,
currentThreadTs: params.currentThreadTs,
currentMessageId: params.currentMessageId,
replyToMode: params.replyToMode,
hasRepliedRef: params.hasRepliedRef,
modelHasVision: params.model.input?.includes("image") ?? false,
requireExplicitMessageTarget:
params.requireExplicitMessageTarget ?? isSubagentSessionKey(params.sessionKey),
disableMessageTool: params.disableMessageTool,
forceMessageTool: params.forceMessageTool,
onYield: (message) => {
yieldDetected = true;
yieldMessage = message;
queueYieldInterruptForSession?.();
runAbortController.abort("sessions_yield");
abortSessionForYield?.();
},
});
return applyEmbeddedAttemptToolsAllow(allTools, params.toolsAllow);
})();
messageProvider: params.messageChannel ?? params.messageProvider,
agentAccountId: params.agentAccountId,
messageTo: params.messageTo,
messageThreadId: params.messageThreadId,
groupId: params.groupId,
groupChannel: params.groupChannel,
groupSpace: params.groupSpace,
memberRoleIds: params.memberRoleIds,
spawnedBy: params.spawnedBy,
senderId: params.senderId,
senderName: params.senderName,
senderUsername: params.senderUsername,
senderE164: params.senderE164,
senderIsOwner: params.senderIsOwner,
allowGatewaySubagentBinding: params.allowGatewaySubagentBinding,
sessionKey: sandboxSessionKey,
sessionId: params.sessionId,
runId: params.runId,
agentDir,
workspaceDir: effectiveWorkspace,
// When sandboxing uses a copied workspace (`ro` or `none`), effectiveWorkspace points
// at the sandbox copy. Spawned subagents should inherit the real workspace instead.
spawnWorkspaceDir: resolveAttemptSpawnWorkspaceDir({
sandbox,
resolvedWorkspace,
}),
config: params.config,
abortSignal: runAbortController.signal,
modelProvider: params.model.provider,
modelId: params.modelId,
modelCompat: extractModelCompat(params.model),
modelApi: params.model.api,
modelContextWindowTokens: params.model.contextWindow,
modelAuthMode: resolveModelAuthMode(params.model.provider, params.config),
currentChannelId: params.currentChannelId,
currentThreadTs: params.currentThreadTs,
currentMessageId: params.currentMessageId,
replyToMode: params.replyToMode,
hasRepliedRef: params.hasRepliedRef,
modelHasVision: params.model.input?.includes("image") ?? false,
requireExplicitMessageTarget:
params.requireExplicitMessageTarget ?? isSubagentSessionKey(params.sessionKey),
disableMessageTool: params.disableMessageTool,
forceMessageTool: params.forceMessageTool,
onYield: (message) => {
yieldDetected = true;
yieldMessage = message;
queueYieldInterruptForSession?.();
runAbortController.abort("sessions_yield");
abortSessionForYield?.();
},
});
return applyEmbeddedAttemptToolsAllow(allTools, params.toolsAllow);
})();
const toolsEnabled = supportsModelTools(params.model);
const bootstrapHasFileAccess = toolsEnabled && toolsRaw.some((tool) => tool.name === "read");
const bootstrapRouting = await resolveAttemptWorkspaceBootstrapRouting({
@@ -1057,7 +1058,9 @@ export async function runEmbeddedAttempt(
},
});
const isDefaultAgent = sessionAgentId === defaultAgentId;
const promptMode = resolvePromptModeForSession(params.sessionKey);
const promptMode =
params.promptMode ??
(params.modelRun ? "none" : resolvePromptModeForSession(params.sessionKey));
// When toolsAllow is set, use minimal prompt and strip skills catalog
const effectivePromptMode = params.toolsAllow?.length ? ("minimal" as const) : promptMode;

View File

@@ -15,6 +15,7 @@ import type {
ToolResultFormat,
} from "../../pi-embedded-subscribe.shared-types.js";
import type { SkillSnapshot } from "../../skills.js";
import type { PromptMode } from "../../system-prompt.types.js";
import type { AuthProfileFailurePolicy } from "./auth-profile-failure-policy.types.js";
export type { ClientToolDefinition } from "../../command/shared-types.js";
@@ -71,6 +72,10 @@ export type RunEmbeddedPiAgentParams = {
requireExplicitMessageTarget?: boolean;
/** If true, omit the message tool from the tool list. */
disableMessageTool?: boolean;
/** Internal one-shot model probe mode: no tools, no workspace/chat prompt policy. */
modelRun?: boolean;
/** Explicit system prompt mode override for trusted callers. */
promptMode?: PromptMode;
/** Keep the message tool available even when a narrow profile would omit it. */
forceMessageTool?: boolean;
/** Allow runtime plugins for this run to late-bind the gateway subagent. */

View File

@@ -364,7 +364,7 @@ describe("capability cli", () => {
);
});
it("cleans up bundled MCP runtimes for local model runs", async () => {
it("runs local model probes without chat-agent prompt policy or tools", async () => {
await runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: ["capability", "model", "run", "--prompt", "hello", "--json"],
@@ -373,13 +373,15 @@ describe("capability cli", () => {
expect(mocks.agentCommand).toHaveBeenCalledWith(
expect.objectContaining({
cleanupBundleMcpOnRunEnd: true,
modelRun: true,
promptMode: "none",
}),
expect.anything(),
expect.anything(),
);
});
it("requests bundled MCP runtime cleanup for gateway model runs", async () => {
it("runs gateway model probes without chat-agent prompt policy or tools", async () => {
await runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: ["capability", "model", "run", "--prompt", "hello", "--gateway", "--json"],
@@ -390,6 +392,8 @@ describe("capability cli", () => {
method: "agent",
params: expect.objectContaining({
cleanupBundleMcpOnRunEnd: true,
modelRun: true,
promptMode: "none",
}),
}),
);

View File

@@ -583,6 +583,8 @@ async function runModelRun(params: {
agentId,
model: params.model,
json: false,
modelRun: true,
promptMode: "none",
cleanupBundleMcpOnRunEnd: true,
},
{
@@ -619,6 +621,8 @@ async function runModelRun(params: {
message: params.prompt,
provider,
model,
modelRun: true,
promptMode: "none",
cleanupBundleMcpOnRunEnd: true,
idempotencyKey: randomIdempotencyKey(),
},

View File

@@ -118,6 +118,9 @@ vi.mock("../agents/command/attempt-execution.runtime.js", () => {
agentDir: params.agentDir,
allowTransientCooldownProbe: params.allowTransientCooldownProbe,
cleanupBundleMcpOnRunEnd: opts.cleanupBundleMcpOnRunEnd,
modelRun: opts.modelRun,
promptMode: opts.promptMode,
disableTools: opts.modelRun === true,
onAgentEvent: params.onAgentEvent,
} as never);
}),
@@ -380,6 +383,61 @@ describe("agentCommand", () => {
});
});
it("does not load the full model catalog for trusted explicit overrides without an allowlist", async () => {
await withTempHome(async (home) => {
const store = path.join(home, "sessions.json");
mockConfig(home, store, { models: {} });
await agentCommand(
{
message: "ping",
to: "+1222",
model: "openrouter/auto",
},
runtime,
);
expect(loadModelCatalog).not.toHaveBeenCalled();
expectLastRunProviderModel("openrouter", "openrouter/auto");
expect(modelSelectionModule.resolveThinkingDefault).toHaveBeenCalledWith(
expect.objectContaining({
provider: "openrouter",
model: "auto",
catalog: undefined,
}),
);
});
});
it("uses no-tools plain prompt mode for one-shot model runs", async () => {
await withTempHome(async (home) => {
const store = path.join(home, "sessions.json");
mockConfig(home, store, { models: {} });
await agentCommand(
{
message: "Reply with exactly OPENCLAW-MODEL-OK",
agentId: "main",
model: "openrouter/auto",
modelRun: true,
promptMode: "none",
},
runtime,
);
const callArgs = getLastEmbeddedCall();
expect(callArgs).toEqual(
expect.objectContaining({
provider: "openrouter",
model: "openrouter/auto",
modelRun: true,
promptMode: "none",
disableTools: true,
}),
);
});
});
it("passes resolved session-id resume files to embedded runs", async () => {
await withTempHome(async (home) => {
const resumeStore = path.join(home, "sessions-resume.json");

View File

@@ -153,6 +153,10 @@ export const AgentParamsSchema = Type.Object(
bestEffortDeliver: Type.Optional(Type.Boolean()),
lane: Type.Optional(Type.String()),
cleanupBundleMcpOnRunEnd: Type.Optional(Type.Boolean()),
modelRun: Type.Optional(Type.Boolean()),
promptMode: Type.Optional(
Type.Union([Type.Literal("full"), Type.Literal("minimal"), Type.Literal("none")]),
),
extraSystemPrompt: Type.Optional(Type.String()),
bootstrapContextMode: Type.Optional(
Type.Union([Type.Literal("full"), Type.Literal("lightweight")]),

View File

@@ -425,6 +425,8 @@ export const agentHandlers: GatewayRequestHandlers = {
groupSpace?: string;
lane?: string;
extraSystemPrompt?: string;
modelRun?: boolean;
promptMode?: "full" | "minimal" | "none";
bootstrapContextMode?: "full" | "lightweight";
bootstrapContextRunKind?: "default" | "heartbeat" | "cron";
internalEvents?: AgentInternalEvent[];
@@ -1170,6 +1172,8 @@ export const agentHandlers: GatewayRequestHandlers = {
runId,
lane: request.lane,
cleanupBundleMcpOnRunEnd: request.cleanupBundleMcpOnRunEnd === true,
modelRun: request.modelRun === true,
promptMode: request.promptMode,
extraSystemPrompt: request.extraSystemPrompt,
bootstrapContextMode: request.bootstrapContextMode,
bootstrapContextRunKind: request.bootstrapContextRunKind,