From fb3ea9efb17911f171ef7ef131b60ef6fca21551 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 28 Apr 2026 09:08:07 +0100 Subject: [PATCH] fix: keep gateway model probes raw --- CHANGELOG.md | 1 + docs/cli/infer.md | 4 +- docs/gateway/local-models.md | 12 + src/agents/agent-command.ts | 8 +- .../command/attempt-execution.cli.test.ts | 64 +++++ src/agents/command/attempt-execution.ts | 57 +++-- .../run.before-agent-reply-cron.test.ts | 18 ++ src/agents/pi-embedded-runner/run.ts | 2 + ...mpt.spawn-workspace.context-engine.test.ts | 68 ++++- .../attempt.spawn-workspace.test-support.ts | 4 + src/agents/pi-embedded-runner/run/attempt.ts | 241 ++++++++++-------- src/commands/agent-command.test-mocks.ts | 19 +- src/commands/agent.test.ts | 54 ++++ src/gateway/server-methods/agent.test.ts | 39 +++ src/gateway/server-methods/agent.ts | 3 +- 15 files changed, 455 insertions(+), 139 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f12bd337a77..0947031a572 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai - Build/runtime: write the runtime-postbuild stamp after `pnpm build` writes the build stamp, so the next CLI invocation does not re-sync runtime artifacts after a successful build. Fixes #73151. Thanks @bittoby. - Build/runtime: preserve staged bundled-plugin runtime dependency caches across source-checkout tsdown rebuilds, so local CLI and gateway-watch rebuilds no longer recreate large plugin dependency trees before starting. Refs #73205. Thanks @SymbolStar. - CLI/channels: list configured chat channel accounts from read-only setup metadata even when the standalone CLI has not loaded the runtime channel registry, so `openclaw channels list` shows Telegram accounts before auth providers. Fixes #73319 and #73322. Thanks @mlaihk. +- CLI/model probes: keep `infer model run --gateway` raw by skipping prior session transcript, bootstrap context, context-engine assembly, tools, and bundled MCP servers, so local backends can be tested without full agent-context overhead. Fixes #73308. Thanks @ScientificProgrammer. - CLI/model probes: reject empty or whitespace-only `infer model run --prompt` values before calling local providers or the Gateway, so smoke checks do not spend provider calls on invalid turns. Fixes #73185. Thanks @iot2edge. - Gateway/media: route text-only `chat.send` image offloads through media-understanding fields so `agents.defaults.imageModel` can describe WebChat attachments instead of leaving only an opaque `media://inbound` marker. Fixes #72968. Thanks @vorajeeah. - Gateway/Windows: route no-listener restart handoffs through the Windows supervisor without leaving restart tokens in flight, so failed task scheduling can be retried and successful handoffs do not coalesce later restart requests. (#69056) Thanks @Thatgfsj. diff --git a/docs/cli/infer.md b/docs/cli/infer.md index 722f9a7fd35..6b1b6f49616 100644 --- a/docs/cli/infer.md +++ b/docs/cli/infer.md @@ -131,7 +131,7 @@ This table maps common inference tasks to the corresponding infer command. - Gateway-managed state commands default to gateway. - The normal local path does not require the gateway to be running. - Local `model run` is a lean one-shot provider completion. It resolves the configured agent model and auth, but does not start a chat-agent turn, load tools, or open bundled MCP servers. -- `model run --gateway` still uses the Gateway agent runtime so it can exercise the same routed runtime path as a normal Gateway-backed turn. MCP servers opened through that runtime are retired after the reply, so repeated scripted invocations do not keep stdio MCP child processes alive. +- `model run --gateway` exercises Gateway routing, saved auth, provider selection, and the embedded runtime, but still runs as a raw model probe: it sends the supplied prompt without prior session transcript, bootstrap/AGENTS context, context-engine assembly, tools, or bundled MCP servers. ## Model @@ -161,7 +161,7 @@ Notes: - Local `model run` is the narrowest CLI smoke for provider/model/auth health because it sends only the supplied prompt to the selected model. - `model run --prompt` must contain non-whitespace text; empty prompts are rejected before local providers or the Gateway are called. - Local `model run` exits non-zero when the provider returns no text output, so unreachable local providers and empty completions do not look like successful probes. -- Use `model run --gateway` when you need to test Gateway routing, agent-runtime setup, or Gateway-managed provider state instead of the lean local completion path. +- Use `model run --gateway` when you need to test Gateway routing, agent-runtime setup, or Gateway-managed provider state while keeping the model input raw. Use `openclaw agent` or chat surfaces when you want the full agent context, tools, memory, and session transcript. - `model auth login`, `model auth logout`, and `model auth status` manage saved provider auth state. ## Image diff --git a/docs/gateway/local-models.md b/docs/gateway/local-models.md index 9eaf268608e..0cdba312d0c 100644 --- a/docs/gateway/local-models.md +++ b/docs/gateway/local-models.md @@ -253,6 +253,18 @@ Compatibility notes for stricter OpenAI-compatible backends: openclaw infer model run --local --model --prompt "Reply with exactly: pong" --json ``` + To verify the Gateway route without the full agent prompt shape, use the + Gateway model probe instead: + + ```bash + openclaw infer model run --gateway --model --prompt "Reply with exactly: pong" --json + ``` + + Both local and Gateway model probes send only the supplied prompt. The + Gateway probe still validates Gateway routing, auth, and provider selection, + but it intentionally skips prior session transcript, AGENTS/bootstrap context, + context-engine assembly, tools, and bundled MCP servers. + If that succeeds but normal OpenClaw agent turns fail, first try `agents.defaults.experimental.localModelLean: true` to drop heavyweight default tools like `browser`, `cron`, and `message`; this is an experimental diff --git a/src/agents/agent-command.ts b/src/agents/agent-command.ts index 1d66cbfc7c7..3071e37375c 100644 --- a/src/agents/agent-command.ts +++ b/src/agents/agent-command.ts @@ -255,6 +255,7 @@ async function prepareAgentCommandExecution( opts: AgentCommandOpts & { senderIsOwner: boolean }, runtime: RuntimeEnv, ) { + const isRawModelRun = opts.modelRun === true || opts.promptMode === "none"; const message = opts.message ?? ""; if (!message.trim()) { throw new Error("Message (--message) is required"); @@ -377,7 +378,7 @@ async function prepareAgentCommandExecution( }) : null; const body = - acpResolution?.kind === "ready" + !isRawModelRun && acpResolution?.kind === "ready" ? resolveAcpPromptBody(message, opts.internalEvents) : prependInternalEventContext(message, opts.internalEvents); const transcriptBody = @@ -417,6 +418,7 @@ async function agentCommandInternal( deps?: CliDeps, ) { const resolvedDeps = await resolveAgentCommandDeps(deps); + const isRawModelRun = opts.modelRun === true || opts.promptMode === "none"; const prepared = await prepareAgentCommandExecution(opts, runtime); const { body, @@ -459,11 +461,11 @@ async function agentCommandInternal( } } - if (acpResolution?.kind === "stale") { + if (!isRawModelRun && acpResolution?.kind === "stale") { throw acpResolution.error; } - if (acpResolution?.kind === "ready" && sessionKey) { + if (!isRawModelRun && acpResolution?.kind === "ready" && sessionKey) { const attemptExecutionRuntime = await loadAttemptExecutionRuntime(); const startedAt = Date.now(); registerAgentRunContext(runId, { diff --git a/src/agents/command/attempt-execution.cli.test.ts b/src/agents/command/attempt-execution.cli.test.ts index df95c4bae4d..6f0a292ee6a 100644 --- a/src/agents/command/attempt-execution.cli.test.ts +++ b/src/agents/command/attempt-execution.cli.test.ts @@ -529,6 +529,70 @@ describe("CLI attempt execution", () => { ); }); + it("keeps one-shot model runs on the raw embedded provider path", async () => { + const sessionKey = "agent:main:direct:model-run-raw"; + const sessionEntry: SessionEntry = { + sessionId: "openclaw-session-model-run-raw", + updatedAt: Date.now(), + }; + const sessionStore: Record = { [sessionKey]: sessionEntry }; + await fs.writeFile(storePath, JSON.stringify(sessionStore, null, 2), "utf-8"); + runEmbeddedPiAgentMock.mockResolvedValueOnce({ + meta: { durationMs: 1 }, + } satisfies EmbeddedPiRunResult); + + await runAgentAttempt({ + providerOverride: "anthropic", + modelOverride: "claude-opus-4-7", + cfg: { + agents: { + defaults: { + agentRuntime: { id: "claude-cli", fallback: "none" }, + }, + }, + } as OpenClawConfig, + sessionEntry, + sessionId: sessionEntry.sessionId, + sessionKey, + sessionAgentId: "main", + sessionFile: path.join(tmpDir, "session.jsonl"), + workspaceDir: tmpDir, + body: "raw prompt", + isFallbackRetry: false, + resolvedThinkLevel: "medium", + timeoutMs: 1_000, + runId: "run-model-run-raw", + opts: { + senderIsOwner: false, + modelRun: true, + promptMode: "none", + } as Parameters[0]["opts"], + runContext: {} as Parameters[0]["runContext"], + spawnedBy: undefined, + messageChannel: "telegram", + skillsSnapshot: undefined, + resolvedVerboseLevel: undefined, + agentDir: tmpDir, + onAgentEvent: vi.fn(), + authProfileProvider: "anthropic", + sessionStore, + storePath, + sessionHasHistory: true, + }); + + expect(runCliAgentMock).not.toHaveBeenCalled(); + expect(runEmbeddedPiAgentMock).toHaveBeenCalledWith( + expect.objectContaining({ + provider: "anthropic", + model: "claude-opus-4-7", + agentHarnessId: "pi", + modelRun: true, + promptMode: "none", + disableTools: true, + }), + ); + }); + it("forwards one-shot CLI cleanup to CLI providers", async () => { const sessionKey = "agent:main:direct:cleanup-claude-cli"; const sessionEntry: SessionEntry = { diff --git a/src/agents/command/attempt-execution.ts b/src/agents/command/attempt-execution.ts index 9faf8e40f74..a76f9d4a2dc 100644 --- a/src/agents/command/attempt-execution.ts +++ b/src/agents/command/attempt-execution.ts @@ -261,7 +261,9 @@ export function runAgentAttempt(params: { allowTransientCooldownProbe?: boolean; sessionHasHistory?: boolean; }) { + const isRawModelRun = params.opts.modelRun === true || params.opts.promptMode === "none"; const claudeCliFallbackPrelude = + !isRawModelRun && params.isFallbackRetry && isClaudeCliProvider(params.originalProvider) && !isClaudeCliProvider(params.providerOverride) @@ -280,29 +282,36 @@ export function runAgentAttempt(params: { ); const bootstrapPromptWarningSignature = bootstrapPromptWarningSignaturesSeen[bootstrapPromptWarningSignaturesSeen.length - 1]; - const sessionPinnedAgentHarnessId = resolveSessionPinnedAgentHarnessId({ - cfg: params.cfg, - sessionAgentId: params.sessionAgentId, - sessionEntry: params.sessionEntry, - sessionHasHistory: params.sessionHasHistory, - sessionId: params.sessionId, - sessionKey: params.sessionKey ?? params.sessionId, - }); - const agentRuntimeOverride = params.sessionEntry?.agentRuntimeOverride?.trim(); - const cliExecutionProvider = - resolveCliRuntimeExecutionProvider({ - provider: params.providerOverride, - cfg: params.cfg, - agentId: params.sessionAgentId, - runtimeOverride: agentRuntimeOverride, - }) ?? params.providerOverride; - const agentHarnessPolicy = resolveAgentHarnessPolicy({ - provider: params.providerOverride, - modelId: params.modelOverride, - config: params.cfg, - agentId: params.sessionAgentId, - sessionKey: params.sessionKey ?? params.sessionId, - }); + const sessionPinnedAgentHarnessId = isRawModelRun + ? "pi" + : resolveSessionPinnedAgentHarnessId({ + cfg: params.cfg, + sessionAgentId: params.sessionAgentId, + sessionEntry: params.sessionEntry, + sessionHasHistory: params.sessionHasHistory, + sessionId: params.sessionId, + sessionKey: params.sessionKey ?? params.sessionId, + }); + const agentRuntimeOverride = isRawModelRun + ? undefined + : params.sessionEntry?.agentRuntimeOverride?.trim(); + const cliExecutionProvider = isRawModelRun + ? params.providerOverride + : (resolveCliRuntimeExecutionProvider({ + provider: params.providerOverride, + cfg: params.cfg, + agentId: params.sessionAgentId, + runtimeOverride: agentRuntimeOverride, + }) ?? params.providerOverride); + const agentHarnessPolicy = isRawModelRun + ? ({ runtime: "pi", fallback: "pi" } as const) + : resolveAgentHarnessPolicy({ + provider: params.providerOverride, + modelId: params.modelOverride, + config: params.cfg, + agentId: params.sessionAgentId, + sessionKey: params.sessionKey ?? params.sessionId, + }); const runtimeAuthPlan = buildAgentRuntimeAuthPlan({ provider: params.providerOverride, authProfileProvider: params.authProfileProvider, @@ -314,7 +323,7 @@ export function runAgentAttempt(params: { allowHarnessAuthProfileForwarding: !isCliProvider(cliExecutionProvider, params.cfg), }); const authProfileId = runtimeAuthPlan.forwardedAuthProfileId; - if (isCliProvider(cliExecutionProvider, params.cfg)) { + if (!isRawModelRun && isCliProvider(cliExecutionProvider, params.cfg)) { const cliSessionBinding = getCliSessionBinding(params.sessionEntry, cliExecutionProvider); const resolveReusableCliSessionBinding = async () => { if ( diff --git a/src/agents/pi-embedded-runner/run.before-agent-reply-cron.test.ts b/src/agents/pi-embedded-runner/run.before-agent-reply-cron.test.ts index ba4e9f4ec9a..20a06f1e1c0 100644 --- a/src/agents/pi-embedded-runner/run.before-agent-reply-cron.test.ts +++ b/src/agents/pi-embedded-runner/run.before-agent-reply-cron.test.ts @@ -82,4 +82,22 @@ describe("runEmbeddedPiAgent cron before_agent_reply seam", () => { expect(mockedGlobalHookRunner.runBeforeAgentReply).not.toHaveBeenCalled(); expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1); }); + + it("forwards one-shot model-run flags into the embedded attempt", async () => { + mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null })); + + await runEmbeddedPiAgent({ + ...overflowBaseRunParams, + trigger: "user", + modelRun: true, + promptMode: "none", + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledWith( + expect.objectContaining({ + modelRun: true, + promptMode: "none", + }), + ); + }); }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index ed59c2070e4..4a792a5a504 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -981,6 +981,8 @@ export async function runEmbeddedPiAgent( sourceReplyDeliveryMode: params.sourceReplyDeliveryMode, inputProvenance: params.inputProvenance, streamParams: params.streamParams, + modelRun: params.modelRun, + promptMode: params.promptMode, ownerNumbers: params.ownerNumbers, enforceFinalTag: params.enforceFinalTag, silentExpected: params.silentExpected, diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts index 1b45703e51b..57b63274515 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts @@ -130,7 +130,7 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => { }); it("sends transcriptPrompt visibly and queues runtime context as hidden custom context", async () => { - const seen: { prompt?: string; messages?: unknown[] } = {}; + const seen: { prompt?: string; messages?: unknown[]; systemPrompt?: string } = {}; const result = await createContextEngineAttemptRunner({ contextEngine: createContextEngineBootstrapAndAssemble(), @@ -240,6 +240,72 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => { expect(contextCompiled?.data?.systemPrompt).toContain("internal heartbeat event"); }); + it("keeps gateway model runs independent from agent context and session history", async () => { + const bootstrap = vi.fn(async () => ({ bootstrapped: true })); + const assemble = vi.fn(async ({ messages }: { messages: AgentMessage[] }) => ({ + messages: [ + ...messages, + { role: "custom", customType: "test-context", content: "should not be sent" }, + ] as AgentMessage[], + estimatedTokens: 1, + })); + const afterTurn = vi.fn(async () => {}); + const runBeforePromptBuild = vi.fn(async () => ({ prependContext: "hook context" })); + const runLlmInput = vi.fn(async () => {}); + hoisted.getGlobalHookRunnerMock.mockReturnValue({ + hasHooks: vi.fn( + (name: string) => + name === "before_prompt_build" || name === "before_agent_start" || name === "llm_input", + ), + runBeforePromptBuild, + runBeforeAgentStart: vi.fn(async () => ({ prependContext: "legacy hook context" })), + runLlmInput, + }); + const seen: { prompt?: string; messages?: unknown[] } = {}; + + const result = await createContextEngineAttemptRunner({ + contextEngine: createTestContextEngine({ + bootstrap, + assemble, + afterTurn, + }), + sessionKey, + tempPaths, + sessionMessages: [ + { role: "user", content: "old session question", timestamp: 1 }, + { role: "assistant", content: "old session answer", timestamp: 2 }, + ] as AgentMessage[], + attemptOverrides: { + promptMode: "none", + disableTools: true, + }, + sessionPrompt: async (session, prompt) => { + seen.prompt = prompt; + seen.messages = [...session.messages]; + seen.systemPrompt = session.agent.state.systemPrompt; + session.messages = [ + ...session.messages, + { role: "assistant", content: "pong", timestamp: 3 }, + ]; + }, + }); + + expect(seen.prompt).toBe("hello"); + expect(seen.messages).toEqual([]); + expect(seen.systemPrompt ?? "").toBe(""); + expect(result.finalPromptText).toBe("hello"); + expect(result.systemPromptReport?.systemPrompt ?? "").toBe(""); + expect(result.messagesSnapshot).toEqual([ + expect.objectContaining({ role: "assistant", content: "pong" }), + ]); + expect(hoisted.resolveBootstrapContextForRunMock).not.toHaveBeenCalled(); + expect(bootstrap).not.toHaveBeenCalled(); + expect(assemble).not.toHaveBeenCalled(); + expect(afterTurn).not.toHaveBeenCalled(); + expect(runBeforePromptBuild).not.toHaveBeenCalled(); + expect(runLlmInput).not.toHaveBeenCalled(); + }); + it("forwards sessionKey to bootstrap, assemble, and afterTurn", async () => { const { bootstrap, assemble } = createContextEngineBootstrapAndAssemble(); const afterTurn = vi.fn(async (_params: { sessionKey?: string }) => {}); diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts index 865ca9c9c62..22b9a01c47a 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts @@ -679,6 +679,7 @@ export type MutableSession = { agent: { streamFn?: unknown; transport?: string; + reset: () => void; state: { messages: unknown[]; systemPrompt?: string; @@ -798,6 +799,9 @@ export function createDefaultEmbeddedSession(params?: { isCompacting: false, isStreaming: false, agent: { + reset: () => { + session.messages = []; + }, state: { get messages() { return session.messages; diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 9ca831bc542..9576989e1b7 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -645,6 +645,13 @@ export async function runEmbeddedAttempt( const sessionLabel = params.sessionKey ?? params.sessionId; const contextInjectionMode = resolveContextInjectionMode(params.config); + const isRawModelRun = params.modelRun === true || params.promptMode === "none"; + if (isRawModelRun && log.isEnabled("debug")) { + log.debug( + `raw model run enabled: modelRun=${params.modelRun === true} promptMode=${params.promptMode ?? "unset"}`, + ); + } + const activeContextEngine = isRawModelRun ? undefined : params.contextEngine; const agentDir = params.agentDir ?? resolveOpenClawAgentDir(); const diagnosticTrace = freezeDiagnosticTraceContext( createDiagnosticTraceContextFromActiveScope(), @@ -684,7 +691,7 @@ export async function runEmbeddedAttempt( }); }; const toolsRaw = - params.disableTools || params.modelRun + params.disableTools || isRawModelRun ? [] : (() => { const allTools = createOpenClawCodingTools({ @@ -769,7 +776,9 @@ export async function runEmbeddedAttempt( contextFiles: resolvedContextFiles, shouldRecordCompletedBootstrapTurn, } = await resolveAttemptBootstrapContext({ - contextInjectionMode, + // modelRun is a provider probe, not an agent turn. Keep AGENTS/BOOTSTRAP + // context out even when the gateway is exercising the embedded runtime. + contextInjectionMode: isRawModelRun ? "never" : contextInjectionMode, bootstrapContextMode: params.bootstrapContextMode, bootstrapContextRunKind: params.bootstrapContextRunKind ?? "default", bootstrapMode, @@ -864,10 +873,10 @@ export async function runEmbeddedAttempt( modelApi: params.model.api, model: params.model, }); - const clientTools = toolsEnabled ? params.clientTools : undefined; + const clientTools = toolsEnabled && !isRawModelRun ? params.clientTools : undefined; const bundleMcpEnabled = shouldCreateBundleMcpRuntimeForAttempt({ toolsEnabled, - disableTools: params.disableTools, + disableTools: params.disableTools || isRawModelRun, toolsAllow: params.toolsAllow, }); const bundleMcpSessionRuntime = bundleMcpEnabled @@ -887,17 +896,18 @@ export async function runEmbeddedAttempt( ], }) : undefined; - const bundleLspRuntime = toolsEnabled - ? await createBundleLspToolRuntime({ - workspaceDir: effectiveWorkspace, - cfg: params.config, - reservedToolNames: [ - ...tools.map((tool) => tool.name), - ...(clientTools?.map((tool) => tool.function.name) ?? []), - ...(bundleMcpRuntime?.tools.map((tool) => tool.name) ?? []), - ], - }) - : undefined; + const bundleLspRuntime = + toolsEnabled && !isRawModelRun + ? await createBundleLspToolRuntime({ + workspaceDir: effectiveWorkspace, + cfg: params.config, + reservedToolNames: [ + ...tools.map((tool) => tool.name), + ...(clientTools?.map((tool) => tool.function.name) ?? []), + ...(bundleMcpRuntime?.tools.map((tool) => tool.name) ?? []), + ], + }) + : undefined; const filteredBundledTools = applyFinalEffectiveToolPolicy({ bundledTools: [...(bundleMcpRuntime?.tools ?? []), ...(bundleLspRuntime?.tools ?? [])], config: params.config, @@ -1062,7 +1072,7 @@ export async function runEmbeddedAttempt( const isDefaultAgent = sessionAgentId === defaultAgentId; const promptMode = params.promptMode ?? - (params.modelRun ? "none" : resolvePromptModeForSession(params.sessionKey)); + (isRawModelRun ? "none" : resolvePromptModeForSession(params.sessionKey)); // When toolsAllow is set, use minimal prompt and strip skills catalog const effectivePromptMode = params.toolsAllow?.length ? ("minimal" as const) : promptMode; @@ -1148,27 +1158,29 @@ export async function runEmbeddedAttempt( userTime, userTimeFormat, contextFiles, - includeMemorySection: !params.contextEngine || params.contextEngine.info.id === "legacy", + includeMemorySection: !activeContextEngine || activeContextEngine.info.id === "legacy", memoryCitationsMode: params.config?.memory?.citations, promptContribution, }); - const appendPrompt = transformProviderSystemPrompt({ - provider: params.provider, - config: params.config, - workspaceDir: effectiveWorkspace, - context: { - config: params.config, - agentDir: params.agentDir, - workspaceDir: effectiveWorkspace, - provider: params.provider, - modelId: params.modelId, - promptMode: effectivePromptMode, - runtimeChannel, - runtimeCapabilities, - agentId: sessionAgentId, - systemPrompt: builtAppendPrompt, - }, - }); + const appendPrompt = isRawModelRun + ? "" + : transformProviderSystemPrompt({ + provider: params.provider, + config: params.config, + workspaceDir: effectiveWorkspace, + context: { + config: params.config, + agentDir: params.agentDir, + workspaceDir: effectiveWorkspace, + provider: params.provider, + modelId: params.modelId, + promptMode: effectivePromptMode, + runtimeChannel, + runtimeCapabilities, + agentId: sessionAgentId, + systemPrompt: builtAppendPrompt, + }, + }); const systemPromptReport = buildSystemPromptReport({ source: "run", generatedAt: Date.now(), @@ -1258,7 +1270,7 @@ export async function runEmbeddedAttempt( await runAttemptContextEngineBootstrap({ hadSessionFile, - contextEngine: params.contextEngine, + contextEngine: activeContextEngine, sessionId: params.sessionId, sessionKey: params.sessionKey, sessionFile: params.sessionFile, @@ -1298,7 +1310,7 @@ export async function runEmbeddedAttempt( }); applyPiAutoCompactionGuard({ settingsManager, - contextEngineInfo: params.contextEngine?.info, + contextEngineInfo: activeContextEngine?.info, }); // Sets compaction/pruning runtime state and returns extension factories @@ -1420,6 +1432,15 @@ export async function runEmbeddedAttempt( } session.setActiveToolsByName(sessionToolAllowlist); const activeSession = session; + if (isRawModelRun) { + // Raw model probes should measure exactly the requested prompt against + // the selected provider/model. Reset clears restored transcript state + // and queues; the empty system override prevents Pi from rebuilding the + // normal OpenClaw agent/tool prompt when `session.prompt()` starts. + activeSession.agent.reset(); + applySystemPromptOverrideToSession(activeSession, ""); + systemPromptText = ""; + } if (typeof activeSession.agent.convertToLlm === "function") { const baseConvertToLlm = activeSession.agent.convertToLlm.bind(activeSession.agent); activeSession.agent.convertToLlm = async (messages) => @@ -1433,7 +1454,7 @@ export async function runEmbeddedAttempt( queueYieldInterruptForSession = () => { queueSessionsYieldInterruptMessage(activeSession); }; - if (params.contextEngine?.info?.ownsCompaction !== true) { + if (!activeContextEngine || activeContextEngine.info.ownsCompaction !== true) { removeToolResultContextGuard = installToolResultContextGuard({ agent: activeSession.agent, contextWindowTokens: Math.max( @@ -1446,7 +1467,7 @@ export async function runEmbeddedAttempt( } else { removeToolResultContextGuard = installContextEngineLoopHook({ agent: activeSession.agent, - contextEngine: params.contextEngine, + contextEngine: activeContextEngine, sessionId: params.sessionId, sessionKey: params.sessionKey, sessionFile: params.sessionFile, @@ -1867,65 +1888,75 @@ export async function runEmbeddedAttempt( ); try { - const prior = await sanitizeSessionHistory({ - messages: activeSession.messages, - modelApi: params.model.api, - modelId: params.modelId, - provider: params.provider, - allowedToolNames, - config: params.config, - workspaceDir: effectiveWorkspace, - env: process.env, - model: params.model, - sessionManager, - sessionId: params.sessionId, - policy: transcriptPolicy, - }); - cacheTrace?.recordStage("session:sanitized", { messages: prior }); - const validated = await validateReplayTurns({ - messages: prior, - modelApi: params.model.api, - modelId: params.modelId, - provider: params.provider, - config: params.config, - workspaceDir: effectiveWorkspace, - env: process.env, - model: params.model, - sessionId: params.sessionId, - policy: transcriptPolicy, - }); - const heartbeatSummary = - params.config && sessionAgentId - ? resolveHeartbeatSummaryForAgent(params.config, sessionAgentId) - : undefined; - const heartbeatFiltered = filterHeartbeatPairs( - validated, - heartbeatSummary?.ackMaxChars, - heartbeatSummary?.prompt, - ); - const truncated = limitHistoryTurns( - heartbeatFiltered, - getDmHistoryLimitFromSessionKey(params.sessionKey, params.config), - ); - // Re-run tool_use/tool_result pairing repair after truncation, since - // limitHistoryTurns can orphan tool_result blocks by removing the - // assistant message that contained the matching tool_use. - const limited = transcriptPolicy.repairToolUseResultPairing - ? sanitizeToolUseResultPairing(truncated, { - erroredAssistantResultPolicy: "drop", - ...(isOpenAIResponsesApi ? { missingToolResultText: "aborted" } : {}), - }) - : truncated; - cacheTrace?.recordStage("session:limited", { messages: limited }); - if (limited.length > 0) { - activeSession.agent.state.messages = limited; + if (isRawModelRun) { + activeSession.agent.reset(); + applySystemPromptOverrideToSession(activeSession, ""); + systemPromptText = ""; + cacheTrace?.recordStage("session:raw-model-run", { + messages: activeSession.messages, + system: systemPromptText, + }); + } else { + const prior = await sanitizeSessionHistory({ + messages: activeSession.messages, + modelApi: params.model.api, + modelId: params.modelId, + provider: params.provider, + allowedToolNames, + config: params.config, + workspaceDir: effectiveWorkspace, + env: process.env, + model: params.model, + sessionManager, + sessionId: params.sessionId, + policy: transcriptPolicy, + }); + cacheTrace?.recordStage("session:sanitized", { messages: prior }); + const validated = await validateReplayTurns({ + messages: prior, + modelApi: params.model.api, + modelId: params.modelId, + provider: params.provider, + config: params.config, + workspaceDir: effectiveWorkspace, + env: process.env, + model: params.model, + sessionId: params.sessionId, + policy: transcriptPolicy, + }); + const heartbeatSummary = + params.config && sessionAgentId + ? resolveHeartbeatSummaryForAgent(params.config, sessionAgentId) + : undefined; + const heartbeatFiltered = filterHeartbeatPairs( + validated, + heartbeatSummary?.ackMaxChars, + heartbeatSummary?.prompt, + ); + const truncated = limitHistoryTurns( + heartbeatFiltered, + getDmHistoryLimitFromSessionKey(params.sessionKey, params.config), + ); + // Re-run tool_use/tool_result pairing repair after truncation, since + // limitHistoryTurns can orphan tool_result blocks by removing the + // assistant message that contained the matching tool_use. + const limited = transcriptPolicy.repairToolUseResultPairing + ? sanitizeToolUseResultPairing(truncated, { + erroredAssistantResultPolicy: "drop", + ...(isOpenAIResponsesApi ? { missingToolResultText: "aborted" } : {}), + }) + : truncated; + cacheTrace?.recordStage("session:limited", { messages: limited }); + if (limited.length > 0) { + activeSession.agent.state.messages = limited; + } } - if (params.contextEngine) { + if (activeContextEngine) { try { unwindowedContextEngineMessagesForPrecheck = activeSession.messages.slice(); const assembled = await assembleAttemptContextEngine({ - contextEngine: params.contextEngine, + contextEngine: activeContextEngine, sessionId: params.sessionId, sessionKey: params.sessionKey, messages: activeSession.messages, @@ -2258,14 +2289,16 @@ export async function runEmbeddedAttempt( }; const promptBuildMessages = pruneProcessedHistoryImages(activeSession.messages) ?? activeSession.messages; - const hookResult = await resolvePromptBuildHookResult({ - config: params.config ?? getRuntimeConfig(), - prompt: params.prompt, - messages: promptBuildMessages, - hookCtx, - hookRunner, - legacyBeforeAgentStartResult: params.legacyBeforeAgentStartResult, - }); + const hookResult = isRawModelRun + ? undefined + : await resolvePromptBuildHookResult({ + config: params.config ?? getRuntimeConfig(), + prompt: params.prompt, + messages: promptBuildMessages, + hookCtx, + hookRunner, + legacyBeforeAgentStartResult: params.legacyBeforeAgentStartResult, + }); { if (hookResult?.prependContext) { effectivePrompt = `${hookResult.prependContext}\n\n${effectivePrompt}`; @@ -2368,7 +2401,7 @@ export async function runEmbeddedAttempt( }); // Repair orphaned trailing user messages so new prompts don't violate role ordering. - const leafEntry = sessionManager.getLeafEntry(); + const leafEntry = isRawModelRun ? null : sessionManager.getLeafEntry(); if (leafEntry?.type === "message" && leafEntry.message.role === "user") { const orphanPromptMerge = resolveMessageMergeStrategy().mergeOrphanedTrailingUserPrompt({ prompt: effectivePrompt, @@ -2537,7 +2570,7 @@ export async function runEmbeddedAttempt( ); } - if (hookRunner?.hasHooks("llm_input")) { + if (!isRawModelRun && hookRunner?.hasHooks("llm_input")) { hookRunner .runLlmInput( { @@ -2892,7 +2925,7 @@ export async function runEmbeddedAttempt( } // Let the active context engine run its post-turn lifecycle. - if (params.contextEngine) { + if (activeContextEngine) { const afterTurnRuntimeContext = buildAfterTurnRuntimeContextFromUsage({ attempt: params, workspaceDir: effectiveWorkspace, @@ -2902,7 +2935,7 @@ export async function runEmbeddedAttempt( promptCache, }); await finalizeAttemptContextEngineTurn({ - contextEngine: params.contextEngine, + contextEngine: activeContextEngine, promptError: Boolean(promptError), aborted, yieldAborted, diff --git a/src/commands/agent-command.test-mocks.ts b/src/commands/agent-command.test-mocks.ts index 528aaadba6d..a71a2c0516b 100644 --- a/src/commands/agent-command.test-mocks.ts +++ b/src/commands/agent-command.test-mocks.ts @@ -22,13 +22,24 @@ vi.mock("../cli/deps.js", () => ({ createDefaultDeps: vi.fn(() => ({})), })); +const acpManagerMock = vi.hoisted(() => ({ + current: { + resolveSession: vi.fn(() => null), + } as unknown, +})); + vi.mock("../acp/control-plane/manager.js", () => ({ __testing: { - resetAcpSessionManagerForTests: vi.fn(), + resetAcpSessionManagerForTests: vi.fn(() => { + acpManagerMock.current = { + resolveSession: vi.fn(() => null), + }; + }), + setAcpSessionManagerForTests: vi.fn((manager: unknown) => { + acpManagerMock.current = manager; + }), }, - getAcpSessionManager: vi.fn(() => ({ - resolveSession: vi.fn(() => null), - })), + getAcpSessionManager: vi.fn(() => acpManagerMock.current), })); vi.mock("../agents/pi-embedded.js", () => ({ diff --git a/src/commands/agent.test.ts b/src/commands/agent.test.ts index cf6e5d539cb..30e4e43b753 100644 --- a/src/commands/agent.test.ts +++ b/src/commands/agent.test.ts @@ -440,6 +440,60 @@ describe("agentCommand", () => { }); }); + it("bypasses ACP sessions for one-shot model runs", async () => { + await withTempHome(async (home) => { + const store = path.join(home, "sessions.json"); + const sessionKey = "agent:main:main"; + mockConfig(home, store, { models: {} }); + writeSessionStoreSeed(store, { + [sessionKey]: { + sessionId: "acp-backed-session", + updatedAt: Date.now(), + }, + }); + const runTurn = vi.fn(); + acpManagerTesting.setAcpSessionManagerForTests({ + resolveSession: vi.fn(() => ({ + kind: "ready", + sessionKey, + meta: { + backend: "acpx", + agent: "codex", + runtimeSessionName: "runtime-1", + mode: "persistent", + state: "idle", + lastActivityAt: Date.now(), + }, + })), + runTurn, + }); + + await agentCommand( + { + message: "Reply with exactly OPENCLAW-MODEL-OK", + sessionKey, + model: "openrouter/auto", + modelRun: true, + promptMode: "none", + }, + runtime, + ); + + expect(runTurn).not.toHaveBeenCalled(); + const callArgs = getLastEmbeddedCall(); + expect(callArgs).toEqual( + expect.objectContaining({ + provider: "openrouter", + model: "openrouter/auto", + prompt: "Reply with exactly OPENCLAW-MODEL-OK", + modelRun: true, + promptMode: "none", + disableTools: true, + }), + ); + }); + }); + it("passes resolved session-id resume files to embedded runs", async () => { await withTempHome(async (home) => { const resumeStore = path.join(home, "sessions-resume.json"); diff --git a/src/gateway/server-methods/agent.test.ts b/src/gateway/server-methods/agent.test.ts index 4d131fde4ca..25bfedc9bc5 100644 --- a/src/gateway/server-methods/agent.test.ts +++ b/src/gateway/server-methods/agent.test.ts @@ -833,6 +833,45 @@ describe("gateway agent handler", () => { resetTimeConfig(); }); + it("keeps model-run gateway prompts undecorated and forwards raw-run flags", async () => { + setupNewYorkTimeConfig("2026-01-29T01:30:00.000Z"); + primeMainAgentRun({ cfg: mocks.loadConfigReturn }); + + await invokeAgent( + { + message: "Reply exactly: pong", + agentId: "main", + provider: "ollama", + model: "llama3.2:latest", + modelRun: true, + promptMode: "none", + sessionKey: "agent:main:main", + idempotencyKey: "test-model-run-raw", + }, + { + reqId: "model-run-raw", + client: { connect: { scopes: ["operator.admin"] } } as AgentHandlerArgs["client"], + }, + ); + + await waitForAssertion(() => expect(mocks.agentCommand).toHaveBeenCalled()); + + const callArgs = mocks.agentCommand.mock.calls[0][0] as { + message?: string; + modelRun?: boolean; + promptMode?: string; + }; + expect(callArgs).toEqual( + expect.objectContaining({ + message: "Reply exactly: pong", + modelRun: true, + promptMode: "none", + }), + ); + + resetTimeConfig(); + }); + it.each([ { name: "passes senderIsOwner=false for write-scoped gateway callers", diff --git a/src/gateway/server-methods/agent.ts b/src/gateway/server-methods/agent.ts index d140ea99166..5691936fbc8 100644 --- a/src/gateway/server-methods/agent.ts +++ b/src/gateway/server-methods/agent.ts @@ -448,6 +448,7 @@ export const agentHandlers: GatewayRequestHandlers = { const allowModelOverride = resolveAllowModelOverrideFromClient(client); const canResetSession = resolveCanResetSessionFromClient(client); const requestedModelOverride = Boolean(request.provider || request.model); + const isRawModelRun = request.modelRun === true || request.promptMode === "none"; if (requestedModelOverride && !allowModelOverride) { respond( false, @@ -773,7 +774,7 @@ export const agentHandlers: GatewayRequestHandlers = { // Channel messages (Discord, Telegram, etc.) get timestamps via envelope // formatting in a separate code path — they never reach this handler. // See: https://github.com/openclaw/openclaw/issues/3658 - if (!skipTimestampInjection) { + if (!skipTimestampInjection && !isRawModelRun) { message = injectTimestamp(message, timestampOptsFromConfig(cfg)); }