diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index b1861d7c24b..f3cc2276b68 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -560,6 +560,7 @@ Periodic heartbeat runs. identifierPolicy: "strict", // strict | off | custom identifierInstructions: "Preserve deployment IDs, ticket IDs, and host:port pairs exactly.", // used when identifierPolicy=custom qualityGuard: { enabled: true, maxRetries: 1 }, + midTurnPrecheck: { enabled: false }, // optional Pi tool-loop pressure check postCompactionSections: ["Session Startup", "Red Lines"], // [] disables reinjection model: "openrouter/anthropic/claude-sonnet-4-6", // optional compaction-only model override truncateAfterCompaction: true, // rotate to a smaller successor JSONL after compaction @@ -585,6 +586,7 @@ Periodic heartbeat runs. - `identifierPolicy`: `strict` (default), `off`, or `custom`. `strict` prepends built-in opaque identifier retention guidance during compaction summarization. - `identifierInstructions`: optional custom identifier-preservation text used when `identifierPolicy=custom`. - `qualityGuard`: retry-on-malformed-output checks for safeguard summaries. Enabled by default in safeguard mode; set `enabled: false` to skip the audit. +- `midTurnPrecheck`: optional Pi tool-loop pressure check. When `enabled: true`, OpenClaw checks context pressure after tool results are appended and before the next model call. If the context no longer fits, it aborts the current attempt before submitting the prompt and reuses the existing precheck recovery path to truncate tool results or compact and retry. Works with both `default` and `safeguard` compaction modes. Default: disabled. - `postCompactionSections`: optional AGENTS.md H2/H3 section names to re-inject after compaction. Defaults to `["Session Startup", "Red Lines"]`; set `[]` to disable reinjection. When unset or explicitly set to that default pair, older `Every Session`/`Safety` headings are also accepted as a legacy fallback. - `model`: optional `provider/model-id` override for compaction summarization only. Use this when the main session should keep one model but compaction summaries should run on another; when unset, compaction uses the session's primary model. - `maxActiveTranscriptBytes`: optional byte threshold (`number` or strings like `"20mb"`) that triggers normal local compaction before a run when the active JSONL grows past the threshold. Requires `truncateAfterCompaction` so successful compaction can rotate to a smaller successor transcript. Disabled when unset or `0`. diff --git a/docs/reference/session-management-compaction.md b/docs/reference/session-management-compaction.md index e8cedc70ccd..4e11fb71842 100644 --- a/docs/reference/session-management-compaction.md +++ b/docs/reference/session-management-compaction.md @@ -272,6 +272,20 @@ reopen cost, not raw archival: OpenClaw still runs normal semantic compaction, and it requires `truncateAfterCompaction` so the compacted summary can become a new successor transcript. +For embedded Pi runs, `agents.defaults.compaction.midTurnPrecheck.enabled: true` +adds an opt-in tool-loop guard. After a tool result is appended and before the +next model call, OpenClaw estimates the prompt pressure using the same preflight +budget logic used at turn start. If the context no longer fits, the guard does +not compact inside Pi's `transformContext` hook. It raises a structured +mid-turn precheck signal, stops the current prompt submission, and lets the +outer run loop use the existing recovery path: truncate oversized tool results +when that is enough, or trigger the configured compaction mode and retry. The +option is disabled by default and works with both `default` and `safeguard` +compaction modes, including provider-backed safeguard compaction. +This is independent of `maxActiveTranscriptBytes`: the byte-size guard runs +before a turn opens, while mid-turn precheck runs later in the embedded Pi tool +loop after new tool results have been appended. + --- ## Compaction settings (`reserveTokens`, `keepRecentTokens`) @@ -298,6 +312,11 @@ OpenClaw also enforces a safety floor for embedded runs: and keeps Pi's recent-tail cut point. Without an explicit keep budget, manual compaction remains a hard checkpoint and rebuilt context starts from the new summary. +- Set `agents.defaults.compaction.midTurnPrecheck.enabled: true` to run the + optional tool-loop precheck after new tool results and before the next model + call. This is a trigger only; summary generation still uses the configured + compaction path. It is independent of `maxActiveTranscriptBytes`, which is a + turn-start active-transcript byte-size guard. - Set `agents.defaults.compaction.maxActiveTranscriptBytes` to a byte value or string such as `"20mb"` to run local compaction before a turn when the active transcript gets large. This guard is active only when diff --git a/src/agents/pi-embedded-runner/context-truncation-notice.ts b/src/agents/pi-embedded-runner/context-truncation-notice.ts new file mode 100644 index 00000000000..3a817ed3712 --- /dev/null +++ b/src/agents/pi-embedded-runner/context-truncation-notice.ts @@ -0,0 +1,5 @@ +export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "more characters truncated"; + +export function formatContextLimitTruncationNotice(truncatedChars: number): string { + return `[... ${Math.max(1, Math.floor(truncatedChars))} ${CONTEXT_LIMIT_TRUNCATION_NOTICE}]`; +} diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts index 79711177d34..f0a799906a9 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts @@ -2,6 +2,7 @@ import fs from "node:fs/promises"; import path from "node:path"; import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { OpenClawConfig } from "../../../config/types.js"; import { buildMemorySystemPromptAddition } from "../../../context-engine/delegate.js"; import { clearMemoryPluginState, @@ -29,6 +30,7 @@ import { buildEmbeddedSubscriptionParams, cleanupEmbeddedAttemptResources, } from "./attempt.subscription-cleanup.js"; +import type { MidTurnPrecheckRequest } from "./midturn-precheck.js"; const hoisted = getHoisted(); const embeddedSessionId = "embedded-session"; @@ -37,6 +39,11 @@ const seedMessage = { role: "user", content: "seed", timestamp: 1 } as AgentMess const doneMessage = { role: "assistant", content: "done", timestamp: 2 } as unknown as AgentMessage; type AfterTurnPromptCacheCall = { runtimeContext?: { promptCache?: Record } }; type TrajectoryEvent = { type?: string; data?: Record }; +type ToolResultGuardInstallParams = { + midTurnPrecheck?: { + onMidTurnPrecheck?: (request: MidTurnPrecheckRequest) => void; + }; +}; function createTestContextEngine(params: Partial): AttemptContextEngine { return { @@ -770,3 +777,95 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => { }); }); }); + +describe("runEmbeddedAttempt context engine mid-turn precheck integration", () => { + const sessionKey = "agent:main:guildchat:channel:midturn-precheck"; + const tempPaths: string[] = []; + + beforeEach(() => { + resetEmbeddedAttemptHarness(); + clearMemoryPluginState(); + }); + + afterEach(async () => { + await cleanupTempPaths(tempPaths); + clearMemoryPluginState(); + vi.restoreAllMocks(); + }); + + it("keeps mid-turn precheck out of the context-engine-owned compaction hook", async () => { + await createContextEngineAttemptRunner({ + contextEngine: { + ...createContextEngineBootstrapAndAssemble(), + info: { ownsCompaction: true }, + }, + sessionKey, + tempPaths, + attemptOverrides: { + config: { + agents: { + defaults: { + compaction: { + mode: "safeguard", + midTurnPrecheck: { enabled: true }, + }, + }, + }, + } as OpenClawConfig, + }, + }); + + expect(hoisted.installContextEngineLoopHookMock).toHaveBeenCalledWith( + expect.not.objectContaining({ midTurnPrecheck: expect.anything() }), + ); + }); + + it("recovers when Pi persists the mid-turn precheck as an assistant error", async () => { + hoisted.installToolResultContextGuardMock.mockImplementation((...args: unknown[]) => { + const params = args[0] as ToolResultGuardInstallParams; + params.midTurnPrecheck?.onMidTurnPrecheck?.({ + route: "compact_only", + estimatedPromptTokens: 9000, + promptBudgetBeforeReserve: 7000, + overflowTokens: 2000, + toolResultReducibleChars: 0, + effectiveReserveTokens: 1000, + }); + return () => {}; + }); + + const syntheticPiError = { + role: "assistant", + content: [{ type: "text", text: "" }], + stopReason: "error", + errorMessage: "Context overflow: prompt too large for the model (mid-turn precheck).", + timestamp: 3, + } as unknown as AgentMessage; + + const result = await createContextEngineAttemptRunner({ + contextEngine: createContextEngineBootstrapAndAssemble(), + sessionKey, + tempPaths, + attemptOverrides: { + config: { + agents: { + defaults: { + compaction: { + mode: "safeguard", + midTurnPrecheck: { enabled: true }, + }, + }, + }, + } as OpenClawConfig, + }, + sessionMessages: [seedMessage], + sessionPrompt: async (session) => { + session.messages = [...session.messages, syntheticPiError]; + }, + }); + + expect(result.promptErrorSource).toBe("precheck"); + expect(result.preflightRecovery).toEqual({ route: "compact_only" }); + expect(result.messagesSnapshot).toEqual([seedMessage]); + }); +}); diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts index 22b9a01c47a..0628f84ab03 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts @@ -63,6 +63,7 @@ type AttemptSpawnWorkspaceHoisted = { subscribeEmbeddedPiSessionMock: Mock; acquireSessionWriteLockMock: Mock; installToolResultContextGuardMock: UnknownMock; + installContextEngineLoopHookMock: UnknownMock; flushPendingToolResultsAfterIdleMock: AsyncUnknownMock; releaseWsSessionMock: UnknownMock; resolveBootstrapContextForRunMock: Mock<() => Promise>; @@ -117,6 +118,7 @@ const hoisted = vi.hoisted((): AttemptSpawnWorkspaceHoisted => { const ensureGlobalUndiciStreamTimeoutsMock = vi.fn(); const buildEmbeddedMessageActionDiscoveryInputMock = vi.fn((params: unknown) => params); const installToolResultContextGuardMock = vi.fn(() => () => {}); + const installContextEngineLoopHookMock = vi.fn(() => () => {}); const flushPendingToolResultsAfterIdleMock = vi.fn(async () => {}); const releaseWsSessionMock = vi.fn(() => {}); const subscribeEmbeddedPiSessionMock = vi.fn(() => @@ -166,6 +168,7 @@ const hoisted = vi.hoisted((): AttemptSpawnWorkspaceHoisted => { subscribeEmbeddedPiSessionMock, acquireSessionWriteLockMock, installToolResultContextGuardMock, + installContextEngineLoopHookMock, flushPendingToolResultsAfterIdleMock, releaseWsSessionMock, resolveBootstrapContextForRunMock, @@ -218,7 +221,7 @@ vi.mock("../../sandbox.js", () => ({ })); vi.mock("../../session-tool-result-guard-wrapper.js", () => ({ - guardSessionManager: () => hoisted.sessionManager, + guardSessionManager: (sessionManager: unknown) => sessionManager, })); vi.mock("../../pi-embedded-subscribe.js", () => ({ @@ -355,6 +358,8 @@ vi.mock("../tool-result-context-guard.js", async () => { `[... ${Math.max(1, Math.floor(truncatedChars))} more characters truncated]`, installToolResultContextGuard: (...args: unknown[]) => (hoisted.installToolResultContextGuardMock as (...args: unknown[]) => unknown)(...args), + installContextEngineLoopHook: (...args: unknown[]) => + (hoisted.installContextEngineLoopHookMock as (...args: unknown[]) => unknown)(...args), }; }); @@ -750,6 +755,7 @@ export function resetEmbeddedAttemptHarness( release: async () => {}, }); hoisted.installToolResultContextGuardMock.mockReset().mockReturnValue(() => {}); + hoisted.installContextEngineLoopHookMock.mockReset().mockReturnValue(() => {}); hoisted.flushPendingToolResultsAfterIdleMock.mockReset().mockResolvedValue(undefined); hoisted.releaseWsSessionMock.mockReset().mockReturnValue(undefined); hoisted.resolveBootstrapContextForRunMock.mockReset().mockResolvedValue({ diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 70d6030d07e..a0e133c3a28 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -318,6 +318,11 @@ import { detectAndLoadPromptImages } from "./images.js"; import { buildAttemptReplayMetadata } from "./incomplete-turn.js"; import { resolveLlmIdleTimeoutMs, streamWithIdleTimeout } from "./llm-idle-timeout.js"; import { resolveMessageMergeStrategy } from "./message-merge-strategy.js"; +import { + MID_TURN_PRECHECK_ERROR_MESSAGE, + isMidTurnPrecheckSignal, + type MidTurnPrecheckRequest, +} from "./midturn-precheck.js"; import { PREEMPTIVE_OVERFLOW_ERROR_TEXT, shouldPreemptivelyCompactBeforePrompt, @@ -494,6 +499,57 @@ export function normalizeMessagesForLlmBoundary(messages: AgentMessage[]): Agent return stripRuntimeContextCustomMessages(normalized); } +function isMidTurnPrecheckAssistantError(message: AgentMessage | undefined): boolean { + if (!message || message.role !== "assistant") { + return false; + } + const record = message as unknown as { stopReason?: unknown; errorMessage?: unknown }; + return record.stopReason === "error" && record.errorMessage === MID_TURN_PRECHECK_ERROR_MESSAGE; +} + +function removeTrailingMidTurnPrecheckAssistantError(params: { + activeSession: { agent: { state: { messages: AgentMessage[] } } }; + sessionManager: ReturnType; +}): void { + const messages = params.activeSession.agent.state.messages; + if (isMidTurnPrecheckAssistantError(messages.at(-1))) { + params.activeSession.agent.state.messages = messages.slice(0, -1); + } + + const mutableSessionManager = params.sessionManager as unknown as { + fileEntries?: Array<{ + type?: string; + id?: string; + parentId?: string | null; + message?: AgentMessage; + }>; + byId?: Map; + leafId?: string | null; + _rewriteFile?: () => void; + }; + const lastEntry = mutableSessionManager.fileEntries?.at(-1); + if (lastEntry?.type !== "message" || !isMidTurnPrecheckAssistantError(lastEntry.message)) { + if (isMidTurnPrecheckAssistantError(params.activeSession.agent.state.messages.at(-1))) { + log.warn( + "[context-overflow-midturn-precheck] removed synthetic assistant error from active session but could not locate matching persisted SessionManager entry", + ); + } + return; + } + if (typeof mutableSessionManager._rewriteFile !== "function") { + log.warn( + "[context-overflow-midturn-precheck] removed synthetic assistant error from active session but SessionManager rewrite hook is unavailable", + ); + return; + } + mutableSessionManager.fileEntries?.pop(); + if (lastEntry.id) { + mutableSessionManager.byId?.delete(lastEntry.id); + } + mutableSessionManager.leafId = lastEntry.parentId ?? null; + mutableSessionManager._rewriteFile(); +} + export function shouldCreateBundleMcpRuntimeForAttempt(params: { toolsEnabled: boolean; disableTools?: boolean; @@ -1470,6 +1526,21 @@ export async function runEmbeddedAttempt( queueYieldInterruptForSession = () => { queueSessionsYieldInterruptMessage(activeSession); }; + const contextTokenBudgetForGuard = Math.max( + 1, + Math.floor(params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS), + ); + const toolResultMaxCharsForGuard = resolveLiveToolResultMaxChars({ + contextWindowTokens: contextTokenBudgetForGuard, + cfg: params.config, + agentId: sessionAgentId, + }); + const midTurnPrecheckEnabled = + params.config?.agents?.defaults?.compaction?.midTurnPrecheck?.enabled === true; + let pendingMidTurnPrecheckRequest: MidTurnPrecheckRequest | null = null; + const onMidTurnPrecheck = (request: MidTurnPrecheckRequest) => { + pendingMidTurnPrecheckRequest = request; + }; if (!activeContextEngine || activeContextEngine.info.ownsCompaction !== true) { removeToolResultContextGuard = installToolResultContextGuard({ agent: activeSession.agent, @@ -1479,6 +1550,19 @@ export async function runEmbeddedAttempt( params.model.contextWindow ?? params.model.maxTokens ?? DEFAULT_CONTEXT_TOKENS, ), ), + ...(midTurnPrecheckEnabled + ? { + midTurnPrecheck: { + enabled: true, + contextTokenBudget: contextTokenBudgetForGuard, + reserveTokens: () => settingsManager.getCompactionReserveTokens(), + toolResultMaxChars: toolResultMaxCharsForGuard, + getSystemPrompt: () => systemPromptText, + getPrePromptMessageCount: () => prePromptMessageCount, + onMidTurnPrecheck, + }, + } + : {}), }); } else { removeToolResultContextGuard = installContextEngineLoopHook({ @@ -2271,8 +2355,67 @@ export async function runEmbeddedAttempt( // Hook runner was already obtained earlier before tool creation const hookAgentId = sessionAgentId; + const activeSessionManager = sessionManager; let preflightRecovery: EmbeddedRunAttemptResult["preflightRecovery"]; let promptErrorSource: "prompt" | "compaction" | "precheck" | null = null; + const handleMidTurnPrecheckRequest = (request: MidTurnPrecheckRequest) => { + const logMidTurnPrecheck = (route: string, extra?: string) => { + log.warn( + `[context-overflow-midturn-precheck] sessionKey=${params.sessionKey ?? params.sessionId} ` + + `provider=${params.provider}/${params.modelId} route=${route} ` + + `estimatedPromptTokens=${request.estimatedPromptTokens} ` + + `promptBudgetBeforeReserve=${request.promptBudgetBeforeReserve} ` + + `overflowTokens=${request.overflowTokens} ` + + `toolResultReducibleChars=${request.toolResultReducibleChars} ` + + `effectiveReserveTokens=${request.effectiveReserveTokens} ` + + `prePromptMessageCount=${prePromptMessageCount} ` + + (extra ? `${extra} ` : "") + + `sessionFile=${params.sessionFile}`, + ); + }; + if (request.route === "truncate_tool_results_only") { + const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS; + const toolResultMaxChars = resolveLiveToolResultMaxChars({ + contextWindowTokens: contextTokenBudget, + cfg: params.config, + agentId: sessionAgentId, + }); + const truncationResult = truncateOversizedToolResultsInSessionManager({ + sessionManager: activeSessionManager, + contextWindowTokens: contextTokenBudget, + maxCharsOverride: toolResultMaxChars, + sessionFile: params.sessionFile, + sessionId: params.sessionId, + sessionKey: params.sessionKey, + }); + if (truncationResult.truncated) { + preflightRecovery = { + route: "truncate_tool_results_only", + handled: true, + truncatedCount: truncationResult.truncatedCount, + }; + const sessionContext = activeSessionManager.buildSessionContext(); + activeSession.agent.state.messages = sessionContext.messages; + logMidTurnPrecheck( + request.route, + `handled=true truncatedCount=${truncationResult.truncatedCount}`, + ); + } else { + preflightRecovery = { route: "compact_only" }; + promptError = new Error(PREEMPTIVE_OVERFLOW_ERROR_TEXT); + promptErrorSource = "precheck"; + logMidTurnPrecheck( + "compact_only", + `truncateFallbackReason=${truncationResult.reason ?? "unknown"}`, + ); + } + } else { + preflightRecovery = { route: request.route }; + promptError = new Error(PREEMPTIVE_OVERFLOW_ERROR_TEXT); + promptErrorSource = "precheck"; + logMidTurnPrecheck(request.route); + } + }; let skipPromptSubmission = false; try { const promptStartedAt = Date.now(); @@ -2782,6 +2925,8 @@ export async function runEmbeddedAttempt( if (yieldMessage) { await persistSessionsYieldContextMessage(activeSession, yieldMessage); } + } else if (isMidTurnPrecheckSignal(err)) { + handleMidTurnPrecheckRequest(err.request); } else { promptError = err; promptErrorSource = "prompt"; @@ -2792,6 +2937,20 @@ export async function runEmbeddedAttempt( ); } + if (pendingMidTurnPrecheckRequest) { + const request = pendingMidTurnPrecheckRequest; + pendingMidTurnPrecheckRequest = null; + removeTrailingMidTurnPrecheckAssistantError({ + activeSession, + sessionManager, + }); + if (!preflightRecovery && promptErrorSource !== "precheck") { + promptError = null; + promptErrorSource = null; + handleMidTurnPrecheckRequest(request); + } + } + // Capture snapshot before compaction wait so we have complete messages if timeout occurs // Check compaction state before and after to avoid race condition where compaction starts during capture // Use session state (not subscription) for snapshot decisions - need instantaneous compaction status diff --git a/src/agents/pi-embedded-runner/run/midturn-precheck.ts b/src/agents/pi-embedded-runner/run/midturn-precheck.ts new file mode 100644 index 00000000000..01869884746 --- /dev/null +++ b/src/agents/pi-embedded-runner/run/midturn-precheck.ts @@ -0,0 +1,27 @@ +import type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js"; + +export type MidTurnPrecheckRequest = { + route: Exclude; + estimatedPromptTokens: number; + promptBudgetBeforeReserve: number; + overflowTokens: number; + toolResultReducibleChars: number; + effectiveReserveTokens: number; +}; + +export const MID_TURN_PRECHECK_ERROR_MESSAGE = + "Context overflow: prompt too large for the model (mid-turn precheck)."; + +export class MidTurnPrecheckSignal extends Error { + readonly request: MidTurnPrecheckRequest; + + constructor(request: MidTurnPrecheckRequest) { + super(MID_TURN_PRECHECK_ERROR_MESSAGE); + this.name = "MidTurnPrecheckSignal"; + this.request = request; + } +} + +export function isMidTurnPrecheckSignal(error: unknown): error is MidTurnPrecheckSignal { + return error instanceof MidTurnPrecheckSignal; +} diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts index 20ee6d06438..4f1f641dcd5 100644 --- a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts +++ b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts @@ -2,6 +2,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { describe, expect, it, vi } from "vitest"; import type { ContextEngine } from "../../context-engine/types.js"; import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js"; +import { MidTurnPrecheckSignal } from "./run/midturn-precheck.js"; import { CONTEXT_LIMIT_TRUNCATION_NOTICE, formatContextLimitTruncationNotice, @@ -104,6 +105,36 @@ async function applyGuardToContext( return await agent.transformContext?.(contextForNextCall, new AbortController().signal); } +async function applyMidTurnPrecheckGuardToContext( + agent: { transformContext?: (messages: AgentMessage[], signal: AbortSignal) => unknown }, + contextForNextCall: AgentMessage[], + options: { + contextWindowTokens?: number; + contextTokenBudget?: number; + reserveTokens?: number; + toolResultMaxChars?: number; + prePromptMessageCount?: number; + systemPrompt?: string; + } = {}, +) { + const contextWindowTokens = options.contextWindowTokens ?? options.contextTokenBudget ?? 20_000; + installToolResultContextGuard({ + agent, + contextWindowTokens, + midTurnPrecheck: { + enabled: true, + contextTokenBudget: options.contextTokenBudget ?? contextWindowTokens, + reserveTokens: () => options.reserveTokens ?? 10_000, + toolResultMaxChars: options.toolResultMaxChars, + getSystemPrompt: () => options.systemPrompt, + ...(options.prePromptMessageCount !== undefined + ? { getPrePromptMessageCount: () => options.prePromptMessageCount as number } + : {}), + }, + }); + return await agent.transformContext?.(contextForNextCall, new AbortController().signal); +} + function expectPiStyleTruncation(text: string): void { expect(text).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE); expect(text).toMatch(/\[\.\.\. \d+ more characters truncated\]$/); @@ -249,6 +280,66 @@ describe("installToolResultContextGuard", () => { expectPiStyleTruncation(getToolResultText(transformed[0])); }); + + it("raises a structured mid-turn precheck signal after a new tool result overflows", async () => { + const agent = makeGuardableAgent(); + const contextForNextCall = [ + makeUser("prompt already in history"), + makeToolResult("call_big", "x".repeat(80_000)), + ]; + + await expect( + applyMidTurnPrecheckGuardToContext(agent, contextForNextCall, { + contextWindowTokens: 200_000, + contextTokenBudget: 20_000, + reserveTokens: 12_000, + toolResultMaxChars: 16_000, + prePromptMessageCount: 1, + }), + ).rejects.toMatchObject({ + name: "MidTurnPrecheckSignal", + request: expect.objectContaining({ + route: "compact_then_truncate", + overflowTokens: expect.any(Number), + toolResultReducibleChars: expect.any(Number), + }), + }); + }); + + it("does not run mid-turn precheck when no new tool result was appended", async () => { + const agent = makeGuardableAgent(); + const contextForNextCall = [makeUser("u".repeat(80_000))]; + + const transformed = await applyMidTurnPrecheckGuardToContext(agent, contextForNextCall, { + contextWindowTokens: 200_000, + contextTokenBudget: 20_000, + reserveTokens: 12_000, + prePromptMessageCount: 0, + }); + + expect(transformed).toBe(contextForNextCall); + }); + + it("uses compact_only route when mid-turn overflow is not reducible by tool truncation", async () => { + const agent = makeGuardableAgent(); + const contextForNextCall = [ + makeUser("u".repeat(80_000)), + makeToolResult("call_small", "small output"), + ]; + + try { + await applyMidTurnPrecheckGuardToContext(agent, contextForNextCall, { + contextWindowTokens: 200_000, + contextTokenBudget: 20_000, + reserveTokens: 12_000, + prePromptMessageCount: 1, + }); + throw new Error("expected mid-turn precheck signal"); + } catch (err) { + expect(err).toBeInstanceOf(MidTurnPrecheckSignal); + expect((err as MidTurnPrecheckSignal).request.route).toBe("compact_only"); + } + }); }); type MockedEngine = ContextEngine & { diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.ts index 1ce238b35da..8331589ef25 100644 --- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts +++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts @@ -1,5 +1,12 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { ContextEngine, ContextEngineRuntimeContext } from "../../context-engine/types.js"; +import { + CONTEXT_LIMIT_TRUNCATION_NOTICE, + formatContextLimitTruncationNotice, +} from "./context-truncation-notice.js"; +import { log } from "./logger.js"; +import { MidTurnPrecheckSignal, type MidTurnPrecheckRequest } from "./run/midturn-precheck.js"; +import { shouldPreemptivelyCompactBeforePrompt } from "./run/preemptive-compaction.js"; import { CHARS_PER_TOKEN_ESTIMATE, TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE, @@ -15,7 +22,6 @@ import { const SINGLE_TOOL_RESULT_CONTEXT_SHARE = 0.5; const PREEMPTIVE_OVERFLOW_RATIO = 0.9; -export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "more characters truncated"; export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE = "Context overflow: estimated context size exceeds safe threshold during tool loop."; const TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO = 4 / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE; @@ -31,9 +37,17 @@ type GuardableAgentRecord = { transformContext?: GuardableTransformContext; }; -export function formatContextLimitTruncationNotice(truncatedChars: number): string { - return `[... ${Math.max(1, Math.floor(truncatedChars))} ${CONTEXT_LIMIT_TRUNCATION_NOTICE}]`; -} +type MidTurnPrecheckOptions = { + enabled?: boolean; + contextTokenBudget: number; + reserveTokens: () => number; + toolResultMaxChars?: number; + getSystemPrompt?: () => string | undefined; + getPrePromptMessageCount?: () => number; + onMidTurnPrecheck?: (request: MidTurnPrecheckRequest) => void; +}; + +export { CONTEXT_LIMIT_TRUNCATION_NOTICE, formatContextLimitTruncationNotice }; function truncateTextToBudget(text: string, maxChars: number): string { if (text.length <= maxChars) { @@ -184,6 +198,34 @@ function enforceToolResultLimitInPlace(params: { } } +function hasNewToolResultAfterFence(params: { + messages: AgentMessage[]; + prePromptMessageCount: number; +}): boolean { + for (const message of params.messages.slice(params.prePromptMessageCount)) { + if (isToolResultMessage(message)) { + return true; + } + } + return false; +} + +function toMidTurnPrecheckRequest( + result: ReturnType, +): MidTurnPrecheckRequest | null { + if (result.route === "fits") { + return null; + } + return { + route: result.route, + estimatedPromptTokens: result.estimatedPromptTokens, + promptBudgetBeforeReserve: result.promptBudgetBeforeReserve, + overflowTokens: result.overflowTokens, + toolResultReducibleChars: result.toolResultReducibleChars, + effectiveReserveTokens: result.effectiveReserveTokens, + }; +} + /** * Per-iteration `afterTurn` + `assemble` wrapper for sessions where * the context engine owns compaction. Lets the engine compact inside @@ -231,7 +273,6 @@ export function installContextEngineLoopHook(params: { if (!hasNewMessages) { return lastAssembledView ?? sourceMessages; } - try { if (typeof contextEngine.afterTurn === "function") { await contextEngine.afterTurn({ @@ -295,6 +336,7 @@ export function installContextEngineLoopHook(params: { export function installToolResultContextGuard(params: { agent: GuardableAgent; contextWindowTokens: number; + midTurnPrecheck?: MidTurnPrecheckOptions; }): () => void { const contextWindowTokens = Math.max(1, Math.floor(params.contextWindowTokens)); const maxContextChars = Math.max( @@ -312,6 +354,7 @@ export function installToolResultContextGuard(params: { // narrow runtime view to keep callsites type-safe while preserving behavior. const mutableAgent = params.agent as GuardableAgentRecord; const originalTransformContext = mutableAgent.transformContext; + let lastSeenLength: number | null = null; mutableAgent.transformContext = (async (messages: AgentMessage[], signal: AbortSignal) => { const transformed = originalTransformContext @@ -331,6 +374,50 @@ export function installToolResultContextGuard(params: { maxSingleToolResultChars, }); } + if (params.midTurnPrecheck?.enabled) { + const prePromptMessageCount = Math.max( + 0, + Math.min( + contextMessages.length, + lastSeenLength ?? + params.midTurnPrecheck.getPrePromptMessageCount?.() ?? + contextMessages.length, + ), + ); + lastSeenLength = prePromptMessageCount; + if ( + hasNewToolResultAfterFence({ + messages: contextMessages, + prePromptMessageCount, + }) + ) { + // Use the same post-truncation view Pi will send to the next model call. + // Recovery re-applies truncation to the persisted session manager, so + // this precheck is only a routing signal, not the source of truth. + const precheck = shouldPreemptivelyCompactBeforePrompt({ + messages: contextMessages, + systemPrompt: params.midTurnPrecheck.getSystemPrompt?.(), + // During a tool loop, the active user prompt is already part of messages. + prompt: "", + contextTokenBudget: params.midTurnPrecheck.contextTokenBudget, + reserveTokens: params.midTurnPrecheck.reserveTokens(), + toolResultMaxChars: params.midTurnPrecheck.toolResultMaxChars, + }); + const request = toMidTurnPrecheckRequest(precheck); + log.debug( + `[context-overflow-midturn-precheck] tool-result-guard check route=${precheck.route} ` + + `messages=${contextMessages.length} prePromptMessageCount=${prePromptMessageCount} ` + + `estimatedPromptTokens=${precheck.estimatedPromptTokens} ` + + `promptBudgetBeforeReserve=${precheck.promptBudgetBeforeReserve} ` + + `overflowTokens=${precheck.overflowTokens}`, + ); + if (request) { + params.midTurnPrecheck.onMidTurnPrecheck?.(request); + throw new MidTurnPrecheckSignal(request); + } + } + lastSeenLength = contextMessages.length; + } if ( exceedsPreemptiveOverflowThreshold({ messages: contextMessages, diff --git a/src/agents/pi-embedded-runner/tool-result-truncation.ts b/src/agents/pi-embedded-runner/tool-result-truncation.ts index 38625993d2e..c8fdc29382a 100644 --- a/src/agents/pi-embedded-runner/tool-result-truncation.ts +++ b/src/agents/pi-embedded-runner/tool-result-truncation.ts @@ -7,8 +7,8 @@ import { emitSessionTranscriptUpdate } from "../../sessions/transcript-events.js import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js"; import { resolveAgentContextLimits } from "../agent-scope.js"; import { acquireSessionWriteLock } from "../session-write-lock.js"; +import { formatContextLimitTruncationNotice } from "./context-truncation-notice.js"; import { log } from "./logger.js"; -import { formatContextLimitTruncationNotice } from "./tool-result-context-guard.js"; import { rewriteTranscriptEntriesInSessionManager } from "./transcript-rewrite.js"; /** diff --git a/src/agents/session-tool-result-guard.ts b/src/agents/session-tool-result-guard.ts index cc47cc53f43..1853a4d4584 100644 --- a/src/agents/session-tool-result-guard.ts +++ b/src/agents/session-tool-result-guard.ts @@ -12,7 +12,7 @@ import type { } from "../plugins/types.js"; import { emitSessionTranscriptUpdate } from "../sessions/transcript-events.js"; import { normalizeOptionalString } from "../shared/string-coerce.js"; -import { formatContextLimitTruncationNotice } from "./pi-embedded-runner/tool-result-context-guard.js"; +import { formatContextLimitTruncationNotice } from "./pi-embedded-runner/context-truncation-notice.js"; import { DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS, truncateToolResultMessage, diff --git a/src/config/config.compaction-settings.test.ts b/src/config/config.compaction-settings.test.ts index a5a91e2daf4..7c12fa09bd9 100644 --- a/src/config/config.compaction-settings.test.ts +++ b/src/config/config.compaction-settings.test.ts @@ -26,6 +26,9 @@ describe("config compaction settings", () => { enabled: true, maxRetries: 2, }, + midTurnPrecheck: { + enabled: true, + }, memoryFlush: { enabled: false, model: "ollama/qwen3:8b", @@ -44,6 +47,7 @@ describe("config compaction settings", () => { expect(compaction?.identifierInstructions).toBe("Keep ticket IDs unchanged."); expect(compaction?.qualityGuard?.enabled).toBe(true); expect(compaction?.qualityGuard?.maxRetries).toBe(2); + expect(compaction?.midTurnPrecheck?.enabled).toBe(true); expect(compaction?.memoryFlush?.enabled).toBe(false); expect(compaction?.memoryFlush?.model).toBe("ollama/qwen3:8b"); expect(compaction?.memoryFlush?.softThresholdTokens).toBe(1234); diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 16e1f5d2e35..614c138c5ce 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -5025,6 +5025,21 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { description: "Quality-audit retry settings for safeguard compaction summaries. Safeguard mode enables this by default; set enabled: false to skip summary audits and regeneration.", }, + midTurnPrecheck: { + type: "object", + properties: { + enabled: { + type: "boolean", + title: "Compaction Mid-turn Precheck Enabled", + description: + "Enable structured mid-turn context pressure checks for Pi tool loops. Default: false. Keep disabled unless long tool-heavy sessions hit context overflow before normal turn-end compaction can run.", + }, + }, + additionalProperties: false, + title: "Compaction Mid-turn Precheck", + description: + "Optional Pi tool-loop precheck that detects context pressure after a tool result is appended and before the next model call. When enabled, OpenClaw reuses existing precheck recovery to truncate tool results or compact before retrying.", + }, postIndexSync: { type: "string", enum: ["off", "async", "await"], @@ -27251,6 +27266,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { help: "Maximum number of regeneration retries after a failed safeguard summary quality audit. Use small values to bound extra latency and token cost.", tags: ["performance"], }, + "agents.defaults.compaction.midTurnPrecheck": { + label: "Compaction Mid-turn Precheck", + help: "Optional Pi tool-loop precheck that detects context pressure after a tool result is appended and before the next model call. When enabled, OpenClaw reuses existing precheck recovery to truncate tool results or compact before retrying.", + tags: ["advanced"], + }, + "agents.defaults.compaction.midTurnPrecheck.enabled": { + label: "Compaction Mid-turn Precheck Enabled", + help: "Enable structured mid-turn context pressure checks for Pi tool loops. Default: false. Keep disabled unless long tool-heavy sessions hit context overflow before normal turn-end compaction can run.", + tags: ["advanced"], + }, "agents.defaults.compaction.postIndexSync": { label: "Compaction Post-Index Sync", help: 'Controls post-compaction session memory reindex mode: "off", "async", or "await" (default: "async"). Use "await" for strongest freshness, "async" for lower compaction latency, and "off" only when session-memory sync is handled elsewhere.', diff --git a/src/config/schema.help.quality.test.ts b/src/config/schema.help.quality.test.ts index 6c0add4dcd3..79bc6e0d480 100644 --- a/src/config/schema.help.quality.test.ts +++ b/src/config/schema.help.quality.test.ts @@ -401,6 +401,8 @@ const TARGET_KEYS = [ "agents.defaults.compaction.qualityGuard", "agents.defaults.compaction.qualityGuard.enabled", "agents.defaults.compaction.qualityGuard.maxRetries", + "agents.defaults.compaction.midTurnPrecheck", + "agents.defaults.compaction.midTurnPrecheck.enabled", "agents.defaults.compaction.postCompactionSections", "agents.defaults.compaction.timeoutSeconds", "agents.defaults.compaction.model", @@ -821,6 +823,9 @@ describe("config help copy quality", () => { expect(/recent.*turn|verbatim/i.test(recentTurnsPreserve)).toBe(true); expect(/default:\s*3/i.test(recentTurnsPreserve)).toBe(true); + const midTurnPrecheck = FIELD_HELP["agents.defaults.compaction.midTurnPrecheck.enabled"]; + expect(/mid-turn|tool loop|default:\s*false/i.test(midTurnPrecheck)).toBe(true); + const postCompactionSections = FIELD_HELP["agents.defaults.compaction.postCompactionSections"]; expect(/Session Startup|Red Lines/i.test(postCompactionSections)).toBe(true); expect(/Every Session|Safety/i.test(postCompactionSections)).toBe(true); diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 1b60dfb0b04..54f8363bbf4 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -1307,6 +1307,10 @@ export const FIELD_HELP: Record = { "Enables summary quality audits and regeneration retries for safeguard compaction. Default: true in safeguard mode.", "agents.defaults.compaction.qualityGuard.maxRetries": "Maximum number of regeneration retries after a failed safeguard summary quality audit. Use small values to bound extra latency and token cost.", + "agents.defaults.compaction.midTurnPrecheck": + "Optional Pi tool-loop precheck that detects context pressure after a tool result is appended and before the next model call. When enabled, OpenClaw reuses existing precheck recovery to truncate tool results or compact before retrying.", + "agents.defaults.compaction.midTurnPrecheck.enabled": + "Enable structured mid-turn context pressure checks for Pi tool loops. Default: false. Keep disabled unless long tool-heavy sessions hit context overflow before normal turn-end compaction can run.", "agents.defaults.compaction.postIndexSync": 'Controls post-compaction session memory reindex mode: "off", "async", or "await" (default: "async"). Use "await" for strongest freshness, "async" for lower compaction latency, and "off" only when session-memory sync is handled elsewhere.', "agents.defaults.compaction.postCompactionSections": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 89001dc9ac4..484d4830ca6 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -608,6 +608,8 @@ export const FIELD_LABELS: Record = { "agents.defaults.compaction.qualityGuard": "Compaction Quality Guard", "agents.defaults.compaction.qualityGuard.enabled": "Compaction Quality Guard Enabled", "agents.defaults.compaction.qualityGuard.maxRetries": "Compaction Quality Guard Max Retries", + "agents.defaults.compaction.midTurnPrecheck": "Compaction Mid-turn Precheck", + "agents.defaults.compaction.midTurnPrecheck.enabled": "Compaction Mid-turn Precheck Enabled", "agents.defaults.compaction.postIndexSync": "Compaction Post-Index Sync", "agents.defaults.compaction.postCompactionSections": "Post-Compaction Context Sections", "agents.defaults.compaction.timeoutSeconds": "Compaction Timeout (Seconds)", diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts index 211d82666f0..5265437ccaa 100644 --- a/src/config/types.agent-defaults.ts +++ b/src/config/types.agent-defaults.ts @@ -432,6 +432,14 @@ export type AgentCompactionQualityGuardConfig = { maxRetries?: number; }; +export type AgentCompactionMidTurnPrecheckConfig = { + /** + * Enable structured context pressure checks after tool results are appended + * and before the next Pi model call. Default: false. + */ + enabled?: boolean; +}; + export type AgentCompactionConfig = { /** Compaction summarization mode. */ mode?: AgentCompactionMode; @@ -453,6 +461,8 @@ export type AgentCompactionConfig = { identifierInstructions?: string; /** Optional quality-audit retries for safeguard compaction summaries. */ qualityGuard?: AgentCompactionQualityGuardConfig; + /** Mid-turn precheck for tool-loop context pressure. Default: disabled. */ + midTurnPrecheck?: AgentCompactionMidTurnPrecheckConfig; /** Post-compaction session memory index sync mode. */ postIndexSync?: AgentCompactionPostIndexSyncMode; /** Pre-compaction memory flush (agentic turn). Default: enabled. */ diff --git a/src/config/zod-schema.agent-defaults.test.ts b/src/config/zod-schema.agent-defaults.test.ts index f05ae64e927..06030ddab5a 100644 --- a/src/config/zod-schema.agent-defaults.test.ts +++ b/src/config/zod-schema.agent-defaults.test.ts @@ -103,6 +103,19 @@ describe("agent defaults schema", () => { expect(result.compaction?.maxActiveTranscriptBytes).toBe("20mb"); }); + it("accepts compaction.midTurnPrecheck.enabled", () => { + const result = AgentDefaultsSchema.parse({ + compaction: { + mode: "safeguard", + midTurnPrecheck: { + enabled: true, + }, + }, + })!; + + expect(result.compaction?.midTurnPrecheck?.enabled).toBe(true); + }); + it("accepts focused contextLimits on defaults and agent entries", () => { const defaults = AgentDefaultsSchema.parse({ contextLimits: { diff --git a/src/config/zod-schema.agent-defaults.ts b/src/config/zod-schema.agent-defaults.ts index 70c55de4b8b..76d04eb4cce 100644 --- a/src/config/zod-schema.agent-defaults.ts +++ b/src/config/zod-schema.agent-defaults.ts @@ -182,6 +182,12 @@ export const AgentDefaultsSchema = z }) .strict() .optional(), + midTurnPrecheck: z + .object({ + enabled: z.boolean().optional(), + }) + .strict() + .optional(), postIndexSync: z.enum(["off", "async", "await"]).optional(), postCompactionSections: z.array(z.string()).optional(), model: z.string().optional(),