From 42584964ac08d824c5ce12cb196e70920a3e2f49 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 1 May 2026 21:43:41 +0700 Subject: [PATCH] fix(context-engine): honor assembled prompt authority in precheck (#74255) Merged via squash. Prepared head SHA: 650b02380b7fbef42e540771a82d75962253e054 Co-authored-by: 100yenadmin <239388517+100yenadmin@users.noreply.github.com> Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com> Reviewed-by: @jalehman --- CHANGELOG.md | 2 + .../.generated/plugin-sdk-api-baseline.sha256 | 4 +- docs/concepts/context-engine.md | 11 ++ ...mpt.spawn-workspace.context-engine.test.ts | 112 ++++++++++++++++++ .../attempt.spawn-workspace.test-support.ts | 19 +++ src/agents/pi-embedded-runner/run/attempt.ts | 18 ++- .../run/preemptive-compaction.test.ts | 2 +- src/context-engine/types.ts | 14 +++ 8 files changed, 177 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42cda82b7fc..7a10b3ade53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,6 +70,7 @@ Docs: https://docs.openclaw.ai - MCP/stdio: settle MCP stdio transport send() from the write callback instead of resolving immediately on buffer acceptance, so async write errors reject the promise instead of being lost. Refs #75438. - Process/exec: add stdin error listener in runCommandWithTimeout so EPIPE from a prematurely-exited child is swallowed instead of escaping to uncaughtException. Refs #75438. - Voice Call/realtime: add default-off fast memory/session context for `openclaw_agent_consult`, giving live calls a bounded answer-or-miss path before the full agent consult. Fixes #71849. Thanks @amzzzzzzz. + - Google Meet: interrupt Realtime provider output when local barge-in clears playback, so command-pair audio stops model speech instead of only restarting Chrome playback. Fixes #73850. (#73834) Thanks @shhtheonlyperson. - Gateway/config: cap oversized plugin-owned schemas in the full `config.schema` response so large installed plugin sets cannot balloon Gateway RSS or crash schema clients. Thanks @vincentkoc. - Plugins/update: skip ClawHub and marketplace plugin updates when the bundled version is newer than the recorded installed version, so `openclaw update` no longer overwrites working bundled plugins with older external packages. Fixes #75447. Thanks @amknight. @@ -145,6 +146,7 @@ Docs: https://docs.openclaw.ai - Pairing: surface unexpected allowlist filesystem stat errors instead of treating the allowlist as missing, so permission and I/O failures are visible during pairing authorization checks. (#63324) Thanks @franciscomaestre. - macOS app: reserve layout space for exec approval command details so the allow dialog no longer overlaps the command, context, and action buttons. (#75470) Thanks @ngutman. - Agents/failover: carry `sessionId`, `lane`, `provider`, `model`, and `profileId` attribution through `FailoverError` and `describeFailoverError`/`coerceToFailoverError` so structured error logs (e.g. `gateway.err.log` ingestion) can attribute exhausted-fallback wrapper errors to the originating session and last-attempted provider instead of dropping the metadata after the per-profile errors. Fixes #42713. (#73506) Thanks @wenxu007. +- Context Engine: treat assembled prompt as the default authority for preemptive overflow prechecks so engines that return a windowed, self-contained context no longer trigger false hard-fail compactions on huge raw history. Engines whose assembled view can hide overflow risk can opt back into the legacy behavior with `AssembleResult.promptAuthority: "preassembly_may_overflow"`. (#74255) Thanks @100yenadmin. ## 2026.4.29 diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index f9f21541e3b..c03babba30d 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -c1446005a26262d6b817d72493471d11c618b98441fad2014f1cf422bfe64bc9 plugin-sdk-api-baseline.json -1b7d71eaabcae7d957396e7ff242598ef22b51851bc3fe1f4b58f2c2e5bf1459 plugin-sdk-api-baseline.jsonl +37787172adf7a55a32097599b4bf5729fc7138c8743c6f4c9d58fc8d01df72a1 plugin-sdk-api-baseline.json +0ec4957528477832085c638a5f7f691c878ba199f3e81f330f162c27cfd9ebf4 plugin-sdk-api-baseline.jsonl diff --git a/docs/concepts/context-engine.md b/docs/concepts/context-engine.md index 393896e1a42..fdd14f6434f 100644 --- a/docs/concepts/context-engine.md +++ b/docs/concepts/context-engine.md @@ -197,6 +197,17 @@ Required members: Prepended to the system prompt. + + Controls which token estimate the runner uses for preemptive overflow + prechecks. Defaults to `"assembled"`, which means only the assembled + prompt's estimate is checked — appropriate for engines that return a + windowed, self-contained context. Set to `"preassembly_may_overflow"` only + when your assembled view can hide overflow risk in the underlying + transcript; the runner then takes the maximum of the assembled estimate + and the pre-assembly (unwindowed) session-history estimate when deciding + whether to preemptively compact. Either way, the messages you return are + still what the model sees — `promptAuthority` only affects the precheck. + `compact` returns a `CompactResult`. When compaction rotates the active transcript, `result.sessionId` and `result.sessionFile` identify the successor diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts index 74c65599e66..9aef361fd80 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts @@ -322,6 +322,118 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => { ); }); + it("uses assembled context as the default precheck authority", async () => { + let sawPrompt = false; + const hugeHistory = "large raw history ".repeat(25_000); + + const result = await createContextEngineAttemptRunner({ + contextEngine: createTestContextEngine({ + assemble: async () => ({ + messages: [ + { role: "user", content: "small assembled context", timestamp: 1 }, + ] as AgentMessage[], + estimatedTokens: 8, + }), + }), + sessionKey, + tempPaths, + sessionMessages: [{ role: "user", content: hugeHistory, timestamp: 1 }] as AgentMessage[], + attemptOverrides: { + contextTokenBudget: 500, + }, + sessionPrompt: async (session) => { + sawPrompt = true; + session.messages = [ + ...session.messages, + { role: "assistant", content: "done", timestamp: 2 }, + ]; + }, + }); + + expect(sawPrompt).toBe(true); + expect(result.promptError).toBeNull(); + expect(result.promptErrorSource).toBeNull(); + expect(hoisted.preemptiveCompactionCalls.at(-1)).not.toHaveProperty("unwindowedMessages"); + }); + + it("honors context engines that opt into preassembly overflow authority", async () => { + let sawPrompt = false; + const hugeHistory = "large raw history ".repeat(25_000); + + const result = await createContextEngineAttemptRunner({ + contextEngine: createTestContextEngine({ + assemble: async () => ({ + messages: [ + { role: "user", content: "small assembled context", timestamp: 1 }, + ] as AgentMessage[], + estimatedTokens: 8, + promptAuthority: "preassembly_may_overflow", + }), + }), + sessionKey, + tempPaths, + sessionMessages: [{ role: "user", content: hugeHistory, timestamp: 1 }] as AgentMessage[], + attemptOverrides: { + contextTokenBudget: 500, + }, + sessionPrompt: async (session) => { + sawPrompt = true; + session.messages = [ + ...session.messages, + { role: "assistant", content: "done", timestamp: 2 }, + ]; + }, + }); + + expect(sawPrompt).toBe(false); + expect(result.promptErrorSource).toBe("precheck"); + expect(result.preflightRecovery?.route).toBe("compact_only"); + expect(hoisted.preemptiveCompactionCalls.at(-1)).toHaveProperty("unwindowedMessages"); + }); + + it("snapshots pre-assembly messages before assemble even when the engine windows in place", async () => { + const hugeHistory = "large raw history ".repeat(25_000); + const preassemblyMarker = { role: "user", content: hugeHistory, timestamp: 1 } as AgentMessage; + + await createContextEngineAttemptRunner({ + contextEngine: createTestContextEngine({ + assemble: async ({ messages }: { messages: AgentMessage[] }) => { + // Simulate an engine that windows the input array IN PLACE. + // The assemble contract does not require immutability, so the + // runner must have already snapshotted before calling us. + messages.length = 0; + messages.push({ role: "user", content: "windowed", timestamp: 2 } as AgentMessage); + return { + messages: [ + { role: "user", content: "small assembled context", timestamp: 1 }, + ] as AgentMessage[], + estimatedTokens: 8, + promptAuthority: "preassembly_may_overflow", + }; + }, + }), + sessionKey, + tempPaths, + sessionMessages: [preassemblyMarker], + attemptOverrides: { + contextTokenBudget: 500, + }, + sessionPrompt: async (session) => { + session.messages = [ + ...session.messages, + { role: "assistant", content: "done", timestamp: 3 }, + ]; + }, + }); + + const lastCall = hoisted.preemptiveCompactionCalls.at(-1); + expect(lastCall).toHaveProperty("unwindowedMessages"); + const unwindowed = (lastCall as { unwindowedMessages?: AgentMessage[] }).unwindowedMessages; + // The snapshot must reflect the true pre-assembly state, not the in-place + // windowed array that assemble mutated. + expect(unwindowed).toEqual([preassemblyMarker]); + }); + it("keeps gateway model runs independent from agent context and session history", async () => { const bootstrap = vi.fn(async () => ({ bootstrapped: true })); const assemble = vi.fn(async ({ messages }: { messages: AgentMessage[] }) => ({ diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts index f79a2ea6b51..cd8d3877040 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts @@ -26,6 +26,8 @@ type SubscribeEmbeddedPiSessionFn = typeof import("../../pi-embedded-subscribe.js").subscribeEmbeddedPiSession; type AcquireSessionWriteLockFn = typeof import("../../session-write-lock.js").acquireSessionWriteLock; +type ShouldPreemptivelyCompactBeforePromptFn = + typeof import("./preemptive-compaction.js").shouldPreemptivelyCompactBeforePrompt; type SubscriptionMock = ReturnType; type UnknownMock = Mock<(...args: unknown[]) => unknown>; @@ -78,6 +80,7 @@ type AttemptSpawnWorkspaceHoisted = { (sessionKey: string | undefined, config: unknown) => number | undefined >; limitHistoryTurnsMock: Mock<(messages: T, limit: number | undefined) => T>; + preemptiveCompactionCalls: Parameters[0][]; sessionManager: SessionManagerMocks; }; @@ -148,6 +151,7 @@ const hoisted = vi.hoisted((): AttemptSpawnWorkspaceHoisted => { const limitHistoryTurnsMock = vi.fn<(messages: T, limit: number | undefined) => T>( (messages) => messages, ); + const preemptiveCompactionCalls: Parameters[0][] = []; const sessionManager = { getLeafEntry: vi.fn(() => null), branch: vi.fn(), @@ -181,6 +185,7 @@ const hoisted = vi.hoisted((): AttemptSpawnWorkspaceHoisted => { runContextEngineMaintenanceMock, getDmHistoryLimitFromSessionKeyMock, limitHistoryTurnsMock, + preemptiveCompactionCalls, sessionManager, }; }); @@ -583,6 +588,19 @@ vi.mock("../compaction-runtime-context.js", () => ({ buildEmbeddedCompactionRuntimeContext: () => ({}), })); +vi.mock("./preemptive-compaction.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + shouldPreemptivelyCompactBeforePrompt: ( + params: Parameters[0], + ) => { + hoisted.preemptiveCompactionCalls.push(params); + return actual.shouldPreemptivelyCompactBeforePrompt(params); + }, + }; +}); + vi.mock("../compaction-safety-timeout.js", () => ({ resolveCompactionTimeoutMs: () => undefined, })); @@ -770,6 +788,7 @@ export function resetEmbeddedAttemptHarness( hoisted.runContextEngineMaintenanceMock.mockReset().mockResolvedValue(undefined); hoisted.getDmHistoryLimitFromSessionKeyMock.mockReset().mockReturnValue(undefined); hoisted.limitHistoryTurnsMock.mockReset().mockImplementation((messages) => messages); + hoisted.preemptiveCompactionCalls.length = 0; hoisted.sessionManager.getLeafEntry.mockReset().mockReturnValue(null); hoisted.sessionManager.branch.mockReset(); hoisted.sessionManager.resetLeaf.mockReset(); diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index a473e3f45e3..5548b319b5c 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -10,6 +10,7 @@ import { import { isAcpRuntimeSpawnAvailable } from "../../../acp/runtime/availability.js"; import { filterHeartbeatPairs } from "../../../auto-reply/heartbeat-filter.js"; import { getRuntimeConfig } from "../../../config/config.js"; +import type { AssembleResult } from "../../../context-engine/types.js"; import { emitTrustedDiagnosticEvent } from "../../../infra/diagnostic-events.js"; import { createChildDiagnosticTraceContext, @@ -1527,6 +1528,8 @@ export async function runEmbeddedAttempt( } let prePromptMessageCount = activeSession.messages.length; let unwindowedContextEngineMessagesForPrecheck: AgentMessage[] | undefined; + let contextEnginePromptAuthority: NonNullable = + "assembled"; abortSessionForYield = () => { yieldAbortSettled = Promise.resolve(activeSession.abort()); }; @@ -2071,7 +2074,11 @@ export async function runEmbeddedAttempt( if (activeContextEngine) { try { - unwindowedContextEngineMessagesForPrecheck = activeSession.messages.slice(); + // Snapshot before assemble: the assemble contract does not require + // the input array to be treated immutably, so an engine that windows + // history in place would otherwise leave the precheck reading + // already-windowed messages instead of the true pre-assembly state. + const preassemblyContextEngineMessagesForPrecheck = activeSession.messages.slice(); const assembled = await assembleAttemptContextEngine({ contextEngine: activeContextEngine, sessionId: params.sessionId, @@ -2089,6 +2096,11 @@ export async function runEmbeddedAttempt( if (assembled.messages !== activeSession.messages) { activeSession.agent.state.messages = assembled.messages; } + contextEnginePromptAuthority = assembled.promptAuthority ?? "assembled"; + if (contextEnginePromptAuthority === "preassembly_may_overflow") { + unwindowedContextEngineMessagesForPrecheck = + preassemblyContextEngineMessagesForPrecheck; + } if (assembled.systemPromptAddition) { systemPromptText = prependSystemPromptAddition({ systemPrompt: systemPromptText, @@ -2760,7 +2772,9 @@ export async function runEmbeddedAttempt( const preemptiveCompaction = shouldPreemptivelyCompactBeforePrompt({ messages: activeSession.messages, - unwindowedMessages: unwindowedContextEngineMessagesForPrecheck, + ...(contextEnginePromptAuthority === "preassembly_may_overflow" + ? { unwindowedMessages: unwindowedContextEngineMessagesForPrecheck } + : {}), systemPrompt: systemPromptText, prompt: effectivePrompt, contextTokenBudget, diff --git a/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts b/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts index 2cc4cb218da..67bdb2cf383 100644 --- a/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts @@ -93,7 +93,7 @@ describe("preemptive-compaction", () => { expect(result.estimatedPromptTokens).toBeLessThan(result.promptBudgetBeforeReserve); }); - it("uses the larger unwindowed message estimate when context engine assembly windows history", () => { + it("uses the larger unwindowed message estimate when explicitly provided", () => { const result = shouldPreemptivelyCompactBeforePrompt({ messages: [makeAssistantHistory("small assembled window")], unwindowedMessages: [makeAssistantHistory(verboseHistory.repeat(4))], diff --git a/src/context-engine/types.ts b/src/context-engine/types.ts index 8a7d6e8b6f0..fa2e1860ed2 100644 --- a/src/context-engine/types.ts +++ b/src/context-engine/types.ts @@ -8,6 +8,20 @@ export type AssembleResult = { messages: AgentMessage[]; /** Estimated total tokens in assembled context */ estimatedTokens: number; + /** + * Controls which token estimate the runner treats as authoritative for + * preemptive overflow prechecks. The returned `messages` are always the + * prompt sent to the model; this only affects the precheck's token comparison. + * + * - "assembled": the precheck uses only the assembled prompt's estimate. + * - "preassembly_may_overflow": the precheck takes the maximum of the + * assembled estimate and the pre-assembly (unwindowed) session-history + * estimate. Engines opt into this when their assembled view can hide an + * overflow that would still affect the underlying transcript. + * + * Defaults to "assembled". + */ + promptAuthority?: "assembled" | "preassembly_may_overflow"; /** Optional context-engine-provided instructions prepended to the runtime system prompt */ systemPromptAddition?: string; };