From 5a36a5f9085a9a44251eea3434cfd05ece872da7 Mon Sep 17 00:00:00 2001
From: Eva <eva@100yen.org>
Date: Fri, 1 May 2026 18:29:46 +0700
Subject: [PATCH] fix(context-engine): snapshot pre-assembly messages before
 assemble

Address PR #74255 review feedback:

- Snapshot activeSession.messages before calling assembleAttemptContextEngine
  so engines that window history in place (allowed by the assemble contract)
  cannot leave the precheck reading already-windowed messages instead of
  the true pre-assembly state. Add a regression that wires up an in-place
  windowing engine and asserts unwindowedMessages still reflects the
  pre-assembly transcript. (Codex P2)

- Clarify the AssembleResult.promptAuthority docstring to spell out the
  two precheck modes (assembled-only vs max(assembled, preassembly))
  so engine authors do not misimplement the opt-in. (Copilot)

- Document promptAuthority in docs/concepts/context-engine.md, regenerate
  the plugin-sdk API baseline, and add a CHANGELOG Unreleased Fixes entry
  for the public contract addition. (Codex P2/P3)
---
 CHANGELOG.md                                  |  3 ++
 .../.generated/plugin-sdk-api-baseline.sha256 |  4 +-
 docs/concepts/context-engine.md               | 11 +++++
 ...mpt.spawn-workspace.context-engine.test.ts | 43 +++++++++++++++++++
 src/agents/pi-embedded-runner/run/attempt.ts  |  8 +++-
 src/context-engine/types.ts                   | 14 ++++--
 6 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 42cda82b7fc..c97b58d2477 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -70,6 +70,9 @@ Docs: https://docs.openclaw.ai
 - MCP/stdio: settle MCP stdio transport send() from the write callback instead of resolving immediately on buffer acceptance, so async write errors reject the promise instead of being lost. Refs #75438.
 - Process/exec: add stdin error listener in runCommandWithTimeout so EPIPE from a prematurely-exited child is swallowed instead of escaping to uncaughtException. Refs #75438.
 - Voice Call/realtime: add default-off fast memory/session context for `openclaw_agent_consult`, giving live calls a bounded answer-or-miss path before the full agent consult. Fixes #71849. Thanks @amzzzzzzz.
+||||||| parent of f69efaa66a (fix(context-engine): snapshot pre-assembly messages before assemble)
+- Context Engine: treat assembled prompt as the default authority for preemptive overflow prechecks so engines that return a windowed, self-contained context no longer trigger false hard-fail compactions on huge raw history. Engines whose assembled view can hide overflow risk can opt back into the legacy behavior with `AssembleResult.promptAuthority: "preassembly_may_overflow"`. (#74255)
+
 - Google Meet: interrupt Realtime provider output when local barge-in clears playback, so command-pair audio stops model speech instead of only restarting Chrome playback. Fixes #73850. (#73834) Thanks @shhtheonlyperson.
 - Gateway/config: cap oversized plugin-owned schemas in the full `config.schema` response so large installed plugin sets cannot balloon Gateway RSS or crash schema clients. Thanks @vincentkoc.
 - Plugins/update: skip ClawHub and marketplace plugin updates when the bundled version is newer than the recorded installed version, so `openclaw update` no longer overwrites working bundled plugins with older external packages. Fixes #75447. Thanks @amknight.
diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256
index f9f21541e3b..c03babba30d 100644
--- a/docs/.generated/plugin-sdk-api-baseline.sha256
+++ b/docs/.generated/plugin-sdk-api-baseline.sha256
@@ -1,2 +1,2 @@
-c1446005a26262d6b817d72493471d11c618b98441fad2014f1cf422bfe64bc9  plugin-sdk-api-baseline.json
-1b7d71eaabcae7d957396e7ff242598ef22b51851bc3fe1f4b58f2c2e5bf1459  plugin-sdk-api-baseline.jsonl
+37787172adf7a55a32097599b4bf5729fc7138c8743c6f4c9d58fc8d01df72a1  plugin-sdk-api-baseline.json
+0ec4957528477832085c638a5f7f691c878ba199f3e81f330f162c27cfd9ebf4  plugin-sdk-api-baseline.jsonl
diff --git a/docs/concepts/context-engine.md b/docs/concepts/context-engine.md
index 393896e1a42..fdd14f6434f 100644
--- a/docs/concepts/context-engine.md
+++ b/docs/concepts/context-engine.md
@@ -197,6 +197,17 @@ Required members:
 <ParamField path="systemPromptAddition" type="string">
   Prepended to the system prompt.
 </ParamField>
+<ParamField path="promptAuthority" type='"assembled" | "preassembly_may_overflow"'>
+  Controls which token estimate the runner uses for preemptive overflow
+  prechecks. Defaults to `"assembled"`, which means only the assembled
+  prompt's estimate is checked — appropriate for engines that return a
+  windowed, self-contained context. Set to `"preassembly_may_overflow"` only
+  when your assembled view can hide overflow risk in the underlying
+  transcript; the runner then takes the maximum of the assembled estimate
+  and the pre-assembly (unwindowed) session-history estimate when deciding
+  whether to preemptively compact. Either way, the messages you return are
+  still what the model sees — `promptAuthority` only affects the precheck.
+</ParamField>
 
 `compact` returns a `CompactResult`. When compaction rotates the active
 transcript, `result.sessionId` and `result.sessionFile` identify the successor
diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts
index cc0a4731af5..9aef361fd80 100644
--- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts
@@ -391,6 +391,49 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => {
     expect(hoisted.preemptiveCompactionCalls.at(-1)).toHaveProperty("unwindowedMessages");
   });
 
+  it("snapshots pre-assembly messages before assemble even when the engine windows in place", async () => {
+    const hugeHistory = "large raw history ".repeat(25_000);
+    const preassemblyMarker = { role: "user", content: hugeHistory, timestamp: 1 } as AgentMessage;
+
+    await createContextEngineAttemptRunner({
+      contextEngine: createTestContextEngine({
+        assemble: async ({ messages }: { messages: AgentMessage[] }) => {
+          // Simulate an engine that windows the input array IN PLACE.
+          // The assemble contract does not require immutability, so the
+          // runner must have already snapshotted before calling us.
+          messages.length = 0;
+          messages.push({ role: "user", content: "windowed", timestamp: 2 } as AgentMessage);
+          return {
+            messages: [
+              { role: "user", content: "small assembled context", timestamp: 1 },
+            ] as AgentMessage[],
+            estimatedTokens: 8,
+            promptAuthority: "preassembly_may_overflow",
+          };
+        },
+      }),
+      sessionKey,
+      tempPaths,
+      sessionMessages: [preassemblyMarker],
+      attemptOverrides: {
+        contextTokenBudget: 500,
+      },
+      sessionPrompt: async (session) => {
+        session.messages = [
+          ...session.messages,
+          { role: "assistant", content: "done", timestamp: 3 },
+        ];
+      },
+    });
+
+    const lastCall = hoisted.preemptiveCompactionCalls.at(-1);
+    expect(lastCall).toHaveProperty("unwindowedMessages");
+    const unwindowed = (lastCall as { unwindowedMessages?: AgentMessage[] }).unwindowedMessages;
+    // The snapshot must reflect the true pre-assembly state, not the in-place
+    // windowed array that assemble mutated.
+    expect(unwindowed).toEqual([preassemblyMarker]);
+  });
+
   it("keeps gateway model runs independent from agent context and session history", async () => {
     const bootstrap = vi.fn(async () => ({ bootstrapped: true }));
     const assemble = vi.fn(async ({ messages }: { messages: AgentMessage[] }) => ({
diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts
index f0d105a16a1..5548b319b5c 100644
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -2074,7 +2074,11 @@ export async function runEmbeddedAttempt(
 
         if (activeContextEngine) {
           try {
-            const preassemblyContextEngineMessagesForPrecheck = activeSession.messages;
+            // Snapshot before assemble: the assemble contract does not require
+            // the input array to be treated immutably, so an engine that windows
+            // history in place would otherwise leave the precheck reading
+            // already-windowed messages instead of the true pre-assembly state.
+            const preassemblyContextEngineMessagesForPrecheck = activeSession.messages.slice();
             const assembled = await assembleAttemptContextEngine({
               contextEngine: activeContextEngine,
               sessionId: params.sessionId,
@@ -2095,7 +2099,7 @@ export async function runEmbeddedAttempt(
             contextEnginePromptAuthority = assembled.promptAuthority ?? "assembled";
             if (contextEnginePromptAuthority === "preassembly_may_overflow") {
               unwindowedContextEngineMessagesForPrecheck =
-                preassemblyContextEngineMessagesForPrecheck.slice();
+                preassemblyContextEngineMessagesForPrecheck;
             }
             if (assembled.systemPromptAddition) {
               systemPromptText = prependSystemPromptAddition({
diff --git a/src/context-engine/types.ts b/src/context-engine/types.ts
index a540ab84ca5..fa2e1860ed2 100644
--- a/src/context-engine/types.ts
+++ b/src/context-engine/types.ts
@@ -9,10 +9,16 @@ export type AssembleResult = {
   /** Estimated total tokens in assembled context */
   estimatedTokens: number;
   /**
-   * Declares which message set overflow prechecks should treat as authoritative.
-   * "assembled" means the returned messages are already windowed and complete;
-   * "preassembly_may_overflow" asks the runner to also check pre-assembly
-   * session history because the context engine may hide an overflow risk.
+   * Controls which token estimate the runner treats as authoritative for
+   * preemptive overflow prechecks. The returned `messages` are always the
+   * prompt sent to the model; this only affects the precheck's token comparison.
+   *
+   * - "assembled": the precheck uses only the assembled prompt's estimate.
+   * - "preassembly_may_overflow": the precheck takes the maximum of the
+   *   assembled estimate and the pre-assembly (unwindowed) session-history
+   *   estimate. Engines opt into this when their assembled view can hide an
+   *   overflow that would still affect the underlying transcript.
+   *
    * Defaults to "assembled".
    */
   promptAuthority?: "assembled" | "preassembly_may_overflow";