diff --git a/CHANGELOG.md b/CHANGELOG.md
index 42cda82b7fc..7a10b3ade53 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -70,6 +70,7 @@ Docs: https://docs.openclaw.ai
- MCP/stdio: settle MCP stdio transport send() from the write callback instead of resolving immediately on buffer acceptance, so async write errors reject the promise instead of being lost. Refs #75438.
- Process/exec: add stdin error listener in runCommandWithTimeout so EPIPE from a prematurely-exited child is swallowed instead of escaping to uncaughtException. Refs #75438.
- Voice Call/realtime: add default-off fast memory/session context for `openclaw_agent_consult`, giving live calls a bounded answer-or-miss path before the full agent consult. Fixes #71849. Thanks @amzzzzzzz.
+
- Google Meet: interrupt Realtime provider output when local barge-in clears playback, so command-pair audio stops model speech instead of only restarting Chrome playback. Fixes #73850. (#73834) Thanks @shhtheonlyperson.
- Gateway/config: cap oversized plugin-owned schemas in the full `config.schema` response so large installed plugin sets cannot balloon Gateway RSS or crash schema clients. Thanks @vincentkoc.
- Plugins/update: skip ClawHub and marketplace plugin updates when the bundled version is newer than the recorded installed version, so `openclaw update` no longer overwrites working bundled plugins with older external packages. Fixes #75447. Thanks @amknight.
@@ -145,6 +146,7 @@ Docs: https://docs.openclaw.ai
- Pairing: surface unexpected allowlist filesystem stat errors instead of treating the allowlist as missing, so permission and I/O failures are visible during pairing authorization checks. (#63324) Thanks @franciscomaestre.
- macOS app: reserve layout space for exec approval command details so the allow dialog no longer overlaps the command, context, and action buttons. (#75470) Thanks @ngutman.
- Agents/failover: carry `sessionId`, `lane`, `provider`, `model`, and `profileId` attribution through `FailoverError` and `describeFailoverError`/`coerceToFailoverError` so structured error logs (e.g. `gateway.err.log` ingestion) can attribute exhausted-fallback wrapper errors to the originating session and last-attempted provider instead of dropping the metadata after the per-profile errors. Fixes #42713. (#73506) Thanks @wenxu007.
+- Context Engine: treat assembled prompt as the default authority for preemptive overflow prechecks so engines that return a windowed, self-contained context no longer trigger false hard-fail compactions on huge raw history. Engines whose assembled view can hide overflow risk can opt back into the legacy behavior with `AssembleResult.promptAuthority: "preassembly_may_overflow"`. (#74255) Thanks @100yenadmin.
## 2026.4.29
diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256
index f9f21541e3b..c03babba30d 100644
--- a/docs/.generated/plugin-sdk-api-baseline.sha256
+++ b/docs/.generated/plugin-sdk-api-baseline.sha256
@@ -1,2 +1,2 @@
-c1446005a26262d6b817d72493471d11c618b98441fad2014f1cf422bfe64bc9 plugin-sdk-api-baseline.json
-1b7d71eaabcae7d957396e7ff242598ef22b51851bc3fe1f4b58f2c2e5bf1459 plugin-sdk-api-baseline.jsonl
+37787172adf7a55a32097599b4bf5729fc7138c8743c6f4c9d58fc8d01df72a1 plugin-sdk-api-baseline.json
+0ec4957528477832085c638a5f7f691c878ba199f3e81f330f162c27cfd9ebf4 plugin-sdk-api-baseline.jsonl
diff --git a/docs/concepts/context-engine.md b/docs/concepts/context-engine.md
index 393896e1a42..fdd14f6434f 100644
--- a/docs/concepts/context-engine.md
+++ b/docs/concepts/context-engine.md
@@ -197,6 +197,17 @@ Required members:
Prepended to the system prompt.
+
+ Controls which token estimate the runner uses for preemptive overflow
+ prechecks. Defaults to `"assembled"`, which means only the assembled
+ prompt's estimate is checked — appropriate for engines that return a
+ windowed, self-contained context. Set to `"preassembly_may_overflow"` only
+ when your assembled view can hide overflow risk in the underlying
+ transcript; the runner then takes the maximum of the assembled estimate
+ and the pre-assembly (unwindowed) session-history estimate when deciding
+ whether to preemptively compact. Either way, the messages you return are
+ still what the model sees — `promptAuthority` only affects the precheck.
+
`compact` returns a `CompactResult`. When compaction rotates the active
transcript, `result.sessionId` and `result.sessionFile` identify the successor
diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts
index 74c65599e66..9aef361fd80 100644
--- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts
@@ -322,6 +322,118 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => {
);
});
+ it("uses assembled context as the default precheck authority", async () => {
+ let sawPrompt = false;
+ const hugeHistory = "large raw history ".repeat(25_000);
+
+ const result = await createContextEngineAttemptRunner({
+ contextEngine: createTestContextEngine({
+ assemble: async () => ({
+ messages: [
+ { role: "user", content: "small assembled context", timestamp: 1 },
+ ] as AgentMessage[],
+ estimatedTokens: 8,
+ }),
+ }),
+ sessionKey,
+ tempPaths,
+ sessionMessages: [{ role: "user", content: hugeHistory, timestamp: 1 }] as AgentMessage[],
+ attemptOverrides: {
+ contextTokenBudget: 500,
+ },
+ sessionPrompt: async (session) => {
+ sawPrompt = true;
+ session.messages = [
+ ...session.messages,
+ { role: "assistant", content: "done", timestamp: 2 },
+ ];
+ },
+ });
+
+ expect(sawPrompt).toBe(true);
+ expect(result.promptError).toBeNull();
+ expect(result.promptErrorSource).toBeNull();
+ expect(hoisted.preemptiveCompactionCalls.at(-1)).not.toHaveProperty("unwindowedMessages");
+ });
+
+ it("honors context engines that opt into preassembly overflow authority", async () => {
+ let sawPrompt = false;
+ const hugeHistory = "large raw history ".repeat(25_000);
+
+ const result = await createContextEngineAttemptRunner({
+ contextEngine: createTestContextEngine({
+ assemble: async () => ({
+ messages: [
+ { role: "user", content: "small assembled context", timestamp: 1 },
+ ] as AgentMessage[],
+ estimatedTokens: 8,
+ promptAuthority: "preassembly_may_overflow",
+ }),
+ }),
+ sessionKey,
+ tempPaths,
+ sessionMessages: [{ role: "user", content: hugeHistory, timestamp: 1 }] as AgentMessage[],
+ attemptOverrides: {
+ contextTokenBudget: 500,
+ },
+ sessionPrompt: async (session) => {
+ sawPrompt = true;
+ session.messages = [
+ ...session.messages,
+ { role: "assistant", content: "done", timestamp: 2 },
+ ];
+ },
+ });
+
+ expect(sawPrompt).toBe(false);
+ expect(result.promptErrorSource).toBe("precheck");
+ expect(result.preflightRecovery?.route).toBe("compact_only");
+ expect(hoisted.preemptiveCompactionCalls.at(-1)).toHaveProperty("unwindowedMessages");
+ });
+
+ it("snapshots pre-assembly messages before assemble even when the engine windows in place", async () => {
+ const hugeHistory = "large raw history ".repeat(25_000);
+ const preassemblyMarker = { role: "user", content: hugeHistory, timestamp: 1 } as AgentMessage;
+
+ await createContextEngineAttemptRunner({
+ contextEngine: createTestContextEngine({
+ assemble: async ({ messages }: { messages: AgentMessage[] }) => {
+ // Simulate an engine that windows the input array IN PLACE.
+ // The assemble contract does not require immutability, so the
+ // runner must have already snapshotted before calling us.
+ messages.length = 0;
+ messages.push({ role: "user", content: "windowed", timestamp: 2 } as AgentMessage);
+ return {
+ messages: [
+ { role: "user", content: "small assembled context", timestamp: 1 },
+ ] as AgentMessage[],
+ estimatedTokens: 8,
+ promptAuthority: "preassembly_may_overflow",
+ };
+ },
+ }),
+ sessionKey,
+ tempPaths,
+ sessionMessages: [preassemblyMarker],
+ attemptOverrides: {
+ contextTokenBudget: 500,
+ },
+ sessionPrompt: async (session) => {
+ session.messages = [
+ ...session.messages,
+ { role: "assistant", content: "done", timestamp: 3 },
+ ];
+ },
+ });
+
+ const lastCall = hoisted.preemptiveCompactionCalls.at(-1);
+ expect(lastCall).toHaveProperty("unwindowedMessages");
+ const unwindowed = (lastCall as { unwindowedMessages?: AgentMessage[] }).unwindowedMessages;
+ // The snapshot must reflect the true pre-assembly state, not the in-place
+ // windowed array that assemble mutated.
+ expect(unwindowed).toEqual([preassemblyMarker]);
+ });
+
it("keeps gateway model runs independent from agent context and session history", async () => {
const bootstrap = vi.fn(async () => ({ bootstrapped: true }));
const assemble = vi.fn(async ({ messages }: { messages: AgentMessage[] }) => ({
diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts
index f79a2ea6b51..cd8d3877040 100644
--- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts
@@ -26,6 +26,8 @@ type SubscribeEmbeddedPiSessionFn =
typeof import("../../pi-embedded-subscribe.js").subscribeEmbeddedPiSession;
type AcquireSessionWriteLockFn =
typeof import("../../session-write-lock.js").acquireSessionWriteLock;
+type ShouldPreemptivelyCompactBeforePromptFn =
+ typeof import("./preemptive-compaction.js").shouldPreemptivelyCompactBeforePrompt;
type SubscriptionMock = ReturnType;
type UnknownMock = Mock<(...args: unknown[]) => unknown>;
@@ -78,6 +80,7 @@ type AttemptSpawnWorkspaceHoisted = {
(sessionKey: string | undefined, config: unknown) => number | undefined
>;
limitHistoryTurnsMock: Mock<(messages: T, limit: number | undefined) => T>;
+ preemptiveCompactionCalls: Parameters[0][];
sessionManager: SessionManagerMocks;
};
@@ -148,6 +151,7 @@ const hoisted = vi.hoisted((): AttemptSpawnWorkspaceHoisted => {
const limitHistoryTurnsMock = vi.fn<(messages: T, limit: number | undefined) => T>(
(messages) => messages,
);
+ const preemptiveCompactionCalls: Parameters[0][] = [];
const sessionManager = {
getLeafEntry: vi.fn(() => null),
branch: vi.fn(),
@@ -181,6 +185,7 @@ const hoisted = vi.hoisted((): AttemptSpawnWorkspaceHoisted => {
runContextEngineMaintenanceMock,
getDmHistoryLimitFromSessionKeyMock,
limitHistoryTurnsMock,
+ preemptiveCompactionCalls,
sessionManager,
};
});
@@ -583,6 +588,19 @@ vi.mock("../compaction-runtime-context.js", () => ({
buildEmbeddedCompactionRuntimeContext: () => ({}),
}));
+vi.mock("./preemptive-compaction.js", async (importOriginal) => {
+ const actual = await importOriginal();
+ return {
+ ...actual,
+ shouldPreemptivelyCompactBeforePrompt: (
+ params: Parameters[0],
+ ) => {
+ hoisted.preemptiveCompactionCalls.push(params);
+ return actual.shouldPreemptivelyCompactBeforePrompt(params);
+ },
+ };
+});
+
vi.mock("../compaction-safety-timeout.js", () => ({
resolveCompactionTimeoutMs: () => undefined,
}));
@@ -770,6 +788,7 @@ export function resetEmbeddedAttemptHarness(
hoisted.runContextEngineMaintenanceMock.mockReset().mockResolvedValue(undefined);
hoisted.getDmHistoryLimitFromSessionKeyMock.mockReset().mockReturnValue(undefined);
hoisted.limitHistoryTurnsMock.mockReset().mockImplementation((messages) => messages);
+ hoisted.preemptiveCompactionCalls.length = 0;
hoisted.sessionManager.getLeafEntry.mockReset().mockReturnValue(null);
hoisted.sessionManager.branch.mockReset();
hoisted.sessionManager.resetLeaf.mockReset();
diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts
index a473e3f45e3..5548b319b5c 100644
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -10,6 +10,7 @@ import {
import { isAcpRuntimeSpawnAvailable } from "../../../acp/runtime/availability.js";
import { filterHeartbeatPairs } from "../../../auto-reply/heartbeat-filter.js";
import { getRuntimeConfig } from "../../../config/config.js";
+import type { AssembleResult } from "../../../context-engine/types.js";
import { emitTrustedDiagnosticEvent } from "../../../infra/diagnostic-events.js";
import {
createChildDiagnosticTraceContext,
@@ -1527,6 +1528,8 @@ export async function runEmbeddedAttempt(
}
let prePromptMessageCount = activeSession.messages.length;
let unwindowedContextEngineMessagesForPrecheck: AgentMessage[] | undefined;
+ let contextEnginePromptAuthority: NonNullable =
+ "assembled";
abortSessionForYield = () => {
yieldAbortSettled = Promise.resolve(activeSession.abort());
};
@@ -2071,7 +2074,11 @@ export async function runEmbeddedAttempt(
if (activeContextEngine) {
try {
- unwindowedContextEngineMessagesForPrecheck = activeSession.messages.slice();
+ // Snapshot before assemble: the assemble contract does not require
+ // the input array to be treated immutably, so an engine that windows
+ // history in place would otherwise leave the precheck reading
+ // already-windowed messages instead of the true pre-assembly state.
+ const preassemblyContextEngineMessagesForPrecheck = activeSession.messages.slice();
const assembled = await assembleAttemptContextEngine({
contextEngine: activeContextEngine,
sessionId: params.sessionId,
@@ -2089,6 +2096,11 @@ export async function runEmbeddedAttempt(
if (assembled.messages !== activeSession.messages) {
activeSession.agent.state.messages = assembled.messages;
}
+ contextEnginePromptAuthority = assembled.promptAuthority ?? "assembled";
+ if (contextEnginePromptAuthority === "preassembly_may_overflow") {
+ unwindowedContextEngineMessagesForPrecheck =
+ preassemblyContextEngineMessagesForPrecheck;
+ }
if (assembled.systemPromptAddition) {
systemPromptText = prependSystemPromptAddition({
systemPrompt: systemPromptText,
@@ -2760,7 +2772,9 @@ export async function runEmbeddedAttempt(
const preemptiveCompaction = shouldPreemptivelyCompactBeforePrompt({
messages: activeSession.messages,
- unwindowedMessages: unwindowedContextEngineMessagesForPrecheck,
+ ...(contextEnginePromptAuthority === "preassembly_may_overflow"
+ ? { unwindowedMessages: unwindowedContextEngineMessagesForPrecheck }
+ : {}),
systemPrompt: systemPromptText,
prompt: effectivePrompt,
contextTokenBudget,
diff --git a/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts b/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts
index 2cc4cb218da..67bdb2cf383 100644
--- a/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts
+++ b/src/agents/pi-embedded-runner/run/preemptive-compaction.test.ts
@@ -93,7 +93,7 @@ describe("preemptive-compaction", () => {
expect(result.estimatedPromptTokens).toBeLessThan(result.promptBudgetBeforeReserve);
});
- it("uses the larger unwindowed message estimate when context engine assembly windows history", () => {
+ it("uses the larger unwindowed message estimate when explicitly provided", () => {
const result = shouldPreemptivelyCompactBeforePrompt({
messages: [makeAssistantHistory("small assembled window")],
unwindowedMessages: [makeAssistantHistory(verboseHistory.repeat(4))],
diff --git a/src/context-engine/types.ts b/src/context-engine/types.ts
index 8a7d6e8b6f0..fa2e1860ed2 100644
--- a/src/context-engine/types.ts
+++ b/src/context-engine/types.ts
@@ -8,6 +8,20 @@ export type AssembleResult = {
messages: AgentMessage[];
/** Estimated total tokens in assembled context */
estimatedTokens: number;
+ /**
+ * Controls which token estimate the runner treats as authoritative for
+ * preemptive overflow prechecks. The returned `messages` are always the
+ * prompt sent to the model; this only affects the precheck's token comparison.
+ *
+ * - "assembled": the precheck uses only the assembled prompt's estimate.
+ * - "preassembly_may_overflow": the precheck takes the maximum of the
+ * assembled estimate and the pre-assembly (unwindowed) session-history
+ * estimate. Engines opt into this when their assembled view can hide an
+ * overflow that would still affect the underlying transcript.
+ *
+ * Defaults to "assembled".
+ */
+ promptAuthority?: "assembled" | "preassembly_may_overflow";
/** Optional context-engine-provided instructions prepended to the runtime system prompt */
systemPromptAddition?: string;
};