fix: stabilize context engine prompt cache touches (#67767)

* fix: stabilize context engine prompt cache touches

* fix(changelog): document context-engine prompt cache touch stabilization
This commit is contained in:
Josh Lehman
2026-04-16 11:53:42 -07:00
committed by GitHub
parent ac717a92e8
commit a327b6750d
6 changed files with 273 additions and 39 deletions

View File

@@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai
- BlueBubbles/inbound: restore inbound image attachment downloads on Node 22+ by stripping incompatible bundled-undici dispatchers from the non-SSRF fetch path, accept `updated-message` webhooks carrying attachments, use event-type-aware dedup keys so attachment follow-ups are not rejected as duplicates, and retry attachment fetch from the BB API when the initial webhook arrives with an empty array. (#64105, #61861, #65430, #67510) Thanks @omarshahine.
- Agents/skills: sort prompt-facing `available_skills` entries by skill name after merging sources so `skills.load.extraDirs` order no longer changes prompt-cache prefixes. (#64198) Thanks @Bartok9.
- Agents/OpenAI Responses: add `models.providers.*.models.*.compat.supportsPromptCacheKey` so OpenAI-compatible proxies that forward `prompt_cache_key` can keep prompt caching enabled while incompatible endpoints can still force stripping. (#67427) Thanks @damselem.
- Agents/context engines: keep loop-hook and final `afterTurn` prompt-cache touch metadata aligned with the current assistant turn so cache-aware context engines retain accurate cache TTL state during tool loops. (#67767) thanks @jalehman.
- Memory/dreaming: strip AI-facing inbound metadata envelopes from session-corpus user turns before normalization so REM topic extraction sees the user's actual message text, including array-shaped split envelopes. (#66548) Thanks @zqchris.
## 2026.4.15-beta.1

View File

@@ -2,7 +2,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage } from "@mariozechner/pi-ai";
import type { MemoryCitationsMode } from "../../../config/types.memory.js";
import type { ContextEngine, ContextEngineRuntimeContext } from "../../../context-engine/types.js";
import type { NormalizedUsage } from "../../usage.js";
import { normalizeUsage, type NormalizedUsage } from "../../usage.js";
import type { PromptCacheChange } from "../prompt-cache-observability.js";
import type { EmbeddedRunAttemptResult } from "./types.js";
@@ -103,6 +103,61 @@ export function findCurrentAttemptAssistantMessage(params: {
.find((message): message is AssistantMessage => message.role === "assistant");
}
function parsePromptCacheTouchTimestamp(value: unknown): number | null {
if (typeof value === "number" && Number.isFinite(value)) {
return value;
}
if (typeof value === "string") {
const parsed = Date.parse(value);
if (Number.isFinite(parsed)) {
return parsed;
}
}
return null;
}
/** Resolve the effective prompt-cache touch timestamp for the current assistant turn. */
export function resolvePromptCacheTouchTimestamp(params: {
lastCallUsage?: NormalizedUsage;
assistantTimestamp?: unknown;
fallbackLastCacheTouchAt?: number | null;
}): number | null {
const hasCacheUsage =
typeof params.lastCallUsage?.cacheRead === "number" ||
typeof params.lastCallUsage?.cacheWrite === "number";
if (!hasCacheUsage) {
return params.fallbackLastCacheTouchAt ?? null;
}
return (
parsePromptCacheTouchTimestamp(params.assistantTimestamp) ??
params.fallbackLastCacheTouchAt ??
null
);
}
export function buildLoopPromptCacheInfo(params: {
messagesSnapshot: AgentMessage[];
prePromptMessageCount: number;
retention?: "none" | "short" | "long";
fallbackLastCacheTouchAt?: number | null;
}): EmbeddedRunAttemptResult["promptCache"] {
const currentAttemptAssistant = findCurrentAttemptAssistantMessage({
messagesSnapshot: params.messagesSnapshot,
prePromptMessageCount: params.prePromptMessageCount,
});
const lastCallUsage = normalizeUsage(currentAttemptAssistant?.usage);
return buildContextEnginePromptCacheInfo({
retention: params.retention,
lastCallUsage,
lastCacheTouchAt: resolvePromptCacheTouchTimestamp({
lastCallUsage,
assistantTimestamp: currentAttemptAssistant?.timestamp,
fallbackLastCacheTouchAt: params.fallbackLastCacheTouchAt,
}),
});
}
export async function runAttemptContextEngineBootstrap(params: {
hadSessionFile: boolean;
contextEngine?: AttemptContextEngine;
@@ -207,51 +262,50 @@ export async function finalizeAttemptContextEngineTurn(params: {
let postTurnFinalizationSucceeded = true;
if (typeof params.contextEngine.afterTurn === "function") {
try {
await params.contextEngine.afterTurn({
sessionId: params.sessionIdUsed,
sessionKey: params.sessionKey,
sessionFile: params.sessionFile,
messages: params.messagesSnapshot,
prePromptMessageCount: params.prePromptMessageCount,
tokenBudget: params.tokenBudget,
runtimeContext: params.runtimeContext,
});
} catch (afterTurnErr) {
postTurnFinalizationSucceeded = false;
params.warn(`context engine afterTurn failed: ${String(afterTurnErr)}`);
}
} else {
const newMessages = params.messagesSnapshot.slice(params.prePromptMessageCount);
if (newMessages.length > 0) {
if (typeof params.contextEngine.ingestBatch === "function") {
try {
await params.contextEngine.afterTurn({
sessionId: params.sessionIdUsed,
sessionKey: params.sessionKey,
sessionFile: params.sessionFile,
messages: params.messagesSnapshot,
prePromptMessageCount: params.prePromptMessageCount,
tokenBudget: params.tokenBudget,
runtimeContext: params.runtimeContext,
});
} catch (afterTurnErr) {
postTurnFinalizationSucceeded = false;
params.warn(`context engine afterTurn failed: ${String(afterTurnErr)}`);
}
} else {
const newMessages = params.messagesSnapshot.slice(params.prePromptMessageCount);
if (newMessages.length > 0) {
if (typeof params.contextEngine.ingestBatch === "function") {
try {
await params.contextEngine.ingestBatch({
sessionId: params.sessionIdUsed,
sessionKey: params.sessionKey,
messages: newMessages,
});
} catch (ingestErr) {
postTurnFinalizationSucceeded = false;
params.warn(`context engine ingest failed: ${String(ingestErr)}`);
}
} else {
for (const msg of newMessages) {
try {
await params.contextEngine.ingestBatch({
await params.contextEngine.ingest?.({
sessionId: params.sessionIdUsed,
sessionKey: params.sessionKey,
messages: newMessages,
message: msg,
});
} catch (ingestErr) {
postTurnFinalizationSucceeded = false;
params.warn(`context engine ingest failed: ${String(ingestErr)}`);
}
} else {
for (const msg of newMessages) {
try {
await params.contextEngine.ingest?.({
sessionId: params.sessionIdUsed,
sessionKey: params.sessionKey,
message: msg,
});
} catch (ingestErr) {
postTurnFinalizationSucceeded = false;
params.warn(`context engine ingest failed: ${String(ingestErr)}`);
}
}
}
}
}
}
if (
!params.promptError &&

View File

@@ -7,10 +7,12 @@ import {
} from "../../../plugins/memory-state.js";
import {
type AttemptContextEngine,
buildLoopPromptCacheInfo,
assembleAttemptContextEngine,
buildContextEnginePromptCacheInfo,
findCurrentAttemptAssistantMessage,
finalizeAttemptContextEngineTurn,
resolvePromptCacheTouchTimestamp,
runAttemptContextEngineBootstrap,
} from "./attempt.context-engine-helpers.js";
import {
@@ -367,6 +369,88 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => {
expect(promptCache).toEqual({ retention: "short" });
});
it("derives live loop prompt-cache info from the current attempt assistant", () => {
const toolUseAssistant = {
role: "assistant",
content: "tool use",
timestamp: "2026-04-16T16:49:59.536Z",
usage: {
input: 1,
output: 2,
cacheRead: 39036,
cacheWrite: 59934,
total: 98973,
},
} as unknown as AgentMessage;
expect(
buildLoopPromptCacheInfo({
messagesSnapshot: [seedMessage, toolUseAssistant],
prePromptMessageCount: 1,
retention: "short",
fallbackLastCacheTouchAt: 123,
}),
).toEqual(
expect.objectContaining({
retention: "short",
lastCallUsage: expect.objectContaining({
cacheRead: 39036,
cacheWrite: 59934,
total: 98973,
}),
lastCacheTouchAt: Date.parse("2026-04-16T16:49:59.536Z"),
}),
);
});
it("falls back to the persisted cache touch when loop usage has no cache metrics", () => {
const toolUseAssistant = {
role: "assistant",
content: "tool use",
timestamp: "2026-04-16T16:49:59.536Z",
usage: {
input: 1,
output: 2,
total: 3,
},
} as unknown as AgentMessage;
expect(
buildLoopPromptCacheInfo({
messagesSnapshot: [seedMessage, toolUseAssistant],
prePromptMessageCount: 1,
retention: "short",
fallbackLastCacheTouchAt: 123,
}),
).toEqual(
expect.objectContaining({
retention: "short",
lastCallUsage: expect.objectContaining({
total: 3,
}),
lastCacheTouchAt: 123,
}),
);
});
it("derives a live cache touch timestamp for final afterTurn usage snapshots", () => {
const lastCallUsage = {
input: 1,
output: 2,
cacheRead: 39036,
cacheWrite: 0,
total: 39039,
};
expect(
resolvePromptCacheTouchTimestamp({
lastCallUsage,
assistantTimestamp: "2026-04-16T17:04:46.974Z",
fallbackLastCacheTouchAt: 123,
}),
).toBe(Date.parse("2026-04-16T17:04:46.974Z"));
});
it("threads prompt-cache break observations into afterTurn", async () => {
const afterTurn = vi.fn(async (_params: AfterTurnPromptCacheCall) => {});

View File

@@ -182,9 +182,11 @@ import { mapThinkingLevel } from "../utils.js";
import { flushPendingToolResultsAfterIdle } from "../wait-for-idle-before-flush.js";
import {
assembleAttemptContextEngine,
buildLoopPromptCacheInfo,
buildContextEnginePromptCacheInfo,
findCurrentAttemptAssistantMessage,
finalizeAttemptContextEngineTurn,
resolvePromptCacheTouchTimestamp,
resolveAttemptBootstrapContext,
runAttemptContextEngineBootstrap,
} from "./attempt.context-engine-helpers.js";
@@ -1071,6 +1073,24 @@ export async function runEmbeddedAttempt(
tokenBudget: params.contextTokenBudget,
modelId: params.modelId,
getPrePromptMessageCount: () => prePromptMessageCount,
getRuntimeContext: ({ messages, prePromptMessageCount: loopPrePromptMessageCount }) =>
buildAfterTurnRuntimeContext({
attempt: params,
workspaceDir: effectiveWorkspace,
agentDir,
tokenBudget: params.contextTokenBudget,
promptCache:
promptCache ??
buildLoopPromptCacheInfo({
messagesSnapshot: messages,
prePromptMessageCount: loopPrePromptMessageCount,
retention: effectivePromptCacheRetention,
fallbackLastCacheTouchAt: readLastCacheTtlTimestamp(sessionManager, {
provider: params.provider,
modelId: params.modelId,
}),
}),
}),
});
}
const cacheTrace = createCacheTrace({
@@ -2235,13 +2255,18 @@ export async function runEmbeddedAttempt(
changes: cacheBreak?.changes ?? promptCacheChangesForTurn,
}
: undefined;
const fallbackLastCacheTouchAt = readLastCacheTtlTimestamp(sessionManager, {
provider: params.provider,
modelId: params.modelId,
});
promptCache = buildContextEnginePromptCacheInfo({
retention: effectivePromptCacheRetention,
lastCallUsage,
observation: promptCacheObservation,
lastCacheTouchAt: readLastCacheTtlTimestamp(sessionManager, {
provider: params.provider,
modelId: params.modelId,
lastCacheTouchAt: resolvePromptCacheTouchTimestamp({
lastCallUsage,
assistantTimestamp: currentAttemptAssistant?.timestamp,
fallbackLastCacheTouchAt,
}),
});

View File

@@ -29,6 +29,15 @@ function makeToolResult(id: string, text: string, toolName = "grep"): AgentMessa
});
}
function makeAssistant(text: string, extras: Record<string, unknown> = {}): AgentMessage {
return castAgentMessage({
role: "assistant",
content: text,
timestamp: Date.now(),
...extras,
});
}
function makeReadToolResult(id: string, text: string): AgentMessage {
return makeToolResult(id, text, "read");
}
@@ -319,6 +328,10 @@ describe("installContextEngineLoopHook", () => {
agent: ReturnType<typeof makeGuardableAgent>,
engine: MockedEngine,
prePromptCount?: number,
getRuntimeContext?: (params: {
messages: AgentMessage[];
prePromptMessageCount: number;
}) => Record<string, unknown> | undefined,
): () => void {
return installContextEngineLoopHook({
agent,
@@ -329,6 +342,7 @@ describe("installContextEngineLoopHook", () => {
tokenBudget,
modelId,
...(prePromptCount !== undefined ? { getPrePromptMessageCount: () => prePromptCount } : {}),
...(getRuntimeContext ? { getRuntimeContext } : {}),
});
}
@@ -361,6 +375,54 @@ describe("installContextEngineLoopHook", () => {
expect(engine.assemble).toHaveBeenCalledTimes(1);
});
it("passes runtimeContext through loop-hook afterTurn calls", async () => {
const agent = makeGuardableAgent();
const engine = makeMockEngine();
installHook(agent, engine, 1, () => ({
provider: "anthropic",
modelId: modelId,
promptCache: {
retention: "short",
lastCacheTouchAt: 123,
},
}));
const messages = [makeUser("first"), makeToolResult("call_1", "result")];
await callTransform(agent, messages);
expect(engine.afterTurn).toHaveBeenCalledTimes(1);
expect(engine.afterTurn.mock.calls[0]?.[0]).toMatchObject({
prePromptMessageCount: 1,
runtimeContext: {
provider: "anthropic",
modelId,
promptCache: {
retention: "short",
lastCacheTouchAt: 123,
},
},
});
});
it("passes loop messages and the prompt fence into the runtimeContext callback", async () => {
const agent = makeGuardableAgent();
const engine = makeMockEngine();
const getRuntimeContext = vi.fn(() => ({ provider: "anthropic" }));
installHook(agent, engine, 1, getRuntimeContext);
const messages = [
makeUser("first"),
makeAssistant("tool use", { usage: { cacheRead: 40, total: 50 }, timestamp: 456 }),
makeToolResult("call_1", "result"),
];
await callTransform(agent, messages);
expect(getRuntimeContext).toHaveBeenCalledWith({
messages,
prePromptMessageCount: 1,
});
});
it("calls afterTurn and assemble when new messages are appended after the first call", async () => {
const agent = makeGuardableAgent();
const engine = makeMockEngine();

View File

@@ -1,5 +1,5 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { ContextEngine } from "../../context-engine/types.js";
import type { ContextEngine, ContextEngineRuntimeContext } from "../../context-engine/types.js";
import {
CHARS_PER_TOKEN_ESTIMATE,
TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE,
@@ -198,6 +198,10 @@ export function installContextEngineLoopHook(params: {
tokenBudget?: number;
modelId: string;
getPrePromptMessageCount?: () => number;
getRuntimeContext?: (params: {
messages: AgentMessage[];
prePromptMessageCount: number;
}) => ContextEngineRuntimeContext | undefined;
}): () => void {
const { contextEngine, sessionId, sessionKey, sessionFile, tokenBudget, modelId } = params;
const mutableAgent = params.agent as GuardableAgentRecord;
@@ -237,6 +241,10 @@ export function installContextEngineLoopHook(params: {
messages: sourceMessages,
prePromptMessageCount,
tokenBudget,
runtimeContext: params.getRuntimeContext?.({
messages: sourceMessages,
prePromptMessageCount,
}),
});
} else {
const newMessages = sourceMessages.slice(prePromptMessageCount);