mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:10:45 +00:00
fix: stabilize context engine prompt cache touches (#67767)
* fix: stabilize context engine prompt cache touches * fix(changelog): document context-engine prompt cache touch stabilization
This commit is contained in:
@@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai
|
||||
- BlueBubbles/inbound: restore inbound image attachment downloads on Node 22+ by stripping incompatible bundled-undici dispatchers from the non-SSRF fetch path, accept `updated-message` webhooks carrying attachments, use event-type-aware dedup keys so attachment follow-ups are not rejected as duplicates, and retry attachment fetch from the BB API when the initial webhook arrives with an empty array. (#64105, #61861, #65430, #67510) Thanks @omarshahine.
|
||||
- Agents/skills: sort prompt-facing `available_skills` entries by skill name after merging sources so `skills.load.extraDirs` order no longer changes prompt-cache prefixes. (#64198) Thanks @Bartok9.
|
||||
- Agents/OpenAI Responses: add `models.providers.*.models.*.compat.supportsPromptCacheKey` so OpenAI-compatible proxies that forward `prompt_cache_key` can keep prompt caching enabled while incompatible endpoints can still force stripping. (#67427) Thanks @damselem.
|
||||
- Agents/context engines: keep loop-hook and final `afterTurn` prompt-cache touch metadata aligned with the current assistant turn so cache-aware context engines retain accurate cache TTL state during tool loops. (#67767) thanks @jalehman.
|
||||
- Memory/dreaming: strip AI-facing inbound metadata envelopes from session-corpus user turns before normalization so REM topic extraction sees the user's actual message text, including array-shaped split envelopes. (#66548) Thanks @zqchris.
|
||||
|
||||
## 2026.4.15-beta.1
|
||||
|
||||
@@ -2,7 +2,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import type { AssistantMessage } from "@mariozechner/pi-ai";
|
||||
import type { MemoryCitationsMode } from "../../../config/types.memory.js";
|
||||
import type { ContextEngine, ContextEngineRuntimeContext } from "../../../context-engine/types.js";
|
||||
import type { NormalizedUsage } from "../../usage.js";
|
||||
import { normalizeUsage, type NormalizedUsage } from "../../usage.js";
|
||||
import type { PromptCacheChange } from "../prompt-cache-observability.js";
|
||||
import type { EmbeddedRunAttemptResult } from "./types.js";
|
||||
|
||||
@@ -103,6 +103,61 @@ export function findCurrentAttemptAssistantMessage(params: {
|
||||
.find((message): message is AssistantMessage => message.role === "assistant");
|
||||
}
|
||||
|
||||
function parsePromptCacheTouchTimestamp(value: unknown): number | null {
|
||||
if (typeof value === "number" && Number.isFinite(value)) {
|
||||
return value;
|
||||
}
|
||||
if (typeof value === "string") {
|
||||
const parsed = Date.parse(value);
|
||||
if (Number.isFinite(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Resolve the effective prompt-cache touch timestamp for the current assistant turn. */
|
||||
export function resolvePromptCacheTouchTimestamp(params: {
|
||||
lastCallUsage?: NormalizedUsage;
|
||||
assistantTimestamp?: unknown;
|
||||
fallbackLastCacheTouchAt?: number | null;
|
||||
}): number | null {
|
||||
const hasCacheUsage =
|
||||
typeof params.lastCallUsage?.cacheRead === "number" ||
|
||||
typeof params.lastCallUsage?.cacheWrite === "number";
|
||||
if (!hasCacheUsage) {
|
||||
return params.fallbackLastCacheTouchAt ?? null;
|
||||
}
|
||||
return (
|
||||
parsePromptCacheTouchTimestamp(params.assistantTimestamp) ??
|
||||
params.fallbackLastCacheTouchAt ??
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
export function buildLoopPromptCacheInfo(params: {
|
||||
messagesSnapshot: AgentMessage[];
|
||||
prePromptMessageCount: number;
|
||||
retention?: "none" | "short" | "long";
|
||||
fallbackLastCacheTouchAt?: number | null;
|
||||
}): EmbeddedRunAttemptResult["promptCache"] {
|
||||
const currentAttemptAssistant = findCurrentAttemptAssistantMessage({
|
||||
messagesSnapshot: params.messagesSnapshot,
|
||||
prePromptMessageCount: params.prePromptMessageCount,
|
||||
});
|
||||
const lastCallUsage = normalizeUsage(currentAttemptAssistant?.usage);
|
||||
|
||||
return buildContextEnginePromptCacheInfo({
|
||||
retention: params.retention,
|
||||
lastCallUsage,
|
||||
lastCacheTouchAt: resolvePromptCacheTouchTimestamp({
|
||||
lastCallUsage,
|
||||
assistantTimestamp: currentAttemptAssistant?.timestamp,
|
||||
fallbackLastCacheTouchAt: params.fallbackLastCacheTouchAt,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
export async function runAttemptContextEngineBootstrap(params: {
|
||||
hadSessionFile: boolean;
|
||||
contextEngine?: AttemptContextEngine;
|
||||
@@ -207,51 +262,50 @@ export async function finalizeAttemptContextEngineTurn(params: {
|
||||
let postTurnFinalizationSucceeded = true;
|
||||
|
||||
if (typeof params.contextEngine.afterTurn === "function") {
|
||||
try {
|
||||
await params.contextEngine.afterTurn({
|
||||
sessionId: params.sessionIdUsed,
|
||||
sessionKey: params.sessionKey,
|
||||
sessionFile: params.sessionFile,
|
||||
messages: params.messagesSnapshot,
|
||||
prePromptMessageCount: params.prePromptMessageCount,
|
||||
tokenBudget: params.tokenBudget,
|
||||
runtimeContext: params.runtimeContext,
|
||||
});
|
||||
} catch (afterTurnErr) {
|
||||
postTurnFinalizationSucceeded = false;
|
||||
params.warn(`context engine afterTurn failed: ${String(afterTurnErr)}`);
|
||||
}
|
||||
} else {
|
||||
const newMessages = params.messagesSnapshot.slice(params.prePromptMessageCount);
|
||||
if (newMessages.length > 0) {
|
||||
if (typeof params.contextEngine.ingestBatch === "function") {
|
||||
try {
|
||||
await params.contextEngine.afterTurn({
|
||||
sessionId: params.sessionIdUsed,
|
||||
sessionKey: params.sessionKey,
|
||||
sessionFile: params.sessionFile,
|
||||
messages: params.messagesSnapshot,
|
||||
prePromptMessageCount: params.prePromptMessageCount,
|
||||
tokenBudget: params.tokenBudget,
|
||||
runtimeContext: params.runtimeContext,
|
||||
});
|
||||
} catch (afterTurnErr) {
|
||||
postTurnFinalizationSucceeded = false;
|
||||
params.warn(`context engine afterTurn failed: ${String(afterTurnErr)}`);
|
||||
}
|
||||
} else {
|
||||
const newMessages = params.messagesSnapshot.slice(params.prePromptMessageCount);
|
||||
if (newMessages.length > 0) {
|
||||
if (typeof params.contextEngine.ingestBatch === "function") {
|
||||
try {
|
||||
await params.contextEngine.ingestBatch({
|
||||
sessionId: params.sessionIdUsed,
|
||||
sessionKey: params.sessionKey,
|
||||
messages: newMessages,
|
||||
});
|
||||
} catch (ingestErr) {
|
||||
postTurnFinalizationSucceeded = false;
|
||||
params.warn(`context engine ingest failed: ${String(ingestErr)}`);
|
||||
}
|
||||
} else {
|
||||
for (const msg of newMessages) {
|
||||
try {
|
||||
await params.contextEngine.ingestBatch({
|
||||
await params.contextEngine.ingest?.({
|
||||
sessionId: params.sessionIdUsed,
|
||||
sessionKey: params.sessionKey,
|
||||
messages: newMessages,
|
||||
message: msg,
|
||||
});
|
||||
} catch (ingestErr) {
|
||||
postTurnFinalizationSucceeded = false;
|
||||
params.warn(`context engine ingest failed: ${String(ingestErr)}`);
|
||||
}
|
||||
} else {
|
||||
for (const msg of newMessages) {
|
||||
try {
|
||||
await params.contextEngine.ingest?.({
|
||||
sessionId: params.sessionIdUsed,
|
||||
sessionKey: params.sessionKey,
|
||||
message: msg,
|
||||
});
|
||||
} catch (ingestErr) {
|
||||
postTurnFinalizationSucceeded = false;
|
||||
params.warn(`context engine ingest failed: ${String(ingestErr)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (
|
||||
!params.promptError &&
|
||||
|
||||
@@ -7,10 +7,12 @@ import {
|
||||
} from "../../../plugins/memory-state.js";
|
||||
import {
|
||||
type AttemptContextEngine,
|
||||
buildLoopPromptCacheInfo,
|
||||
assembleAttemptContextEngine,
|
||||
buildContextEnginePromptCacheInfo,
|
||||
findCurrentAttemptAssistantMessage,
|
||||
finalizeAttemptContextEngineTurn,
|
||||
resolvePromptCacheTouchTimestamp,
|
||||
runAttemptContextEngineBootstrap,
|
||||
} from "./attempt.context-engine-helpers.js";
|
||||
import {
|
||||
@@ -367,6 +369,88 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => {
|
||||
expect(promptCache).toEqual({ retention: "short" });
|
||||
});
|
||||
|
||||
it("derives live loop prompt-cache info from the current attempt assistant", () => {
|
||||
const toolUseAssistant = {
|
||||
role: "assistant",
|
||||
content: "tool use",
|
||||
timestamp: "2026-04-16T16:49:59.536Z",
|
||||
usage: {
|
||||
input: 1,
|
||||
output: 2,
|
||||
cacheRead: 39036,
|
||||
cacheWrite: 59934,
|
||||
total: 98973,
|
||||
},
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
expect(
|
||||
buildLoopPromptCacheInfo({
|
||||
messagesSnapshot: [seedMessage, toolUseAssistant],
|
||||
prePromptMessageCount: 1,
|
||||
retention: "short",
|
||||
fallbackLastCacheTouchAt: 123,
|
||||
}),
|
||||
).toEqual(
|
||||
expect.objectContaining({
|
||||
retention: "short",
|
||||
lastCallUsage: expect.objectContaining({
|
||||
cacheRead: 39036,
|
||||
cacheWrite: 59934,
|
||||
total: 98973,
|
||||
}),
|
||||
lastCacheTouchAt: Date.parse("2026-04-16T16:49:59.536Z"),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("falls back to the persisted cache touch when loop usage has no cache metrics", () => {
|
||||
const toolUseAssistant = {
|
||||
role: "assistant",
|
||||
content: "tool use",
|
||||
timestamp: "2026-04-16T16:49:59.536Z",
|
||||
usage: {
|
||||
input: 1,
|
||||
output: 2,
|
||||
total: 3,
|
||||
},
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
expect(
|
||||
buildLoopPromptCacheInfo({
|
||||
messagesSnapshot: [seedMessage, toolUseAssistant],
|
||||
prePromptMessageCount: 1,
|
||||
retention: "short",
|
||||
fallbackLastCacheTouchAt: 123,
|
||||
}),
|
||||
).toEqual(
|
||||
expect.objectContaining({
|
||||
retention: "short",
|
||||
lastCallUsage: expect.objectContaining({
|
||||
total: 3,
|
||||
}),
|
||||
lastCacheTouchAt: 123,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("derives a live cache touch timestamp for final afterTurn usage snapshots", () => {
|
||||
const lastCallUsage = {
|
||||
input: 1,
|
||||
output: 2,
|
||||
cacheRead: 39036,
|
||||
cacheWrite: 0,
|
||||
total: 39039,
|
||||
};
|
||||
|
||||
expect(
|
||||
resolvePromptCacheTouchTimestamp({
|
||||
lastCallUsage,
|
||||
assistantTimestamp: "2026-04-16T17:04:46.974Z",
|
||||
fallbackLastCacheTouchAt: 123,
|
||||
}),
|
||||
).toBe(Date.parse("2026-04-16T17:04:46.974Z"));
|
||||
});
|
||||
|
||||
it("threads prompt-cache break observations into afterTurn", async () => {
|
||||
const afterTurn = vi.fn(async (_params: AfterTurnPromptCacheCall) => {});
|
||||
|
||||
|
||||
@@ -182,9 +182,11 @@ import { mapThinkingLevel } from "../utils.js";
|
||||
import { flushPendingToolResultsAfterIdle } from "../wait-for-idle-before-flush.js";
|
||||
import {
|
||||
assembleAttemptContextEngine,
|
||||
buildLoopPromptCacheInfo,
|
||||
buildContextEnginePromptCacheInfo,
|
||||
findCurrentAttemptAssistantMessage,
|
||||
finalizeAttemptContextEngineTurn,
|
||||
resolvePromptCacheTouchTimestamp,
|
||||
resolveAttemptBootstrapContext,
|
||||
runAttemptContextEngineBootstrap,
|
||||
} from "./attempt.context-engine-helpers.js";
|
||||
@@ -1071,6 +1073,24 @@ export async function runEmbeddedAttempt(
|
||||
tokenBudget: params.contextTokenBudget,
|
||||
modelId: params.modelId,
|
||||
getPrePromptMessageCount: () => prePromptMessageCount,
|
||||
getRuntimeContext: ({ messages, prePromptMessageCount: loopPrePromptMessageCount }) =>
|
||||
buildAfterTurnRuntimeContext({
|
||||
attempt: params,
|
||||
workspaceDir: effectiveWorkspace,
|
||||
agentDir,
|
||||
tokenBudget: params.contextTokenBudget,
|
||||
promptCache:
|
||||
promptCache ??
|
||||
buildLoopPromptCacheInfo({
|
||||
messagesSnapshot: messages,
|
||||
prePromptMessageCount: loopPrePromptMessageCount,
|
||||
retention: effectivePromptCacheRetention,
|
||||
fallbackLastCacheTouchAt: readLastCacheTtlTimestamp(sessionManager, {
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
});
|
||||
}
|
||||
const cacheTrace = createCacheTrace({
|
||||
@@ -2235,13 +2255,18 @@ export async function runEmbeddedAttempt(
|
||||
changes: cacheBreak?.changes ?? promptCacheChangesForTurn,
|
||||
}
|
||||
: undefined;
|
||||
const fallbackLastCacheTouchAt = readLastCacheTtlTimestamp(sessionManager, {
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
});
|
||||
promptCache = buildContextEnginePromptCacheInfo({
|
||||
retention: effectivePromptCacheRetention,
|
||||
lastCallUsage,
|
||||
observation: promptCacheObservation,
|
||||
lastCacheTouchAt: readLastCacheTtlTimestamp(sessionManager, {
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
lastCacheTouchAt: resolvePromptCacheTouchTimestamp({
|
||||
lastCallUsage,
|
||||
assistantTimestamp: currentAttemptAssistant?.timestamp,
|
||||
fallbackLastCacheTouchAt,
|
||||
}),
|
||||
});
|
||||
|
||||
|
||||
@@ -29,6 +29,15 @@ function makeToolResult(id: string, text: string, toolName = "grep"): AgentMessa
|
||||
});
|
||||
}
|
||||
|
||||
function makeAssistant(text: string, extras: Record<string, unknown> = {}): AgentMessage {
|
||||
return castAgentMessage({
|
||||
role: "assistant",
|
||||
content: text,
|
||||
timestamp: Date.now(),
|
||||
...extras,
|
||||
});
|
||||
}
|
||||
|
||||
function makeReadToolResult(id: string, text: string): AgentMessage {
|
||||
return makeToolResult(id, text, "read");
|
||||
}
|
||||
@@ -319,6 +328,10 @@ describe("installContextEngineLoopHook", () => {
|
||||
agent: ReturnType<typeof makeGuardableAgent>,
|
||||
engine: MockedEngine,
|
||||
prePromptCount?: number,
|
||||
getRuntimeContext?: (params: {
|
||||
messages: AgentMessage[];
|
||||
prePromptMessageCount: number;
|
||||
}) => Record<string, unknown> | undefined,
|
||||
): () => void {
|
||||
return installContextEngineLoopHook({
|
||||
agent,
|
||||
@@ -329,6 +342,7 @@ describe("installContextEngineLoopHook", () => {
|
||||
tokenBudget,
|
||||
modelId,
|
||||
...(prePromptCount !== undefined ? { getPrePromptMessageCount: () => prePromptCount } : {}),
|
||||
...(getRuntimeContext ? { getRuntimeContext } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -361,6 +375,54 @@ describe("installContextEngineLoopHook", () => {
|
||||
expect(engine.assemble).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("passes runtimeContext through loop-hook afterTurn calls", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
const engine = makeMockEngine();
|
||||
installHook(agent, engine, 1, () => ({
|
||||
provider: "anthropic",
|
||||
modelId: modelId,
|
||||
promptCache: {
|
||||
retention: "short",
|
||||
lastCacheTouchAt: 123,
|
||||
},
|
||||
}));
|
||||
|
||||
const messages = [makeUser("first"), makeToolResult("call_1", "result")];
|
||||
await callTransform(agent, messages);
|
||||
|
||||
expect(engine.afterTurn).toHaveBeenCalledTimes(1);
|
||||
expect(engine.afterTurn.mock.calls[0]?.[0]).toMatchObject({
|
||||
prePromptMessageCount: 1,
|
||||
runtimeContext: {
|
||||
provider: "anthropic",
|
||||
modelId,
|
||||
promptCache: {
|
||||
retention: "short",
|
||||
lastCacheTouchAt: 123,
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("passes loop messages and the prompt fence into the runtimeContext callback", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
const engine = makeMockEngine();
|
||||
const getRuntimeContext = vi.fn(() => ({ provider: "anthropic" }));
|
||||
installHook(agent, engine, 1, getRuntimeContext);
|
||||
|
||||
const messages = [
|
||||
makeUser("first"),
|
||||
makeAssistant("tool use", { usage: { cacheRead: 40, total: 50 }, timestamp: 456 }),
|
||||
makeToolResult("call_1", "result"),
|
||||
];
|
||||
await callTransform(agent, messages);
|
||||
|
||||
expect(getRuntimeContext).toHaveBeenCalledWith({
|
||||
messages,
|
||||
prePromptMessageCount: 1,
|
||||
});
|
||||
});
|
||||
|
||||
it("calls afterTurn and assemble when new messages are appended after the first call", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
const engine = makeMockEngine();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import type { ContextEngine } from "../../context-engine/types.js";
|
||||
import type { ContextEngine, ContextEngineRuntimeContext } from "../../context-engine/types.js";
|
||||
import {
|
||||
CHARS_PER_TOKEN_ESTIMATE,
|
||||
TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE,
|
||||
@@ -198,6 +198,10 @@ export function installContextEngineLoopHook(params: {
|
||||
tokenBudget?: number;
|
||||
modelId: string;
|
||||
getPrePromptMessageCount?: () => number;
|
||||
getRuntimeContext?: (params: {
|
||||
messages: AgentMessage[];
|
||||
prePromptMessageCount: number;
|
||||
}) => ContextEngineRuntimeContext | undefined;
|
||||
}): () => void {
|
||||
const { contextEngine, sessionId, sessionKey, sessionFile, tokenBudget, modelId } = params;
|
||||
const mutableAgent = params.agent as GuardableAgentRecord;
|
||||
@@ -237,6 +241,10 @@ export function installContextEngineLoopHook(params: {
|
||||
messages: sourceMessages,
|
||||
prePromptMessageCount,
|
||||
tokenBudget,
|
||||
runtimeContext: params.getRuntimeContext?.({
|
||||
messages: sourceMessages,
|
||||
prePromptMessageCount,
|
||||
}),
|
||||
});
|
||||
} else {
|
||||
const newMessages = sourceMessages.slice(prePromptMessageCount);
|
||||
|
||||
Reference in New Issue
Block a user