diff --git a/CHANGELOG.md b/CHANGELOG.md index 384885bf0f9..1b53619da87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai - BlueBubbles/inbound: restore inbound image attachment downloads on Node 22+ by stripping incompatible bundled-undici dispatchers from the non-SSRF fetch path, accept `updated-message` webhooks carrying attachments, use event-type-aware dedup keys so attachment follow-ups are not rejected as duplicates, and retry attachment fetch from the BB API when the initial webhook arrives with an empty array. (#64105, #61861, #65430, #67510) Thanks @omarshahine. - Agents/skills: sort prompt-facing `available_skills` entries by skill name after merging sources so `skills.load.extraDirs` order no longer changes prompt-cache prefixes. (#64198) Thanks @Bartok9. - Agents/OpenAI Responses: add `models.providers.*.models.*.compat.supportsPromptCacheKey` so OpenAI-compatible proxies that forward `prompt_cache_key` can keep prompt caching enabled while incompatible endpoints can still force stripping. (#67427) Thanks @damselem. +- Agents/context engines: keep loop-hook and final `afterTurn` prompt-cache touch metadata aligned with the current assistant turn so cache-aware context engines retain accurate cache TTL state during tool loops. (#67767) thanks @jalehman. - Memory/dreaming: strip AI-facing inbound metadata envelopes from session-corpus user turns before normalization so REM topic extraction sees the user's actual message text, including array-shaped split envelopes. (#66548) Thanks @zqchris. ## 2026.4.15-beta.1 diff --git a/src/agents/pi-embedded-runner/run/attempt.context-engine-helpers.ts b/src/agents/pi-embedded-runner/run/attempt.context-engine-helpers.ts index 6b89298d904..7e95d6c0951 100644 --- a/src/agents/pi-embedded-runner/run/attempt.context-engine-helpers.ts +++ b/src/agents/pi-embedded-runner/run/attempt.context-engine-helpers.ts @@ -2,7 +2,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { AssistantMessage } from "@mariozechner/pi-ai"; import type { MemoryCitationsMode } from "../../../config/types.memory.js"; import type { ContextEngine, ContextEngineRuntimeContext } from "../../../context-engine/types.js"; -import type { NormalizedUsage } from "../../usage.js"; +import { normalizeUsage, type NormalizedUsage } from "../../usage.js"; import type { PromptCacheChange } from "../prompt-cache-observability.js"; import type { EmbeddedRunAttemptResult } from "./types.js"; @@ -103,6 +103,61 @@ export function findCurrentAttemptAssistantMessage(params: { .find((message): message is AssistantMessage => message.role === "assistant"); } +function parsePromptCacheTouchTimestamp(value: unknown): number | null { + if (typeof value === "number" && Number.isFinite(value)) { + return value; + } + if (typeof value === "string") { + const parsed = Date.parse(value); + if (Number.isFinite(parsed)) { + return parsed; + } + } + return null; +} + +/** Resolve the effective prompt-cache touch timestamp for the current assistant turn. */ +export function resolvePromptCacheTouchTimestamp(params: { + lastCallUsage?: NormalizedUsage; + assistantTimestamp?: unknown; + fallbackLastCacheTouchAt?: number | null; +}): number | null { + const hasCacheUsage = + typeof params.lastCallUsage?.cacheRead === "number" || + typeof params.lastCallUsage?.cacheWrite === "number"; + if (!hasCacheUsage) { + return params.fallbackLastCacheTouchAt ?? null; + } + return ( + parsePromptCacheTouchTimestamp(params.assistantTimestamp) ?? + params.fallbackLastCacheTouchAt ?? + null + ); +} + +export function buildLoopPromptCacheInfo(params: { + messagesSnapshot: AgentMessage[]; + prePromptMessageCount: number; + retention?: "none" | "short" | "long"; + fallbackLastCacheTouchAt?: number | null; +}): EmbeddedRunAttemptResult["promptCache"] { + const currentAttemptAssistant = findCurrentAttemptAssistantMessage({ + messagesSnapshot: params.messagesSnapshot, + prePromptMessageCount: params.prePromptMessageCount, + }); + const lastCallUsage = normalizeUsage(currentAttemptAssistant?.usage); + + return buildContextEnginePromptCacheInfo({ + retention: params.retention, + lastCallUsage, + lastCacheTouchAt: resolvePromptCacheTouchTimestamp({ + lastCallUsage, + assistantTimestamp: currentAttemptAssistant?.timestamp, + fallbackLastCacheTouchAt: params.fallbackLastCacheTouchAt, + }), + }); +} + export async function runAttemptContextEngineBootstrap(params: { hadSessionFile: boolean; contextEngine?: AttemptContextEngine; @@ -207,51 +262,50 @@ export async function finalizeAttemptContextEngineTurn(params: { let postTurnFinalizationSucceeded = true; if (typeof params.contextEngine.afterTurn === "function") { - try { - await params.contextEngine.afterTurn({ - sessionId: params.sessionIdUsed, - sessionKey: params.sessionKey, - sessionFile: params.sessionFile, - messages: params.messagesSnapshot, - prePromptMessageCount: params.prePromptMessageCount, - tokenBudget: params.tokenBudget, - runtimeContext: params.runtimeContext, - }); - } catch (afterTurnErr) { - postTurnFinalizationSucceeded = false; - params.warn(`context engine afterTurn failed: ${String(afterTurnErr)}`); - } - } else { - const newMessages = params.messagesSnapshot.slice(params.prePromptMessageCount); - if (newMessages.length > 0) { - if (typeof params.contextEngine.ingestBatch === "function") { + try { + await params.contextEngine.afterTurn({ + sessionId: params.sessionIdUsed, + sessionKey: params.sessionKey, + sessionFile: params.sessionFile, + messages: params.messagesSnapshot, + prePromptMessageCount: params.prePromptMessageCount, + tokenBudget: params.tokenBudget, + runtimeContext: params.runtimeContext, + }); + } catch (afterTurnErr) { + postTurnFinalizationSucceeded = false; + params.warn(`context engine afterTurn failed: ${String(afterTurnErr)}`); + } + } else { + const newMessages = params.messagesSnapshot.slice(params.prePromptMessageCount); + if (newMessages.length > 0) { + if (typeof params.contextEngine.ingestBatch === "function") { + try { + await params.contextEngine.ingestBatch({ + sessionId: params.sessionIdUsed, + sessionKey: params.sessionKey, + messages: newMessages, + }); + } catch (ingestErr) { + postTurnFinalizationSucceeded = false; + params.warn(`context engine ingest failed: ${String(ingestErr)}`); + } + } else { + for (const msg of newMessages) { try { - await params.contextEngine.ingestBatch({ + await params.contextEngine.ingest?.({ sessionId: params.sessionIdUsed, sessionKey: params.sessionKey, - messages: newMessages, + message: msg, }); } catch (ingestErr) { postTurnFinalizationSucceeded = false; params.warn(`context engine ingest failed: ${String(ingestErr)}`); } - } else { - for (const msg of newMessages) { - try { - await params.contextEngine.ingest?.({ - sessionId: params.sessionIdUsed, - sessionKey: params.sessionKey, - message: msg, - }); - } catch (ingestErr) { - postTurnFinalizationSucceeded = false; - params.warn(`context engine ingest failed: ${String(ingestErr)}`); - } - } } } } - + } if ( !params.promptError && diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts index a2817885639..4aa12036413 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.context-engine.test.ts @@ -7,10 +7,12 @@ import { } from "../../../plugins/memory-state.js"; import { type AttemptContextEngine, + buildLoopPromptCacheInfo, assembleAttemptContextEngine, buildContextEnginePromptCacheInfo, findCurrentAttemptAssistantMessage, finalizeAttemptContextEngineTurn, + resolvePromptCacheTouchTimestamp, runAttemptContextEngineBootstrap, } from "./attempt.context-engine-helpers.js"; import { @@ -367,6 +369,88 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => { expect(promptCache).toEqual({ retention: "short" }); }); + it("derives live loop prompt-cache info from the current attempt assistant", () => { + const toolUseAssistant = { + role: "assistant", + content: "tool use", + timestamp: "2026-04-16T16:49:59.536Z", + usage: { + input: 1, + output: 2, + cacheRead: 39036, + cacheWrite: 59934, + total: 98973, + }, + } as unknown as AgentMessage; + + expect( + buildLoopPromptCacheInfo({ + messagesSnapshot: [seedMessage, toolUseAssistant], + prePromptMessageCount: 1, + retention: "short", + fallbackLastCacheTouchAt: 123, + }), + ).toEqual( + expect.objectContaining({ + retention: "short", + lastCallUsage: expect.objectContaining({ + cacheRead: 39036, + cacheWrite: 59934, + total: 98973, + }), + lastCacheTouchAt: Date.parse("2026-04-16T16:49:59.536Z"), + }), + ); + }); + + it("falls back to the persisted cache touch when loop usage has no cache metrics", () => { + const toolUseAssistant = { + role: "assistant", + content: "tool use", + timestamp: "2026-04-16T16:49:59.536Z", + usage: { + input: 1, + output: 2, + total: 3, + }, + } as unknown as AgentMessage; + + expect( + buildLoopPromptCacheInfo({ + messagesSnapshot: [seedMessage, toolUseAssistant], + prePromptMessageCount: 1, + retention: "short", + fallbackLastCacheTouchAt: 123, + }), + ).toEqual( + expect.objectContaining({ + retention: "short", + lastCallUsage: expect.objectContaining({ + total: 3, + }), + lastCacheTouchAt: 123, + }), + ); + }); + + it("derives a live cache touch timestamp for final afterTurn usage snapshots", () => { + const lastCallUsage = { + input: 1, + output: 2, + cacheRead: 39036, + cacheWrite: 0, + total: 39039, + }; + + expect( + resolvePromptCacheTouchTimestamp({ + lastCallUsage, + assistantTimestamp: "2026-04-16T17:04:46.974Z", + fallbackLastCacheTouchAt: 123, + }), + ).toBe(Date.parse("2026-04-16T17:04:46.974Z")); + }); + it("threads prompt-cache break observations into afterTurn", async () => { const afterTurn = vi.fn(async (_params: AfterTurnPromptCacheCall) => {}); diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 9598a01ec45..3c07668d5b6 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -182,9 +182,11 @@ import { mapThinkingLevel } from "../utils.js"; import { flushPendingToolResultsAfterIdle } from "../wait-for-idle-before-flush.js"; import { assembleAttemptContextEngine, + buildLoopPromptCacheInfo, buildContextEnginePromptCacheInfo, findCurrentAttemptAssistantMessage, finalizeAttemptContextEngineTurn, + resolvePromptCacheTouchTimestamp, resolveAttemptBootstrapContext, runAttemptContextEngineBootstrap, } from "./attempt.context-engine-helpers.js"; @@ -1071,6 +1073,24 @@ export async function runEmbeddedAttempt( tokenBudget: params.contextTokenBudget, modelId: params.modelId, getPrePromptMessageCount: () => prePromptMessageCount, + getRuntimeContext: ({ messages, prePromptMessageCount: loopPrePromptMessageCount }) => + buildAfterTurnRuntimeContext({ + attempt: params, + workspaceDir: effectiveWorkspace, + agentDir, + tokenBudget: params.contextTokenBudget, + promptCache: + promptCache ?? + buildLoopPromptCacheInfo({ + messagesSnapshot: messages, + prePromptMessageCount: loopPrePromptMessageCount, + retention: effectivePromptCacheRetention, + fallbackLastCacheTouchAt: readLastCacheTtlTimestamp(sessionManager, { + provider: params.provider, + modelId: params.modelId, + }), + }), + }), }); } const cacheTrace = createCacheTrace({ @@ -2235,13 +2255,18 @@ export async function runEmbeddedAttempt( changes: cacheBreak?.changes ?? promptCacheChangesForTurn, } : undefined; + const fallbackLastCacheTouchAt = readLastCacheTtlTimestamp(sessionManager, { + provider: params.provider, + modelId: params.modelId, + }); promptCache = buildContextEnginePromptCacheInfo({ retention: effectivePromptCacheRetention, lastCallUsage, observation: promptCacheObservation, - lastCacheTouchAt: readLastCacheTtlTimestamp(sessionManager, { - provider: params.provider, - modelId: params.modelId, + lastCacheTouchAt: resolvePromptCacheTouchTimestamp({ + lastCallUsage, + assistantTimestamp: currentAttemptAssistant?.timestamp, + fallbackLastCacheTouchAt, }), }); diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts index 46df86ecb62..c52ad7f3a56 100644 --- a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts +++ b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts @@ -29,6 +29,15 @@ function makeToolResult(id: string, text: string, toolName = "grep"): AgentMessa }); } +function makeAssistant(text: string, extras: Record = {}): AgentMessage { + return castAgentMessage({ + role: "assistant", + content: text, + timestamp: Date.now(), + ...extras, + }); +} + function makeReadToolResult(id: string, text: string): AgentMessage { return makeToolResult(id, text, "read"); } @@ -319,6 +328,10 @@ describe("installContextEngineLoopHook", () => { agent: ReturnType, engine: MockedEngine, prePromptCount?: number, + getRuntimeContext?: (params: { + messages: AgentMessage[]; + prePromptMessageCount: number; + }) => Record | undefined, ): () => void { return installContextEngineLoopHook({ agent, @@ -329,6 +342,7 @@ describe("installContextEngineLoopHook", () => { tokenBudget, modelId, ...(prePromptCount !== undefined ? { getPrePromptMessageCount: () => prePromptCount } : {}), + ...(getRuntimeContext ? { getRuntimeContext } : {}), }); } @@ -361,6 +375,54 @@ describe("installContextEngineLoopHook", () => { expect(engine.assemble).toHaveBeenCalledTimes(1); }); + it("passes runtimeContext through loop-hook afterTurn calls", async () => { + const agent = makeGuardableAgent(); + const engine = makeMockEngine(); + installHook(agent, engine, 1, () => ({ + provider: "anthropic", + modelId: modelId, + promptCache: { + retention: "short", + lastCacheTouchAt: 123, + }, + })); + + const messages = [makeUser("first"), makeToolResult("call_1", "result")]; + await callTransform(agent, messages); + + expect(engine.afterTurn).toHaveBeenCalledTimes(1); + expect(engine.afterTurn.mock.calls[0]?.[0]).toMatchObject({ + prePromptMessageCount: 1, + runtimeContext: { + provider: "anthropic", + modelId, + promptCache: { + retention: "short", + lastCacheTouchAt: 123, + }, + }, + }); + }); + + it("passes loop messages and the prompt fence into the runtimeContext callback", async () => { + const agent = makeGuardableAgent(); + const engine = makeMockEngine(); + const getRuntimeContext = vi.fn(() => ({ provider: "anthropic" })); + installHook(agent, engine, 1, getRuntimeContext); + + const messages = [ + makeUser("first"), + makeAssistant("tool use", { usage: { cacheRead: 40, total: 50 }, timestamp: 456 }), + makeToolResult("call_1", "result"), + ]; + await callTransform(agent, messages); + + expect(getRuntimeContext).toHaveBeenCalledWith({ + messages, + prePromptMessageCount: 1, + }); + }); + it("calls afterTurn and assemble when new messages are appended after the first call", async () => { const agent = makeGuardableAgent(); const engine = makeMockEngine(); diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.ts index 601097d58b7..1ce238b35da 100644 --- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts +++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts @@ -1,5 +1,5 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; -import type { ContextEngine } from "../../context-engine/types.js"; +import type { ContextEngine, ContextEngineRuntimeContext } from "../../context-engine/types.js"; import { CHARS_PER_TOKEN_ESTIMATE, TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE, @@ -198,6 +198,10 @@ export function installContextEngineLoopHook(params: { tokenBudget?: number; modelId: string; getPrePromptMessageCount?: () => number; + getRuntimeContext?: (params: { + messages: AgentMessage[]; + prePromptMessageCount: number; + }) => ContextEngineRuntimeContext | undefined; }): () => void { const { contextEngine, sessionId, sessionKey, sessionFile, tokenBudget, modelId } = params; const mutableAgent = params.agent as GuardableAgentRecord; @@ -237,6 +241,10 @@ export function installContextEngineLoopHook(params: { messages: sourceMessages, prePromptMessageCount, tokenBudget, + runtimeContext: params.getRuntimeContext?.({ + messages: sourceMessages, + prePromptMessageCount, + }), }); } else { const newMessages = sourceMessages.slice(prePromptMessageCount);