mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 12:30:43 +00:00
* Context engine: plumb prompt cache runtime context Add a typed prompt-cache payload to the context-engine runtime context and populate it from the embedded runner's resolved retention, last-call usage, cache-break observation, and cache-touch metadata. Also pass the same payload through the retry compaction runtime context when a run attempt already has it. Regeneration-Prompt: | Expose OpenClaw prompt-cache telemetry to context engines in a narrow, additive way without changing compaction policy. Keep the public change on the OpenClaw side only: add a typed promptCache payload to the context-engine runtime context, thread it into afterTurn, and also into compact where the existing run loop already has the data cheaply available. Use OpenClaw's resolved cache retention, not raw config. Use last-call usage for the new payload, not accumulated retry or tool-loop totals. Reuse the existing prompt-cache observability result and tracked change causes instead of inventing a new heuristic. If cache-touch metadata is already available from the cache-TTL bookkeeping, include it; do not invent expiry timestamps for providers where OpenClaw cannot know them confidently. Keep the interface backward-compatible for engines that ignore the new field. Add focused tests around the existing attempt/context-engine helpers and the compaction runtime-context propagation path rather than broad new integration coverage. * Agents: fix prompt-cache afterTurn usage Regeneration-Prompt: | Fix PR #62179 so context-engine prompt-cache metadata uses only the current attempt's usage. The review comment pointed out that early exits could reuse a prior turn's assistant usage when no new assistant message was produced. Restrict the prompt-cache lastCallUsage lookup to assistant messages added after prePromptMessageCount, and fall back to current-attempt usage totals instead of stale snapshot history. Also repair the PR's new context-engine test typings and add a regression test for the stale prior-turn case. Two import-only fixes in doctor-state-integrity and config/talk were already broken on origin/main, but they blocked build/check and the gateway-watch regression harness, so include the minimum unblocking imports as well. * Agents: document prompt-cache context * Agents: address prompt-cache review feedback * Doctor: drop unused isRecord import
282 lines
8.8 KiB
TypeScript
282 lines
8.8 KiB
TypeScript
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
|
import type { MemoryCitationsMode } from "../config/types.memory.js";
|
|
|
|
// Result types
|
|
|
|
export type AssembleResult = {
|
|
/** Ordered messages to use as model context */
|
|
messages: AgentMessage[];
|
|
/** Estimated total tokens in assembled context */
|
|
estimatedTokens: number;
|
|
/** Optional context-engine-provided instructions prepended to the runtime system prompt */
|
|
systemPromptAddition?: string;
|
|
};
|
|
|
|
export type CompactResult = {
|
|
ok: boolean;
|
|
compacted: boolean;
|
|
reason?: string;
|
|
result?: {
|
|
summary?: string;
|
|
firstKeptEntryId?: string;
|
|
tokensBefore: number;
|
|
tokensAfter?: number;
|
|
details?: unknown;
|
|
};
|
|
};
|
|
|
|
export type IngestResult = {
|
|
/** Whether the message was ingested (false if duplicate or no-op) */
|
|
ingested: boolean;
|
|
};
|
|
|
|
export type IngestBatchResult = {
|
|
/** Number of messages ingested from the supplied batch */
|
|
ingestedCount: number;
|
|
};
|
|
|
|
export type BootstrapResult = {
|
|
/** Whether bootstrap ran and initialized the engine's store */
|
|
bootstrapped: boolean;
|
|
/** Number of historical messages imported (if applicable) */
|
|
importedMessages?: number;
|
|
/** Optional reason when bootstrap was skipped */
|
|
reason?: string;
|
|
};
|
|
|
|
export type ContextEngineInfo = {
|
|
id: string;
|
|
name: string;
|
|
version?: string;
|
|
/** True when the engine manages its own compaction lifecycle. */
|
|
ownsCompaction?: boolean;
|
|
};
|
|
|
|
export type SubagentSpawnPreparation = {
|
|
/** Roll back pre-spawn setup when subagent launch fails. */
|
|
rollback: () => void | Promise<void>;
|
|
};
|
|
|
|
export type SubagentEndReason = "deleted" | "completed" | "swept" | "released";
|
|
|
|
export type TranscriptRewriteReplacement = {
|
|
/** Existing transcript entry id to replace on the active branch. */
|
|
entryId: string;
|
|
/** Replacement message content for that entry. */
|
|
message: AgentMessage;
|
|
};
|
|
|
|
export type TranscriptRewriteRequest = {
|
|
/** Message entry replacements to apply in one branch-and-reappend pass. */
|
|
replacements: TranscriptRewriteReplacement[];
|
|
};
|
|
|
|
export type TranscriptRewriteResult = {
|
|
/** Whether the active branch changed. */
|
|
changed: boolean;
|
|
/** Estimated bytes removed from the active branch message payloads. */
|
|
bytesFreed: number;
|
|
/** Number of transcript message entries rewritten. */
|
|
rewrittenEntries: number;
|
|
/** Optional reason when no rewrite occurred. */
|
|
reason?: string;
|
|
};
|
|
|
|
export type ContextEngineMaintenanceResult = TranscriptRewriteResult;
|
|
|
|
export type ContextEnginePromptCacheRetention = "none" | "short" | "long" | "in_memory" | "24h";
|
|
|
|
export type ContextEnginePromptCacheUsage = {
|
|
input?: number;
|
|
output?: number;
|
|
cacheRead?: number;
|
|
cacheWrite?: number;
|
|
total?: number;
|
|
};
|
|
|
|
export type ContextEnginePromptCacheObservationChangeCode =
|
|
| "cacheRetention"
|
|
| "model"
|
|
| "streamStrategy"
|
|
| "systemPrompt"
|
|
| "tools"
|
|
| "transport";
|
|
|
|
export type ContextEnginePromptCacheObservationChange = {
|
|
code: ContextEnginePromptCacheObservationChangeCode;
|
|
detail: string;
|
|
};
|
|
|
|
export type ContextEnginePromptCacheObservation = {
|
|
broke: boolean;
|
|
previousCacheRead?: number;
|
|
cacheRead?: number;
|
|
changes?: ContextEnginePromptCacheObservationChange[];
|
|
};
|
|
|
|
export type ContextEnginePromptCacheInfo = {
|
|
/** Runtime-resolved retention for the actual provider/model/request path. */
|
|
retention?: ContextEnginePromptCacheRetention;
|
|
/** Usage from the most recent API call, not accumulated retry/tool-loop totals. */
|
|
lastCallUsage?: ContextEnginePromptCacheUsage;
|
|
/** Result from the runtime's prompt-cache observability heuristic. */
|
|
observation?: ContextEnginePromptCacheObservation;
|
|
/** Last known cache-touch timestamp from runtime-managed cache-TTL bookkeeping. */
|
|
lastCacheTouchAt?: number;
|
|
/** Known cache expiry time when the runtime can source it confidently. */
|
|
expiresAt?: number;
|
|
};
|
|
|
|
export type ContextEngineRuntimeContext = Record<string, unknown> & {
|
|
/** Optional prompt-cache telemetry for cache-aware engines. */
|
|
promptCache?: ContextEnginePromptCacheInfo;
|
|
/**
|
|
* Safe transcript rewrite helper implemented by the runtime.
|
|
*
|
|
* Engines decide what is safe to rewrite; the runtime owns how the session
|
|
* DAG is updated on disk.
|
|
*/
|
|
rewriteTranscriptEntries?: (
|
|
request: TranscriptRewriteRequest,
|
|
) => Promise<TranscriptRewriteResult>;
|
|
};
|
|
|
|
/**
|
|
* ContextEngine defines the pluggable contract for context management.
|
|
*
|
|
* Required methods define a generic lifecycle; optional methods allow engines
|
|
* to provide additional capabilities (retrieval, lineage, etc.).
|
|
*/
|
|
export interface ContextEngine {
|
|
/** Engine identifier and metadata */
|
|
readonly info: ContextEngineInfo;
|
|
|
|
/**
|
|
* Initialize engine state for a session, optionally importing historical context.
|
|
*/
|
|
bootstrap?(params: {
|
|
sessionId: string;
|
|
sessionKey?: string;
|
|
sessionFile: string;
|
|
}): Promise<BootstrapResult>;
|
|
|
|
/**
|
|
* Run transcript maintenance after bootstrap, successful turns, or compaction.
|
|
*
|
|
* Engines can use runtimeContext.rewriteTranscriptEntries() to request safe
|
|
* branch-and-reappend transcript rewrites without depending on Pi internals.
|
|
*/
|
|
maintain?(params: {
|
|
sessionId: string;
|
|
sessionKey?: string;
|
|
sessionFile: string;
|
|
runtimeContext?: ContextEngineRuntimeContext;
|
|
}): Promise<ContextEngineMaintenanceResult>;
|
|
|
|
/**
|
|
* Ingest a single message into the engine's store.
|
|
*/
|
|
ingest(params: {
|
|
sessionId: string;
|
|
sessionKey?: string;
|
|
message: AgentMessage;
|
|
/** True when the message belongs to a heartbeat run. */
|
|
isHeartbeat?: boolean;
|
|
}): Promise<IngestResult>;
|
|
|
|
/**
|
|
* Ingest a completed turn batch as a single unit.
|
|
*/
|
|
ingestBatch?(params: {
|
|
sessionId: string;
|
|
sessionKey?: string;
|
|
messages: AgentMessage[];
|
|
/** True when the batch belongs to a heartbeat run. */
|
|
isHeartbeat?: boolean;
|
|
}): Promise<IngestBatchResult>;
|
|
|
|
/**
|
|
* Execute optional post-turn lifecycle work after a run attempt completes.
|
|
* Engines can use this to persist canonical context and trigger background
|
|
* compaction decisions.
|
|
*/
|
|
afterTurn?(params: {
|
|
sessionId: string;
|
|
sessionKey?: string;
|
|
sessionFile: string;
|
|
messages: AgentMessage[];
|
|
/** Number of messages that existed before the prompt was sent. */
|
|
prePromptMessageCount: number;
|
|
/** Optional auto-compaction summary emitted by the runtime. */
|
|
autoCompactionSummary?: string;
|
|
/** True when this turn belongs to a heartbeat run. */
|
|
isHeartbeat?: boolean;
|
|
/** Optional model context token budget for proactive compaction. */
|
|
tokenBudget?: number;
|
|
/** Optional runtime-owned context for engines that need caller state. */
|
|
runtimeContext?: ContextEngineRuntimeContext;
|
|
}): Promise<void>;
|
|
|
|
/**
|
|
* Assemble model context under a token budget.
|
|
* Returns an ordered set of messages ready for the model.
|
|
*/
|
|
assemble(params: {
|
|
sessionId: string;
|
|
sessionKey?: string;
|
|
messages: AgentMessage[];
|
|
tokenBudget?: number;
|
|
/** Tool names available for this run so engines can align prompt guidance with runtime tool access. */
|
|
availableTools?: Set<string>;
|
|
/** Active memory citation mode when engines want to mirror memory prompt guidance. */
|
|
citationsMode?: MemoryCitationsMode;
|
|
/** Current model identifier (e.g. "claude-opus-4", "gpt-4o", "qwen2.5-7b").
|
|
* Allows context engine plugins to adapt formatting per model. */
|
|
model?: string;
|
|
/** The incoming user prompt for this turn (useful for retrieval-oriented engines). */
|
|
prompt?: string;
|
|
}): Promise<AssembleResult>;
|
|
|
|
/**
|
|
* Compact context to reduce token usage.
|
|
* May create summaries, prune old turns, etc.
|
|
*/
|
|
compact(params: {
|
|
sessionId: string;
|
|
sessionKey?: string;
|
|
sessionFile: string;
|
|
tokenBudget?: number;
|
|
/** Force compaction even below the default trigger threshold. */
|
|
force?: boolean;
|
|
/** Optional live token estimate from the caller's active context. */
|
|
currentTokenCount?: number;
|
|
/** Controls convergence target; defaults to budget. */
|
|
compactionTarget?: "budget" | "threshold";
|
|
customInstructions?: string;
|
|
/** Optional runtime-owned context for engines that need caller state. */
|
|
runtimeContext?: ContextEngineRuntimeContext;
|
|
}): Promise<CompactResult>;
|
|
|
|
/**
|
|
* Prepare context-engine-managed subagent state before the child run starts.
|
|
*
|
|
* Implementations can return a rollback handle that is invoked when spawn
|
|
* fails after preparation succeeds.
|
|
*/
|
|
prepareSubagentSpawn?(params: {
|
|
parentSessionKey: string;
|
|
childSessionKey: string;
|
|
ttlMs?: number;
|
|
}): Promise<SubagentSpawnPreparation | undefined>;
|
|
|
|
/**
|
|
* Notify the context engine that a subagent lifecycle ended.
|
|
*/
|
|
onSubagentEnded?(params: { childSessionKey: string; reason: SubagentEndReason }): Promise<void>;
|
|
|
|
/**
|
|
* Dispose of any resources held by the engine.
|
|
*/
|
|
dispose?(): Promise<void>;
|
|
}
|