diff --git a/src/auto-reply/get-reply-options.types.ts b/src/auto-reply/get-reply-options.types.ts index e2868521f9f..8f907a6b666 100644 --- a/src/auto-reply/get-reply-options.types.ts +++ b/src/auto-reply/get-reply-options.types.ts @@ -49,6 +49,8 @@ export type PartialReplyPayload = Pick & { export type GetReplyOptions = { /** Override run id for agent events (defaults to random UUID). */ runId?: string; + /** Stable provider prompt-cache affinity key; distinct from run id/idempotency. */ + promptCacheKey?: string; /** Abort signal for the underlying agent run. */ abortSignal?: AbortSignal; /** Optional inbound images (used for webchat attachments). */ diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 7d13bf3693b..37276f700d2 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -2231,6 +2231,7 @@ export async function runAgentTurnWithFallback(params: { hasRepliedRef: params.opts?.hasRepliedRef, provider, runId, + promptCacheKey: params.opts?.promptCacheKey, allowTransientCooldownProbe: runOptions?.allowTransientCooldownProbe, model, }); diff --git a/src/auto-reply/reply/agent-runner-run-params.ts b/src/auto-reply/reply/agent-runner-run-params.ts index f691e11d285..e38ae319bcb 100644 --- a/src/auto-reply/reply/agent-runner-run-params.ts +++ b/src/auto-reply/reply/agent-runner-run-params.ts @@ -55,6 +55,7 @@ export function buildEmbeddedRunBaseParams(params: { provider: string; model: string; runId: string; + promptCacheKey?: string; authProfile: ReturnType; allowTransientCooldownProbe?: boolean; isReasoningTagProvider?: ReasoningTagProviderResolver; @@ -99,6 +100,7 @@ export function buildEmbeddedRunBaseParams(params: { bashElevated: params.run.bashElevated, timeoutMs: params.run.timeoutMs, runId: params.runId, + promptCacheKey: params.promptCacheKey, allowTransientCooldownProbe: params.allowTransientCooldownProbe, }; } diff --git a/src/auto-reply/reply/agent-runner-utils.test.ts b/src/auto-reply/reply/agent-runner-utils.test.ts index 9847f21dd25..53eedd979d7 100644 --- a/src/auto-reply/reply/agent-runner-utils.test.ts +++ b/src/auto-reply/reply/agent-runner-utils.test.ts @@ -25,6 +25,7 @@ const { buildThreadingToolContext, buildEmbeddedRunBaseParams, buildEmbeddedRunContexts, + buildEmbeddedRunExecutionParams, resolveModelFallbackOptions, resolveEnforceFinalTag, resolveProviderScopedAuthProfile, @@ -138,6 +139,7 @@ describe("agent-runner-utils", () => { provider: "openai", model: "gpt-4.1-mini", runId: "run-1", + promptCacheKey: "webchat-cache-key", authProfile, }); @@ -160,6 +162,24 @@ describe("agent-runner-utils", () => { expect(resolved.bashElevated).toBe(run.bashElevated); expect(resolved.timeoutMs).toBe(run.timeoutMs); expect(resolved.runId).toBe("run-1"); + expect(resolved.promptCacheKey).toBe("webchat-cache-key"); + }); + + it("threads prompt cache affinity through embedded execution params", () => { + const run = makeRun(); + + const resolved = buildEmbeddedRunExecutionParams({ + run, + sessionCtx: { Provider: "webchat" }, + hasRepliedRef: undefined, + provider: "openai", + model: "gpt-4.1-mini", + runId: "run-1", + promptCacheKey: "stable-session-cache-key", + }); + + expect(resolved.runBaseParams.runId).toBe("run-1"); + expect(resolved.runBaseParams.promptCacheKey).toBe("stable-session-cache-key"); }); it("passes through recovered auto fallback provenance for embedded run params", () => { diff --git a/src/auto-reply/reply/agent-runner-utils.ts b/src/auto-reply/reply/agent-runner-utils.ts index bf28684ac80..572c214468f 100644 --- a/src/auto-reply/reply/agent-runner-utils.ts +++ b/src/auto-reply/reply/agent-runner-utils.ts @@ -273,6 +273,7 @@ export function buildEmbeddedRunExecutionParams(params: { provider: string; model: string; runId: string; + promptCacheKey?: string; allowTransientCooldownProbe?: boolean; }) { const { authProfile, embeddedContext, senderContext } = buildEmbeddedRunContexts(params); @@ -281,6 +282,7 @@ export function buildEmbeddedRunExecutionParams(params: { provider: params.provider, model: params.model, runId: params.runId, + promptCacheKey: params.promptCacheKey, authProfile, allowTransientCooldownProbe: params.allowTransientCooldownProbe, }); diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index a886176b922..52144eb6aec 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -333,6 +333,28 @@ function buildMediaOnlyTtsSupplementTranscriptMarker( return buildTtsSupplementTranscriptMarker(payload); } +function resolveWebchatPromptCacheKey(params: { + agentId: string; + model: string; + provider: string; + sessionKey: string; +}): string { + const digest = createHash("sha256") + .update( + [ + "v1", + params.provider.trim().toLowerCase(), + params.model.trim(), + normalizeAgentId(params.agentId), + params.sessionKey, + ].join("\0"), + "utf8", + ) + .digest("hex") + .slice(0, 32); + return `openclaw-webchat-${digest}`; +} + async function buildWebchatAssistantMediaMessage( payloads: ReplyPayload[], options?: { @@ -3633,6 +3655,16 @@ export const chatHandlers: GatewayRequestHandlers = { dispatcher, replyOptions: { runId: clientRunId, + ...(isOperatorUiClient(clientInfo) + ? { + promptCacheKey: resolveWebchatPromptCacheKey({ + agentId, + provider: resolvedSessionModel.provider, + model: resolvedSessionModel.model, + sessionKey: activeRunScopeKey, + }), + } + : {}), abortSignal: activeRunAbort.controller.signal, images: replyOptionImages, imageOrder: imageOrder.length > 0 ? imageOrder : undefined, diff --git a/src/gateway/server.chat.gateway-server-chat-b.test.ts b/src/gateway/server.chat.gateway-server-chat-b.test.ts index 93a1251df7a..eca4569c158 100644 --- a/src/gateway/server.chat.gateway-server-chat-b.test.ts +++ b/src/gateway/server.chat.gateway-server-chat-b.test.ts @@ -1004,6 +1004,17 @@ describe("gateway server chat", () => { }, ]); expect(dispatchInboundMessageMock).toHaveBeenCalledTimes(2); + const dispatchOptions = dispatchInboundMessageMock.mock.calls.map(([params]) => { + return (params as { replyOptions?: GetReplyOptions }).replyOptions; + }); + expect(dispatchOptions[0]?.runId).toBe("idem-sequential-a"); + expect(dispatchOptions[1]?.runId).toBe("idem-sequential-b"); + expect(dispatchOptions[0]?.promptCacheKey).toEqual( + expect.stringMatching(/^openclaw-webchat-[a-f0-9]{32}$/u), + ); + expect(dispatchOptions[1]?.promptCacheKey).toBe(dispatchOptions[0]?.promptCacheKey); + expect(dispatchOptions[0]?.promptCacheKey).not.toContain("main"); + expect(dispatchOptions[0]?.promptCacheKey).not.toContain("sess-main"); expect(context.addChatRun).toHaveBeenCalledTimes(2); } finally { dispatchInboundMessageMock.mockReset();