mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-11 09:11:13 +00:00
fix(cache): enable prompt cache retention for Anthropic Vertex AI (#60888)
* fix(cache): enable prompt cache retention for Anthropic Vertex AI * fix(cache): add anthropic-vertex to isAnthropicFamilyCacheTtlEligible * fix(cache): use hostname parsing for long-TTL endpoint eligibility * docs(changelog): note anthropic vertex cache ttl fix --------- Co-authored-by: affsantos <andreffsantos91@gmail.com> Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
@@ -98,6 +98,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Mobile pairing/bootstrap: keep QR bootstrap handoff tokens bounded to the mobile-safe contract so node handoff stays unscoped and operator handoff drops mixed `node.*`, `operator.admin`, and `operator.pairing` scopes.
|
||||
- Gateway/auth: serialize async shared-secret auth attempts per client so concurrent Tailscale-capable failures cannot overrun the intended auth rate-limit budget. Thanks @Telecaster2147.
|
||||
- Doctor/config: compare normalized `talk` configs by deep structural equality instead of key-order-sensitive serialization so `openclaw doctor --fix` stops repeatedly reporting/applying no-op `talk.provider/providers` normalization. (#59911) Thanks @ejames-dev.
|
||||
- Providers/Anthropic Vertex: honor `cacheRetention: "long"` with the real 1-hour prompt-cache TTL on Vertex AI endpoints, and default `anthropic-vertex` cache retention like direct Anthropic. (#60888) Thanks @affsantos.
|
||||
- Gateway/device auth: reuse cached device-token scopes only for cached-token reconnects, while keeping explicit `deviceToken` scope requests and empty-cache fallbacks intact so reconnects preserve `operator.read` without breaking explicit auth flows. (#46032) Thanks @caicongyang.
|
||||
- Agents/scheduling: steer background-now work toward automatic completion wake and treat `process` polling as on-demand inspection or intervention instead of default completion handling. (#60877) Thanks @vincentkoc.
|
||||
- Google Gemini CLI auth: improve OAuth credential discovery across Windows nvm and Homebrew libexec installs, and align Code Assist metadata so Gemini login stops failing on packaged CLI layouts. (#40729) Thanks @hughcube.
|
||||
|
||||
@@ -137,6 +137,66 @@ describe("anthropic payload policy", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("applies 1h TTL for Vertex AI endpoints with long cache retention", () => {
|
||||
const policy = resolveAnthropicPayloadPolicy({
|
||||
provider: "anthropic-vertex",
|
||||
api: "anthropic-messages",
|
||||
baseUrl: "https://us-east5-aiplatform.googleapis.com",
|
||||
cacheRetention: "long",
|
||||
enableCacheControl: true,
|
||||
});
|
||||
const payload: TestPayload = {
|
||||
system: [
|
||||
{ type: "text", text: "Follow policy." },
|
||||
{ type: "text", text: "Use tools carefully." },
|
||||
],
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
};
|
||||
|
||||
applyAnthropicPayloadPolicyToParams(payload, policy);
|
||||
|
||||
expect(payload.system).toEqual([
|
||||
{
|
||||
type: "text",
|
||||
text: "Follow policy.",
|
||||
cache_control: { type: "ephemeral", ttl: "1h" },
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "Use tools carefully.",
|
||||
cache_control: { type: "ephemeral", ttl: "1h" },
|
||||
},
|
||||
]);
|
||||
expect(payload.messages[0]).toEqual({
|
||||
role: "user",
|
||||
content: [{ type: "text", text: "Hello", cache_control: { type: "ephemeral", ttl: "1h" } }],
|
||||
});
|
||||
});
|
||||
|
||||
it("applies 5m ephemeral cache for Vertex AI endpoints with short cache retention", () => {
|
||||
const policy = resolveAnthropicPayloadPolicy({
|
||||
provider: "anthropic-vertex",
|
||||
api: "anthropic-messages",
|
||||
baseUrl: "https://us-east5-aiplatform.googleapis.com",
|
||||
cacheRetention: "short",
|
||||
enableCacheControl: true,
|
||||
});
|
||||
const payload: TestPayload = {
|
||||
system: [{ type: "text", text: "Follow policy." }],
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
};
|
||||
|
||||
applyAnthropicPayloadPolicyToParams(payload, policy);
|
||||
|
||||
expect(payload.system).toEqual([
|
||||
{
|
||||
type: "text",
|
||||
text: "Follow policy.",
|
||||
cache_control: { type: "ephemeral" },
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("strips the boundary even when cache retention is disabled", () => {
|
||||
const policy = resolveAnthropicPayloadPolicy({
|
||||
provider: "anthropic",
|
||||
|
||||
@@ -26,6 +26,29 @@ export type AnthropicPayloadPolicy = {
|
||||
serviceTier: AnthropicServiceTier | undefined;
|
||||
};
|
||||
|
||||
function resolveBaseUrlHostname(baseUrl: string): string | undefined {
|
||||
try {
|
||||
return new URL(baseUrl).hostname;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function isLongTtlEligibleEndpoint(baseUrl: string | undefined): boolean {
|
||||
if (typeof baseUrl !== "string") {
|
||||
return false;
|
||||
}
|
||||
const hostname = resolveBaseUrlHostname(baseUrl);
|
||||
if (!hostname) {
|
||||
return false;
|
||||
}
|
||||
return (
|
||||
hostname === "api.anthropic.com" ||
|
||||
hostname === "aiplatform.googleapis.com" ||
|
||||
hostname.endsWith("-aiplatform.googleapis.com")
|
||||
);
|
||||
}
|
||||
|
||||
function resolveAnthropicEphemeralCacheControl(
|
||||
baseUrl: string | undefined,
|
||||
cacheRetention: AnthropicPayloadPolicyInput["cacheRetention"],
|
||||
@@ -35,10 +58,7 @@ function resolveAnthropicEphemeralCacheControl(
|
||||
if (retention === "none") {
|
||||
return undefined;
|
||||
}
|
||||
const ttl =
|
||||
retention === "long" && typeof baseUrl === "string" && baseUrl.includes("api.anthropic.com")
|
||||
? "1h"
|
||||
: undefined;
|
||||
const ttl = retention === "long" && isLongTtlEligibleEndpoint(baseUrl) ? "1h" : undefined;
|
||||
return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ export function isAnthropicFamilyCacheTtlEligible(params: {
|
||||
modelId: string;
|
||||
}): boolean {
|
||||
const normalizedProvider = params.provider.trim().toLowerCase();
|
||||
if (normalizedProvider === "anthropic") {
|
||||
if (normalizedProvider === "anthropic" || normalizedProvider === "anthropic-vertex") {
|
||||
return true;
|
||||
}
|
||||
if (normalizedProvider === "amazon-bedrock") {
|
||||
@@ -38,7 +38,7 @@ export function resolveAnthropicCacheRetentionFamily(params: {
|
||||
hasExplicitCacheConfig: boolean;
|
||||
}): AnthropicCacheRetentionFamily | undefined {
|
||||
const normalizedProvider = params.provider.trim().toLowerCase();
|
||||
if (normalizedProvider === "anthropic") {
|
||||
if (normalizedProvider === "anthropic" || normalizedProvider === "anthropic-vertex") {
|
||||
return "anthropic-direct";
|
||||
}
|
||||
if (
|
||||
|
||||
@@ -237,6 +237,39 @@ describe("cacheRetention default behavior", () => {
|
||||
),
|
||||
).toBe("long");
|
||||
});
|
||||
|
||||
it("defaults to 'short' for anthropic-vertex without explicit config", () => {
|
||||
expect(
|
||||
resolveCacheRetention(
|
||||
undefined,
|
||||
"anthropic-vertex",
|
||||
"anthropic-messages",
|
||||
"claude-sonnet-4-6",
|
||||
),
|
||||
).toBe("short");
|
||||
});
|
||||
|
||||
it("respects explicit 'long' for anthropic-vertex", () => {
|
||||
expect(
|
||||
resolveCacheRetention(
|
||||
{ cacheRetention: "long" },
|
||||
"anthropic-vertex",
|
||||
"anthropic-messages",
|
||||
"claude-sonnet-4-6",
|
||||
),
|
||||
).toBe("long");
|
||||
});
|
||||
|
||||
it("respects explicit 'none' for anthropic-vertex", () => {
|
||||
expect(
|
||||
resolveCacheRetention(
|
||||
{ cacheRetention: "none" },
|
||||
"anthropic-vertex",
|
||||
"anthropic-messages",
|
||||
"claude-sonnet-4-6",
|
||||
),
|
||||
).toBe("none");
|
||||
});
|
||||
});
|
||||
|
||||
describe("anthropic-family cache semantics", () => {
|
||||
|
||||
Reference in New Issue
Block a user