fix(cache): enable prompt cache retention for Anthropic Vertex AI (#60888)

* fix(cache): enable prompt cache retention for Anthropic Vertex AI

* fix(cache): add anthropic-vertex to isAnthropicFamilyCacheTtlEligible

* fix(cache): use hostname parsing for long-TTL endpoint eligibility

* docs(changelog): note anthropic vertex cache ttl fix

---------

Co-authored-by: affsantos <andreffsantos91@gmail.com>
Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
André Santos
2026-04-05 09:07:02 +02:00
committed by GitHub
parent a6894a5238
commit eb0f367e00
5 changed files with 120 additions and 6 deletions

View File

@@ -98,6 +98,7 @@ Docs: https://docs.openclaw.ai
- Mobile pairing/bootstrap: keep QR bootstrap handoff tokens bounded to the mobile-safe contract so node handoff stays unscoped and operator handoff drops mixed `node.*`, `operator.admin`, and `operator.pairing` scopes.
- Gateway/auth: serialize async shared-secret auth attempts per client so concurrent Tailscale-capable failures cannot overrun the intended auth rate-limit budget. Thanks @Telecaster2147.
- Doctor/config: compare normalized `talk` configs by deep structural equality instead of key-order-sensitive serialization so `openclaw doctor --fix` stops repeatedly reporting/applying no-op `talk.provider/providers` normalization. (#59911) Thanks @ejames-dev.
- Providers/Anthropic Vertex: honor `cacheRetention: "long"` with the real 1-hour prompt-cache TTL on Vertex AI endpoints, and default `anthropic-vertex` cache retention like direct Anthropic. (#60888) Thanks @affsantos.
- Gateway/device auth: reuse cached device-token scopes only for cached-token reconnects, while keeping explicit `deviceToken` scope requests and empty-cache fallbacks intact so reconnects preserve `operator.read` without breaking explicit auth flows. (#46032) Thanks @caicongyang.
- Agents/scheduling: steer background-now work toward automatic completion wake and treat `process` polling as on-demand inspection or intervention instead of default completion handling. (#60877) Thanks @vincentkoc.
- Google Gemini CLI auth: improve OAuth credential discovery across Windows nvm and Homebrew libexec installs, and align Code Assist metadata so Gemini login stops failing on packaged CLI layouts. (#40729) Thanks @hughcube.

View File

@@ -137,6 +137,66 @@ describe("anthropic payload policy", () => {
]);
});
it("applies 1h TTL for Vertex AI endpoints with long cache retention", () => {
const policy = resolveAnthropicPayloadPolicy({
provider: "anthropic-vertex",
api: "anthropic-messages",
baseUrl: "https://us-east5-aiplatform.googleapis.com",
cacheRetention: "long",
enableCacheControl: true,
});
const payload: TestPayload = {
system: [
{ type: "text", text: "Follow policy." },
{ type: "text", text: "Use tools carefully." },
],
messages: [{ role: "user", content: "Hello" }],
};
applyAnthropicPayloadPolicyToParams(payload, policy);
expect(payload.system).toEqual([
{
type: "text",
text: "Follow policy.",
cache_control: { type: "ephemeral", ttl: "1h" },
},
{
type: "text",
text: "Use tools carefully.",
cache_control: { type: "ephemeral", ttl: "1h" },
},
]);
expect(payload.messages[0]).toEqual({
role: "user",
content: [{ type: "text", text: "Hello", cache_control: { type: "ephemeral", ttl: "1h" } }],
});
});
it("applies 5m ephemeral cache for Vertex AI endpoints with short cache retention", () => {
const policy = resolveAnthropicPayloadPolicy({
provider: "anthropic-vertex",
api: "anthropic-messages",
baseUrl: "https://us-east5-aiplatform.googleapis.com",
cacheRetention: "short",
enableCacheControl: true,
});
const payload: TestPayload = {
system: [{ type: "text", text: "Follow policy." }],
messages: [{ role: "user", content: "Hello" }],
};
applyAnthropicPayloadPolicyToParams(payload, policy);
expect(payload.system).toEqual([
{
type: "text",
text: "Follow policy.",
cache_control: { type: "ephemeral" },
},
]);
});
it("strips the boundary even when cache retention is disabled", () => {
const policy = resolveAnthropicPayloadPolicy({
provider: "anthropic",

View File

@@ -26,6 +26,29 @@ export type AnthropicPayloadPolicy = {
serviceTier: AnthropicServiceTier | undefined;
};
function resolveBaseUrlHostname(baseUrl: string): string | undefined {
try {
return new URL(baseUrl).hostname;
} catch {
return undefined;
}
}
function isLongTtlEligibleEndpoint(baseUrl: string | undefined): boolean {
if (typeof baseUrl !== "string") {
return false;
}
const hostname = resolveBaseUrlHostname(baseUrl);
if (!hostname) {
return false;
}
return (
hostname === "api.anthropic.com" ||
hostname === "aiplatform.googleapis.com" ||
hostname.endsWith("-aiplatform.googleapis.com")
);
}
function resolveAnthropicEphemeralCacheControl(
baseUrl: string | undefined,
cacheRetention: AnthropicPayloadPolicyInput["cacheRetention"],
@@ -35,10 +58,7 @@ function resolveAnthropicEphemeralCacheControl(
if (retention === "none") {
return undefined;
}
const ttl =
retention === "long" && typeof baseUrl === "string" && baseUrl.includes("api.anthropic.com")
? "1h"
: undefined;
const ttl = retention === "long" && isLongTtlEligibleEndpoint(baseUrl) ? "1h" : undefined;
return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
}

View File

@@ -22,7 +22,7 @@ export function isAnthropicFamilyCacheTtlEligible(params: {
modelId: string;
}): boolean {
const normalizedProvider = params.provider.trim().toLowerCase();
if (normalizedProvider === "anthropic") {
if (normalizedProvider === "anthropic" || normalizedProvider === "anthropic-vertex") {
return true;
}
if (normalizedProvider === "amazon-bedrock") {
@@ -38,7 +38,7 @@ export function resolveAnthropicCacheRetentionFamily(params: {
hasExplicitCacheConfig: boolean;
}): AnthropicCacheRetentionFamily | undefined {
const normalizedProvider = params.provider.trim().toLowerCase();
if (normalizedProvider === "anthropic") {
if (normalizedProvider === "anthropic" || normalizedProvider === "anthropic-vertex") {
return "anthropic-direct";
}
if (

View File

@@ -237,6 +237,39 @@ describe("cacheRetention default behavior", () => {
),
).toBe("long");
});
it("defaults to 'short' for anthropic-vertex without explicit config", () => {
expect(
resolveCacheRetention(
undefined,
"anthropic-vertex",
"anthropic-messages",
"claude-sonnet-4-6",
),
).toBe("short");
});
it("respects explicit 'long' for anthropic-vertex", () => {
expect(
resolveCacheRetention(
{ cacheRetention: "long" },
"anthropic-vertex",
"anthropic-messages",
"claude-sonnet-4-6",
),
).toBe("long");
});
it("respects explicit 'none' for anthropic-vertex", () => {
expect(
resolveCacheRetention(
{ cacheRetention: "none" },
"anthropic-vertex",
"anthropic-messages",
"claude-sonnet-4-6",
),
).toBe("none");
});
});
describe("anthropic-family cache semantics", () => {