From bdba4fa1bfe03fed7a1671b26983c746cfc070c6 Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Sat, 25 Apr 2026 16:50:38 +0530 Subject: [PATCH] fix: isolate active memory auth health (#71539) * fix(agents): scope helper auth failures * fix(active-memory): isolate recall auth health * fix: isolate active memory auth health (#71539) * fix: avoid auth policy import cycle (#71539) --- CHANGELOG.md | 1 + extensions/active-memory/index.test.ts | 18 ++++++++ extensions/active-memory/index.ts | 1 + src/agents/pi-embedded-runner/run.ts | 24 +++++------ .../run/auth-profile-failure-policy.test.ts | 42 +++++++++++++++++++ .../run/auth-profile-failure-policy.ts | 14 +++++++ .../run/auth-profile-failure-policy.types.ts | 1 + src/agents/pi-embedded-runner/run/params.ts | 2 + 8 files changed, 89 insertions(+), 14 deletions(-) create mode 100644 src/agents/pi-embedded-runner/run/auth-profile-failure-policy.test.ts create mode 100644 src/agents/pi-embedded-runner/run/auth-profile-failure-policy.ts create mode 100644 src/agents/pi-embedded-runner/run/auth-profile-failure-policy.types.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 370968bebc0..4c775dbee0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,6 +70,7 @@ Docs: https://docs.openclaw.ai - Agents/tool-result pruning: harden the tool-result character estimator and context-pruning loops against malformed `{ type: "text" }` blocks created by void or undefined tool handler results, serializing non-string text payloads for size accounting so they cannot bypass trimming as zero-sized. Fixes #34979. (#51267) Thanks @cgdusek, @alvinttang, and @coffeexcoin. - Daemon/service-env: add Nix Home Manager profile bin directories to generated gateway service PATHs on macOS and Linux, honoring `NIX_PROFILES` right-to-left precedence and falling back to `~/.nix-profile/bin` when unset. Fixes #44402. (#59935) Thanks @jerome-benoit. - Agents/heartbeat: stop injecting the heartbeat system prompt into non-heartbeat runs, preventing ordinary user replies from being suppressed as `HEARTBEAT_OK` acknowledgments. Fixes #69079. (#69278) Thanks @stainlu. +- Active Memory: keep silent recall sub-agent billing/auth failures out of shared auth-profile cooldown state, so a Claude CLI extra-usage rejection cannot disable normal Claude-backed turns. Fixes #71284. (#71539) Thanks @vishutdhar and @obviyus. ## 2026.4.25 (Unreleased) diff --git a/extensions/active-memory/index.test.ts b/extensions/active-memory/index.test.ts index b021f30bac7..b2ab36658e2 100644 --- a/extensions/active-memory/index.test.ts +++ b/extensions/active-memory/index.test.ts @@ -162,6 +162,24 @@ describe("active-memory plugin", () => { expect(api.on).toHaveBeenCalledWith("before_prompt_build", expect.any(Function)); }); + it("runs recall without recording shared auth-profile failures", async () => { + await hooks.before_prompt_build( + { prompt: "what wings should i order?", messages: [] }, + { + agentId: "main", + trigger: "user", + sessionKey: "agent:main:main", + messageProvider: "webchat", + }, + ); + + expect(runEmbeddedPiAgent).toHaveBeenCalledWith( + expect.objectContaining({ + authProfileFailurePolicy: "local", + }), + ); + }); + it("registers a session-scoped active-memory toggle command", async () => { const command = registeredCommands["active-memory"]; const sessionKey = "agent:main:active-memory-toggle"; diff --git a/extensions/active-memory/index.ts b/extensions/active-memory/index.ts index 6961e54d328..79cda33ed55 100644 --- a/extensions/active-memory/index.ts +++ b/extensions/active-memory/index.ts @@ -1685,6 +1685,7 @@ async function runRecallSubagent(params: { thinkLevel: params.config.thinking, reasoningLevel: "off", silentExpected: true, + authProfileFailurePolicy: "local", cleanupBundleMcpOnRunEnd: true, abortSignal: params.abortSignal, }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 26a30eddf63..d7de8ec7481 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -90,6 +90,7 @@ import { resolveModelAsync } from "./model.js"; import { createEmbeddedRunReplayState, observeReplayMetadata } from "./replay-state.js"; import { handleAssistantFailover } from "./run/assistant-failover.js"; import { createEmbeddedRunAuthController } from "./run/auth-controller.js"; +import { resolveAuthProfileFailureReason } from "./run/auth-profile-failure-policy.js"; import { runEmbeddedAttemptWithBackend } from "./run/backend.js"; import { createFailoverDecisionLogger } from "./run/failover-observation.js"; import { mergeRetryFailoverReason, resolveRunFailoverDecision } from "./run/failover-policy.js"; @@ -669,16 +670,11 @@ export async function runEmbeddedPiAgent( modelId: failure.modelId, }); }; - const resolveAuthProfileFailureReason = ( - failoverReason: FailoverReason | null, - ): AuthProfileFailureReason | null => { - // Timeouts are transport/model-path failures, not auth health signals, - // so they should not persist auth-profile failure state. - if (!failoverReason || failoverReason === "timeout") { - return null; - } - return failoverReason; - }; + const resolveRunAuthProfileFailureReason = (failoverReason: FailoverReason | null) => + resolveAuthProfileFailureReason({ + failoverReason, + policy: params.authProfileFailurePolicy, + }); const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => { if (reason !== "overloaded" || overloadFailoverBackoffMs <= 0) { return; @@ -1485,7 +1481,7 @@ export async function runEmbeddedPiAgent( const promptFailoverReason = promptErrorDetails.reason ?? classifyFailoverReason(errorText, { provider }); const promptProfileFailureReason = - resolveAuthProfileFailureReason(promptFailoverReason); + resolveRunAuthProfileFailureReason(promptFailoverReason); await maybeMarkAuthProfileFailure({ profileId: lastProfileId, reason: promptProfileFailureReason, @@ -1630,7 +1626,7 @@ export async function runEmbeddedPiAgent( }, ); const assistantProfileFailureReason = - resolveAuthProfileFailureReason(assistantFailoverReason); + resolveRunAuthProfileFailureReason(assistantFailoverReason); const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError; const imageDimensionError = parseImageDimensionError( assistantForFailover?.errorMessage ?? "", @@ -2047,7 +2043,7 @@ export async function runEmbeddedPiAgent( if (lastProfileId) { await maybeMarkAuthProfileFailure({ profileId: lastProfileId, - reason: resolveAuthProfileFailureReason(assistantFailoverReason), + reason: resolveRunAuthProfileFailureReason(assistantFailoverReason), }); } return { @@ -2157,7 +2153,7 @@ export async function runEmbeddedPiAgent( if (lastProfileId) { await maybeMarkAuthProfileFailure({ profileId: lastProfileId, - reason: resolveAuthProfileFailureReason(assistantFailoverReason), + reason: resolveRunAuthProfileFailureReason(assistantFailoverReason), }); } diff --git a/src/agents/pi-embedded-runner/run/auth-profile-failure-policy.test.ts b/src/agents/pi-embedded-runner/run/auth-profile-failure-policy.test.ts new file mode 100644 index 00000000000..f4994d1245b --- /dev/null +++ b/src/agents/pi-embedded-runner/run/auth-profile-failure-policy.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from "vitest"; +import { resolveAuthProfileFailureReason } from "./auth-profile-failure-policy.js"; + +describe("resolveAuthProfileFailureReason", () => { + it("records shared non-timeout provider failures", () => { + expect( + resolveAuthProfileFailureReason({ + failoverReason: "billing", + policy: "shared", + }), + ).toBe("billing"); + expect( + resolveAuthProfileFailureReason({ + failoverReason: "rate_limit", + policy: "shared", + }), + ).toBe("rate_limit"); + }); + + it("does not record local helper failures in shared auth state", () => { + expect( + resolveAuthProfileFailureReason({ + failoverReason: "billing", + policy: "local", + }), + ).toBeNull(); + expect( + resolveAuthProfileFailureReason({ + failoverReason: "auth", + policy: "local", + }), + ).toBeNull(); + }); + + it("does not persist transport timeouts as auth-profile health", () => { + expect( + resolveAuthProfileFailureReason({ + failoverReason: "timeout", + }), + ).toBeNull(); + }); +}); diff --git a/src/agents/pi-embedded-runner/run/auth-profile-failure-policy.ts b/src/agents/pi-embedded-runner/run/auth-profile-failure-policy.ts new file mode 100644 index 00000000000..ddd199ba2bc --- /dev/null +++ b/src/agents/pi-embedded-runner/run/auth-profile-failure-policy.ts @@ -0,0 +1,14 @@ +import type { AuthProfileFailureReason } from "../../auth-profiles/types.js"; +import type { FailoverReason } from "../../pi-embedded-helpers/types.js"; +import type { AuthProfileFailurePolicy } from "./auth-profile-failure-policy.types.js"; + +export function resolveAuthProfileFailureReason(params: { + failoverReason: FailoverReason | null; + policy?: AuthProfileFailurePolicy; +}): AuthProfileFailureReason | null { + // Helper-local runs and transport timeouts should not poison shared provider auth health. + if (params.policy === "local" || !params.failoverReason || params.failoverReason === "timeout") { + return null; + } + return params.failoverReason; +} diff --git a/src/agents/pi-embedded-runner/run/auth-profile-failure-policy.types.ts b/src/agents/pi-embedded-runner/run/auth-profile-failure-policy.types.ts new file mode 100644 index 00000000000..5a1387faa3c --- /dev/null +++ b/src/agents/pi-embedded-runner/run/auth-profile-failure-policy.types.ts @@ -0,0 +1 @@ +export type AuthProfileFailurePolicy = "shared" | "local"; diff --git a/src/agents/pi-embedded-runner/run/params.ts b/src/agents/pi-embedded-runner/run/params.ts index cfc13931a91..4e51811ad50 100644 --- a/src/agents/pi-embedded-runner/run/params.ts +++ b/src/agents/pi-embedded-runner/run/params.ts @@ -15,6 +15,7 @@ import type { ToolResultFormat, } from "../../pi-embedded-subscribe.shared-types.js"; import type { SkillSnapshot } from "../../skills.js"; +import type { AuthProfileFailurePolicy } from "./auth-profile-failure-policy.types.js"; export type { ClientToolDefinition } from "../../command/shared-types.js"; export type EmbeddedRunTrigger = "cron" | "heartbeat" | "manual" | "memory" | "overflow" | "user"; @@ -139,6 +140,7 @@ export type RunEmbeddedPiAgentParams = { ownerNumbers?: string[]; enforceFinalTag?: boolean; silentExpected?: boolean; + authProfileFailurePolicy?: AuthProfileFailurePolicy; /** * Allow a single run attempt even when all auth profiles are in cooldown, * but only for inferred transient cooldowns like `rate_limit` or `overloaded`.