mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
Agents: infer auth-profile unavailable failover reason
This commit is contained in:
@@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Install/Discord Voice: make `@discordjs/opus` an optional dependency so `openclaw` install/update no longer hard-fails when native Opus builds fail, while keeping `opusscript` as the runtime fallback decoder for Discord voice flows. (#23737, #23733, #23703) Thanks @jeadland, @Sheetaa, and @Breakyman.
|
||||
- Agents/Exec: honor explicit agent context when resolving `tools.exec` defaults for runs with opaque/non-agent session keys, so per-agent `host/security/ask` policies are applied consistently. (#11832)
|
||||
- Agents/Auth profiles: infer `all profiles unavailable` failover reasons from active profile cooldown/disabled stats (instead of hardcoded `rate_limit`) so auth/billing OAuth outages surface accurately in fallback errors. (#23996) Thanks @DerpyNoodlez.
|
||||
- Security/Sessions: redact sensitive token patterns from `sessions_history` tool output and surface `contentRedacted` metadata when masking occurs. (#16928) Thanks @aether-ai-agent.
|
||||
- Sandbox/Docker: default sandbox container user to the workspace owner `uid:gid` when `agents.*.sandbox.docker.user` is unset, fixing non-root gateway file-tool permissions under capability-dropped containers. (#20979)
|
||||
- Doctor/Security: add an explicit warning that `approvals.exec.enabled=false` disables forwarding only, while enforcement remains driven by host-local `exec-approvals.json` policy. (#15047)
|
||||
|
||||
@@ -40,5 +40,6 @@ export {
|
||||
markAuthProfileCooldown,
|
||||
markAuthProfileFailure,
|
||||
markAuthProfileUsed,
|
||||
resolveProfilesUnavailableReason,
|
||||
resolveProfileUnusableUntilForDisplay,
|
||||
} from "./auth-profiles/usage.js";
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
clearExpiredCooldowns,
|
||||
isProfileInCooldown,
|
||||
markAuthProfileFailure,
|
||||
resolveProfilesUnavailableReason,
|
||||
resolveProfileUnusableUntil,
|
||||
} from "./usage.js";
|
||||
|
||||
@@ -85,6 +86,101 @@ describe("isProfileInCooldown", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveProfilesUnavailableReason", () => {
|
||||
it("prefers active disabledReason when profiles are disabled", () => {
|
||||
const now = Date.now();
|
||||
const store = makeStore({
|
||||
"anthropic:default": {
|
||||
disabledUntil: now + 60_000,
|
||||
disabledReason: "billing",
|
||||
},
|
||||
});
|
||||
|
||||
expect(
|
||||
resolveProfilesUnavailableReason({
|
||||
store,
|
||||
profileIds: ["anthropic:default"],
|
||||
now,
|
||||
}),
|
||||
).toBe("billing");
|
||||
});
|
||||
|
||||
it("uses recorded non-rate-limit failure counts for active cooldown windows", () => {
|
||||
const now = Date.now();
|
||||
const store = makeStore({
|
||||
"anthropic:default": {
|
||||
cooldownUntil: now + 60_000,
|
||||
failureCounts: { auth: 3, rate_limit: 1 },
|
||||
},
|
||||
});
|
||||
|
||||
expect(
|
||||
resolveProfilesUnavailableReason({
|
||||
store,
|
||||
profileIds: ["anthropic:default"],
|
||||
now,
|
||||
}),
|
||||
).toBe("auth");
|
||||
});
|
||||
|
||||
it("falls back to rate_limit when active cooldown has no reason history", () => {
|
||||
const now = Date.now();
|
||||
const store = makeStore({
|
||||
"anthropic:default": {
|
||||
cooldownUntil: now + 60_000,
|
||||
},
|
||||
});
|
||||
|
||||
expect(
|
||||
resolveProfilesUnavailableReason({
|
||||
store,
|
||||
profileIds: ["anthropic:default"],
|
||||
now,
|
||||
}),
|
||||
).toBe("rate_limit");
|
||||
});
|
||||
|
||||
it("ignores expired windows and returns null when no profile is actively unavailable", () => {
|
||||
const now = Date.now();
|
||||
const store = makeStore({
|
||||
"anthropic:default": {
|
||||
cooldownUntil: now - 1_000,
|
||||
failureCounts: { auth: 5 },
|
||||
},
|
||||
"anthropic:backup": {
|
||||
disabledUntil: now - 500,
|
||||
disabledReason: "billing",
|
||||
},
|
||||
});
|
||||
|
||||
expect(
|
||||
resolveProfilesUnavailableReason({
|
||||
store,
|
||||
profileIds: ["anthropic:default", "anthropic:backup"],
|
||||
now,
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
it("breaks ties by reason priority for equal active failure counts", () => {
|
||||
const now = Date.now();
|
||||
const store = makeStore({
|
||||
"anthropic:default": {
|
||||
cooldownUntil: now + 60_000,
|
||||
failureCounts: { timeout: 2, auth: 2 },
|
||||
},
|
||||
});
|
||||
|
||||
expect(
|
||||
resolveProfilesUnavailableReason({
|
||||
store,
|
||||
profileIds: ["anthropic:default"],
|
||||
now,
|
||||
}),
|
||||
).toBe("auth");
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// clearExpiredCooldowns
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -3,6 +3,20 @@ import { normalizeProviderId } from "../model-selection.js";
|
||||
import { saveAuthProfileStore, updateAuthProfileStoreWithLock } from "./store.js";
|
||||
import type { AuthProfileFailureReason, AuthProfileStore, ProfileUsageStats } from "./types.js";
|
||||
|
||||
const FAILURE_REASON_PRIORITY: AuthProfileFailureReason[] = [
|
||||
"auth",
|
||||
"billing",
|
||||
"format",
|
||||
"model_not_found",
|
||||
"timeout",
|
||||
"rate_limit",
|
||||
"unknown",
|
||||
];
|
||||
const FAILURE_REASON_SET = new Set<AuthProfileFailureReason>(FAILURE_REASON_PRIORITY);
|
||||
const FAILURE_REASON_ORDER = new Map<AuthProfileFailureReason, number>(
|
||||
FAILURE_REASON_PRIORITY.map((reason, index) => [reason, index]),
|
||||
);
|
||||
|
||||
export function resolveProfileUnusableUntil(
|
||||
stats: Pick<ProfileUsageStats, "cooldownUntil" | "disabledUntil">,
|
||||
): number | null {
|
||||
@@ -27,6 +41,85 @@ export function isProfileInCooldown(store: AuthProfileStore, profileId: string):
|
||||
return unusableUntil ? Date.now() < unusableUntil : false;
|
||||
}
|
||||
|
||||
function isActiveUnusableWindow(until: number | undefined, now: number): boolean {
|
||||
return typeof until === "number" && Number.isFinite(until) && until > 0 && now < until;
|
||||
}
|
||||
|
||||
/**
|
||||
* Infer the most likely reason all candidate profiles are currently unavailable.
|
||||
*
|
||||
* We prefer explicit active `disabledReason` values (for example billing/auth)
|
||||
* over generic cooldown buckets, then fall back to failure-count signals.
|
||||
*/
|
||||
export function resolveProfilesUnavailableReason(params: {
|
||||
store: AuthProfileStore;
|
||||
profileIds: string[];
|
||||
now?: number;
|
||||
}): AuthProfileFailureReason | null {
|
||||
const now = params.now ?? Date.now();
|
||||
const scores = new Map<AuthProfileFailureReason, number>();
|
||||
const addScore = (reason: AuthProfileFailureReason, value: number) => {
|
||||
if (!FAILURE_REASON_SET.has(reason) || value <= 0 || !Number.isFinite(value)) {
|
||||
return;
|
||||
}
|
||||
scores.set(reason, (scores.get(reason) ?? 0) + value);
|
||||
};
|
||||
|
||||
for (const profileId of params.profileIds) {
|
||||
const stats = params.store.usageStats?.[profileId];
|
||||
if (!stats) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const disabledActive = isActiveUnusableWindow(stats.disabledUntil, now);
|
||||
if (disabledActive && stats.disabledReason && FAILURE_REASON_SET.has(stats.disabledReason)) {
|
||||
// Disabled reasons are explicit and high-signal; weight heavily.
|
||||
addScore(stats.disabledReason, 1_000);
|
||||
continue;
|
||||
}
|
||||
|
||||
const cooldownActive = isActiveUnusableWindow(stats.cooldownUntil, now);
|
||||
if (!cooldownActive) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let recordedReason = false;
|
||||
for (const [rawReason, rawCount] of Object.entries(stats.failureCounts ?? {})) {
|
||||
const reason = rawReason as AuthProfileFailureReason;
|
||||
const count = typeof rawCount === "number" ? rawCount : 0;
|
||||
if (!FAILURE_REASON_SET.has(reason) || count <= 0) {
|
||||
continue;
|
||||
}
|
||||
addScore(reason, count);
|
||||
recordedReason = true;
|
||||
}
|
||||
if (!recordedReason) {
|
||||
addScore("rate_limit", 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (scores.size === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let best: AuthProfileFailureReason | null = null;
|
||||
let bestScore = -1;
|
||||
let bestPriority = Number.MAX_SAFE_INTEGER;
|
||||
for (const reason of FAILURE_REASON_PRIORITY) {
|
||||
const score = scores.get(reason);
|
||||
if (typeof score !== "number") {
|
||||
continue;
|
||||
}
|
||||
const priority = FAILURE_REASON_ORDER.get(reason) ?? Number.MAX_SAFE_INTEGER;
|
||||
if (score > bestScore || (score === bestScore && priority < bestPriority)) {
|
||||
best = reason;
|
||||
bestScore = score;
|
||||
bestPriority = priority;
|
||||
}
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the soonest `unusableUntil` timestamp (ms epoch) among the given
|
||||
* profiles, or `null` when no profile has a recorded cooldown. Note: the
|
||||
|
||||
@@ -8,6 +8,7 @@ vi.mock("./auth-profiles.js", () => ({
|
||||
ensureAuthProfileStore: vi.fn(),
|
||||
getSoonestCooldownExpiry: vi.fn(),
|
||||
isProfileInCooldown: vi.fn(),
|
||||
resolveProfilesUnavailableReason: vi.fn(),
|
||||
resolveAuthProfileOrder: vi.fn(),
|
||||
}));
|
||||
|
||||
@@ -15,6 +16,7 @@ import {
|
||||
ensureAuthProfileStore,
|
||||
getSoonestCooldownExpiry,
|
||||
isProfileInCooldown,
|
||||
resolveProfilesUnavailableReason,
|
||||
resolveAuthProfileOrder,
|
||||
} from "./auth-profiles.js";
|
||||
import { _probeThrottleInternals, runWithModelFallback } from "./model-fallback.js";
|
||||
@@ -22,6 +24,7 @@ import { _probeThrottleInternals, runWithModelFallback } from "./model-fallback.
|
||||
const mockedEnsureAuthProfileStore = vi.mocked(ensureAuthProfileStore);
|
||||
const mockedGetSoonestCooldownExpiry = vi.mocked(getSoonestCooldownExpiry);
|
||||
const mockedIsProfileInCooldown = vi.mocked(isProfileInCooldown);
|
||||
const mockedResolveProfilesUnavailableReason = vi.mocked(resolveProfilesUnavailableReason);
|
||||
const mockedResolveAuthProfileOrder = vi.mocked(resolveAuthProfileOrder);
|
||||
|
||||
const makeCfg = makeModelFallbackCfg;
|
||||
@@ -98,6 +101,7 @@ describe("runWithModelFallback – probe logic", () => {
|
||||
mockedIsProfileInCooldown.mockImplementation((_store, profileId: string) => {
|
||||
return profileId.startsWith("openai");
|
||||
});
|
||||
mockedResolveProfilesUnavailableReason.mockReturnValue("rate_limit");
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -119,6 +123,22 @@ describe("runWithModelFallback – probe logic", () => {
|
||||
expectFallbackUsed(result, run);
|
||||
});
|
||||
|
||||
it("uses inferred unavailable reason when skipping a cooldowned primary model", async () => {
|
||||
const cfg = makeCfg();
|
||||
const expiresIn30Min = NOW + 30 * 60 * 1000;
|
||||
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
|
||||
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
|
||||
|
||||
const run = vi.fn().mockResolvedValue("ok");
|
||||
|
||||
const result = await runPrimaryCandidate(cfg, run);
|
||||
|
||||
expect(result.result).toBe("ok");
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
|
||||
expect(result.attempts[0]?.reason).toBe("billing");
|
||||
});
|
||||
|
||||
it("probes primary model when within 2-min margin of cooldown expiry", async () => {
|
||||
const cfg = makeCfg();
|
||||
// Cooldown expires in 1 minute — within 2-min probe margin
|
||||
|
||||
@@ -348,6 +348,49 @@ describe("runWithModelFallback", () => {
|
||||
expect(result.attempts[0]?.reason).toBe("rate_limit");
|
||||
});
|
||||
|
||||
it("propagates disabled reason when all profiles are unavailable", async () => {
|
||||
const provider = `disabled-test-${crypto.randomUUID()}`;
|
||||
const profileId = `${provider}:default`;
|
||||
const now = Date.now();
|
||||
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
[profileId]: {
|
||||
type: "api_key",
|
||||
provider,
|
||||
key: "test-key",
|
||||
},
|
||||
},
|
||||
usageStats: {
|
||||
[profileId]: {
|
||||
disabledUntil: now + 5 * 60_000,
|
||||
disabledReason: "billing",
|
||||
failureCounts: { rate_limit: 4 },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const cfg = makeProviderFallbackCfg(provider);
|
||||
const run = vi.fn().mockImplementation(async (providerId, modelId) => {
|
||||
if (providerId === "fallback") {
|
||||
return "ok";
|
||||
}
|
||||
throw new Error(`unexpected provider: ${providerId}/${modelId}`);
|
||||
});
|
||||
|
||||
const result = await runWithStoredAuth({
|
||||
cfg,
|
||||
store,
|
||||
provider,
|
||||
run,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("ok");
|
||||
expect(run.mock.calls).toEqual([["fallback", "ok-model"]]);
|
||||
expect(result.attempts[0]?.reason).toBe("billing");
|
||||
});
|
||||
|
||||
it("does not skip when any profile is available", async () => {
|
||||
const provider = `cooldown-mixed-${crypto.randomUUID()}`;
|
||||
const profileA = `${provider}:a`;
|
||||
|
||||
@@ -3,6 +3,7 @@ import {
|
||||
ensureAuthProfileStore,
|
||||
getSoonestCooldownExpiry,
|
||||
isProfileInCooldown,
|
||||
resolveProfilesUnavailableReason,
|
||||
resolveAuthProfileOrder,
|
||||
} from "./auth-profiles.js";
|
||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
|
||||
@@ -342,12 +343,18 @@ export async function runWithModelFallback<T>(params: {
|
||||
profileIds,
|
||||
});
|
||||
if (!shouldProbe) {
|
||||
const inferredReason =
|
||||
resolveProfilesUnavailableReason({
|
||||
store: authStore,
|
||||
profileIds,
|
||||
now,
|
||||
}) ?? "rate_limit";
|
||||
// Skip without attempting
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`,
|
||||
reason: "rate_limit",
|
||||
reason: inferredReason,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import path from "node:path";
|
||||
import type { AssistantMessage } from "@mariozechner/pi-ai";
|
||||
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import type { AuthProfileFailureReason } from "./auth-profiles.js";
|
||||
import type { EmbeddedRunAttemptResult } from "./pi-embedded-runner/run/types.js";
|
||||
|
||||
const runEmbeddedAttemptMock = vi.fn<(params: unknown) => Promise<EmbeddedRunAttemptResult>>();
|
||||
@@ -112,7 +113,16 @@ const writeAuthStore = async (
|
||||
agentDir: string,
|
||||
opts?: {
|
||||
includeAnthropic?: boolean;
|
||||
usageStats?: Record<string, { lastUsed?: number; cooldownUntil?: number }>;
|
||||
usageStats?: Record<
|
||||
string,
|
||||
{
|
||||
lastUsed?: number;
|
||||
cooldownUntil?: number;
|
||||
disabledUntil?: number;
|
||||
disabledReason?: AuthProfileFailureReason;
|
||||
failureCounts?: Partial<Record<AuthProfileFailureReason, number>>;
|
||||
}
|
||||
>;
|
||||
},
|
||||
) => {
|
||||
const authPath = path.join(agentDir, "auth-profiles.json");
|
||||
@@ -184,7 +194,17 @@ async function runAutoPinnedOpenAiTurn(params: {
|
||||
async function readUsageStats(agentDir: string) {
|
||||
const stored = JSON.parse(
|
||||
await fs.readFile(path.join(agentDir, "auth-profiles.json"), "utf-8"),
|
||||
) as { usageStats?: Record<string, { lastUsed?: number; cooldownUntil?: number }> };
|
||||
) as {
|
||||
usageStats?: Record<
|
||||
string,
|
||||
{
|
||||
lastUsed?: number;
|
||||
cooldownUntil?: number;
|
||||
disabledUntil?: number;
|
||||
disabledReason?: AuthProfileFailureReason;
|
||||
}
|
||||
>;
|
||||
};
|
||||
return stored.usageStats ?? {};
|
||||
}
|
||||
|
||||
@@ -496,6 +516,50 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("fails over with disabled reason when all profiles are unavailable", async () => {
|
||||
await withTimedAgentWorkspace(async ({ agentDir, workspaceDir, now }) => {
|
||||
await writeAuthStore(agentDir, {
|
||||
usageStats: {
|
||||
"openai:p1": {
|
||||
lastUsed: 1,
|
||||
disabledUntil: now + 60 * 60 * 1000,
|
||||
disabledReason: "billing",
|
||||
failureCounts: { rate_limit: 4 },
|
||||
},
|
||||
"openai:p2": {
|
||||
lastUsed: 2,
|
||||
disabledUntil: now + 60 * 60 * 1000,
|
||||
disabledReason: "billing",
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await expect(
|
||||
runEmbeddedPiAgent({
|
||||
sessionId: "session:test",
|
||||
sessionKey: "agent:test:disabled-failover",
|
||||
sessionFile: path.join(workspaceDir, "session.jsonl"),
|
||||
workspaceDir,
|
||||
agentDir,
|
||||
config: makeConfig({ fallbacks: ["openai/mock-2"] }),
|
||||
prompt: "hello",
|
||||
provider: "openai",
|
||||
model: "mock-1",
|
||||
authProfileIdSource: "auto",
|
||||
timeoutMs: 5_000,
|
||||
runId: "run:disabled-failover",
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
name: "FailoverError",
|
||||
reason: "billing",
|
||||
provider: "openai",
|
||||
model: "mock-1",
|
||||
});
|
||||
|
||||
expect(runEmbeddedAttemptMock).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
it("fails over when auth is unavailable and fallbacks are configured", async () => {
|
||||
const previousOpenAiKey = process.env.OPENAI_API_KEY;
|
||||
delete process.env.OPENAI_API_KEY;
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
markAuthProfileFailure,
|
||||
markAuthProfileGood,
|
||||
markAuthProfileUsed,
|
||||
resolveProfilesUnavailableReason,
|
||||
} from "../auth-profiles.js";
|
||||
import {
|
||||
CONTEXT_WINDOW_HARD_MIN_TOKENS,
|
||||
@@ -364,9 +365,18 @@ export async function runEmbeddedPiAgent(
|
||||
const resolveAuthProfileFailoverReason = (params: {
|
||||
allInCooldown: boolean;
|
||||
message: string;
|
||||
profileIds?: Array<string | undefined>;
|
||||
}): FailoverReason => {
|
||||
if (params.allInCooldown) {
|
||||
return "rate_limit";
|
||||
const profileIds = (params.profileIds ?? profileCandidates).filter(
|
||||
(id): id is string => typeof id === "string" && id.length > 0,
|
||||
);
|
||||
return (
|
||||
resolveProfilesUnavailableReason({
|
||||
store: authStore,
|
||||
profileIds,
|
||||
}) ?? "rate_limit"
|
||||
);
|
||||
}
|
||||
const classified = classifyFailoverReason(params.message);
|
||||
return classified ?? "auth";
|
||||
@@ -385,6 +395,7 @@ export async function runEmbeddedPiAgent(
|
||||
const reason = resolveAuthProfileFailoverReason({
|
||||
allInCooldown: params.allInCooldown,
|
||||
message,
|
||||
profileIds: profileCandidates,
|
||||
});
|
||||
if (fallbackConfigured) {
|
||||
throw new FailoverError(message, {
|
||||
|
||||
Reference in New Issue
Block a user