Files
openclaw/src/agents/cli-runner.ts
Aaron Zhu 983909f826 fix(agents): classify generic provider errors for failover (#59325)
* fix(agents): classify generic provider errors for failover

Anthropic returns bare 'An unknown error occurred' during API instability
and OpenRouter wraps upstream failures as 'Provider returned error'. Neither
message was recognized by the failover classifier, so the error surfaced
directly to users instead of triggering the configured fallback chain.

Add both patterns to the serverError classifier so they are classified as
transient server errors (timeout) and trigger model failover.

Closes #49706
Closes #45834

* fix(agents): scope unknown-error failover by provider

* docs(changelog): note provider-scoped unknown-error failover

---------

Co-authored-by: Aaron Zhu <aaron@Aarons-MacBook-Air.local>
Co-authored-by: Altay <altay@uinaf.dev>
2026-04-04 18:11:46 +03:00

130 lines
4.7 KiB
TypeScript

import type { ImageContent } from "@mariozechner/pi-ai";
import type { ThinkLevel } from "../auto-reply/thinking.js";
import type { OpenClawConfig } from "../config/config.js";
import { executePreparedCliRun } from "./cli-runner/execute.js";
import { prepareCliRunContext } from "./cli-runner/prepare.js";
import type { RunCliAgentParams } from "./cli-runner/types.js";
import { FailoverError, resolveFailoverStatus } from "./failover-error.js";
import { classifyFailoverReason, isFailoverErrorMessage } from "./pi-embedded-helpers.js";
import type { EmbeddedPiRunResult } from "./pi-embedded-runner.js";
export async function runCliAgent(params: RunCliAgentParams): Promise<EmbeddedPiRunResult> {
const context = await prepareCliRunContext(params);
const buildCliRunResult = (resultParams: {
output: Awaited<ReturnType<typeof executePreparedCliRun>>;
effectiveCliSessionId?: string;
}): EmbeddedPiRunResult => {
const text = resultParams.output.text?.trim();
const payloads = text ? [{ text }] : undefined;
return {
payloads,
meta: {
durationMs: Date.now() - context.started,
systemPromptReport: context.systemPromptReport,
agentMeta: {
sessionId: resultParams.effectiveCliSessionId ?? params.sessionId ?? "",
provider: params.provider,
model: context.modelId,
usage: resultParams.output.usage,
...(resultParams.effectiveCliSessionId
? {
cliSessionBinding: {
sessionId: resultParams.effectiveCliSessionId,
...(params.authProfileId ? { authProfileId: params.authProfileId } : {}),
...(context.extraSystemPromptHash
? { extraSystemPromptHash: context.extraSystemPromptHash }
: {}),
...(context.preparedBackend.mcpConfigHash
? { mcpConfigHash: context.preparedBackend.mcpConfigHash }
: {}),
},
}
: {}),
},
},
};
};
// Try with the provided CLI session ID first
try {
try {
const output = await executePreparedCliRun(context, context.reusableCliSession.sessionId);
const effectiveCliSessionId = output.sessionId ?? context.reusableCliSession.sessionId;
return buildCliRunResult({ output, effectiveCliSessionId });
} catch (err) {
if (err instanceof FailoverError) {
// Check if this is a session expired error and we have a session to clear
if (
err.reason === "session_expired" &&
context.reusableCliSession.sessionId &&
params.sessionKey
) {
// Clear the expired session ID from the session entry
// This requires access to the session store, which we don't have here
// We'll need to modify the caller to handle this case
// For now, retry without the session ID to create a new session
const output = await executePreparedCliRun(context, undefined);
const effectiveCliSessionId = output.sessionId;
return buildCliRunResult({ output, effectiveCliSessionId });
}
throw err;
}
const message = err instanceof Error ? err.message : String(err);
if (isFailoverErrorMessage(message, { provider: params.provider })) {
const reason = classifyFailoverReason(message, { provider: params.provider }) ?? "unknown";
const status = resolveFailoverStatus(reason);
throw new FailoverError(message, {
reason,
provider: params.provider,
model: context.modelId,
status,
});
}
throw err;
}
} finally {
await context.preparedBackend.cleanup?.();
}
}
export async function runClaudeCliAgent(params: {
sessionId: string;
sessionKey?: string;
agentId?: string;
sessionFile: string;
workspaceDir: string;
config?: OpenClawConfig;
prompt: string;
provider?: string;
model?: string;
thinkLevel?: ThinkLevel;
timeoutMs: number;
runId: string;
extraSystemPrompt?: string;
ownerNumbers?: string[];
claudeSessionId?: string;
images?: ImageContent[];
}): Promise<EmbeddedPiRunResult> {
return runCliAgent({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
agentId: params.agentId,
sessionFile: params.sessionFile,
workspaceDir: params.workspaceDir,
config: params.config,
prompt: params.prompt,
provider: params.provider ?? "claude-cli",
model: params.model ?? "opus",
thinkLevel: params.thinkLevel,
timeoutMs: params.timeoutMs,
runId: params.runId,
extraSystemPrompt: params.extraSystemPrompt,
ownerNumbers: params.ownerNumbers,
cliSessionId: params.claudeSessionId,
images: params.images,
});
}