mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-11 08:40:43 +00:00
fix(failover): stop retrying assistant-prefill format failures
Summary:
- classify assistant-prefill provider rejections as format errors
- surface terminal format failover reasons instead of rotating profiles or falling back
- refresh shared Swift protocol output from current main
Verification:
- pnpm test src/agents/pi-embedded-runner/run/failover-policy.test.ts src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
- pnpm exec oxfmt --check --threads=1 CHANGELOG.md src/agents/pi-embedded-runner/run.ts src/agents/pi-embedded-runner/run/assistant-failover.ts src/agents/pi-embedded-runner/run/failover-policy.ts src/agents/pi-embedded-runner/run/failover-policy.test.ts src/agents/pi-embedded-helpers/failover-matches.ts src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
- fnm exec --using=24.13.0 pnpm lint --threads=8
- pnpm protocol:check
- GitHub CI on 678e92bcb2
This commit is contained in:
@@ -156,6 +156,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- QQBot: route gateway WebSocket connections through the ambient proxy agent so deployments with `https_proxy`, `HTTPS_PROXY`, or `HTTP_PROXY` can reach the QQ gateway. (#72961) Thanks @xialonglee.
|
||||
- Agents/subagents: treat `sessions_spawn` `model: "default"` as the default-model fallback and ignore ACP-only stream targets for native sub-agent spawns. Fixes #72078. (#72101) Thanks @xialonglee.
|
||||
- Agents/failover: stop retrying assistant-prefill format rejections across auth profiles or model fallbacks, surfacing the deterministic provider error instead of requeueing the lane. Fixes #79688. (#79728) Thanks @hclsys.
|
||||
- Memory/QMD: warn with a manual stale collection removal hint when QMD reports a path/pattern conflict but `collection list` lacks verifiable metadata, avoiding unsafe stderr-only rebinds. Refs #71783. (#72297) Thanks @MonkeyLeeT.
|
||||
- Models/auth: make `openclaw models status --check` and dashboard auth health honor effective auth profile order while keeping stale profiles visible. (#79685) Thanks @nimbleenigma.
|
||||
- Agents/failover: classify bare `stream_read_error` streaming failures as transient timeouts so configured model fallback runs instead of surfacing the raw transport error. Fixes #79689. (#79692) Thanks @hekunwang.
|
||||
|
||||
@@ -3406,21 +3406,25 @@ public struct TalkSessionSubmitToolResultParams: Codable, Sendable {
|
||||
public let sessionid: String
|
||||
public let callid: String
|
||||
public let result: AnyCodable
|
||||
public let options: [String: AnyCodable]?
|
||||
|
||||
public init(
|
||||
sessionid: String,
|
||||
callid: String,
|
||||
result: AnyCodable)
|
||||
result: AnyCodable,
|
||||
options: [String: AnyCodable]?)
|
||||
{
|
||||
self.sessionid = sessionid
|
||||
self.callid = callid
|
||||
self.result = result
|
||||
self.options = options
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case sessionid = "sessionId"
|
||||
case callid = "callId"
|
||||
case result
|
||||
case options
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1153,6 +1153,15 @@ describe("classifyFailoverReason provider messages", () => {
|
||||
),
|
||||
).toBe("timeout");
|
||||
expect(classifyFailoverReason("string should match pattern")).toBe("format");
|
||||
expect(
|
||||
classifyFailoverReason(
|
||||
"This model does not support assistant message prefill. The conversation must end with a user message.",
|
||||
),
|
||||
).toBe("format");
|
||||
expect(
|
||||
classifyFailoverReason("LLM request rejected: does not support assistant message prefill"),
|
||||
).toBe("format");
|
||||
expect(classifyFailoverReason("conversation must end with a user message")).toBe("format");
|
||||
expect(classifyFailoverReason("bad request")).toBeNull();
|
||||
expect(
|
||||
classifyFailoverReason(
|
||||
|
||||
@@ -218,6 +218,12 @@ const ERROR_PATTERNS = {
|
||||
"messages.1.content.1.tool_use.id",
|
||||
"invalid request format",
|
||||
/tool call id was.*must be/i,
|
||||
// Prefill-strict models (e.g. claude-opus-4-7) reject requests that end
|
||||
// with an assistant turn. The lane must not re-queue these — the same
|
||||
// payload will fail identically on every retry, causing an infinite loop
|
||||
// (#79688).
|
||||
"does not support assistant message prefill",
|
||||
"conversation must end with a user message",
|
||||
],
|
||||
} as const;
|
||||
|
||||
|
||||
@@ -2216,6 +2216,7 @@ export async function runEmbeddedPiAgent(
|
||||
|
||||
const assistantFailoverDecision = resolveRunFailoverDecision({
|
||||
stage: "assistant",
|
||||
allowFormatRetry: cloudCodeAssistFormatError,
|
||||
aborted,
|
||||
externalAbort,
|
||||
fallbackConfigured,
|
||||
|
||||
@@ -181,6 +181,7 @@ export async function handleAssistantFailover(params: {
|
||||
|
||||
decision = resolveRunFailoverDecision({
|
||||
stage: "assistant",
|
||||
allowFormatRetry: params.cloudCodeAssistFormatError,
|
||||
aborted: params.aborted,
|
||||
externalAbort: params.externalAbort,
|
||||
fallbackConfigured: params.fallbackConfigured,
|
||||
|
||||
@@ -61,6 +61,41 @@ describe("resolveRunFailoverDecision", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("surfaces deterministic prompt format failures instead of rotating or falling back", () => {
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "prompt",
|
||||
aborted: false,
|
||||
externalAbort: false,
|
||||
fallbackConfigured: true,
|
||||
failoverFailure: true,
|
||||
failoverReason: "format",
|
||||
profileRotated: false,
|
||||
}),
|
||||
).toEqual({
|
||||
action: "surface_error",
|
||||
reason: "format",
|
||||
});
|
||||
});
|
||||
|
||||
it("can still rotate explicitly retryable prompt format failures", () => {
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "prompt",
|
||||
allowFormatRetry: true,
|
||||
aborted: false,
|
||||
externalAbort: false,
|
||||
fallbackConfigured: true,
|
||||
failoverFailure: true,
|
||||
failoverReason: "format",
|
||||
profileRotated: false,
|
||||
}),
|
||||
).toEqual({
|
||||
action: "rotate_profile",
|
||||
reason: "format",
|
||||
});
|
||||
});
|
||||
|
||||
it("treats classified assistant-side 429s as rotation candidates even without error stopReason", () => {
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
@@ -81,6 +116,47 @@ describe("resolveRunFailoverDecision", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("surfaces deterministic assistant format failures instead of rotating or falling back", () => {
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "assistant",
|
||||
aborted: false,
|
||||
externalAbort: false,
|
||||
fallbackConfigured: true,
|
||||
failoverFailure: true,
|
||||
failoverReason: "format",
|
||||
timedOut: false,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: false,
|
||||
profileRotated: false,
|
||||
}),
|
||||
).toEqual({
|
||||
action: "surface_error",
|
||||
reason: "format",
|
||||
});
|
||||
});
|
||||
|
||||
it("can still rotate explicitly retryable assistant format failures", () => {
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
stage: "assistant",
|
||||
allowFormatRetry: true,
|
||||
aborted: false,
|
||||
externalAbort: false,
|
||||
fallbackConfigured: true,
|
||||
failoverFailure: true,
|
||||
failoverReason: "format",
|
||||
timedOut: false,
|
||||
timedOutDuringCompaction: false,
|
||||
timedOutDuringToolExecution: false,
|
||||
profileRotated: false,
|
||||
}),
|
||||
).toEqual({
|
||||
action: "rotate_profile",
|
||||
reason: "format",
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back after assistant rotation is exhausted", () => {
|
||||
expect(
|
||||
resolveRunFailoverDecision({
|
||||
|
||||
@@ -39,6 +39,7 @@ type RetryLimitDecisionParams = {
|
||||
|
||||
type PromptDecisionParams = {
|
||||
stage: "prompt";
|
||||
allowFormatRetry?: boolean;
|
||||
aborted: boolean;
|
||||
externalAbort: boolean;
|
||||
fallbackConfigured: boolean;
|
||||
@@ -49,6 +50,7 @@ type PromptDecisionParams = {
|
||||
|
||||
type AssistantDecisionParams = {
|
||||
stage: "assistant";
|
||||
allowFormatRetry?: boolean;
|
||||
aborted: boolean;
|
||||
externalAbort: boolean;
|
||||
fallbackConfigured: boolean;
|
||||
@@ -75,11 +77,25 @@ function shouldEscalateRetryLimit(reason: FailoverReason | null): boolean {
|
||||
);
|
||||
}
|
||||
|
||||
function isTerminalFormatFailure(params: {
|
||||
allowFormatRetry?: boolean;
|
||||
failoverReason: FailoverReason | null;
|
||||
}): boolean {
|
||||
return params.failoverReason === "format" && params.allowFormatRetry !== true;
|
||||
}
|
||||
|
||||
function shouldRotatePrompt(params: PromptDecisionParams): boolean {
|
||||
return params.failoverFailure && params.failoverReason !== "timeout";
|
||||
return (
|
||||
params.failoverFailure &&
|
||||
params.failoverReason !== "timeout" &&
|
||||
!isTerminalFormatFailure(params)
|
||||
);
|
||||
}
|
||||
|
||||
function shouldRotateAssistant(params: AssistantDecisionParams): boolean {
|
||||
if (isTerminalFormatFailure(params)) {
|
||||
return false;
|
||||
}
|
||||
return (
|
||||
(!params.aborted && (params.failoverFailure || params.failoverReason !== null)) ||
|
||||
(params.timedOut && !params.timedOutDuringCompaction && !params.timedOutDuringToolExecution)
|
||||
@@ -128,7 +144,7 @@ export function resolveRunFailoverDecision(params: RunFailoverDecisionParams): R
|
||||
reason: params.failoverReason,
|
||||
};
|
||||
}
|
||||
if (params.fallbackConfigured && params.failoverFailure) {
|
||||
if (params.fallbackConfigured && params.failoverFailure && !isTerminalFormatFailure(params)) {
|
||||
return {
|
||||
action: "fallback_model",
|
||||
reason: params.failoverReason ?? "unknown",
|
||||
@@ -146,6 +162,12 @@ export function resolveRunFailoverDecision(params: RunFailoverDecisionParams): R
|
||||
reason: params.failoverReason,
|
||||
};
|
||||
}
|
||||
if (isTerminalFormatFailure(params)) {
|
||||
return {
|
||||
action: "surface_error",
|
||||
reason: params.failoverReason,
|
||||
};
|
||||
}
|
||||
const assistantShouldRotate = shouldRotateAssistant(params);
|
||||
if (!params.profileRotated && assistantShouldRotate) {
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user