fix(agents): prevent cross-provider error context leak in fallback chain (#62907)

Merged via squash.

Prepared head SHA: 06a3a82816
Co-authored-by: stainlu <109842185+stainlu@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
This commit is contained in:
stain lu
2026-04-12 02:15:26 +08:00
committed by GitHub
parent 97b60b992c
commit acd3697162
9 changed files with 238 additions and 45 deletions

View File

@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
- Auto-reply/WhatsApp: preserve inbound image attachment notes after media understanding so image edits keep the real saved media path instead of hallucinating a missing local path. (#64918) Thanks @ngutman.
- Telegram/sessions: keep topic-scoped session initialization on the canonical topic transcript path when inbound turns omit `MessageThreadId`, so one topic session no longer alternates between bare and topic-qualified transcript files. (#64869) thanks @jalehman.
- Agents/failover: scope assistant-side fallback classification and surfaced provider errors to the current attempt instead of stale session history, so cross-provider fallback runs stop inheriting the previous provider's failure. (#62907) Thanks @stainlu.
## 2026.4.11-beta.1

View File

@@ -1,4 +1,5 @@
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
import { makeAssistantMessageFixture } from "../test-helpers/assistant-message-fixtures.js";
import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js";
import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
import {
@@ -12,7 +13,6 @@ import {
overflowBaseRunParams,
resetRunOverflowCompactionHarnessMocks,
} from "./run.overflow-compaction.harness.js";
import type { EmbeddedRunAttemptResult } from "./run/types.js";
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
@@ -35,15 +35,17 @@ describe("runEmbeddedPiAgent Codex server_error fallback handoff", () => {
mockedFormatAssistantErrorText.mockReturnValue(
"LLM error server_error: An error occurred while processing your request.",
);
const currentAttemptAssistant = makeAssistantMessageFixture({
stopReason: "error",
errorMessage: rawCodexError,
provider: "openai-codex",
model: "gpt-5.4",
});
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
assistantTexts: [],
lastAssistant: {
stopReason: "error",
errorMessage: rawCodexError,
provider: "openai-codex",
model: "gpt-5.4",
} as EmbeddedRunAttemptResult["lastAssistant"],
lastAssistant: currentAttemptAssistant,
currentAttemptAssistant,
}),
);

View File

@@ -0,0 +1,174 @@
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
import { makeAssistantMessageFixture } from "../test-helpers/assistant-message-fixtures.js";
import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js";
import { makeAttemptResult } from "./run.overflow-compaction.fixture.js";
import {
loadRunOverflowCompactionHarness,
MockedFailoverError,
mockedFormatAssistantErrorText,
mockedGlobalHookRunner,
mockedIsFailoverAssistantError,
mockedIsRateLimitAssistantError,
mockedRunEmbeddedAttempt,
overflowBaseRunParams,
resetRunOverflowCompactionHarnessMocks,
} from "./run.overflow-compaction.harness.js";
import type { EmbeddedRunAttemptResult } from "./run/types.js";
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
function isCurrentAttemptAssistant(
value: unknown,
): value is NonNullable<EmbeddedRunAttemptResult["currentAttemptAssistant"]> {
return (
typeof value === "object" &&
value !== null &&
"provider" in value &&
"model" in value &&
"errorMessage" in value
);
}
describe("runEmbeddedPiAgent cross-provider fallback error handling", () => {
beforeAll(async () => {
({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness());
});
beforeEach(() => {
resetRunOverflowCompactionHarnessMocks();
mockedGlobalHookRunner.hasHooks.mockImplementation(() => false);
});
it("uses the current attempt assistant for fallback errors instead of stale session history", async () => {
mockedIsFailoverAssistantError.mockImplementation((...args: unknown[]) => {
const assistant = args[0];
return isCurrentAttemptAssistant(assistant) && assistant.provider === "deepseek";
});
mockedIsRateLimitAssistantError.mockImplementation((...args: unknown[]) => {
const assistant = args[0];
return isCurrentAttemptAssistant(assistant) && assistant.provider === "deepseek";
});
let lastFormattedAssistant: unknown;
mockedFormatAssistantErrorText.mockImplementation((...args: unknown[]) => {
lastFormattedAssistant = args[0];
if (!isCurrentAttemptAssistant(lastFormattedAssistant)) {
return String(lastFormattedAssistant);
}
return `${lastFormattedAssistant.provider}/${lastFormattedAssistant.model}: ${lastFormattedAssistant.errorMessage}`;
});
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
assistantTexts: [],
lastAssistant: makeAssistantMessageFixture({
stopReason: "error",
errorMessage: "You have hit your ChatGPT usage limit (plus plan).",
provider: "openai-codex",
model: "gpt-5.4",
content: [],
}),
currentAttemptAssistant: makeAssistantMessageFixture({
stopReason: "error",
errorMessage: "429 deepseek rate limit",
provider: "deepseek",
model: "deepseek-chat",
content: [],
}),
}),
);
const promise = runEmbeddedPiAgent({
...overflowBaseRunParams,
runId: "run-cross-provider-fallback-error-context",
config: makeModelFallbackCfg({
agents: {
defaults: {
model: {
primary: "openai-codex/gpt-5.4",
fallbacks: ["deepseek/deepseek-chat", "google/gemini-2.5-flash"],
},
},
},
}),
});
await expect(promise).rejects.toBeInstanceOf(MockedFailoverError);
await expect(promise).rejects.toThrow("deepseek/deepseek-chat: 429 deepseek rate limit");
expect(mockedIsRateLimitAssistantError).toHaveBeenCalledWith(
expect.objectContaining({
provider: "deepseek",
model: "deepseek-chat",
errorMessage: "429 deepseek rate limit",
}),
);
expect(lastFormattedAssistant).toEqual(
expect.objectContaining({
provider: "deepseek",
model: "deepseek-chat",
errorMessage: "429 deepseek rate limit",
}),
);
});
it("falls back to the session assistant when compaction removes the current attempt slice", async () => {
mockedIsFailoverAssistantError.mockImplementation((...args: unknown[]) => {
const assistant = args[0];
return isCurrentAttemptAssistant(assistant) && assistant.provider === "deepseek";
});
mockedIsRateLimitAssistantError.mockImplementation((...args: unknown[]) => {
const assistant = args[0];
return isCurrentAttemptAssistant(assistant) && assistant.provider === "deepseek";
});
let lastFormattedAssistant: unknown;
mockedFormatAssistantErrorText.mockImplementation((...args: unknown[]) => {
lastFormattedAssistant = args[0];
if (!isCurrentAttemptAssistant(lastFormattedAssistant)) {
return String(lastFormattedAssistant);
}
return `${lastFormattedAssistant.provider}/${lastFormattedAssistant.model}: ${lastFormattedAssistant.errorMessage}`;
});
mockedRunEmbeddedAttempt.mockResolvedValueOnce(
makeAttemptResult({
assistantTexts: [],
lastAssistant: makeAssistantMessageFixture({
stopReason: "error",
errorMessage: "429 deepseek rate limit",
provider: "deepseek",
model: "deepseek-chat",
content: [],
}),
currentAttemptAssistant: undefined,
}),
);
const promise = runEmbeddedPiAgent({
...overflowBaseRunParams,
runId: "run-compaction-fallback-error-context",
config: makeModelFallbackCfg({
agents: {
defaults: {
model: {
primary: "openai-codex/gpt-5.4",
fallbacks: ["deepseek/deepseek-chat", "google/gemini-2.5-flash"],
},
},
},
}),
});
await expect(promise).rejects.toBeInstanceOf(MockedFailoverError);
await expect(promise).rejects.toThrow("deepseek/deepseek-chat: 429 deepseek rate limit");
expect(mockedIsRateLimitAssistantError).toHaveBeenCalledWith(
expect.objectContaining({
provider: "deepseek",
model: "deepseek-chat",
errorMessage: "429 deepseek rate limit",
}),
);
expect(lastFormattedAssistant).toEqual(
expect.objectContaining({
provider: "deepseek",
model: "deepseek-chat",
errorMessage: "429 deepseek rate limit",
}),
);
});
});

View File

@@ -726,7 +726,8 @@ export async function runEmbeddedPiAgent(
idleTimedOut,
timedOutDuringCompaction,
sessionIdUsed,
lastAssistant,
lastAssistant: sessionLastAssistant,
currentAttemptAssistant,
} = attempt;
bootstrapPromptWarningSignaturesSeen =
attempt.bootstrapPromptWarningSignaturesSeen ??
@@ -738,7 +739,7 @@ export async function runEmbeddedPiAgent(
]),
)
: bootstrapPromptWarningSignaturesSeen);
const lastAssistantUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
const lastAssistantUsage = normalizeUsage(sessionLastAssistant?.usage as UsageLike);
const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage;
mergeUsageIntoAccumulator(usageAccumulator, attemptUsage);
// Keep prompt size from the latest model call so session totalTokens
@@ -748,7 +749,6 @@ export async function runEmbeddedPiAgent(
const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0);
autoCompactionCount += attemptCompactionCount;
const activeErrorContext = resolveActiveErrorContext({
lastAssistant,
provider,
model: modelId,
});
@@ -765,8 +765,8 @@ export async function runEmbeddedPiAgent(
accumulatedReplayState,
attempt.replayMetadata,
);
const formattedAssistantErrorText = lastAssistant
? formatAssistantErrorText(lastAssistant, {
const formattedAssistantErrorText = sessionLastAssistant
? formatAssistantErrorText(sessionLastAssistant, {
cfg: params.config,
sessionKey: resolvedSessionKey ?? params.sessionId,
provider: activeErrorContext.provider,
@@ -774,8 +774,8 @@ export async function runEmbeddedPiAgent(
})
: undefined;
const assistantErrorText =
lastAssistant?.stopReason === "error"
? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText
sessionLastAssistant?.stopReason === "error"
? sessionLastAssistant.errorMessage?.trim() || formattedAssistantErrorText
: undefined;
const canRestartForLiveSwitch =
!attempt.didSendViaMessagingTool &&
@@ -1140,7 +1140,7 @@ export async function runEmbeddedPiAgent(
model: model.id,
usageAccumulator,
lastRunPromptUsage,
lastAssistant,
lastAssistant: sessionLastAssistant,
lastTurnTotal,
}),
systemPromptReport: attempt.systemPromptReport,
@@ -1194,7 +1194,7 @@ export async function runEmbeddedPiAgent(
model: model.id,
usageAccumulator,
lastRunPromptUsage,
lastAssistant,
lastAssistant: sessionLastAssistant,
lastTurnTotal,
}),
systemPromptReport: attempt.systemPromptReport,
@@ -1232,7 +1232,7 @@ export async function runEmbeddedPiAgent(
model: model.id,
usageAccumulator,
lastRunPromptUsage,
lastAssistant,
lastAssistant: sessionLastAssistant,
lastTurnTotal,
}),
systemPromptReport: attempt.systemPromptReport,
@@ -1340,8 +1340,9 @@ export async function runEmbeddedPiAgent(
throw promptError;
}
const assistantForFailover = currentAttemptAssistant ?? sessionLastAssistant;
const fallbackThinking = pickFallbackThinkingLevel({
message: lastAssistant?.errorMessage,
message: assistantForFailover?.errorMessage,
attempted: attemptedThinking,
});
if (fallbackThinking && !aborted) {
@@ -1352,26 +1353,28 @@ export async function runEmbeddedPiAgent(
continue;
}
const authFailure = isAuthAssistantError(lastAssistant);
const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
const billingFailure = isBillingAssistantError(lastAssistant);
const failoverFailure = isFailoverAssistantError(lastAssistant);
const authFailure = isAuthAssistantError(assistantForFailover);
const rateLimitFailure = isRateLimitAssistantError(assistantForFailover);
const billingFailure = isBillingAssistantError(assistantForFailover);
const failoverFailure = isFailoverAssistantError(assistantForFailover);
const assistantFailoverReason = classifyFailoverReason(
lastAssistant?.errorMessage ?? "",
assistantForFailover?.errorMessage ?? "",
{
provider: lastAssistant?.provider,
provider: assistantForFailover?.provider,
},
);
const assistantProfileFailureReason =
resolveAuthProfileFailureReason(assistantFailoverReason);
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");
const imageDimensionError = parseImageDimensionError(
assistantForFailover?.errorMessage ?? "",
);
// Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
const failedAssistantProfileId = lastProfileId;
const logAssistantFailoverDecision = createFailoverDecisionLogger({
stage: "assistant",
runId: params.runId,
rawError: lastAssistant?.errorMessage?.trim(),
rawError: assistantForFailover?.errorMessage?.trim(),
failoverReason: assistantFailoverReason,
profileFailureReason: assistantProfileFailureReason,
provider: activeErrorContext.provider,
@@ -1385,7 +1388,7 @@ export async function runEmbeddedPiAgent(
if (
authFailure &&
(await maybeRefreshRuntimeAuthForAuthError(
lastAssistant?.errorMessage ?? "",
assistantForFailover?.errorMessage ?? "",
runtimeAuthRetry,
))
) {
@@ -1442,7 +1445,7 @@ export async function runEmbeddedPiAgent(
modelId,
provider,
activeErrorContext,
lastAssistant,
lastAssistant: assistantForFailover,
config: params.config,
sessionKey: params.sessionKey ?? params.sessionId,
authFailure,
@@ -1473,20 +1476,20 @@ export async function runEmbeddedPiAgent(
}
const usageMeta = buildUsageAgentMetaFields({
usageAccumulator,
lastAssistantUsage: lastAssistant?.usage as UsageLike | undefined,
lastAssistantUsage: sessionLastAssistant?.usage as UsageLike | undefined,
lastRunPromptUsage,
lastTurnTotal,
});
const agentMeta: EmbeddedPiAgentMeta = {
sessionId: sessionIdUsed,
provider: lastAssistant?.provider ?? provider,
model: lastAssistant?.model ?? model.id,
provider: sessionLastAssistant?.provider ?? provider,
model: sessionLastAssistant?.model ?? model.id,
usage: usageMeta.usage,
lastCallUsage: usageMeta.lastCallUsage,
promptTokens: usageMeta.promptTokens,
compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
};
const finalAssistantVisibleText = resolveFinalAssistantVisibleText(lastAssistant);
const finalAssistantVisibleText = resolveFinalAssistantVisibleText(sessionLastAssistant);
const payloads = buildEmbeddedRunPayloads({
assistantTexts: attempt.assistantTexts,
@@ -1737,7 +1740,7 @@ export async function runEmbeddedPiAgent(
? "tool_calls"
: attempt.yieldDetected
? "end_turn"
: (lastAssistant?.stopReason as string | undefined),
: (sessionLastAssistant?.stopReason as string | undefined),
pendingToolCalls: attempt.clientToolCall
? [
{

View File

@@ -1,4 +1,5 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage } from "@mariozechner/pi-ai";
import type { MemoryCitationsMode } from "../../../config/types.memory.js";
import type { ContextEngine, ContextEngineRuntimeContext } from "../../../context-engine/types.js";
import type { NormalizedUsage } from "../../usage.js";
@@ -95,11 +96,11 @@ export function buildContextEnginePromptCacheInfo(params: {
export function findCurrentAttemptAssistantMessage(params: {
messagesSnapshot: AgentMessage[];
prePromptMessageCount: number;
}): AgentMessage | undefined {
}): AssistantMessage | undefined {
return params.messagesSnapshot
.slice(Math.max(0, params.prePromptMessageCount))
.toReversed()
.find((message) => message.role === "assistant");
.find((message): message is AssistantMessage => message.role === "assistant");
}
export async function runAttemptContextEngineBootstrap(params: {

View File

@@ -112,7 +112,7 @@ import { buildSystemPromptReport } from "../../system-prompt-report.js";
import { resolveAgentTimeoutMs } from "../../timeout.js";
import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js";
import { resolveTranscriptPolicy } from "../../transcript-policy.js";
import { normalizeUsage, type NormalizedUsage, type UsageLike } from "../../usage.js";
import { normalizeUsage, type NormalizedUsage } from "../../usage.js";
import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js";
import { isRunnerAbortError } from "../abort.js";
import { isCacheTtlEligibleProvider, readLastCacheTtlTimestamp } from "../cache-ttl.js";
@@ -1512,6 +1512,7 @@ export async function runEmbeddedAttempt(
abort: abortRun,
};
let lastAssistant: AgentMessage | undefined;
let currentAttemptAssistant: EmbeddedRunAttemptResult["currentAttemptAssistant"];
let attemptUsage: NormalizedUsage | undefined;
let cacheBreak: ReturnType<typeof completePromptCacheObservation> = null;
let promptCache: EmbeddedRunAttemptResult["promptCache"];
@@ -2075,7 +2076,7 @@ export async function runEmbeddedAttempt(
.slice()
.toReversed()
.find((m) => m.role === "assistant");
const currentAttemptAssistant = findCurrentAttemptAssistantMessage({
currentAttemptAssistant = findCurrentAttemptAssistantMessage({
messagesSnapshot,
prePromptMessageCount,
});
@@ -2087,9 +2088,7 @@ export async function runEmbeddedAttempt(
usage: attemptUsage,
})
: null;
const lastCallUsage = normalizeUsage(
(currentAttemptAssistant as { usage?: UsageLike } | undefined)?.usage,
);
const lastCallUsage = normalizeUsage(currentAttemptAssistant?.usage);
const promptCacheObservation =
cacheObservabilityEnabled &&
(cacheBreak || promptCacheChangesForTurn || typeof attemptUsage?.cacheRead === "number")
@@ -2353,6 +2352,7 @@ export async function runEmbeddedAttempt(
assistantTexts,
toolMetas: toolMetasNormalized,
lastAssistant,
currentAttemptAssistant,
lastToolError: getLastToolError?.(),
didSendViaMessagingTool: didSendViaMessagingTool(),
messagingToolSentTexts: getMessagingToolSentTexts(),

View File

@@ -0,0 +1,12 @@
import { describe, expect, it } from "vitest";
import { resolveActiveErrorContext } from "./helpers.js";
describe("resolveActiveErrorContext", () => {
it("returns the current provider/model", () => {
const result = resolveActiveErrorContext({
provider: "deepseek",
model: "deepseek-chat",
});
expect(result).toEqual({ provider: "deepseek", model: "deepseek-chat" });
});
});

View File

@@ -77,14 +77,13 @@ export function resolveMaxRunRetryIterations(profileCandidateCount: number): num
return Math.min(MAX_RUN_RETRY_ITERATIONS, Math.max(MIN_RUN_RETRY_ITERATIONS, scaled));
}
export function resolveActiveErrorContext(params: {
lastAssistant: { provider?: string; model?: string } | undefined;
export function resolveActiveErrorContext(params: { provider: string; model: string }): {
provider: string;
model: string;
}): { provider: string; model: string } {
} {
return {
provider: params.lastAssistant?.provider ?? params.provider,
model: params.lastAssistant?.model ?? params.model,
provider: params.provider,
model: params.model,
};
}

View File

@@ -75,6 +75,7 @@ export type EmbeddedRunAttemptResult = {
assistantTexts: string[];
toolMetas: Array<{ toolName: string; meta?: string }>;
lastAssistant: AssistantMessage | undefined;
currentAttemptAssistant?: AssistantMessage | undefined;
lastToolError?: ToolErrorSummary;
didSendViaMessagingTool: boolean;
didSendDeterministicApprovalPrompt?: boolean;