From acd3697162dc9ea0c6ec06b0f7d49dac4a96b97b Mon Sep 17 00:00:00 2001 From: stain lu <109842185+stainlu@users.noreply.github.com> Date: Sun, 12 Apr 2026 02:15:26 +0800 Subject: [PATCH] fix(agents): prevent cross-provider error context leak in fallback chain (#62907) Merged via squash. Prepared head SHA: 06a3a82816b4e6167a8b2821ba055a3717692bc6 Co-authored-by: stainlu <109842185+stainlu@users.noreply.github.com> Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com> Reviewed-by: @altaywtf --- CHANGELOG.md | 1 + .../run.codex-server-error-fallback.test.ts | 16 +- ...ss-provider-fallback-error-context.test.ts | 174 ++++++++++++++++++ src/agents/pi-embedded-runner/run.ts | 55 +++--- .../run/attempt.context-engine-helpers.ts | 5 +- src/agents/pi-embedded-runner/run/attempt.ts | 10 +- .../run/helpers.resolve-error-context.test.ts | 12 ++ src/agents/pi-embedded-runner/run/helpers.ts | 9 +- src/agents/pi-embedded-runner/run/types.ts | 1 + 9 files changed, 238 insertions(+), 45 deletions(-) create mode 100644 src/agents/pi-embedded-runner/run.cross-provider-fallback-error-context.test.ts create mode 100644 src/agents/pi-embedded-runner/run/helpers.resolve-error-context.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index deaa3c78234..c33cbdef79d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai - Auto-reply/WhatsApp: preserve inbound image attachment notes after media understanding so image edits keep the real saved media path instead of hallucinating a missing local path. (#64918) Thanks @ngutman. - Telegram/sessions: keep topic-scoped session initialization on the canonical topic transcript path when inbound turns omit `MessageThreadId`, so one topic session no longer alternates between bare and topic-qualified transcript files. (#64869) thanks @jalehman. +- Agents/failover: scope assistant-side fallback classification and surfaced provider errors to the current attempt instead of stale session history, so cross-provider fallback runs stop inheriting the previous provider's failure. (#62907) Thanks @stainlu. ## 2026.4.11-beta.1 diff --git a/src/agents/pi-embedded-runner/run.codex-server-error-fallback.test.ts b/src/agents/pi-embedded-runner/run.codex-server-error-fallback.test.ts index 7c4e0d58670..da87b30bb24 100644 --- a/src/agents/pi-embedded-runner/run.codex-server-error-fallback.test.ts +++ b/src/agents/pi-embedded-runner/run.codex-server-error-fallback.test.ts @@ -1,4 +1,5 @@ import { beforeAll, beforeEach, describe, expect, it } from "vitest"; +import { makeAssistantMessageFixture } from "../test-helpers/assistant-message-fixtures.js"; import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js"; import { makeAttemptResult } from "./run.overflow-compaction.fixture.js"; import { @@ -12,7 +13,6 @@ import { overflowBaseRunParams, resetRunOverflowCompactionHarnessMocks, } from "./run.overflow-compaction.harness.js"; -import type { EmbeddedRunAttemptResult } from "./run/types.js"; let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent; @@ -35,15 +35,17 @@ describe("runEmbeddedPiAgent Codex server_error fallback handoff", () => { mockedFormatAssistantErrorText.mockReturnValue( "LLM error server_error: An error occurred while processing your request.", ); + const currentAttemptAssistant = makeAssistantMessageFixture({ + stopReason: "error", + errorMessage: rawCodexError, + provider: "openai-codex", + model: "gpt-5.4", + }); mockedRunEmbeddedAttempt.mockResolvedValueOnce( makeAttemptResult({ assistantTexts: [], - lastAssistant: { - stopReason: "error", - errorMessage: rawCodexError, - provider: "openai-codex", - model: "gpt-5.4", - } as EmbeddedRunAttemptResult["lastAssistant"], + lastAssistant: currentAttemptAssistant, + currentAttemptAssistant, }), ); diff --git a/src/agents/pi-embedded-runner/run.cross-provider-fallback-error-context.test.ts b/src/agents/pi-embedded-runner/run.cross-provider-fallback-error-context.test.ts new file mode 100644 index 00000000000..edb605b6bda --- /dev/null +++ b/src/agents/pi-embedded-runner/run.cross-provider-fallback-error-context.test.ts @@ -0,0 +1,174 @@ +import { beforeAll, beforeEach, describe, expect, it } from "vitest"; +import { makeAssistantMessageFixture } from "../test-helpers/assistant-message-fixtures.js"; +import { makeModelFallbackCfg } from "../test-helpers/model-fallback-config-fixture.js"; +import { makeAttemptResult } from "./run.overflow-compaction.fixture.js"; +import { + loadRunOverflowCompactionHarness, + MockedFailoverError, + mockedFormatAssistantErrorText, + mockedGlobalHookRunner, + mockedIsFailoverAssistantError, + mockedIsRateLimitAssistantError, + mockedRunEmbeddedAttempt, + overflowBaseRunParams, + resetRunOverflowCompactionHarnessMocks, +} from "./run.overflow-compaction.harness.js"; +import type { EmbeddedRunAttemptResult } from "./run/types.js"; + +let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent; + +function isCurrentAttemptAssistant( + value: unknown, +): value is NonNullable { + return ( + typeof value === "object" && + value !== null && + "provider" in value && + "model" in value && + "errorMessage" in value + ); +} +describe("runEmbeddedPiAgent cross-provider fallback error handling", () => { + beforeAll(async () => { + ({ runEmbeddedPiAgent } = await loadRunOverflowCompactionHarness()); + }); + + beforeEach(() => { + resetRunOverflowCompactionHarnessMocks(); + mockedGlobalHookRunner.hasHooks.mockImplementation(() => false); + }); + + it("uses the current attempt assistant for fallback errors instead of stale session history", async () => { + mockedIsFailoverAssistantError.mockImplementation((...args: unknown[]) => { + const assistant = args[0]; + return isCurrentAttemptAssistant(assistant) && assistant.provider === "deepseek"; + }); + mockedIsRateLimitAssistantError.mockImplementation((...args: unknown[]) => { + const assistant = args[0]; + return isCurrentAttemptAssistant(assistant) && assistant.provider === "deepseek"; + }); + let lastFormattedAssistant: unknown; + mockedFormatAssistantErrorText.mockImplementation((...args: unknown[]) => { + lastFormattedAssistant = args[0]; + if (!isCurrentAttemptAssistant(lastFormattedAssistant)) { + return String(lastFormattedAssistant); + } + return `${lastFormattedAssistant.provider}/${lastFormattedAssistant.model}: ${lastFormattedAssistant.errorMessage}`; + }); + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + assistantTexts: [], + lastAssistant: makeAssistantMessageFixture({ + stopReason: "error", + errorMessage: "You have hit your ChatGPT usage limit (plus plan).", + provider: "openai-codex", + model: "gpt-5.4", + content: [], + }), + currentAttemptAssistant: makeAssistantMessageFixture({ + stopReason: "error", + errorMessage: "429 deepseek rate limit", + provider: "deepseek", + model: "deepseek-chat", + content: [], + }), + }), + ); + + const promise = runEmbeddedPiAgent({ + ...overflowBaseRunParams, + runId: "run-cross-provider-fallback-error-context", + config: makeModelFallbackCfg({ + agents: { + defaults: { + model: { + primary: "openai-codex/gpt-5.4", + fallbacks: ["deepseek/deepseek-chat", "google/gemini-2.5-flash"], + }, + }, + }, + }), + }); + + await expect(promise).rejects.toBeInstanceOf(MockedFailoverError); + await expect(promise).rejects.toThrow("deepseek/deepseek-chat: 429 deepseek rate limit"); + expect(mockedIsRateLimitAssistantError).toHaveBeenCalledWith( + expect.objectContaining({ + provider: "deepseek", + model: "deepseek-chat", + errorMessage: "429 deepseek rate limit", + }), + ); + expect(lastFormattedAssistant).toEqual( + expect.objectContaining({ + provider: "deepseek", + model: "deepseek-chat", + errorMessage: "429 deepseek rate limit", + }), + ); + }); + + it("falls back to the session assistant when compaction removes the current attempt slice", async () => { + mockedIsFailoverAssistantError.mockImplementation((...args: unknown[]) => { + const assistant = args[0]; + return isCurrentAttemptAssistant(assistant) && assistant.provider === "deepseek"; + }); + mockedIsRateLimitAssistantError.mockImplementation((...args: unknown[]) => { + const assistant = args[0]; + return isCurrentAttemptAssistant(assistant) && assistant.provider === "deepseek"; + }); + let lastFormattedAssistant: unknown; + mockedFormatAssistantErrorText.mockImplementation((...args: unknown[]) => { + lastFormattedAssistant = args[0]; + if (!isCurrentAttemptAssistant(lastFormattedAssistant)) { + return String(lastFormattedAssistant); + } + return `${lastFormattedAssistant.provider}/${lastFormattedAssistant.model}: ${lastFormattedAssistant.errorMessage}`; + }); + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + assistantTexts: [], + lastAssistant: makeAssistantMessageFixture({ + stopReason: "error", + errorMessage: "429 deepseek rate limit", + provider: "deepseek", + model: "deepseek-chat", + content: [], + }), + currentAttemptAssistant: undefined, + }), + ); + + const promise = runEmbeddedPiAgent({ + ...overflowBaseRunParams, + runId: "run-compaction-fallback-error-context", + config: makeModelFallbackCfg({ + agents: { + defaults: { + model: { + primary: "openai-codex/gpt-5.4", + fallbacks: ["deepseek/deepseek-chat", "google/gemini-2.5-flash"], + }, + }, + }, + }), + }); + + await expect(promise).rejects.toBeInstanceOf(MockedFailoverError); + await expect(promise).rejects.toThrow("deepseek/deepseek-chat: 429 deepseek rate limit"); + expect(mockedIsRateLimitAssistantError).toHaveBeenCalledWith( + expect.objectContaining({ + provider: "deepseek", + model: "deepseek-chat", + errorMessage: "429 deepseek rate limit", + }), + ); + expect(lastFormattedAssistant).toEqual( + expect.objectContaining({ + provider: "deepseek", + model: "deepseek-chat", + errorMessage: "429 deepseek rate limit", + }), + ); + }); +}); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index c875ca77cb9..7e4eb3967ec 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -726,7 +726,8 @@ export async function runEmbeddedPiAgent( idleTimedOut, timedOutDuringCompaction, sessionIdUsed, - lastAssistant, + lastAssistant: sessionLastAssistant, + currentAttemptAssistant, } = attempt; bootstrapPromptWarningSignaturesSeen = attempt.bootstrapPromptWarningSignaturesSeen ?? @@ -738,7 +739,7 @@ export async function runEmbeddedPiAgent( ]), ) : bootstrapPromptWarningSignaturesSeen); - const lastAssistantUsage = normalizeUsage(lastAssistant?.usage as UsageLike); + const lastAssistantUsage = normalizeUsage(sessionLastAssistant?.usage as UsageLike); const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage; mergeUsageIntoAccumulator(usageAccumulator, attemptUsage); // Keep prompt size from the latest model call so session totalTokens @@ -748,7 +749,6 @@ export async function runEmbeddedPiAgent( const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0); autoCompactionCount += attemptCompactionCount; const activeErrorContext = resolveActiveErrorContext({ - lastAssistant, provider, model: modelId, }); @@ -765,8 +765,8 @@ export async function runEmbeddedPiAgent( accumulatedReplayState, attempt.replayMetadata, ); - const formattedAssistantErrorText = lastAssistant - ? formatAssistantErrorText(lastAssistant, { + const formattedAssistantErrorText = sessionLastAssistant + ? formatAssistantErrorText(sessionLastAssistant, { cfg: params.config, sessionKey: resolvedSessionKey ?? params.sessionId, provider: activeErrorContext.provider, @@ -774,8 +774,8 @@ export async function runEmbeddedPiAgent( }) : undefined; const assistantErrorText = - lastAssistant?.stopReason === "error" - ? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText + sessionLastAssistant?.stopReason === "error" + ? sessionLastAssistant.errorMessage?.trim() || formattedAssistantErrorText : undefined; const canRestartForLiveSwitch = !attempt.didSendViaMessagingTool && @@ -1140,7 +1140,7 @@ export async function runEmbeddedPiAgent( model: model.id, usageAccumulator, lastRunPromptUsage, - lastAssistant, + lastAssistant: sessionLastAssistant, lastTurnTotal, }), systemPromptReport: attempt.systemPromptReport, @@ -1194,7 +1194,7 @@ export async function runEmbeddedPiAgent( model: model.id, usageAccumulator, lastRunPromptUsage, - lastAssistant, + lastAssistant: sessionLastAssistant, lastTurnTotal, }), systemPromptReport: attempt.systemPromptReport, @@ -1232,7 +1232,7 @@ export async function runEmbeddedPiAgent( model: model.id, usageAccumulator, lastRunPromptUsage, - lastAssistant, + lastAssistant: sessionLastAssistant, lastTurnTotal, }), systemPromptReport: attempt.systemPromptReport, @@ -1340,8 +1340,9 @@ export async function runEmbeddedPiAgent( throw promptError; } + const assistantForFailover = currentAttemptAssistant ?? sessionLastAssistant; const fallbackThinking = pickFallbackThinkingLevel({ - message: lastAssistant?.errorMessage, + message: assistantForFailover?.errorMessage, attempted: attemptedThinking, }); if (fallbackThinking && !aborted) { @@ -1352,26 +1353,28 @@ export async function runEmbeddedPiAgent( continue; } - const authFailure = isAuthAssistantError(lastAssistant); - const rateLimitFailure = isRateLimitAssistantError(lastAssistant); - const billingFailure = isBillingAssistantError(lastAssistant); - const failoverFailure = isFailoverAssistantError(lastAssistant); + const authFailure = isAuthAssistantError(assistantForFailover); + const rateLimitFailure = isRateLimitAssistantError(assistantForFailover); + const billingFailure = isBillingAssistantError(assistantForFailover); + const failoverFailure = isFailoverAssistantError(assistantForFailover); const assistantFailoverReason = classifyFailoverReason( - lastAssistant?.errorMessage ?? "", + assistantForFailover?.errorMessage ?? "", { - provider: lastAssistant?.provider, + provider: assistantForFailover?.provider, }, ); const assistantProfileFailureReason = resolveAuthProfileFailureReason(assistantFailoverReason); const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError; - const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? ""); + const imageDimensionError = parseImageDimensionError( + assistantForFailover?.errorMessage ?? "", + ); // Capture the failing profile before auth-profile rotation mutates `lastProfileId`. const failedAssistantProfileId = lastProfileId; const logAssistantFailoverDecision = createFailoverDecisionLogger({ stage: "assistant", runId: params.runId, - rawError: lastAssistant?.errorMessage?.trim(), + rawError: assistantForFailover?.errorMessage?.trim(), failoverReason: assistantFailoverReason, profileFailureReason: assistantProfileFailureReason, provider: activeErrorContext.provider, @@ -1385,7 +1388,7 @@ export async function runEmbeddedPiAgent( if ( authFailure && (await maybeRefreshRuntimeAuthForAuthError( - lastAssistant?.errorMessage ?? "", + assistantForFailover?.errorMessage ?? "", runtimeAuthRetry, )) ) { @@ -1442,7 +1445,7 @@ export async function runEmbeddedPiAgent( modelId, provider, activeErrorContext, - lastAssistant, + lastAssistant: assistantForFailover, config: params.config, sessionKey: params.sessionKey ?? params.sessionId, authFailure, @@ -1473,20 +1476,20 @@ export async function runEmbeddedPiAgent( } const usageMeta = buildUsageAgentMetaFields({ usageAccumulator, - lastAssistantUsage: lastAssistant?.usage as UsageLike | undefined, + lastAssistantUsage: sessionLastAssistant?.usage as UsageLike | undefined, lastRunPromptUsage, lastTurnTotal, }); const agentMeta: EmbeddedPiAgentMeta = { sessionId: sessionIdUsed, - provider: lastAssistant?.provider ?? provider, - model: lastAssistant?.model ?? model.id, + provider: sessionLastAssistant?.provider ?? provider, + model: sessionLastAssistant?.model ?? model.id, usage: usageMeta.usage, lastCallUsage: usageMeta.lastCallUsage, promptTokens: usageMeta.promptTokens, compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined, }; - const finalAssistantVisibleText = resolveFinalAssistantVisibleText(lastAssistant); + const finalAssistantVisibleText = resolveFinalAssistantVisibleText(sessionLastAssistant); const payloads = buildEmbeddedRunPayloads({ assistantTexts: attempt.assistantTexts, @@ -1737,7 +1740,7 @@ export async function runEmbeddedPiAgent( ? "tool_calls" : attempt.yieldDetected ? "end_turn" - : (lastAssistant?.stopReason as string | undefined), + : (sessionLastAssistant?.stopReason as string | undefined), pendingToolCalls: attempt.clientToolCall ? [ { diff --git a/src/agents/pi-embedded-runner/run/attempt.context-engine-helpers.ts b/src/agents/pi-embedded-runner/run/attempt.context-engine-helpers.ts index e0e2ca048dc..94c363a5866 100644 --- a/src/agents/pi-embedded-runner/run/attempt.context-engine-helpers.ts +++ b/src/agents/pi-embedded-runner/run/attempt.context-engine-helpers.ts @@ -1,4 +1,5 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { AssistantMessage } from "@mariozechner/pi-ai"; import type { MemoryCitationsMode } from "../../../config/types.memory.js"; import type { ContextEngine, ContextEngineRuntimeContext } from "../../../context-engine/types.js"; import type { NormalizedUsage } from "../../usage.js"; @@ -95,11 +96,11 @@ export function buildContextEnginePromptCacheInfo(params: { export function findCurrentAttemptAssistantMessage(params: { messagesSnapshot: AgentMessage[]; prePromptMessageCount: number; -}): AgentMessage | undefined { +}): AssistantMessage | undefined { return params.messagesSnapshot .slice(Math.max(0, params.prePromptMessageCount)) .toReversed() - .find((message) => message.role === "assistant"); + .find((message): message is AssistantMessage => message.role === "assistant"); } export async function runAttemptContextEngineBootstrap(params: { diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index e777157eb02..032595ba5de 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -112,7 +112,7 @@ import { buildSystemPromptReport } from "../../system-prompt-report.js"; import { resolveAgentTimeoutMs } from "../../timeout.js"; import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js"; import { resolveTranscriptPolicy } from "../../transcript-policy.js"; -import { normalizeUsage, type NormalizedUsage, type UsageLike } from "../../usage.js"; +import { normalizeUsage, type NormalizedUsage } from "../../usage.js"; import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js"; import { isRunnerAbortError } from "../abort.js"; import { isCacheTtlEligibleProvider, readLastCacheTtlTimestamp } from "../cache-ttl.js"; @@ -1512,6 +1512,7 @@ export async function runEmbeddedAttempt( abort: abortRun, }; let lastAssistant: AgentMessage | undefined; + let currentAttemptAssistant: EmbeddedRunAttemptResult["currentAttemptAssistant"]; let attemptUsage: NormalizedUsage | undefined; let cacheBreak: ReturnType = null; let promptCache: EmbeddedRunAttemptResult["promptCache"]; @@ -2075,7 +2076,7 @@ export async function runEmbeddedAttempt( .slice() .toReversed() .find((m) => m.role === "assistant"); - const currentAttemptAssistant = findCurrentAttemptAssistantMessage({ + currentAttemptAssistant = findCurrentAttemptAssistantMessage({ messagesSnapshot, prePromptMessageCount, }); @@ -2087,9 +2088,7 @@ export async function runEmbeddedAttempt( usage: attemptUsage, }) : null; - const lastCallUsage = normalizeUsage( - (currentAttemptAssistant as { usage?: UsageLike } | undefined)?.usage, - ); + const lastCallUsage = normalizeUsage(currentAttemptAssistant?.usage); const promptCacheObservation = cacheObservabilityEnabled && (cacheBreak || promptCacheChangesForTurn || typeof attemptUsage?.cacheRead === "number") @@ -2353,6 +2352,7 @@ export async function runEmbeddedAttempt( assistantTexts, toolMetas: toolMetasNormalized, lastAssistant, + currentAttemptAssistant, lastToolError: getLastToolError?.(), didSendViaMessagingTool: didSendViaMessagingTool(), messagingToolSentTexts: getMessagingToolSentTexts(), diff --git a/src/agents/pi-embedded-runner/run/helpers.resolve-error-context.test.ts b/src/agents/pi-embedded-runner/run/helpers.resolve-error-context.test.ts new file mode 100644 index 00000000000..6e0e6bae31d --- /dev/null +++ b/src/agents/pi-embedded-runner/run/helpers.resolve-error-context.test.ts @@ -0,0 +1,12 @@ +import { describe, expect, it } from "vitest"; +import { resolveActiveErrorContext } from "./helpers.js"; + +describe("resolveActiveErrorContext", () => { + it("returns the current provider/model", () => { + const result = resolveActiveErrorContext({ + provider: "deepseek", + model: "deepseek-chat", + }); + expect(result).toEqual({ provider: "deepseek", model: "deepseek-chat" }); + }); +}); diff --git a/src/agents/pi-embedded-runner/run/helpers.ts b/src/agents/pi-embedded-runner/run/helpers.ts index c55fcdc04e2..7704340b87c 100644 --- a/src/agents/pi-embedded-runner/run/helpers.ts +++ b/src/agents/pi-embedded-runner/run/helpers.ts @@ -77,14 +77,13 @@ export function resolveMaxRunRetryIterations(profileCandidateCount: number): num return Math.min(MAX_RUN_RETRY_ITERATIONS, Math.max(MIN_RUN_RETRY_ITERATIONS, scaled)); } -export function resolveActiveErrorContext(params: { - lastAssistant: { provider?: string; model?: string } | undefined; +export function resolveActiveErrorContext(params: { provider: string; model: string }): { provider: string; model: string; -}): { provider: string; model: string } { +} { return { - provider: params.lastAssistant?.provider ?? params.provider, - model: params.lastAssistant?.model ?? params.model, + provider: params.provider, + model: params.model, }; } diff --git a/src/agents/pi-embedded-runner/run/types.ts b/src/agents/pi-embedded-runner/run/types.ts index 52e3e276bfb..274af5c6255 100644 --- a/src/agents/pi-embedded-runner/run/types.ts +++ b/src/agents/pi-embedded-runner/run/types.ts @@ -75,6 +75,7 @@ export type EmbeddedRunAttemptResult = { assistantTexts: string[]; toolMetas: Array<{ toolName: string; meta?: string }>; lastAssistant: AssistantMessage | undefined; + currentAttemptAssistant?: AssistantMessage | undefined; lastToolError?: ToolErrorSummary; didSendViaMessagingTool: boolean; didSendDeterministicApprovalPrompt?: boolean;