diff --git a/extensions/codex/src/app-server/event-projector.ts b/extensions/codex/src/app-server/event-projector.ts index 6e4f9b6264f..bfe1477fa07 100644 --- a/extensions/codex/src/app-server/event-projector.ts +++ b/extensions/codex/src/app-server/event-projector.ts @@ -220,6 +220,7 @@ export class CodexAppServerEventProjector { timedOut: false, idleTimedOut: false, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, promptError, promptErrorSource: promptError ? this.promptErrorSource || "prompt" : null, sessionIdUsed: this.params.sessionId, diff --git a/src/agents/harness/selection.test.ts b/src/agents/harness/selection.test.ts index 487d290ac80..7e151da17e5 100644 --- a/src/agents/harness/selection.test.ts +++ b/src/agents/harness/selection.test.ts @@ -68,6 +68,7 @@ function createAttemptResult(sessionIdUsed: string): EmbeddedRunAttemptResult { timedOut: false, idleTimedOut: false, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, promptError: null, promptErrorSource: null, sessionIdUsed, diff --git a/src/agents/harness/v2.test.ts b/src/agents/harness/v2.test.ts index b83283a5dba..d5086a017a1 100644 --- a/src/agents/harness/v2.test.ts +++ b/src/agents/harness/v2.test.ts @@ -47,6 +47,7 @@ function createAttemptResult(): EmbeddedRunAttemptResult { timedOut: false, idleTimedOut: false, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, promptError: null, promptErrorSource: null, sessionIdUsed: "session-1", diff --git a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts index 7cd811b0ff6..749454ebc59 100644 --- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts +++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts @@ -168,6 +168,7 @@ const makeAttempt = (overrides: Partial): EmbeddedRunA timedOut: false, idleTimedOut: false, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, promptError: null, promptErrorSource: null, sessionIdUsed: "session:test", diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts index 3d4c04b2494..29bc3a19050 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts @@ -45,6 +45,7 @@ export function makeAttemptResult( timedOut: false, idleTimedOut: false, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, promptError: null, promptErrorSource: null, sessionIdUsed: "test-session", diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index cffa69daebc..c96a7622c3d 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1133,6 +1133,7 @@ export async function runEmbeddedPiAgent( timedOut, idleTimedOut, timedOutDuringCompaction, + timedOutDuringToolExecution, sessionIdUsed, sessionFileUsed, lastAssistant: sessionLastAssistant, @@ -1932,6 +1933,7 @@ export async function runEmbeddedPiAgent( failoverReason: assistantFailoverReason, timedOut, timedOutDuringCompaction, + timedOutDuringToolExecution, profileRotated: false, }); const assistantFailoverOutcome = await handleAssistantFailover({ @@ -1944,6 +1946,7 @@ export async function runEmbeddedPiAgent( timedOut, idleTimedOut, timedOutDuringCompaction, + timedOutDuringToolExecution, allowSameModelIdleTimeoutRetry: timedOut && idleTimedOut && diff --git a/src/agents/pi-embedded-runner/run/assistant-failover.test.ts b/src/agents/pi-embedded-runner/run/assistant-failover.test.ts index 474683f97b9..db7c575ec13 100644 --- a/src/agents/pi-embedded-runner/run/assistant-failover.test.ts +++ b/src/agents/pi-embedded-runner/run/assistant-failover.test.ts @@ -19,6 +19,7 @@ function makeParams(overrides: Partial = {}): Params { timedOut: false, idleTimedOut: false, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, allowSameModelIdleTimeoutRetry: false, assistantProfileFailureReason: null, lastProfileId: undefined, diff --git a/src/agents/pi-embedded-runner/run/assistant-failover.ts b/src/agents/pi-embedded-runner/run/assistant-failover.ts index 84d3a2c2b1b..be37dde285b 100644 --- a/src/agents/pi-embedded-runner/run/assistant-failover.ts +++ b/src/agents/pi-embedded-runner/run/assistant-failover.ts @@ -42,6 +42,7 @@ export async function handleAssistantFailover(params: { timedOut: boolean; idleTimedOut: boolean; timedOutDuringCompaction: boolean; + timedOutDuringToolExecution: boolean; allowSameModelIdleTimeoutRetry: boolean; assistantProfileFailureReason: AuthProfileFailureReason | null; lastProfileId?: string; @@ -177,6 +178,7 @@ export async function handleAssistantFailover(params: { failoverReason: params.failoverReason, timedOut: params.timedOut, timedOutDuringCompaction: params.timedOutDuringCompaction, + timedOutDuringToolExecution: params.timedOutDuringToolExecution, profileRotated: true, }); } diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index f116dffa970..f5d88246fd4 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -101,6 +101,7 @@ import { resolveBootstrapPromptTruncationWarningMode, resolveBootstrapTotalMaxChars, } from "../../pi-embedded-helpers.js"; +import { countActiveToolExecutions } from "../../pi-embedded-subscribe.handlers.tools.js"; import { subscribeEmbeddedPiSession } from "../../pi-embedded-subscribe.js"; import { createPreparedEmbeddedPiSettingsManager } from "../../pi-project-settings.js"; import { @@ -782,6 +783,7 @@ export async function runEmbeddedAttempt( let timedOut = false; let idleTimedOut = false; let timedOutDuringCompaction = false; + let timedOutDuringToolExecution = false; let promptError: unknown = null; let emitDiagnosticRunCompleted: | ((outcome: "completed" | "aborted" | "error", err?: unknown) => void) @@ -2250,6 +2252,14 @@ export async function runEmbeddedAttempt( aborted = true; if (isTimeout) { timedOut = true; + // Distinguish run-timer fires that occur while tool execution is in + // flight (LLM already responded; primary model is not at fault) from + // LLM-phase timeouts. Mirrors the `timedOutDuringCompaction` precedent + // (#46889) so the failover policy can skip pointless model fallback. + // Closes #52147. + if (!timedOutDuringCompaction && countActiveToolExecutions(params.runId) > 0) { + timedOutDuringToolExecution = true; + } } if (isTimeout) { runAbortController.abort(reason ?? makeTimeoutAbortReason()); @@ -3456,6 +3466,7 @@ export async function runEmbeddedAttempt( timedOut, idleTimedOut, timedOutDuringCompaction, + timedOutDuringToolExecution, promptError: promptError ? formatErrorMessage(promptError) : undefined, promptErrorSource, usage: attemptUsage, @@ -3474,6 +3485,7 @@ export async function runEmbeddedAttempt( timedOut, idleTimedOut, timedOutDuringCompaction, + timedOutDuringToolExecution, promptError: promptError ? formatErrorMessage(promptError) : undefined, promptErrorSource, usage: attemptUsage, @@ -3498,6 +3510,7 @@ export async function runEmbeddedAttempt( timedOut, idleTimedOut, timedOutDuringCompaction, + timedOutDuringToolExecution, promptError: promptError ? formatErrorMessage(promptError) : undefined, }); trajectoryEndRecorded = true; @@ -3511,6 +3524,7 @@ export async function runEmbeddedAttempt( timedOut, idleTimedOut, timedOutDuringCompaction, + timedOutDuringToolExecution, promptError, promptErrorSource, preflightRecovery, @@ -3555,6 +3569,7 @@ export async function runEmbeddedAttempt( timedOut, idleTimedOut, timedOutDuringCompaction, + timedOutDuringToolExecution, promptError: promptError ? formatErrorMessage(promptError) : undefined, }); } diff --git a/src/agents/pi-embedded-runner/run/failover-policy.test.ts b/src/agents/pi-embedded-runner/run/failover-policy.test.ts index c4081c5789a..bfa3c0a04c8 100644 --- a/src/agents/pi-embedded-runner/run/failover-policy.test.ts +++ b/src/agents/pi-embedded-runner/run/failover-policy.test.ts @@ -72,6 +72,7 @@ describe("resolveRunFailoverDecision", () => { failoverReason: "rate_limit", timedOut: false, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, profileRotated: false, }), ).toEqual({ @@ -91,6 +92,7 @@ describe("resolveRunFailoverDecision", () => { failoverReason: "rate_limit", timedOut: false, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, profileRotated: true, }), ).toEqual({ @@ -110,6 +112,7 @@ describe("resolveRunFailoverDecision", () => { failoverReason: null, timedOut: false, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, profileRotated: false, }), ).toEqual({ @@ -134,6 +137,64 @@ describe("resolveRunFailoverDecision", () => { }); }); + it("does not rotate or fallback assistant timeouts that fired during tool execution (#52147)", () => { + expect( + resolveRunFailoverDecision({ + stage: "assistant", + aborted: true, + externalAbort: false, + fallbackConfigured: true, + failoverFailure: false, + failoverReason: null, + timedOut: true, + timedOutDuringCompaction: false, + timedOutDuringToolExecution: true, + profileRotated: false, + }), + ).toEqual({ + action: "continue_normal", + }); + }); + + it("does not fallback assistant tool-execution timeouts even after profile rotation exhausted (#52147)", () => { + expect( + resolveRunFailoverDecision({ + stage: "assistant", + aborted: true, + externalAbort: false, + fallbackConfigured: true, + failoverFailure: false, + failoverReason: null, + timedOut: true, + timedOutDuringCompaction: false, + timedOutDuringToolExecution: true, + profileRotated: true, + }), + ).toEqual({ + action: "continue_normal", + }); + }); + + it("still rotates assistant timeouts that fired during LLM phase (no active tool execution)", () => { + expect( + resolveRunFailoverDecision({ + stage: "assistant", + aborted: true, + externalAbort: false, + fallbackConfigured: true, + failoverFailure: false, + failoverReason: null, + timedOut: true, + timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, + profileRotated: false, + }), + ).toEqual({ + action: "rotate_profile", + reason: null, + }); + }); + it("does not rotate or fallback assistant timeouts after an external abort", () => { expect( resolveRunFailoverDecision({ @@ -145,6 +206,7 @@ describe("resolveRunFailoverDecision", () => { failoverReason: null, timedOut: true, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, profileRotated: false, }), ).toEqual({ diff --git a/src/agents/pi-embedded-runner/run/failover-policy.ts b/src/agents/pi-embedded-runner/run/failover-policy.ts index 841b392c01e..10c026d417c 100644 --- a/src/agents/pi-embedded-runner/run/failover-policy.ts +++ b/src/agents/pi-embedded-runner/run/failover-policy.ts @@ -56,6 +56,7 @@ type AssistantDecisionParams = { failoverReason: FailoverReason | null; timedOut: boolean; timedOutDuringCompaction: boolean; + timedOutDuringToolExecution: boolean; profileRotated: boolean; }; @@ -81,7 +82,7 @@ function shouldRotatePrompt(params: PromptDecisionParams): boolean { function shouldRotateAssistant(params: AssistantDecisionParams): boolean { return ( (!params.aborted && (params.failoverFailure || params.failoverReason !== null)) || - (params.timedOut && !params.timedOutDuringCompaction) + (params.timedOut && !params.timedOutDuringCompaction && !params.timedOutDuringToolExecution) ); } diff --git a/src/agents/pi-embedded-runner/run/types.ts b/src/agents/pi-embedded-runner/run/types.ts index c69dd499ebd..81c850eecdf 100644 --- a/src/agents/pi-embedded-runner/run/types.ts +++ b/src/agents/pi-embedded-runner/run/types.ts @@ -58,6 +58,12 @@ export type EmbeddedRunAttemptResult = { idleTimedOut: boolean; /** True if the timeout occurred while compaction was in progress or pending. */ timedOutDuringCompaction: boolean; + /** + * True if the run-level timer fired while at least one tool execution was + * still in flight. The LLM had already responded; the timeout is unrelated + * to the primary model and must not trigger model fallback. Closes #52147. + */ + timedOutDuringToolExecution: boolean; promptError: unknown; /** * Identifies which phase produced the promptError. diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.ts b/src/agents/pi-embedded-subscribe.handlers.tools.ts index 5673e2f952d..f4d1707165a 100644 --- a/src/agents/pi-embedded-subscribe.handlers.tools.ts +++ b/src/agents/pi-embedded-subscribe.handlers.tools.ts @@ -92,6 +92,26 @@ function buildToolStartKey(runId: string, toolCallId: string): string { return `${runId}:${toolCallId}`; } +/** + * Count tool executions currently in flight for a given run. + * + * Reads the existing `toolStartData` map: handleToolExecutionStart inserts on + * tool start, handleToolExecutionEnd deletes on completion. Used by the + * embedded run timer to detect whether a run-level timeout fired while tool + * execution was active (in which case the failover policy should not rotate + * to a fallback model — the LLM had already responded). + */ +export function countActiveToolExecutions(runId: string): number { + const prefix = `${runId}:`; + let count = 0; + for (const key of toolStartData.keys()) { + if (key.startsWith(prefix)) { + count += 1; + } + } + return count; +} + function isCronAddAction(args: unknown): boolean { if (!args || typeof args !== "object") { return false; diff --git a/src/agents/test-helpers/pi-embedded-runner-e2e-fixtures.ts b/src/agents/test-helpers/pi-embedded-runner-e2e-fixtures.ts index 185c6af2a24..8c6de735916 100644 --- a/src/agents/test-helpers/pi-embedded-runner-e2e-fixtures.ts +++ b/src/agents/test-helpers/pi-embedded-runner-e2e-fixtures.ts @@ -109,6 +109,7 @@ export function makeEmbeddedRunnerAttempt( timedOut: false, idleTimedOut: false, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, promptError: null, promptErrorSource: null, sessionIdUsed: "session:test", diff --git a/src/trajectory/export.ts b/src/trajectory/export.ts index 8f3b9d7a17d..441c6a53e33 100644 --- a/src/trajectory/export.ts +++ b/src/trajectory/export.ts @@ -745,6 +745,8 @@ function buildArtifactsCapture(params: { idleTimedOut: runtimeArtifacts?.idleTimedOut ?? runtimeEnd?.idleTimedOut, timedOutDuringCompaction: runtimeArtifacts?.timedOutDuringCompaction ?? runtimeEnd?.timedOutDuringCompaction, + timedOutDuringToolExecution: + runtimeArtifacts?.timedOutDuringToolExecution ?? runtimeEnd?.timedOutDuringToolExecution, promptError: runtimeArtifacts?.promptError ?? runtimeEnd?.promptError ?? runtimeCompletion?.promptError, promptErrorSource: runtimeArtifacts?.promptErrorSource ?? runtimeCompletion?.promptErrorSource, diff --git a/src/trajectory/metadata.test.ts b/src/trajectory/metadata.test.ts index ec56b2fc4a8..ef3aa3cde45 100644 --- a/src/trajectory/metadata.test.ts +++ b/src/trajectory/metadata.test.ts @@ -185,6 +185,7 @@ describe("trajectory metadata", () => { timedOut: false, idleTimedOut: false, timedOutDuringCompaction: false, + timedOutDuringToolExecution: false, compactionCount: 1, assistantTexts: ["done"], finalPromptText: "run tests", diff --git a/src/trajectory/metadata.ts b/src/trajectory/metadata.ts index f3bbcf22a79..755763ff3f0 100644 --- a/src/trajectory/metadata.ts +++ b/src/trajectory/metadata.ts @@ -46,6 +46,7 @@ type BuildTrajectoryArtifactsParams = { timedOut: boolean; idleTimedOut: boolean; timedOutDuringCompaction: boolean; + timedOutDuringToolExecution: boolean; promptError?: string; promptErrorSource?: string | null; usage?: unknown; @@ -303,6 +304,7 @@ export function buildTrajectoryArtifacts( timedOut: params.timedOut, idleTimedOut: params.idleTimedOut, timedOutDuringCompaction: params.timedOutDuringCompaction, + timedOutDuringToolExecution: params.timedOutDuringToolExecution, promptError: params.promptError, promptErrorSource: params.promptErrorSource, usage: params.usage,