fix(agents): classify tool-execution timeouts

Detect run-level timeouts that fire while a tool call is still active and keep them out of assistant model fallback.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Simon
2026-05-02 19:20:07 +05:30
committed by Ayaan Zaidi
parent afbc395dda
commit 2605490dbd
17 changed files with 122 additions and 1 deletions

View File

@@ -68,6 +68,7 @@ function createAttemptResult(sessionIdUsed: string): EmbeddedRunAttemptResult {
timedOut: false,
idleTimedOut: false,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
promptError: null,
promptErrorSource: null,
sessionIdUsed,

View File

@@ -47,6 +47,7 @@ function createAttemptResult(): EmbeddedRunAttemptResult {
timedOut: false,
idleTimedOut: false,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
promptError: null,
promptErrorSource: null,
sessionIdUsed: "session-1",

View File

@@ -168,6 +168,7 @@ const makeAttempt = (overrides: Partial<EmbeddedRunAttemptResult>): EmbeddedRunA
timedOut: false,
idleTimedOut: false,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
promptError: null,
promptErrorSource: null,
sessionIdUsed: "session:test",

View File

@@ -45,6 +45,7 @@ export function makeAttemptResult(
timedOut: false,
idleTimedOut: false,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
promptError: null,
promptErrorSource: null,
sessionIdUsed: "test-session",

View File

@@ -1133,6 +1133,7 @@ export async function runEmbeddedPiAgent(
timedOut,
idleTimedOut,
timedOutDuringCompaction,
timedOutDuringToolExecution,
sessionIdUsed,
sessionFileUsed,
lastAssistant: sessionLastAssistant,
@@ -1932,6 +1933,7 @@ export async function runEmbeddedPiAgent(
failoverReason: assistantFailoverReason,
timedOut,
timedOutDuringCompaction,
timedOutDuringToolExecution,
profileRotated: false,
});
const assistantFailoverOutcome = await handleAssistantFailover({
@@ -1944,6 +1946,7 @@ export async function runEmbeddedPiAgent(
timedOut,
idleTimedOut,
timedOutDuringCompaction,
timedOutDuringToolExecution,
allowSameModelIdleTimeoutRetry:
timedOut &&
idleTimedOut &&

View File

@@ -19,6 +19,7 @@ function makeParams(overrides: Partial<Params> = {}): Params {
timedOut: false,
idleTimedOut: false,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
allowSameModelIdleTimeoutRetry: false,
assistantProfileFailureReason: null,
lastProfileId: undefined,

View File

@@ -42,6 +42,7 @@ export async function handleAssistantFailover(params: {
timedOut: boolean;
idleTimedOut: boolean;
timedOutDuringCompaction: boolean;
timedOutDuringToolExecution: boolean;
allowSameModelIdleTimeoutRetry: boolean;
assistantProfileFailureReason: AuthProfileFailureReason | null;
lastProfileId?: string;
@@ -177,6 +178,7 @@ export async function handleAssistantFailover(params: {
failoverReason: params.failoverReason,
timedOut: params.timedOut,
timedOutDuringCompaction: params.timedOutDuringCompaction,
timedOutDuringToolExecution: params.timedOutDuringToolExecution,
profileRotated: true,
});
}

View File

@@ -101,6 +101,7 @@ import {
resolveBootstrapPromptTruncationWarningMode,
resolveBootstrapTotalMaxChars,
} from "../../pi-embedded-helpers.js";
import { countActiveToolExecutions } from "../../pi-embedded-subscribe.handlers.tools.js";
import { subscribeEmbeddedPiSession } from "../../pi-embedded-subscribe.js";
import { createPreparedEmbeddedPiSettingsManager } from "../../pi-project-settings.js";
import {
@@ -782,6 +783,7 @@ export async function runEmbeddedAttempt(
let timedOut = false;
let idleTimedOut = false;
let timedOutDuringCompaction = false;
let timedOutDuringToolExecution = false;
let promptError: unknown = null;
let emitDiagnosticRunCompleted:
| ((outcome: "completed" | "aborted" | "error", err?: unknown) => void)
@@ -2250,6 +2252,14 @@ export async function runEmbeddedAttempt(
aborted = true;
if (isTimeout) {
timedOut = true;
// Distinguish run-timer fires that occur while tool execution is in
// flight (LLM already responded; primary model is not at fault) from
// LLM-phase timeouts. Mirrors the `timedOutDuringCompaction` precedent
// (#46889) so the failover policy can skip pointless model fallback.
// Closes #52147.
if (!timedOutDuringCompaction && countActiveToolExecutions(params.runId) > 0) {
timedOutDuringToolExecution = true;
}
}
if (isTimeout) {
runAbortController.abort(reason ?? makeTimeoutAbortReason());
@@ -3456,6 +3466,7 @@ export async function runEmbeddedAttempt(
timedOut,
idleTimedOut,
timedOutDuringCompaction,
timedOutDuringToolExecution,
promptError: promptError ? formatErrorMessage(promptError) : undefined,
promptErrorSource,
usage: attemptUsage,
@@ -3474,6 +3485,7 @@ export async function runEmbeddedAttempt(
timedOut,
idleTimedOut,
timedOutDuringCompaction,
timedOutDuringToolExecution,
promptError: promptError ? formatErrorMessage(promptError) : undefined,
promptErrorSource,
usage: attemptUsage,
@@ -3498,6 +3510,7 @@ export async function runEmbeddedAttempt(
timedOut,
idleTimedOut,
timedOutDuringCompaction,
timedOutDuringToolExecution,
promptError: promptError ? formatErrorMessage(promptError) : undefined,
});
trajectoryEndRecorded = true;
@@ -3511,6 +3524,7 @@ export async function runEmbeddedAttempt(
timedOut,
idleTimedOut,
timedOutDuringCompaction,
timedOutDuringToolExecution,
promptError,
promptErrorSource,
preflightRecovery,
@@ -3555,6 +3569,7 @@ export async function runEmbeddedAttempt(
timedOut,
idleTimedOut,
timedOutDuringCompaction,
timedOutDuringToolExecution,
promptError: promptError ? formatErrorMessage(promptError) : undefined,
});
}

View File

@@ -72,6 +72,7 @@ describe("resolveRunFailoverDecision", () => {
failoverReason: "rate_limit",
timedOut: false,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
profileRotated: false,
}),
).toEqual({
@@ -91,6 +92,7 @@ describe("resolveRunFailoverDecision", () => {
failoverReason: "rate_limit",
timedOut: false,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
profileRotated: true,
}),
).toEqual({
@@ -110,6 +112,7 @@ describe("resolveRunFailoverDecision", () => {
failoverReason: null,
timedOut: false,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
profileRotated: false,
}),
).toEqual({
@@ -134,6 +137,64 @@ describe("resolveRunFailoverDecision", () => {
});
});
it("does not rotate or fallback assistant timeouts that fired during tool execution (#52147)", () => {
expect(
resolveRunFailoverDecision({
stage: "assistant",
aborted: true,
externalAbort: false,
fallbackConfigured: true,
failoverFailure: false,
failoverReason: null,
timedOut: true,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: true,
profileRotated: false,
}),
).toEqual({
action: "continue_normal",
});
});
it("does not fallback assistant tool-execution timeouts even after profile rotation exhausted (#52147)", () => {
expect(
resolveRunFailoverDecision({
stage: "assistant",
aborted: true,
externalAbort: false,
fallbackConfigured: true,
failoverFailure: false,
failoverReason: null,
timedOut: true,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: true,
profileRotated: true,
}),
).toEqual({
action: "continue_normal",
});
});
it("still rotates assistant timeouts that fired during LLM phase (no active tool execution)", () => {
expect(
resolveRunFailoverDecision({
stage: "assistant",
aborted: true,
externalAbort: false,
fallbackConfigured: true,
failoverFailure: false,
failoverReason: null,
timedOut: true,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
profileRotated: false,
}),
).toEqual({
action: "rotate_profile",
reason: null,
});
});
it("does not rotate or fallback assistant timeouts after an external abort", () => {
expect(
resolveRunFailoverDecision({
@@ -145,6 +206,7 @@ describe("resolveRunFailoverDecision", () => {
failoverReason: null,
timedOut: true,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
profileRotated: false,
}),
).toEqual({

View File

@@ -56,6 +56,7 @@ type AssistantDecisionParams = {
failoverReason: FailoverReason | null;
timedOut: boolean;
timedOutDuringCompaction: boolean;
timedOutDuringToolExecution: boolean;
profileRotated: boolean;
};
@@ -81,7 +82,7 @@ function shouldRotatePrompt(params: PromptDecisionParams): boolean {
function shouldRotateAssistant(params: AssistantDecisionParams): boolean {
return (
(!params.aborted && (params.failoverFailure || params.failoverReason !== null)) ||
(params.timedOut && !params.timedOutDuringCompaction)
(params.timedOut && !params.timedOutDuringCompaction && !params.timedOutDuringToolExecution)
);
}

View File

@@ -58,6 +58,12 @@ export type EmbeddedRunAttemptResult = {
idleTimedOut: boolean;
/** True if the timeout occurred while compaction was in progress or pending. */
timedOutDuringCompaction: boolean;
/**
* True if the run-level timer fired while at least one tool execution was
* still in flight. The LLM had already responded; the timeout is unrelated
* to the primary model and must not trigger model fallback. Closes #52147.
*/
timedOutDuringToolExecution: boolean;
promptError: unknown;
/**
* Identifies which phase produced the promptError.

View File

@@ -92,6 +92,26 @@ function buildToolStartKey(runId: string, toolCallId: string): string {
return `${runId}:${toolCallId}`;
}
/**
* Count tool executions currently in flight for a given run.
*
* Reads the existing `toolStartData` map: handleToolExecutionStart inserts on
* tool start, handleToolExecutionEnd deletes on completion. Used by the
* embedded run timer to detect whether a run-level timeout fired while tool
* execution was active (in which case the failover policy should not rotate
* to a fallback model — the LLM had already responded).
*/
export function countActiveToolExecutions(runId: string): number {
const prefix = `${runId}:`;
let count = 0;
for (const key of toolStartData.keys()) {
if (key.startsWith(prefix)) {
count += 1;
}
}
return count;
}
function isCronAddAction(args: unknown): boolean {
if (!args || typeof args !== "object") {
return false;

View File

@@ -109,6 +109,7 @@ export function makeEmbeddedRunnerAttempt(
timedOut: false,
idleTimedOut: false,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
promptError: null,
promptErrorSource: null,
sessionIdUsed: "session:test",

View File

@@ -745,6 +745,8 @@ function buildArtifactsCapture(params: {
idleTimedOut: runtimeArtifacts?.idleTimedOut ?? runtimeEnd?.idleTimedOut,
timedOutDuringCompaction:
runtimeArtifacts?.timedOutDuringCompaction ?? runtimeEnd?.timedOutDuringCompaction,
timedOutDuringToolExecution:
runtimeArtifacts?.timedOutDuringToolExecution ?? runtimeEnd?.timedOutDuringToolExecution,
promptError:
runtimeArtifacts?.promptError ?? runtimeEnd?.promptError ?? runtimeCompletion?.promptError,
promptErrorSource: runtimeArtifacts?.promptErrorSource ?? runtimeCompletion?.promptErrorSource,

View File

@@ -185,6 +185,7 @@ describe("trajectory metadata", () => {
timedOut: false,
idleTimedOut: false,
timedOutDuringCompaction: false,
timedOutDuringToolExecution: false,
compactionCount: 1,
assistantTexts: ["done"],
finalPromptText: "run tests",

View File

@@ -46,6 +46,7 @@ type BuildTrajectoryArtifactsParams = {
timedOut: boolean;
idleTimedOut: boolean;
timedOutDuringCompaction: boolean;
timedOutDuringToolExecution: boolean;
promptError?: string;
promptErrorSource?: string | null;
usage?: unknown;
@@ -303,6 +304,7 @@ export function buildTrajectoryArtifacts(
timedOut: params.timedOut,
idleTimedOut: params.idleTimedOut,
timedOutDuringCompaction: params.timedOutDuringCompaction,
timedOutDuringToolExecution: params.timedOutDuringToolExecution,
promptError: params.promptError,
promptErrorSource: params.promptErrorSource,
usage: params.usage,