From 8996161e992a87a1f01e69070a048a12feec1493 Mon Sep 17 00:00:00 2001 From: Simon <178938782+simonusa@users.noreply.github.com> Date: Sat, 2 May 2026 19:20:08 +0530 Subject: [PATCH] fix(agents): skip retry paths for tool timeouts Thread tool-timeout state through timeout-triggered compaction, generic timeout payload synthesis, and the changelog. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 1 + src/agents/pi-embedded-runner/run.ts | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3079be3fbd8..7dd987a5c49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Docs: https://docs.openclaw.ai ### Fixes - CLI/update: treat inherited Gateway service markers as origin hints and only block package replacement when the managed Gateway is still live, so self-updates can stop the service and continue safely. (#75729) Thanks @hxy91819. +- Agents/failover: exempt run-level timeouts that fire during tool execution from model fallback, timeout-triggered compaction, and generic timeout payload synthesis. Long `process(poll)`, browser, or `exec` tool calls that exceed `agents.defaults.timeoutSeconds` previously rotated auth profiles, switched to a fallback model, and surfaced a misleading "LLM request timed out" error even though the primary model had already responded. Mirrors the existing `timedOutDuringCompaction` precedent (#46889). Fixes #52147. (#75873) Thanks @simonusa. ## 2026.5.2 diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index c96a7622c3d..208c76fee48 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1244,7 +1244,10 @@ export async function runEmbeddedPiAgent( // ── Timeout-triggered compaction ────────────────────────────────── // When the LLM times out with high context usage, compact before // retrying to break the death spiral of repeated timeouts. - if (timedOut && !timedOutDuringCompaction) { + // Skip when the timeout fired during tool execution: the LLM had + // already responded, the prompt wasn't the problem, and compacting + // would lose the in-flight tool context. See #52147. + if (timedOut && !timedOutDuringCompaction && !timedOutDuringToolExecution) { // Only consider prompt-side tokens here. API totals include output // tokens, which can make a long generation look like high context // pressure even when the prompt itself was small. @@ -2078,7 +2081,17 @@ export async function runEmbeddedPiAgent( // Timeout aborts can leave the run without any assistant payloads. // Emit an explicit timeout error instead of silently completing, so // callers do not lose the turn as an orphaned user message. - if (timedOut && !timedOutDuringCompaction && !payloadsWithToolMedia?.length) { + // Skip when the timeout fired during tool execution: the assistant + // did produce a response (a tool call) that ran long; the generic + // "no response from model" payload would mislead the caller. The + // partial tool output already in the session is the correct artifact + // to surface. See #52147. + if ( + timedOut && + !timedOutDuringCompaction && + !timedOutDuringToolExecution && + !payloadsWithToolMedia?.length + ) { const timeoutText = idleTimedOut ? "The model did not produce a response before the model idle timeout. " + "Please try again, or increase `models.providers..timeoutSeconds` for slow local or self-hosted providers."