From d9dc75774bcb8a58087f50033b5e885431576246 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 8 Apr 2026 14:30:27 +0100 Subject: [PATCH] fix: align LLM idle timeout policy --- CHANGELOG.md | 1 + docs/concepts/agent-loop.md | 1 + ...pi-agent.auth-profile-rotation.e2e.test.ts | 1 + .../run.overflow-compaction.fixture.ts | 1 + .../run.timeout-triggered-compaction.test.ts | 19 +++++++ src/agents/pi-embedded-runner/run.ts | 10 ++-- src/agents/pi-embedded-runner/run/attempt.ts | 8 ++- .../run/llm-idle-timeout.test.ts | 52 ++++++++++++++++--- .../run/llm-idle-timeout.ts | 25 +++++++-- src/agents/pi-embedded-runner/run/types.ts | 2 + .../usage-reporting.test.ts | 1 + .../pi-embedded-runner-e2e-fixtures.ts | 1 + 12 files changed, 106 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 20f27449eca..07a3ce401ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai - Control UI: guard stale session-history reloads during fast session switches so the selected session and rendered transcript stay in sync. (#62975) Thanks @scoootscooob. - Agents/failover: classify Z.ai vendor code `1311` as billing and `1113` as auth, including long wrapped `1311` payloads, so these errors stop falling through to generic failover handling. (#49552) Thanks @1bcMax. - npm packaging: mirror bundled Slack, Telegram, Discord, and Feishu channel runtime deps at the root and harden published-install verification so fresh installs fail fast on manifest drift instead of missing-module crashes. (#63065) Thanks @scoootscooob. +- Agents/timeouts: make the LLM idle timeout inherit `agents.defaults.timeoutSeconds` when configured, disable the unconfigured idle watchdog for cron runs, and point idle-timeout errors at `agents.defaults.llm.idleTimeoutSeconds`. Thanks @drvoss. ## 2026.4.8 diff --git a/docs/concepts/agent-loop.md b/docs/concepts/agent-loop.md index dd6e45c8792..e1aa0cacc51 100644 --- a/docs/concepts/agent-loop.md +++ b/docs/concepts/agent-loop.md @@ -151,6 +151,7 @@ See [Plugin hooks](/plugins/architecture#provider-runtime-hooks) for the hook AP - `agent.wait` default: 30s (just the wait). `timeoutMs` param overrides. - Agent runtime: `agents.defaults.timeoutSeconds` default 172800s (48 hours); enforced in `runEmbeddedPiAgent` abort timer. +- LLM idle timeout: `agents.defaults.llm.idleTimeoutSeconds` aborts a model request when no response chunks arrive before the idle window. Set it explicitly for slow local models or reasoning/tool-call providers; set it to 0 to disable. If it is not set, OpenClaw uses `agents.defaults.timeoutSeconds` when configured, otherwise 60s. Cron-triggered runs with no explicit LLM or agent timeout disable the idle watchdog and rely on the cron outer timeout. ## Where things can end early diff --git a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts index 2250bb9cfd9..9e5e14acb98 100644 --- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts +++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts @@ -184,6 +184,7 @@ const makeAttempt = (overrides: Partial): EmbeddedRunA return { aborted: false, timedOut: false, + idleTimedOut: false, timedOutDuringCompaction: false, promptError: null, promptErrorSource: null, diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts index 39122cd446e..8eabce1d1a0 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts @@ -35,6 +35,7 @@ export function makeAttemptResult( return { aborted: false, timedOut: false, + idleTimedOut: false, timedOutDuringCompaction: false, promptError: null, promptErrorSource: null, diff --git a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts index 64c1d425ba2..a9a9302b609 100644 --- a/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.timeout-triggered-compaction.test.ts @@ -221,6 +221,25 @@ describe("timeout-triggered compaction", () => { expect(result.payloads?.[0]?.text).toContain("timed out"); }); + it("points idle-timeout errors at the LLM idle timeout config key", async () => { + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + timedOut: true, + idleTimedOut: true, + lastAssistant: { + usage: { input: 20000 }, + } as never, + }), + ); + + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + expect(mockedCompactDirect).not.toHaveBeenCalled(); + expect(result.payloads?.[0]?.isError).toBe(true); + expect(result.payloads?.[0]?.text).toContain("agents.defaults.llm.idleTimeoutSeconds"); + expect(result.payloads?.[0]?.text).not.toContain("agents.defaults.timeoutSeconds"); + }); + it("does not attempt compaction for low-context timeouts on later retries", async () => { mockedPickFallbackThinkingLevel.mockReturnValueOnce("low"); mockedRunEmbeddedAttempt diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index a80764ec118..888ad1df21d 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -688,6 +688,7 @@ export async function runEmbeddedPiAgent( promptErrorSource, preflightRecovery, timedOut, + idleTimedOut, timedOutDuringCompaction, sessionIdUsed, lastAssistant, @@ -1433,12 +1434,15 @@ export async function runEmbeddedPiAgent( // Emit an explicit timeout error instead of silently completing, so // callers do not lose the turn as an orphaned user message. if (timedOut && !timedOutDuringCompaction && payloads.length === 0) { + const timeoutText = idleTimedOut + ? "The model did not produce a response before the LLM idle timeout. " + + "Please try again, or increase `agents.defaults.llm.idleTimeoutSeconds` in your config (set to 0 to disable)." + : "Request timed out before a response was generated. " + + "Please try again, or increase `agents.defaults.timeoutSeconds` in your config."; return { payloads: [ { - text: - "Request timed out before a response was generated. " + - "Please try again, or increase `agents.defaults.timeoutSeconds` in your config.", + text: timeoutText, isError: true, }, ], diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 5476a0a3344..ece10891c6a 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -1266,7 +1266,10 @@ export async function runEmbeddedAttempt( let idleTimeoutTrigger: ((error: Error) => void) | undefined; // Wrap stream with idle timeout detection - const idleTimeoutMs = resolveLlmIdleTimeoutMs(params.config); + const idleTimeoutMs = resolveLlmIdleTimeoutMs({ + cfg: params.config, + trigger: params.trigger, + }); if (idleTimeoutMs > 0) { activeSession.agent.streamFn = streamWithIdleTimeout( activeSession.agent.streamFn, @@ -1377,6 +1380,7 @@ export async function runEmbeddedAttempt( let aborted = Boolean(params.abortSignal?.aborted); let yieldAborted = false; let timedOut = false; + let idleTimedOut = false; let timedOutDuringCompaction = false; const getAbortReason = (signal: AbortSignal): unknown => "reason" in signal ? (signal as { reason?: unknown }).reason : undefined; @@ -1426,6 +1430,7 @@ export async function runEmbeddedAttempt( void activeSession.abort(); }; idleTimeoutTrigger = (error) => { + idleTimedOut = true; abortRun(true, error); }; const abortable = (promise: Promise): Promise => { @@ -2327,6 +2332,7 @@ export async function runEmbeddedAttempt( itemLifecycle: getItemLifecycle(), aborted, timedOut, + idleTimedOut, timedOutDuringCompaction, promptError, promptErrorSource, diff --git a/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts b/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts index 1892c20a280..e5c0ab38b50 100644 --- a/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts +++ b/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts @@ -8,46 +8,82 @@ import { describe("resolveLlmIdleTimeoutMs", () => { it("returns default when config is undefined", () => { - expect(resolveLlmIdleTimeoutMs(undefined)).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); + expect(resolveLlmIdleTimeoutMs()).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); }); it("returns default when llm config is missing", () => { const cfg = { agents: {} } as OpenClawConfig; - expect(resolveLlmIdleTimeoutMs(cfg)).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); + expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); }); it("returns default when idleTimeoutSeconds is not set", () => { const cfg = { agents: { defaults: { llm: {} } } } as OpenClawConfig; - expect(resolveLlmIdleTimeoutMs(cfg)).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); + expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); }); it("returns 0 when idleTimeoutSeconds is 0 (disabled)", () => { const cfg = { agents: { defaults: { llm: { idleTimeoutSeconds: 0 } } } } as OpenClawConfig; - expect(resolveLlmIdleTimeoutMs(cfg)).toBe(0); + expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(0); }); it("returns configured value in milliseconds", () => { const cfg = { agents: { defaults: { llm: { idleTimeoutSeconds: 30 } } } } as OpenClawConfig; - expect(resolveLlmIdleTimeoutMs(cfg)).toBe(30_000); + expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(30_000); }); it("caps at max safe timeout", () => { const cfg = { agents: { defaults: { llm: { idleTimeoutSeconds: 10_000_000 } } }, } as OpenClawConfig; - expect(resolveLlmIdleTimeoutMs(cfg)).toBe(2_147_000_000); + expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(2_147_000_000); }); it("ignores negative values", () => { const cfg = { agents: { defaults: { llm: { idleTimeoutSeconds: -10 } } } } as OpenClawConfig; - expect(resolveLlmIdleTimeoutMs(cfg)).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); + expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); }); it("ignores non-finite values", () => { const cfg = { agents: { defaults: { llm: { idleTimeoutSeconds: Infinity } } }, } as OpenClawConfig; - expect(resolveLlmIdleTimeoutMs(cfg)).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); + expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); + }); + + it("falls back to agents.defaults.timeoutSeconds when llm.idleTimeoutSeconds is not set", () => { + const cfg = { agents: { defaults: { timeoutSeconds: 300 } } } as OpenClawConfig; + expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(300_000); + }); + + it("prefers llm.idleTimeoutSeconds over agents.defaults.timeoutSeconds", () => { + const cfg = { + agents: { defaults: { timeoutSeconds: 300, llm: { idleTimeoutSeconds: 120 } } }, + } as OpenClawConfig; + expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(120_000); + }); + + it("keeps idleTimeoutSeconds=0 disabled even when timeoutSeconds is set", () => { + const cfg = { + agents: { defaults: { timeoutSeconds: 300, llm: { idleTimeoutSeconds: 0 } } }, + } as OpenClawConfig; + expect(resolveLlmIdleTimeoutMs({ cfg })).toBe(0); + }); + + it("disables the default idle timeout for cron when no timeout is configured", () => { + expect(resolveLlmIdleTimeoutMs({ trigger: "cron" })).toBe(0); + + const cfg = { agents: { defaults: { llm: {} } } } as OpenClawConfig; + expect(resolveLlmIdleTimeoutMs({ cfg, trigger: "cron" })).toBe(0); + }); + + it("uses agents.defaults.timeoutSeconds for cron before disabling the default idle timeout", () => { + const cfg = { agents: { defaults: { timeoutSeconds: 300 } } } as OpenClawConfig; + expect(resolveLlmIdleTimeoutMs({ cfg, trigger: "cron" })).toBe(300_000); + }); + + it("keeps an explicit cron idle timeout when configured", () => { + const cfg = { agents: { defaults: { llm: { idleTimeoutSeconds: 45 } } } } as OpenClawConfig; + expect(resolveLlmIdleTimeoutMs({ cfg, trigger: "cron" })).toBe(45_000); }); }); diff --git a/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts b/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts index 6846c2bf477..20bba85b3e4 100644 --- a/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts +++ b/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts @@ -1,6 +1,7 @@ import type { StreamFn } from "@mariozechner/pi-agent-core"; import { streamSimple } from "@mariozechner/pi-ai"; import type { OpenClawConfig } from "../../../config/config.js"; +import type { EmbeddedRunTrigger } from "./params.js"; /** * Default idle timeout for LLM streaming responses in milliseconds. @@ -17,18 +18,34 @@ const MAX_SAFE_TIMEOUT_MS = 2_147_000_000; /** * Resolves the LLM idle timeout from configuration. - * @param cfg - OpenClaw configuration * @returns Idle timeout in milliseconds, or 0 to disable */ -export function resolveLlmIdleTimeoutMs(cfg?: OpenClawConfig): number { - const raw = cfg?.agents?.defaults?.llm?.idleTimeoutSeconds; - // 0 means disabled (no timeout) +export function resolveLlmIdleTimeoutMs(params?: { + cfg?: OpenClawConfig; + trigger?: EmbeddedRunTrigger; +}): number { + const raw = params?.cfg?.agents?.defaults?.llm?.idleTimeoutSeconds; + // 0 means explicitly disabled (no timeout). if (raw === 0) { return 0; } if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) { return Math.min(Math.floor(raw) * 1000, MAX_SAFE_TIMEOUT_MS); } + + const agentTimeoutSeconds = params?.cfg?.agents?.defaults?.timeoutSeconds; + if ( + typeof agentTimeoutSeconds === "number" && + Number.isFinite(agentTimeoutSeconds) && + agentTimeoutSeconds > 0 + ) { + return Math.min(Math.floor(agentTimeoutSeconds) * 1000, MAX_SAFE_TIMEOUT_MS); + } + + if (params?.trigger === "cron") { + return 0; + } + return DEFAULT_LLM_IDLE_TIMEOUT_MS; } diff --git a/src/agents/pi-embedded-runner/run/types.ts b/src/agents/pi-embedded-runner/run/types.ts index 5ffe16b2810..87e498938e3 100644 --- a/src/agents/pi-embedded-runner/run/types.ts +++ b/src/agents/pi-embedded-runner/run/types.ts @@ -39,6 +39,8 @@ export type EmbeddedRunAttemptParams = EmbeddedRunAttemptBase & { export type EmbeddedRunAttemptResult = { aborted: boolean; timedOut: boolean; + /** True when the no-response LLM idle watchdog caused the timeout. */ + idleTimedOut: boolean; /** True if the timeout occurred while compaction was in progress or pending. */ timedOutDuringCompaction: boolean; promptError: unknown; diff --git a/src/agents/pi-embedded-runner/usage-reporting.test.ts b/src/agents/pi-embedded-runner/usage-reporting.test.ts index 00cd0c0e645..e8914d0f34c 100644 --- a/src/agents/pi-embedded-runner/usage-reporting.test.ts +++ b/src/agents/pi-embedded-runner/usage-reporting.test.ts @@ -19,6 +19,7 @@ function makeAttemptResult( return { aborted: false, timedOut: false, + idleTimedOut: false, timedOutDuringCompaction: false, promptError: null, promptErrorSource: null, diff --git a/src/agents/test-helpers/pi-embedded-runner-e2e-fixtures.ts b/src/agents/test-helpers/pi-embedded-runner-e2e-fixtures.ts index 7b4dd7057fc..47ec0f0ed2e 100644 --- a/src/agents/test-helpers/pi-embedded-runner-e2e-fixtures.ts +++ b/src/agents/test-helpers/pi-embedded-runner-e2e-fixtures.ts @@ -103,6 +103,7 @@ export function makeEmbeddedRunnerAttempt( return { aborted: false, timedOut: false, + idleTimedOut: false, timedOutDuringCompaction: false, promptError: null, promptErrorSource: null,