From 54e6e3d7daf5d0d857edf756b35628a29d11c7f5 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 30 Apr 2026 19:22:30 +0100 Subject: [PATCH] fix(codex): time out silent app-server turns --- docs/concepts/agent-loop.md | 1 + docs/concepts/queue.md | 1 + .../codex/src/app-server/run-attempt.test.ts | 27 +++++++ .../codex/src/app-server/run-attempt.ts | 81 +++++++++++++++++++ 4 files changed, 110 insertions(+) diff --git a/docs/concepts/agent-loop.md b/docs/concepts/agent-loop.md index 464ea294656..79a7bfb5f53 100644 --- a/docs/concepts/agent-loop.md +++ b/docs/concepts/agent-loop.md @@ -32,6 +32,7 @@ wired end-to-end. - resolves model + auth profile and builds the pi session - subscribes to pi events and streams assistant/tool deltas - enforces timeout -> aborts run if exceeded + - for Codex app-server turns, aborts an accepted turn that stops producing app-server progress before a terminal event - returns payloads + usage metadata 4. `subscribeEmbeddedPiSession` bridges pi-agent-core events to OpenClaw `agent` stream: - tool events => `stream: "tool"` diff --git a/docs/concepts/queue.md b/docs/concepts/queue.md index 50e7d7ae542..41e501e82b2 100644 --- a/docs/concepts/queue.md +++ b/docs/concepts/queue.md @@ -114,6 +114,7 @@ keys. - If commands seem stuck, enable verbose logs and look for “queued for …ms” lines to confirm the queue is draining. - If you need queue depth, enable verbose logs and watch for queue timing lines. +- Codex app-server runs that accept a turn and then stop emitting progress are interrupted by the Codex adapter so the active session lane can release instead of waiting for the outer run timeout. - When diagnostics are enabled, sessions that remain in `processing` past `diagnostics.stuckSessionWarnMs` log a stuck-session warning. Active embedded runs, active reply operations, and active lane tasks remain warning-only by default; stale startup bookkeeping with no active session work can release the affected session lane so queued work drains. ## Related diff --git a/extensions/codex/src/app-server/run-attempt.test.ts b/extensions/codex/src/app-server/run-attempt.test.ts index 8d81370b30e..dc6ceccdf3f 100644 --- a/extensions/codex/src/app-server/run-attempt.test.ts +++ b/extensions/codex/src/app-server/run-attempt.test.ts @@ -443,6 +443,33 @@ describe("runCodexAppServerAttempt", () => { expect(queueAgentHarnessMessage("session-1", "after timeout")).toBe(false); }); + it("releases the session when Codex accepts a turn but never sends progress", async () => { + const harness = createStartedThreadHarness(); + const params = createParams( + path.join(tempDir, "session.jsonl"), + path.join(tempDir, "workspace"), + ); + params.timeoutMs = 60_000; + + const run = runCodexAppServerAttempt(params, { turnTerminalIdleTimeoutMs: 5 }); + await harness.waitForMethod("turn/start"); + + await expect(run).resolves.toMatchObject({ + aborted: true, + timedOut: true, + promptError: "codex app-server turn idle timed out waiting for turn/completed", + }); + await vi.waitFor( + () => + expect(harness.request).toHaveBeenCalledWith("turn/interrupt", { + threadId: "thread-1", + turnId: "turn-1", + }), + { interval: 1 }, + ); + expect(queueAgentHarnessMessage("session-1", "after silent turn")).toBe(false); + }); + it("applies before_prompt_build to Codex developer instructions and turn input", async () => { const beforePromptBuild = vi.fn(async () => ({ systemPrompt: "custom codex system", diff --git a/extensions/codex/src/app-server/run-attempt.ts b/extensions/codex/src/app-server/run-attempt.ts index 34ae4f0a2e4..fff8a52703b 100644 --- a/extensions/codex/src/app-server/run-attempt.ts +++ b/extensions/codex/src/app-server/run-attempt.ts @@ -87,6 +87,7 @@ import { filterToolsForVisionInputs } from "./vision-tools.js"; const CODEX_DYNAMIC_TOOL_TIMEOUT_MS = 30_000; const CODEX_TURN_COMPLETION_IDLE_TIMEOUT_MS = 60_000; +const CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS = 30 * 60_000; const CODEX_STEER_ALL_DEBOUNCE_MS = 500; type OpenClawCodingToolsOptions = NonNullable< @@ -226,6 +227,7 @@ export async function runCodexAppServerAttempt( hookTimeoutSec?: number; }; turnCompletionIdleTimeoutMs?: number; + turnTerminalIdleTimeoutMs?: number; } = {}, ): Promise { const attemptStartedAt = Date.now(); @@ -471,8 +473,13 @@ export async function runCodexAppServerAttempt( const turnCompletionIdleTimeoutMs = resolveCodexTurnCompletionIdleTimeoutMs( options.turnCompletionIdleTimeoutMs, ); + const turnTerminalIdleTimeoutMs = resolveCodexTurnTerminalIdleTimeoutMs( + options.turnTerminalIdleTimeoutMs, + ); let turnCompletionIdleTimer: ReturnType | undefined; let turnCompletionIdleWatchArmed = false; + let turnTerminalIdleTimer: ReturnType | undefined; + let turnTerminalIdleWatchArmed = false; let turnCompletionLastActivityAt = Date.now(); let turnCompletionLastActivityReason = "startup"; let activeAppServerTurnRequests = 0; @@ -484,6 +491,13 @@ export async function runCodexAppServerAttempt( } }; + const clearTurnTerminalIdleTimer = () => { + if (turnTerminalIdleTimer) { + clearTimeout(turnTerminalIdleTimer); + turnTerminalIdleTimer = undefined; + } + }; + const fireTurnCompletionIdleTimeout = () => { if ( completed || @@ -520,6 +534,42 @@ export async function runCodexAppServerAttempt( runAbortController.abort("turn_completion_idle_timeout"); }; + const fireTurnTerminalIdleTimeout = () => { + if ( + completed || + runAbortController.signal.aborted || + !turnTerminalIdleWatchArmed || + activeAppServerTurnRequests > 0 + ) { + return; + } + const idleMs = Math.max(0, Date.now() - turnCompletionLastActivityAt); + if (idleMs < turnTerminalIdleTimeoutMs) { + scheduleTurnTerminalIdleWatch(); + return; + } + timedOut = true; + turnCompletionIdleTimedOut = true; + turnCompletionIdleTimeoutMessage = + "codex app-server turn idle timed out waiting for turn/completed"; + projector?.markTimedOut(); + trajectoryRecorder?.recordEvent("turn.terminal_idle_timeout", { + threadId: thread.threadId, + turnId, + idleMs, + timeoutMs: turnTerminalIdleTimeoutMs, + lastActivityReason: turnCompletionLastActivityReason, + }); + embeddedAgentLog.warn("codex app-server turn idle timed out waiting for terminal event", { + threadId: thread.threadId, + turnId, + idleMs, + timeoutMs: turnTerminalIdleTimeoutMs, + lastActivityReason: turnCompletionLastActivityReason, + }); + runAbortController.abort("turn_terminal_idle_timeout"); + }; + function scheduleTurnCompletionIdleWatch() { clearTurnCompletionIdleTimer(); if ( @@ -536,6 +586,22 @@ export async function runCodexAppServerAttempt( turnCompletionIdleTimer.unref?.(); } + function scheduleTurnTerminalIdleWatch() { + clearTurnTerminalIdleTimer(); + if ( + completed || + runAbortController.signal.aborted || + !turnTerminalIdleWatchArmed || + activeAppServerTurnRequests > 0 + ) { + return; + } + const elapsedMs = Math.max(0, Date.now() - turnCompletionLastActivityAt); + const delayMs = Math.max(1, turnTerminalIdleTimeoutMs - elapsedMs); + turnTerminalIdleTimer = setTimeout(fireTurnTerminalIdleTimeout, delayMs); + turnTerminalIdleTimer.unref?.(); + } + const touchTurnCompletionActivity = (reason: string, options?: { arm?: boolean }) => { turnCompletionLastActivityAt = Date.now(); turnCompletionLastActivityReason = reason; @@ -543,6 +609,7 @@ export async function runCodexAppServerAttempt( turnCompletionIdleWatchArmed = true; } scheduleTurnCompletionIdleWatch(); + scheduleTurnTerminalIdleWatch(); }; const emitLifecycleStart = () => { @@ -595,6 +662,7 @@ export async function runCodexAppServerAttempt( } completed = true; clearTurnCompletionIdleTimer(); + clearTurnTerminalIdleTimer(); resolveCompletion?.(); } } @@ -839,6 +907,7 @@ export async function runCodexAppServerAttempt( abort: () => runAbortController.abort("aborted"), }; setActiveEmbeddedRun(params.sessionId, handle, params.sessionKey); + turnTerminalIdleWatchArmed = true; touchTurnCompletionActivity("turn:start"); const timeout = setTimeout( @@ -1005,6 +1074,7 @@ export async function runCodexAppServerAttempt( userInputBridge?.cancelPending(); clearTimeout(timeout); clearTurnCompletionIdleTimer(); + clearTurnTerminalIdleTimer(); notificationCleanup(); requestCleanup(); nativeHookRelay?.unregister(); @@ -1305,6 +1375,16 @@ function resolveCodexTurnCompletionIdleTimeoutMs(value: number | undefined): num return Math.max(1, Math.floor(value)); } +function resolveCodexTurnTerminalIdleTimeoutMs(value: number | undefined): number { + if (value === undefined) { + return CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS; + } + if (!Number.isFinite(value)) { + return CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS; + } + return Math.max(1, Math.floor(value)); +} + function readDynamicToolCallParams( value: JsonValue | undefined, ): CodexDynamicToolCallParams | undefined { @@ -1417,6 +1497,7 @@ function handleApprovalRequest(params: { export const __testing = { CODEX_DYNAMIC_TOOL_TIMEOUT_MS, CODEX_TURN_COMPLETION_IDLE_TIMEOUT_MS, + CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS, buildCodexNativeHookRelayId, filterToolsForVisionInputs, handleDynamicToolCallWithTimeout,