fix(codex): time out silent app-server turns

This commit is contained in:
Peter Steinberger
2026-04-30 19:22:30 +01:00
parent 126dcb0d9e
commit 54e6e3d7da
4 changed files with 110 additions and 0 deletions

View File

@@ -32,6 +32,7 @@ wired end-to-end.
- resolves model + auth profile and builds the pi session
- subscribes to pi events and streams assistant/tool deltas
- enforces timeout -> aborts run if exceeded
- for Codex app-server turns, aborts an accepted turn that stops producing app-server progress before a terminal event
- returns payloads + usage metadata
4. `subscribeEmbeddedPiSession` bridges pi-agent-core events to OpenClaw `agent` stream:
- tool events => `stream: "tool"`

View File

@@ -114,6 +114,7 @@ keys.
- If commands seem stuck, enable verbose logs and look for “queued for …ms” lines to confirm the queue is draining.
- If you need queue depth, enable verbose logs and watch for queue timing lines.
- Codex app-server runs that accept a turn and then stop emitting progress are interrupted by the Codex adapter so the active session lane can release instead of waiting for the outer run timeout.
- When diagnostics are enabled, sessions that remain in `processing` past `diagnostics.stuckSessionWarnMs` log a stuck-session warning. Active embedded runs, active reply operations, and active lane tasks remain warning-only by default; stale startup bookkeeping with no active session work can release the affected session lane so queued work drains.
## Related

View File

@@ -443,6 +443,33 @@ describe("runCodexAppServerAttempt", () => {
expect(queueAgentHarnessMessage("session-1", "after timeout")).toBe(false);
});
it("releases the session when Codex accepts a turn but never sends progress", async () => {
const harness = createStartedThreadHarness();
const params = createParams(
path.join(tempDir, "session.jsonl"),
path.join(tempDir, "workspace"),
);
params.timeoutMs = 60_000;
const run = runCodexAppServerAttempt(params, { turnTerminalIdleTimeoutMs: 5 });
await harness.waitForMethod("turn/start");
await expect(run).resolves.toMatchObject({
aborted: true,
timedOut: true,
promptError: "codex app-server turn idle timed out waiting for turn/completed",
});
await vi.waitFor(
() =>
expect(harness.request).toHaveBeenCalledWith("turn/interrupt", {
threadId: "thread-1",
turnId: "turn-1",
}),
{ interval: 1 },
);
expect(queueAgentHarnessMessage("session-1", "after silent turn")).toBe(false);
});
it("applies before_prompt_build to Codex developer instructions and turn input", async () => {
const beforePromptBuild = vi.fn(async () => ({
systemPrompt: "custom codex system",

View File

@@ -87,6 +87,7 @@ import { filterToolsForVisionInputs } from "./vision-tools.js";
const CODEX_DYNAMIC_TOOL_TIMEOUT_MS = 30_000;
const CODEX_TURN_COMPLETION_IDLE_TIMEOUT_MS = 60_000;
const CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS = 30 * 60_000;
const CODEX_STEER_ALL_DEBOUNCE_MS = 500;
type OpenClawCodingToolsOptions = NonNullable<
@@ -226,6 +227,7 @@ export async function runCodexAppServerAttempt(
hookTimeoutSec?: number;
};
turnCompletionIdleTimeoutMs?: number;
turnTerminalIdleTimeoutMs?: number;
} = {},
): Promise<EmbeddedRunAttemptResult> {
const attemptStartedAt = Date.now();
@@ -471,8 +473,13 @@ export async function runCodexAppServerAttempt(
const turnCompletionIdleTimeoutMs = resolveCodexTurnCompletionIdleTimeoutMs(
options.turnCompletionIdleTimeoutMs,
);
const turnTerminalIdleTimeoutMs = resolveCodexTurnTerminalIdleTimeoutMs(
options.turnTerminalIdleTimeoutMs,
);
let turnCompletionIdleTimer: ReturnType<typeof setTimeout> | undefined;
let turnCompletionIdleWatchArmed = false;
let turnTerminalIdleTimer: ReturnType<typeof setTimeout> | undefined;
let turnTerminalIdleWatchArmed = false;
let turnCompletionLastActivityAt = Date.now();
let turnCompletionLastActivityReason = "startup";
let activeAppServerTurnRequests = 0;
@@ -484,6 +491,13 @@ export async function runCodexAppServerAttempt(
}
};
const clearTurnTerminalIdleTimer = () => {
if (turnTerminalIdleTimer) {
clearTimeout(turnTerminalIdleTimer);
turnTerminalIdleTimer = undefined;
}
};
const fireTurnCompletionIdleTimeout = () => {
if (
completed ||
@@ -520,6 +534,42 @@ export async function runCodexAppServerAttempt(
runAbortController.abort("turn_completion_idle_timeout");
};
const fireTurnTerminalIdleTimeout = () => {
if (
completed ||
runAbortController.signal.aborted ||
!turnTerminalIdleWatchArmed ||
activeAppServerTurnRequests > 0
) {
return;
}
const idleMs = Math.max(0, Date.now() - turnCompletionLastActivityAt);
if (idleMs < turnTerminalIdleTimeoutMs) {
scheduleTurnTerminalIdleWatch();
return;
}
timedOut = true;
turnCompletionIdleTimedOut = true;
turnCompletionIdleTimeoutMessage =
"codex app-server turn idle timed out waiting for turn/completed";
projector?.markTimedOut();
trajectoryRecorder?.recordEvent("turn.terminal_idle_timeout", {
threadId: thread.threadId,
turnId,
idleMs,
timeoutMs: turnTerminalIdleTimeoutMs,
lastActivityReason: turnCompletionLastActivityReason,
});
embeddedAgentLog.warn("codex app-server turn idle timed out waiting for terminal event", {
threadId: thread.threadId,
turnId,
idleMs,
timeoutMs: turnTerminalIdleTimeoutMs,
lastActivityReason: turnCompletionLastActivityReason,
});
runAbortController.abort("turn_terminal_idle_timeout");
};
function scheduleTurnCompletionIdleWatch() {
clearTurnCompletionIdleTimer();
if (
@@ -536,6 +586,22 @@ export async function runCodexAppServerAttempt(
turnCompletionIdleTimer.unref?.();
}
function scheduleTurnTerminalIdleWatch() {
clearTurnTerminalIdleTimer();
if (
completed ||
runAbortController.signal.aborted ||
!turnTerminalIdleWatchArmed ||
activeAppServerTurnRequests > 0
) {
return;
}
const elapsedMs = Math.max(0, Date.now() - turnCompletionLastActivityAt);
const delayMs = Math.max(1, turnTerminalIdleTimeoutMs - elapsedMs);
turnTerminalIdleTimer = setTimeout(fireTurnTerminalIdleTimeout, delayMs);
turnTerminalIdleTimer.unref?.();
}
const touchTurnCompletionActivity = (reason: string, options?: { arm?: boolean }) => {
turnCompletionLastActivityAt = Date.now();
turnCompletionLastActivityReason = reason;
@@ -543,6 +609,7 @@ export async function runCodexAppServerAttempt(
turnCompletionIdleWatchArmed = true;
}
scheduleTurnCompletionIdleWatch();
scheduleTurnTerminalIdleWatch();
};
const emitLifecycleStart = () => {
@@ -595,6 +662,7 @@ export async function runCodexAppServerAttempt(
}
completed = true;
clearTurnCompletionIdleTimer();
clearTurnTerminalIdleTimer();
resolveCompletion?.();
}
}
@@ -839,6 +907,7 @@ export async function runCodexAppServerAttempt(
abort: () => runAbortController.abort("aborted"),
};
setActiveEmbeddedRun(params.sessionId, handle, params.sessionKey);
turnTerminalIdleWatchArmed = true;
touchTurnCompletionActivity("turn:start");
const timeout = setTimeout(
@@ -1005,6 +1074,7 @@ export async function runCodexAppServerAttempt(
userInputBridge?.cancelPending();
clearTimeout(timeout);
clearTurnCompletionIdleTimer();
clearTurnTerminalIdleTimer();
notificationCleanup();
requestCleanup();
nativeHookRelay?.unregister();
@@ -1305,6 +1375,16 @@ function resolveCodexTurnCompletionIdleTimeoutMs(value: number | undefined): num
return Math.max(1, Math.floor(value));
}
function resolveCodexTurnTerminalIdleTimeoutMs(value: number | undefined): number {
if (value === undefined) {
return CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS;
}
if (!Number.isFinite(value)) {
return CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS;
}
return Math.max(1, Math.floor(value));
}
function readDynamicToolCallParams(
value: JsonValue | undefined,
): CodexDynamicToolCallParams | undefined {
@@ -1417,6 +1497,7 @@ function handleApprovalRequest(params: {
export const __testing = {
CODEX_DYNAMIC_TOOL_TIMEOUT_MS,
CODEX_TURN_COMPLETION_IDLE_TIMEOUT_MS,
CODEX_TURN_TERMINAL_IDLE_TIMEOUT_MS,
buildCodexNativeHookRelayId,
filterToolsForVisionInputs,
handleDynamicToolCallWithTimeout,