From 98f6ec50aadbc90e4aafa3bedfe1f432b6a4cea6 Mon Sep 17 00:00:00 2001 From: bot_apk Date: Mon, 16 Mar 2026 03:36:56 +0000 Subject: [PATCH] fix: address 6 review comments on PR #47719 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. [P1] Treat remap failures as resume failures — if replaceSubagentRunAfterSteer returns false, do NOT clear abortedLastRun, increment failed count. 2. [P2] Count scan-level exceptions as retryable failures — set result.failed > 0 in the outer catch block so scheduleOrphanRecovery retry logic triggers. 3. [P2] Persist resumed-session dedupe across recovery retries — accept resumedSessionKeys as a parameter; scheduleOrphanRecovery lifts the Set to its own scope and passes it through retries. 4. [Greptile] Use typed config accessors instead of raw structural cast for TLS check in lifecycle.ts. 5. [Greptile] Forward gateway.reload.deferralTimeoutMs to deferGatewayRestartUntilIdle in scheduleGatewaySigusr1Restart so user-configured value is not silently ignored. 6. [Greptile] Same as #4 — already addressed by the typed config fix. Co-Authored-By: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- src/agents/subagent-orphan-recovery.ts | 20 +++++++++++++++++--- src/cli/daemon-cli/lifecycle.ts | 3 +-- src/infra/restart.ts | 7 ++++++- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/agents/subagent-orphan-recovery.ts b/src/agents/subagent-orphan-recovery.ts index 320d96f3727..ed2eac6d8f3 100644 --- a/src/agents/subagent-orphan-recovery.ts +++ b/src/agents/subagent-orphan-recovery.ts @@ -100,10 +100,16 @@ async function resumeOrphanedSession(params: { }, timeoutMs: 10_000, }); - replaceSubagentRunAfterSteer({ + const remapped = replaceSubagentRunAfterSteer({ previousRunId: params.originalRunId, nextRunId: result.runId, }); + if (!remapped) { + log.warn( + `resumed orphaned session ${params.sessionKey} but remap failed (old run already removed); treating as failure`, + ); + return false; + } log.info(`resumed orphaned session: ${params.sessionKey}`); return true; } catch (err) { @@ -125,9 +131,11 @@ async function resumeOrphanedSession(params: { */ export async function recoverOrphanedSubagentSessions(params: { getActiveRuns: () => Map; + /** Persisted across retries so already-resumed sessions are not resumed again. */ + resumedSessionKeys?: Set; }): Promise<{ recovered: number; failed: number; skipped: number }> { const result = { recovered: 0, failed: 0, skipped: 0 }; - const resumedSessionKeys = new Set(); + const resumedSessionKeys = params.resumedSessionKeys ?? new Set(); const configChangePattern = /openclaw\.json|openclaw gateway restart|config\.patch/i; try { @@ -236,6 +244,10 @@ export async function recoverOrphanedSubagentSessions(params: { } } catch (err) { log.warn(`orphan recovery scan failed: ${String(err)}`); + // Ensure retry logic fires for scan-level exceptions. + if (result.failed === 0) { + result.failed = 1; + } } if (result.recovered > 0 || result.failed > 0) { @@ -265,9 +277,11 @@ export function scheduleOrphanRecovery(params: { const initialDelay = params.delayMs ?? DEFAULT_RECOVERY_DELAY_MS; const maxRetries = params.maxRetries ?? MAX_RECOVERY_RETRIES; + const resumedSessionKeys = new Set(); + const attemptRecovery = (attempt: number, delay: number) => { setTimeout(() => { - void recoverOrphanedSubagentSessions(params) + void recoverOrphanedSubagentSessions({ ...params, resumedSessionKeys }) .then((result) => { if (result.failed > 0 && attempt < maxRetries) { const nextDelay = delay * RETRY_BACKOFF_MULTIPLIER; diff --git a/src/cli/daemon-cli/lifecycle.ts b/src/cli/daemon-cli/lifecycle.ts index 76099fe956c..d3e01f66412 100644 --- a/src/cli/daemon-cli/lifecycle.ts +++ b/src/cli/daemon-cli/lifecycle.ts @@ -51,8 +51,7 @@ function resolveGatewayPortFallback(): Promise { async function assertUnmanagedGatewayRestartEnabled(port: number): Promise { const cfg = await readBestEffortConfig().catch(() => undefined); - const tlsEnabled = !!(cfg as { gateway?: { tls?: { enabled?: unknown } } } | undefined)?.gateway - ?.tls?.enabled; + const tlsEnabled = !!cfg?.gateway?.tls?.enabled; const scheme = tlsEnabled ? "wss" : "ws"; const probe = await probeGateway({ url: `${scheme}://127.0.0.1:${port}`, diff --git a/src/infra/restart.ts b/src/infra/restart.ts index 1238b4954d0..f671df382e2 100644 --- a/src/infra/restart.ts +++ b/src/infra/restart.ts @@ -1,6 +1,7 @@ import { spawnSync } from "node:child_process"; import os from "node:os"; import path from "node:path"; +import { loadConfig } from "../config/config.js"; import { resolveGatewayLaunchAgentLabel, resolveGatewaySystemdServiceName, @@ -476,7 +477,11 @@ export function scheduleGatewaySigusr1Restart(opts?: { emitGatewayRestart(); return; } - deferGatewayRestartUntilIdle({ getPendingCount: pendingCheck }); + const cfg = loadConfig(); + deferGatewayRestartUntilIdle({ + getPendingCount: pendingCheck, + maxWaitMs: cfg.gateway?.reload?.deferralTimeoutMs, + }); }, Math.max(0, requestedDueAt - nowMs), );