mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-18 04:31:10 +00:00
fix(gateway): address review feedback on memory leak fix
1. clearAgentRunContext now also deletes seqByRun (Greptile P2) 2. TTL constants moved to module scope (Greptile P2) 3. Session-mode TTL uses cleanupCompletedAt instead of endedAt to avoid interrupting deferred cleanup flows (Codex P1) 4. Added lastActiveAt to AgentRunContext, refreshed on every emitAgentEvent — long-running active agents are not swept (Codex P1) 5. resetAgentRunContextForTest also clears seqByRun (P2 drive-by) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -108,6 +108,10 @@ const SUBAGENT_ANNOUNCE_TIMEOUT_MS = 120_000;
|
||||
* subsequent lifecycle `start` / `end` can cancel premature failure announces.
|
||||
*/
|
||||
const LIFECYCLE_ERROR_RETRY_GRACE_MS = 15_000;
|
||||
/** Absolute TTL for session-mode runs after cleanup completes (no archiveAtMs). */
|
||||
const SESSION_RUN_TTL_MS = 5 * 60_000; // 5 minutes
|
||||
/** Absolute TTL for orphaned pendingLifecycleError entries. */
|
||||
const PENDING_ERROR_TTL_MS = 5 * 60_000; // 5 minutes
|
||||
|
||||
function loadSubagentRegistryRuntime() {
|
||||
subagentRegistryRuntimePromise ??= import("./subagent-registry.runtime.js");
|
||||
@@ -478,11 +482,11 @@ function stopSweeper() {
|
||||
async function sweepSubagentRuns() {
|
||||
const now = Date.now();
|
||||
let mutated = false;
|
||||
const SESSION_RUN_TTL_MS = 5 * 60 * 1000; // 5 min absolute TTL for session-mode runs
|
||||
for (const [runId, entry] of subagentRuns.entries()) {
|
||||
// Session-mode runs have no archiveAtMs — apply absolute TTL after completion.
|
||||
// Session-mode runs have no archiveAtMs — apply absolute TTL after cleanup completes.
|
||||
// Use cleanupCompletedAt (not endedAt) to avoid interrupting deferred cleanup flows.
|
||||
if (!entry.archiveAtMs) {
|
||||
if (typeof entry.endedAt === "number" && now - entry.endedAt > SESSION_RUN_TTL_MS) {
|
||||
if (typeof entry.cleanupCompletedAt === "number" && now - entry.cleanupCompletedAt > SESSION_RUN_TTL_MS) {
|
||||
clearPendingLifecycleError(runId);
|
||||
void notifyContextEngineSubagentEnded({
|
||||
childSessionKey: entry.childSessionKey,
|
||||
@@ -523,7 +527,6 @@ async function sweepSubagentRuns() {
|
||||
}
|
||||
}
|
||||
// Sweep orphaned pendingLifecycleError entries (absolute TTL).
|
||||
const PENDING_ERROR_TTL_MS = 5 * 60 * 1000;
|
||||
for (const [runId, pending] of pendingLifecycleErrorByRunId.entries()) {
|
||||
if (now - pending.endedAt > PENDING_ERROR_TTL_MS) {
|
||||
clearPendingLifecycleError(runId);
|
||||
|
||||
@@ -113,6 +113,8 @@ export type AgentRunContext = {
|
||||
isControlUiVisible?: boolean;
|
||||
/** Timestamp when this context was first registered (for TTL-based cleanup). */
|
||||
registeredAt?: number;
|
||||
/** Timestamp of last activity (updated on every emitAgentEvent). */
|
||||
lastActiveAt?: number;
|
||||
};
|
||||
|
||||
type AgentEventState = {
|
||||
@@ -161,6 +163,7 @@ export function getAgentRunContext(runId: string) {
|
||||
|
||||
export function clearAgentRunContext(runId: string) {
|
||||
getAgentEventState().runContextById.delete(runId);
|
||||
getAgentEventState().seqByRun.delete(runId);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -171,8 +174,10 @@ export function sweepStaleRunContexts(maxAgeMs = 30 * 60 * 1000): number {
|
||||
const now = Date.now();
|
||||
let swept = 0;
|
||||
for (const [runId, ctx] of state.runContextById.entries()) {
|
||||
// Treat missing registeredAt (pre-deploy entries) as infinitely old.
|
||||
const age = ctx.registeredAt ? now - ctx.registeredAt : Infinity;
|
||||
// Use lastActiveAt (refreshed on every event) to avoid sweeping active runs.
|
||||
// Fall back to registeredAt, then treat missing timestamps as infinitely old.
|
||||
const lastSeen = ctx.lastActiveAt ?? ctx.registeredAt;
|
||||
const age = lastSeen ? now - lastSeen : Infinity;
|
||||
if (age > maxAgeMs) {
|
||||
state.runContextById.delete(runId);
|
||||
state.seqByRun.delete(runId);
|
||||
@@ -184,6 +189,7 @@ export function sweepStaleRunContexts(maxAgeMs = 30 * 60 * 1000): number {
|
||||
|
||||
export function resetAgentRunContextForTest() {
|
||||
getAgentEventState().runContextById.clear();
|
||||
getAgentEventState().seqByRun.clear();
|
||||
}
|
||||
|
||||
export function emitAgentEvent(event: Omit<AgentEventPayload, "seq" | "ts">) {
|
||||
@@ -191,6 +197,9 @@ export function emitAgentEvent(event: Omit<AgentEventPayload, "seq" | "ts">) {
|
||||
const nextSeq = (state.seqByRun.get(event.runId) ?? 0) + 1;
|
||||
state.seqByRun.set(event.runId, nextSeq);
|
||||
const context = state.runContextById.get(event.runId);
|
||||
if (context) {
|
||||
context.lastActiveAt = Date.now();
|
||||
}
|
||||
const isControlUiVisible = context?.isControlUiVisible ?? true;
|
||||
const eventSessionKey =
|
||||
typeof event.sessionKey === "string" && event.sessionKey.trim() ? event.sessionKey : undefined;
|
||||
|
||||
Reference in New Issue
Block a user