fix(gateway): address review feedback on memory leak fix

1. clearAgentRunContext now also deletes seqByRun (Greptile P2)
2. TTL constants moved to module scope (Greptile P2)
3. Session-mode TTL uses cleanupCompletedAt instead of endedAt to
   avoid interrupting deferred cleanup flows (Codex P1)
4. Added lastActiveAt to AgentRunContext, refreshed on every
   emitAgentEvent — long-running active agents are not swept (Codex P1)
5. resetAgentRunContextForTest also clears seqByRun (P2 drive-by)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
XING
2026-03-23 16:46:58 +08:00
committed by Josh Lehman
parent f1faf0c970
commit 840ae56cef
2 changed files with 18 additions and 6 deletions

View File

@@ -108,6 +108,10 @@ const SUBAGENT_ANNOUNCE_TIMEOUT_MS = 120_000;
* subsequent lifecycle `start` / `end` can cancel premature failure announces.
*/
const LIFECYCLE_ERROR_RETRY_GRACE_MS = 15_000;
/** Absolute TTL for session-mode runs after cleanup completes (no archiveAtMs). */
const SESSION_RUN_TTL_MS = 5 * 60_000; // 5 minutes
/** Absolute TTL for orphaned pendingLifecycleError entries. */
const PENDING_ERROR_TTL_MS = 5 * 60_000; // 5 minutes
function loadSubagentRegistryRuntime() {
subagentRegistryRuntimePromise ??= import("./subagent-registry.runtime.js");
@@ -478,11 +482,11 @@ function stopSweeper() {
async function sweepSubagentRuns() {
const now = Date.now();
let mutated = false;
const SESSION_RUN_TTL_MS = 5 * 60 * 1000; // 5 min absolute TTL for session-mode runs
for (const [runId, entry] of subagentRuns.entries()) {
// Session-mode runs have no archiveAtMs — apply absolute TTL after completion.
// Session-mode runs have no archiveAtMs — apply absolute TTL after cleanup completes.
// Use cleanupCompletedAt (not endedAt) to avoid interrupting deferred cleanup flows.
if (!entry.archiveAtMs) {
if (typeof entry.endedAt === "number" && now - entry.endedAt > SESSION_RUN_TTL_MS) {
if (typeof entry.cleanupCompletedAt === "number" && now - entry.cleanupCompletedAt > SESSION_RUN_TTL_MS) {
clearPendingLifecycleError(runId);
void notifyContextEngineSubagentEnded({
childSessionKey: entry.childSessionKey,
@@ -523,7 +527,6 @@ async function sweepSubagentRuns() {
}
}
// Sweep orphaned pendingLifecycleError entries (absolute TTL).
const PENDING_ERROR_TTL_MS = 5 * 60 * 1000;
for (const [runId, pending] of pendingLifecycleErrorByRunId.entries()) {
if (now - pending.endedAt > PENDING_ERROR_TTL_MS) {
clearPendingLifecycleError(runId);

View File

@@ -113,6 +113,8 @@ export type AgentRunContext = {
isControlUiVisible?: boolean;
/** Timestamp when this context was first registered (for TTL-based cleanup). */
registeredAt?: number;
/** Timestamp of last activity (updated on every emitAgentEvent). */
lastActiveAt?: number;
};
type AgentEventState = {
@@ -161,6 +163,7 @@ export function getAgentRunContext(runId: string) {
export function clearAgentRunContext(runId: string) {
getAgentEventState().runContextById.delete(runId);
getAgentEventState().seqByRun.delete(runId);
}
/**
@@ -171,8 +174,10 @@ export function sweepStaleRunContexts(maxAgeMs = 30 * 60 * 1000): number {
const now = Date.now();
let swept = 0;
for (const [runId, ctx] of state.runContextById.entries()) {
// Treat missing registeredAt (pre-deploy entries) as infinitely old.
const age = ctx.registeredAt ? now - ctx.registeredAt : Infinity;
// Use lastActiveAt (refreshed on every event) to avoid sweeping active runs.
// Fall back to registeredAt, then treat missing timestamps as infinitely old.
const lastSeen = ctx.lastActiveAt ?? ctx.registeredAt;
const age = lastSeen ? now - lastSeen : Infinity;
if (age > maxAgeMs) {
state.runContextById.delete(runId);
state.seqByRun.delete(runId);
@@ -184,6 +189,7 @@ export function sweepStaleRunContexts(maxAgeMs = 30 * 60 * 1000): number {
export function resetAgentRunContextForTest() {
getAgentEventState().runContextById.clear();
getAgentEventState().seqByRun.clear();
}
export function emitAgentEvent(event: Omit<AgentEventPayload, "seq" | "ts">) {
@@ -191,6 +197,9 @@ export function emitAgentEvent(event: Omit<AgentEventPayload, "seq" | "ts">) {
const nextSeq = (state.seqByRun.get(event.runId) ?? 0) + 1;
state.seqByRun.set(event.runId, nextSeq);
const context = state.runContextById.get(event.runId);
if (context) {
context.lastActiveAt = Date.now();
}
const isControlUiVisible = context?.isControlUiVisible ?? true;
const eventSessionKey =
typeof event.sessionKey === "string" && event.sessionKey.trim() ? event.sessionKey : undefined;