diff --git a/src/infra/diagnostic-events.ts b/src/infra/diagnostic-events.ts index 65308a335f8..24dd86f0e5b 100644 --- a/src/infra/diagnostic-events.ts +++ b/src/infra/diagnostic-events.ts @@ -124,11 +124,7 @@ export type DiagnosticSessionStateEvent = DiagnosticBaseEvent & { queueDepth?: number; }; -export type DiagnosticSessionActiveWorkKind = - | "embedded_run" - | "model_call" - | "tool_call" - | "queued_work"; +export type DiagnosticSessionActiveWorkKind = "embedded_run" | "model_call" | "tool_call"; export type DiagnosticSessionAttentionClassification = | "long_running" diff --git a/src/logging/diagnostic.test.ts b/src/logging/diagnostic.test.ts index 20e116e40b1..355aa964ba4 100644 --- a/src/logging/diagnostic.test.ts +++ b/src/logging/diagnostic.test.ts @@ -24,6 +24,7 @@ import { } from "./diagnostic-stability.js"; import { logSessionStateChange, + logMessageQueued, resetDiagnosticStateForTest, resolveStuckSessionWarnMs, startDiagnosticHeartbeat, @@ -162,6 +163,45 @@ describe("stuck session diagnostics threshold", () => { }); }); + it("keeps queued stale sessions eligible for lane recovery", () => { + const events: DiagnosticEventPayload[] = []; + const recoverStuckSession = vi.fn(); + const unsubscribe = onDiagnosticEvent((event) => { + events.push(event); + }); + try { + startDiagnosticHeartbeat( + { + diagnostics: { + enabled: true, + stuckSessionWarnMs: 30_000, + }, + }, + { recoverStuckSession }, + ); + logMessageQueued({ sessionId: "s1", sessionKey: "main", source: "test" }); + logSessionStateChange({ sessionId: "s1", sessionKey: "main", state: "processing" }); + vi.advanceTimersByTime(61_000); + } finally { + unsubscribe(); + } + + expect(events.filter((event) => event.type === "session.long_running")).toHaveLength(0); + const stuckEvents = events.filter((event) => event.type === "session.stuck"); + expect(stuckEvents).toHaveLength(1); + expect(stuckEvents[0]).toMatchObject({ + classification: "stale_session_state", + reason: "queued_work_without_active_run", + queueDepth: 1, + }); + expect(recoverStuckSession).toHaveBeenCalledWith({ + sessionId: "s1", + sessionKey: "main", + ageMs: expect.any(Number), + queueDepth: 1, + }); + }); + it("reports active sessions as stalled instead of stuck when active work stops progressing", () => { const events: DiagnosticEventPayload[] = []; const recoverStuckSession = vi.fn(); @@ -232,6 +272,44 @@ describe("stuck session diagnostics threshold", () => { expect(recoverStuckSession).not.toHaveBeenCalled(); }); + it("keeps queued sessions non-recoverable while active work is making progress", () => { + const events: DiagnosticEventPayload[] = []; + const recoverStuckSession = vi.fn(); + const unsubscribe = onDiagnosticEvent((event) => { + events.push(event); + }); + try { + startDiagnosticHeartbeat( + { + diagnostics: { + enabled: true, + stuckSessionWarnMs: 30_000, + }, + }, + { recoverStuckSession }, + ); + logMessageQueued({ sessionId: "s1", sessionKey: "main", source: "test" }); + logSessionStateChange({ sessionId: "s1", sessionKey: "main", state: "processing" }); + vi.advanceTimersByTime(45_000); + markDiagnosticEmbeddedRunStarted({ sessionId: "s1", sessionKey: "main" }); + vi.advanceTimersByTime(16_000); + } finally { + unsubscribe(); + } + + expect(events.filter((event) => event.type === "session.stuck")).toHaveLength(0); + expect(events.filter((event) => event.type === "session.stalled")).toHaveLength(0); + const longRunningEvents = events.filter((event) => event.type === "session.long_running"); + expect(longRunningEvents).toHaveLength(1); + expect(longRunningEvents[0]).toMatchObject({ + classification: "long_running", + reason: "queued_behind_active_work", + activeWorkKind: "embedded_run", + queueDepth: 1, + }); + expect(recoverStuckSession).not.toHaveBeenCalled(); + }); + it("starts and stops the stability recorder with the heartbeat lifecycle", () => { startDiagnosticHeartbeat({ diagnostics: { diff --git a/src/logging/diagnostic.ts b/src/logging/diagnostic.ts index 3ba77124a8d..df563810223 100644 --- a/src/logging/diagnostic.ts +++ b/src/logging/diagnostic.ts @@ -216,19 +216,9 @@ function classifySessionAttention(params: { }; } - if (params.queueDepth > 0) { - return { - eventType: "session.long_running", - reason: "queued_work_without_active_run", - classification: "long_running", - activeWorkKind: "queued_work", - recoveryEligible: false, - }; - } - return { eventType: "session.stuck", - reason: "stale_session_state", + reason: params.queueDepth > 0 ? "queued_work_without_active_run" : "stale_session_state", classification: "stale_session_state", recoveryEligible: true, };