mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-26 16:41:49 +00:00
feat(diagnostics): add configurable stuck-session warning threshold
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import fs from "node:fs";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { onDiagnosticEvent, resetDiagnosticEventsForTest } from "../infra/diagnostic-events.js";
|
||||
import {
|
||||
diagnosticSessionStates,
|
||||
getDiagnosticSessionStateCountForTest,
|
||||
@@ -7,6 +8,12 @@ import {
|
||||
pruneDiagnosticSessionStates,
|
||||
resetDiagnosticSessionStateForTest,
|
||||
} from "./diagnostic-session-state.js";
|
||||
import {
|
||||
logSessionStateChange,
|
||||
resetDiagnosticStateForTest,
|
||||
resolveStuckSessionWarnMs,
|
||||
startDiagnosticHeartbeat,
|
||||
} from "./diagnostic.js";
|
||||
|
||||
describe("diagnostic session state pruning", () => {
|
||||
beforeEach(() => {
|
||||
@@ -74,3 +81,60 @@ describe("logger import side effects", () => {
|
||||
expect(mkdirSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe("stuck session diagnostics threshold", () => {
|
||||
beforeEach(() => {
|
||||
vi.useFakeTimers();
|
||||
resetDiagnosticStateForTest();
|
||||
resetDiagnosticEventsForTest();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
resetDiagnosticEventsForTest();
|
||||
resetDiagnosticStateForTest();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it("uses the configured diagnostics.stuckSessionWarnMs threshold", () => {
|
||||
const events: Array<{ type: string }> = [];
|
||||
const unsubscribe = onDiagnosticEvent((event) => {
|
||||
events.push({ type: event.type });
|
||||
});
|
||||
try {
|
||||
startDiagnosticHeartbeat({
|
||||
diagnostics: {
|
||||
enabled: true,
|
||||
stuckSessionWarnMs: 30_000,
|
||||
},
|
||||
});
|
||||
logSessionStateChange({ sessionId: "s1", sessionKey: "main", state: "processing" });
|
||||
vi.advanceTimersByTime(61_000);
|
||||
} finally {
|
||||
unsubscribe();
|
||||
}
|
||||
|
||||
expect(events.filter((event) => event.type === "session.stuck")).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("falls back to default threshold when config is absent", () => {
|
||||
const events: Array<{ type: string }> = [];
|
||||
const unsubscribe = onDiagnosticEvent((event) => {
|
||||
events.push({ type: event.type });
|
||||
});
|
||||
try {
|
||||
startDiagnosticHeartbeat();
|
||||
logSessionStateChange({ sessionId: "s2", sessionKey: "main", state: "processing" });
|
||||
vi.advanceTimersByTime(31_000);
|
||||
} finally {
|
||||
unsubscribe();
|
||||
}
|
||||
|
||||
expect(events.filter((event) => event.type === "session.stuck")).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("uses default threshold for invalid values", () => {
|
||||
expect(resolveStuckSessionWarnMs({ diagnostics: { stuckSessionWarnMs: -1 } })).toBe(120_000);
|
||||
expect(resolveStuckSessionWarnMs({ diagnostics: { stuckSessionWarnMs: 0 } })).toBe(120_000);
|
||||
expect(resolveStuckSessionWarnMs()).toBe(120_000);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { emitDiagnosticEvent } from "../infra/diagnostic-events.js";
|
||||
import {
|
||||
diagnosticSessionStates,
|
||||
@@ -20,11 +21,26 @@ const webhookStats = {
|
||||
};
|
||||
|
||||
let lastActivityAt = 0;
|
||||
const DEFAULT_STUCK_SESSION_WARN_MS = 120_000;
|
||||
const MIN_STUCK_SESSION_WARN_MS = 1_000;
|
||||
const MAX_STUCK_SESSION_WARN_MS = 24 * 60 * 60 * 1000;
|
||||
|
||||
function markActivity() {
|
||||
lastActivityAt = Date.now();
|
||||
}
|
||||
|
||||
export function resolveStuckSessionWarnMs(config?: OpenClawConfig): number {
|
||||
const raw = config?.diagnostics?.stuckSessionWarnMs;
|
||||
if (typeof raw !== "number" || !Number.isFinite(raw)) {
|
||||
return DEFAULT_STUCK_SESSION_WARN_MS;
|
||||
}
|
||||
const rounded = Math.floor(raw);
|
||||
if (rounded < MIN_STUCK_SESSION_WARN_MS || rounded > MAX_STUCK_SESSION_WARN_MS) {
|
||||
return DEFAULT_STUCK_SESSION_WARN_MS;
|
||||
}
|
||||
return rounded;
|
||||
}
|
||||
|
||||
export function logWebhookReceived(params: {
|
||||
channel: string;
|
||||
updateType?: string;
|
||||
@@ -305,10 +321,11 @@ export function logActiveRuns() {
|
||||
|
||||
let heartbeatInterval: NodeJS.Timeout | null = null;
|
||||
|
||||
export function startDiagnosticHeartbeat() {
|
||||
export function startDiagnosticHeartbeat(config?: OpenClawConfig) {
|
||||
if (heartbeatInterval) {
|
||||
return;
|
||||
}
|
||||
const stuckSessionWarnMs = resolveStuckSessionWarnMs(config);
|
||||
heartbeatInterval = setInterval(() => {
|
||||
const now = Date.now();
|
||||
pruneDiagnosticSessionStates(now, true);
|
||||
@@ -362,7 +379,7 @@ export function startDiagnosticHeartbeat() {
|
||||
|
||||
for (const [, state] of diagnosticSessionStates) {
|
||||
const ageMs = now - state.lastActivity;
|
||||
if (state.state === "processing" && ageMs > 120_000) {
|
||||
if (state.state === "processing" && ageMs > stuckSessionWarnMs) {
|
||||
logSessionStuck({
|
||||
sessionId: state.sessionId,
|
||||
sessionKey: state.sessionKey,
|
||||
|
||||
Reference in New Issue
Block a user