mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-18 21:04:45 +00:00
fix(logging): avoid false liveness backlog warnings
This commit is contained in:
@@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai
|
||||
- WhatsApp: honor forced document delivery for outbound image, GIF, and video media so `forceDocument`/`asDocument` sends preserve original media bytes instead of using compressed media payloads. (#79272) Thanks @itsuzef.
|
||||
- WhatsApp: name outbound document attachments from their MIME type when no filename is provided, so PDF and CSV sends arrive as `file.pdf` and `file.csv` instead of an extensionless `file`. Thanks @mcaxtr.
|
||||
- Process/diagnostics: report active lane blockers in lane wait warnings so `queueAhead=0` no longer hides commands waiting behind active work. Fixes #82791. (#82792) Thanks @galiniliev.
|
||||
- Process/diagnostics: stop counting the active processing turn as queued backlog in liveness warnings so transient max-only event-loop spikes do not surface as gateway warnings.
|
||||
|
||||
## 2026.5.17
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ export type SessionState = {
|
||||
lastLongRunningWarnAgeMs?: number;
|
||||
state: SessionStateValue;
|
||||
queueDepth: number;
|
||||
activeQueuedTurn?: boolean;
|
||||
toolCallHistory?: ToolCallRecord[];
|
||||
toolLoopWarningBuckets?: Map<string, number>;
|
||||
commandPollCounts?: Map<string, { count: number; lastPollAt: number }>;
|
||||
@@ -104,6 +105,7 @@ function mergeSessionState(target: SessionState, source: SessionState): void {
|
||||
target.generation = Math.max(target.generation ?? 0, source.generation ?? 0);
|
||||
target.lastActivity = Math.max(target.lastActivity, source.lastActivity);
|
||||
target.queueDepth += source.queueDepth;
|
||||
target.activeQueuedTurn ||= source.activeQueuedTurn;
|
||||
target.lastStuckWarnAgeMs =
|
||||
target.lastStuckWarnAgeMs === undefined || source.lastStuckWarnAgeMs === undefined
|
||||
? undefined
|
||||
|
||||
@@ -1280,6 +1280,86 @@ describe("stuck session diagnostics threshold", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("does not count the active processing message as queued liveness backlog", () => {
|
||||
const warnSpy = vi.spyOn(diagnosticLogger, "warn").mockImplementation(() => undefined);
|
||||
|
||||
startDiagnosticHeartbeat(
|
||||
{
|
||||
diagnostics: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
emitMemorySample: createEmitMemorySampleMock(),
|
||||
sampleLiveness: () => ({
|
||||
reasons: ["event_loop_delay"],
|
||||
intervalMs: 30_000,
|
||||
eventLoopDelayP99Ms: 53.6,
|
||||
eventLoopDelayMaxMs: 2_761.9,
|
||||
eventLoopUtilization: 0.785,
|
||||
cpuCoreRatio: 0.378,
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
logMessageQueued({ sessionId: "s1", sessionKey: "main", source: "discord" });
|
||||
logSessionStateChange({ sessionId: "s1", sessionKey: "main", state: "processing" });
|
||||
vi.advanceTimersByTime(30_000);
|
||||
|
||||
expectNoLoggerMessageContaining(warnSpy, "liveness warning:");
|
||||
requireMatchingRecord(
|
||||
getDiagnosticStabilitySnapshot({ limit: 10 }).events,
|
||||
{
|
||||
type: "diagnostic.liveness.warning",
|
||||
level: "info",
|
||||
active: 1,
|
||||
waiting: 0,
|
||||
queued: 0,
|
||||
},
|
||||
"active processing liveness stability event",
|
||||
);
|
||||
});
|
||||
|
||||
it("counts messages queued behind already active work as liveness backlog", () => {
|
||||
const warnSpy = vi.spyOn(diagnosticLogger, "warn").mockImplementation(() => undefined);
|
||||
|
||||
startDiagnosticHeartbeat(
|
||||
{
|
||||
diagnostics: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
emitMemorySample: createEmitMemorySampleMock(),
|
||||
sampleLiveness: () => ({
|
||||
reasons: ["event_loop_delay"],
|
||||
intervalMs: 30_000,
|
||||
eventLoopDelayP99Ms: 53.6,
|
||||
eventLoopDelayMaxMs: 2_761.9,
|
||||
eventLoopUtilization: 0.785,
|
||||
cpuCoreRatio: 0.378,
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
logSessionStateChange({ sessionId: "s1", sessionKey: "main", state: "processing" });
|
||||
logMessageQueued({ sessionId: "s1", sessionKey: "main", source: "discord" });
|
||||
vi.advanceTimersByTime(30_000);
|
||||
|
||||
expectLoggerMessageContaining(warnSpy, "liveness warning:");
|
||||
requireMatchingRecord(
|
||||
getDiagnosticStabilitySnapshot({ limit: 10 }).events,
|
||||
{
|
||||
type: "diagnostic.liveness.warning",
|
||||
level: "warning",
|
||||
active: 1,
|
||||
waiting: 0,
|
||||
queued: 1,
|
||||
},
|
||||
"queued backlog liveness stability event",
|
||||
);
|
||||
});
|
||||
|
||||
it("does not let idle liveness samples suppress later active-work warnings", () => {
|
||||
const warnSpy = vi.spyOn(diagnosticLogger, "warn").mockImplementation(() => undefined);
|
||||
|
||||
|
||||
@@ -180,6 +180,7 @@ function formatDiagnosticWorkLabel(
|
||||
sessionKey?: string;
|
||||
state: SessionStateValue;
|
||||
queueDepth: number;
|
||||
activeQueuedTurn?: boolean;
|
||||
lastActivity: number;
|
||||
},
|
||||
now: number,
|
||||
@@ -217,10 +218,14 @@ function getDiagnosticWorkSnapshot(now = Date.now()): DiagnosticWorkSnapshot {
|
||||
waitingCount += 1;
|
||||
pushLimitedDiagnosticLabel(waitingLabels, formatDiagnosticWorkLabel(state, now));
|
||||
}
|
||||
if (state.queueDepth > 0) {
|
||||
const queuedBacklog = Math.max(
|
||||
0,
|
||||
state.queueDepth - (state.state === "processing" && state.activeQueuedTurn ? 1 : 0),
|
||||
);
|
||||
if (queuedBacklog > 0) {
|
||||
pushLimitedDiagnosticLabel(queuedLabels, formatDiagnosticWorkLabel(state, now));
|
||||
}
|
||||
queuedCount += state.queueDepth;
|
||||
queuedCount += queuedBacklog;
|
||||
}
|
||||
|
||||
return { activeCount, waitingCount, queuedCount, activeLabels, waitingLabels, queuedLabels };
|
||||
@@ -686,8 +691,12 @@ export function logSessionStateChange(
|
||||
state.generation = (state.generation ?? 0) + 1;
|
||||
state.lastStuckWarnAgeMs = undefined;
|
||||
state.lastLongRunningWarnAgeMs = undefined;
|
||||
if (params.state === "processing" && prevState !== "processing") {
|
||||
state.activeQueuedTurn = state.queueDepth > 0;
|
||||
}
|
||||
if (params.state === "idle") {
|
||||
state.queueDepth = Math.max(0, state.queueDepth - 1);
|
||||
state.activeQueuedTurn = false;
|
||||
}
|
||||
if (!isProbeSession && diag.isEnabled("debug")) {
|
||||
diag.debug(
|
||||
|
||||
Reference in New Issue
Block a user