mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:50:42 +00:00
fix: throttle long-running diagnostic warnings
This commit is contained in:
@@ -39,6 +39,7 @@ Docs: https://docs.openclaw.ai
|
||||
- CLI/doctor: trust a ready gateway memory probe when CLI-side active memory backend resolution is unavailable, preventing false "No active memory plugin is registered" warnings for healthy runtime setups. Fixes #76792. Thanks @som-686.
|
||||
- Memory/status: keep plain `openclaw memory status` and `openclaw memory status --json` on the cheap read-only path by reserving vector and embedding provider probes for `--deep` or `--index`. Fixes #76769. Thanks @daruire.
|
||||
- Telegram: suppress stale same-session replies when a newer accepted message arrives before an older in-flight Telegram dispatch finalizes. Fixes #76642. Thanks @chinar-amrutkar.
|
||||
- Gateway/diagnostics: throttle repeated long-running active-work session warnings so healthy cron or subagent runs no longer print the same `recovery=none` line every heartbeat.
|
||||
- Slack: collapse routine Socket Mode pong-timeout reconnects into one OpenClaw reconnect line and suppress the duplicate Slack SDK pong warning.
|
||||
- Gateway/diagnostics: abort-drain embedded runs after an extended no-progress stall so a single dead session no longer leaves queued Discord/channel turns blocked behind repeated `recovery=none` liveness warnings.
|
||||
- Plugins/ClawHub: accept the live artifact resolver `kind`/`sha256` field names alongside the typed `artifactKind`/`artifactSha256` form so `clawhub:` installs of npm-pack and legacy ZIP packages no longer miss downloadable artifacts. Thanks @romneyda.
|
||||
|
||||
@@ -5,6 +5,7 @@ export type SessionState = {
|
||||
sessionKey?: string;
|
||||
lastActivity: number;
|
||||
lastStuckWarnAgeMs?: number;
|
||||
lastLongRunningWarnAgeMs?: number;
|
||||
state: SessionStateValue;
|
||||
queueDepth: number;
|
||||
toolCallHistory?: ToolCallRecord[];
|
||||
@@ -105,6 +106,10 @@ function mergeSessionState(target: SessionState, source: SessionState): void {
|
||||
target.lastStuckWarnAgeMs === undefined || source.lastStuckWarnAgeMs === undefined
|
||||
? undefined
|
||||
: Math.max(target.lastStuckWarnAgeMs, source.lastStuckWarnAgeMs);
|
||||
target.lastLongRunningWarnAgeMs =
|
||||
target.lastLongRunningWarnAgeMs === undefined || source.lastLongRunningWarnAgeMs === undefined
|
||||
? undefined
|
||||
: Math.max(target.lastLongRunningWarnAgeMs, source.lastLongRunningWarnAgeMs);
|
||||
if (source.toolCallHistory?.length) {
|
||||
target.toolCallHistory = [...(target.toolCallHistory ?? []), ...source.toolCallHistory];
|
||||
}
|
||||
|
||||
@@ -320,7 +320,7 @@ describe("stuck session diagnostics threshold", () => {
|
||||
expect(events).toHaveLength(1);
|
||||
expect(recoverStuckSession).toHaveBeenCalledTimes(1);
|
||||
|
||||
vi.advanceTimersByTime(30_000);
|
||||
vi.advanceTimersByTime(31_000);
|
||||
} finally {
|
||||
unsubscribe();
|
||||
}
|
||||
@@ -442,6 +442,48 @@ describe("stuck session diagnostics threshold", () => {
|
||||
expect(recoverStuckSession).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("throttles repeated long-running active-work warnings", () => {
|
||||
const events: DiagnosticEventPayload[] = [];
|
||||
const recoverStuckSession = vi.fn();
|
||||
const unsubscribe = onDiagnosticEvent((event) => {
|
||||
events.push(event);
|
||||
});
|
||||
try {
|
||||
startDiagnosticHeartbeat(
|
||||
{
|
||||
diagnostics: {
|
||||
enabled: true,
|
||||
stuckSessionWarnMs: 30_000,
|
||||
},
|
||||
},
|
||||
{ recoverStuckSession },
|
||||
);
|
||||
logSessionStateChange({ sessionId: "s1", sessionKey: "main", state: "processing" });
|
||||
vi.advanceTimersByTime(45_000);
|
||||
markDiagnosticEmbeddedRunStarted({ sessionId: "s1", sessionKey: "main" });
|
||||
vi.advanceTimersByTime(16_000);
|
||||
|
||||
expect(events.filter((event) => event.type === "session.long_running")).toHaveLength(1);
|
||||
|
||||
vi.advanceTimersByTime(28_000);
|
||||
emitDiagnosticEvent({
|
||||
type: "run.progress",
|
||||
sessionId: "s1",
|
||||
sessionKey: "main",
|
||||
reason: "stream",
|
||||
});
|
||||
vi.advanceTimersByTime(2_000);
|
||||
|
||||
expect(events.filter((event) => event.type === "session.long_running")).toHaveLength(1);
|
||||
} finally {
|
||||
unsubscribe();
|
||||
}
|
||||
|
||||
const longRunningEvents = events.filter((event) => event.type === "session.long_running");
|
||||
expect(longRunningEvents).toHaveLength(1);
|
||||
expect(recoverStuckSession).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("keeps queued sessions non-recoverable while active work is making progress", () => {
|
||||
const events: DiagnosticEventPayload[] = [];
|
||||
const recoverStuckSession = vi.fn();
|
||||
|
||||
@@ -461,6 +461,7 @@ export function logMessageQueued(params: {
|
||||
state.queueDepth += 1;
|
||||
state.lastActivity = Date.now();
|
||||
state.lastStuckWarnAgeMs = undefined;
|
||||
state.lastLongRunningWarnAgeMs = undefined;
|
||||
if (diag.isEnabled("debug")) {
|
||||
diag.debug(
|
||||
`message queued: sessionId=${state.sessionId ?? "unknown"} sessionKey=${
|
||||
@@ -540,6 +541,7 @@ export function logSessionStateChange(
|
||||
state.state = params.state;
|
||||
state.lastActivity = Date.now();
|
||||
state.lastStuckWarnAgeMs = undefined;
|
||||
state.lastLongRunningWarnAgeMs = undefined;
|
||||
if (params.state === "idle") {
|
||||
state.queueDepth = Math.max(0, state.queueDepth - 1);
|
||||
}
|
||||
@@ -571,6 +573,7 @@ export function markDiagnosticSessionProgress(params: SessionRef) {
|
||||
const state = getDiagnosticSessionState(params);
|
||||
state.lastActivity = Date.now();
|
||||
state.lastStuckWarnAgeMs = undefined;
|
||||
state.lastLongRunningWarnAgeMs = undefined;
|
||||
markActivity();
|
||||
}
|
||||
|
||||
@@ -635,6 +638,19 @@ export function logSessionAttention(
|
||||
}
|
||||
state.lastStuckWarnAgeMs = params.ageMs;
|
||||
}
|
||||
if (classification.eventType === "session.long_running") {
|
||||
const nextWarnAgeMs =
|
||||
state.lastLongRunningWarnAgeMs === undefined
|
||||
? params.thresholdMs
|
||||
: Math.max(
|
||||
state.lastLongRunningWarnAgeMs + params.thresholdMs,
|
||||
state.lastLongRunningWarnAgeMs * 2,
|
||||
);
|
||||
if (params.ageMs < nextWarnAgeMs) {
|
||||
return undefined;
|
||||
}
|
||||
state.lastLongRunningWarnAgeMs = params.ageMs;
|
||||
}
|
||||
const label =
|
||||
classification.eventType === "session.stuck"
|
||||
? "stuck session"
|
||||
|
||||
Reference in New Issue
Block a user