mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-18 22:54:46 +00:00
fix(gateway): suppress startup liveness warnings (#81699)
* fix(gateway): suppress startup liveness warnings * docs(changelog): note diagnostic startup grace fix
This commit is contained in:
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Hooks: load workspace-relative legacy hook modules from dot-dot-prefixed directories without treating the filename prefix as parent traversal.
|
||||
- Plugins: preserve installed package metadata and persisted registry freshness checks for plugin package paths under dot-dot-prefixed directories.
|
||||
- Agents: allow dot-dot-prefixed filenames such as `..note.txt` through sandbox FS bridge, remote sandbox reads, and apply_patch summaries without mistaking the name for parent traversal.
|
||||
- Gateway/diagnostics: suppress cold-start liveness warnings during the startup grace window while still sampling liveness metrics. Fixes #79915. (#81699) Thanks @joshavant.
|
||||
- CLI/migrate: hide per-item source/plugin hints on non-conflicting Codex skill and plugin selection prompts, keeping the hint text reserved for rows that actually need attention. Thanks @sjf.
|
||||
- Codex harness: treat high-confidence app-server OAuth refresh invalidation as a terminal auth-profile failure, stopping repeated raw token-refresh errors without turning entitlement or usage-limit payloads into re-auth prompts.
|
||||
- CLI/migrate: humanize Codex conflict-status messaging across the migrate UI so selection prompts and plan/result rows say "Codex skill already installed in workspace" instead of surfacing internal `MIGRATION_REASON_*` codes. Thanks @sjf.
|
||||
|
||||
@@ -593,7 +593,10 @@ export async function startGatewayServer(
|
||||
const diagnosticsEnabled = isDiagnosticsEnabled(cfgAtStart);
|
||||
setDiagnosticsEnabledForProcess(diagnosticsEnabled);
|
||||
if (diagnosticsEnabled) {
|
||||
startDiagnosticHeartbeat(undefined, { getConfig: getRuntimeConfig });
|
||||
startDiagnosticHeartbeat(undefined, {
|
||||
getConfig: getRuntimeConfig,
|
||||
startupGraceMs: 60_000,
|
||||
});
|
||||
}
|
||||
setGatewaySigusr1RestartPolicy({ allowExternal: isRestartEnabled(cfgAtStart) });
|
||||
let getActiveTaskCount = () => 0;
|
||||
|
||||
@@ -1055,6 +1055,48 @@ describe("stuck session diagnostics threshold", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("suppresses liveness warnings during startupGraceMs while still sampling", () => {
|
||||
const warnSpy = vi.spyOn(diagnosticLogger, "warn").mockImplementation(() => undefined);
|
||||
const events: string[] = [];
|
||||
const sampleLiveness = vi.fn(() => ({
|
||||
reasons: ["event_loop_delay" as const],
|
||||
intervalMs: 30_000,
|
||||
eventLoopDelayP99Ms: 1_500,
|
||||
eventLoopDelayMaxMs: 2_000,
|
||||
}));
|
||||
const unsubscribe = onDiagnosticEvent((event) => events.push(event.type));
|
||||
|
||||
try {
|
||||
startDiagnosticHeartbeat(
|
||||
{
|
||||
diagnostics: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
emitMemorySample: createEmitMemorySampleMock(),
|
||||
sampleLiveness,
|
||||
startupGraceMs: 60_000,
|
||||
},
|
||||
);
|
||||
|
||||
logMessageQueued({ sessionId: "s1", sessionKey: "main", source: "test" });
|
||||
vi.advanceTimersByTime(30_000);
|
||||
|
||||
expect(sampleLiveness).toHaveBeenCalledTimes(1);
|
||||
expectNoLoggerMessageContaining(warnSpy, "liveness warning:");
|
||||
expect(events).not.toContain("diagnostic.liveness.warning");
|
||||
|
||||
vi.advanceTimersByTime(30_000);
|
||||
|
||||
expect(sampleLiveness).toHaveBeenCalledTimes(2);
|
||||
expectLoggerMessageContaining(warnSpy, "liveness warning:");
|
||||
expect(events).toContain("diagnostic.liveness.warning");
|
||||
} finally {
|
||||
unsubscribe();
|
||||
}
|
||||
});
|
||||
|
||||
it("warns for liveness samples when diagnostic work is open", () => {
|
||||
const warnSpy = vi.spyOn(diagnosticLogger, "warn").mockImplementation(() => undefined);
|
||||
|
||||
|
||||
@@ -123,6 +123,7 @@ type StartDiagnosticHeartbeatOptions = {
|
||||
emitMemorySample?: EmitDiagnosticMemorySample;
|
||||
sampleLiveness?: SampleDiagnosticLiveness;
|
||||
recoverStuckSession?: RecoverStuckSession;
|
||||
startupGraceMs?: number;
|
||||
};
|
||||
|
||||
let diagnosticLivenessMonitor: EventLoopDelayMonitor | null = null;
|
||||
@@ -939,6 +940,8 @@ export function startDiagnosticHeartbeat(
|
||||
return;
|
||||
}
|
||||
startDiagnosticLivenessSampler();
|
||||
const livenessGraceUntil =
|
||||
opts?.startupGraceMs != null && opts.startupGraceMs > 0 ? Date.now() + opts.startupGraceMs : 0;
|
||||
heartbeatInterval = setInterval(() => {
|
||||
let heartbeatConfig = config;
|
||||
if (!heartbeatConfig) {
|
||||
@@ -953,7 +956,10 @@ export function startDiagnosticHeartbeat(
|
||||
const now = Date.now();
|
||||
pruneDiagnosticSessionStates(now, true);
|
||||
const work = getDiagnosticWorkSnapshot(now);
|
||||
const livenessSample = (opts?.sampleLiveness ?? sampleDiagnosticLiveness)(now, work);
|
||||
const inStartupGrace = livenessGraceUntil > 0 && now < livenessGraceUntil;
|
||||
const rawLivenessSample = (opts?.sampleLiveness ?? sampleDiagnosticLiveness)(now, work);
|
||||
// Keep sampling during grace so event-loop delay baselines reset, but suppress startup-only reports.
|
||||
const livenessSample = inStartupGrace ? null : rawLivenessSample;
|
||||
const shouldEmitLivenessEvent =
|
||||
livenessSample !== null && shouldEmitDiagnosticLivenessEvent(now);
|
||||
const shouldEmitLivenessWarning =
|
||||
|
||||
Reference in New Issue
Block a user