fix(gateway): suppress startup liveness warnings (#81699)

* fix(gateway): suppress startup liveness warnings

* docs(changelog): note diagnostic startup grace fix
This commit is contained in:
Josh Avant
2026-05-14 01:39:46 -05:00
committed by GitHub
parent 25eef1203a
commit 4e1f59010e
4 changed files with 54 additions and 2 deletions

View File

@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
- Hooks: load workspace-relative legacy hook modules from dot-dot-prefixed directories without treating the filename prefix as parent traversal.
- Plugins: preserve installed package metadata and persisted registry freshness checks for plugin package paths under dot-dot-prefixed directories.
- Agents: allow dot-dot-prefixed filenames such as `..note.txt` through sandbox FS bridge, remote sandbox reads, and apply_patch summaries without mistaking the name for parent traversal.
- Gateway/diagnostics: suppress cold-start liveness warnings during the startup grace window while still sampling liveness metrics. Fixes #79915. (#81699) Thanks @joshavant.
- CLI/migrate: hide per-item source/plugin hints on non-conflicting Codex skill and plugin selection prompts, keeping the hint text reserved for rows that actually need attention. Thanks @sjf.
- Codex harness: treat high-confidence app-server OAuth refresh invalidation as a terminal auth-profile failure, stopping repeated raw token-refresh errors without turning entitlement or usage-limit payloads into re-auth prompts.
- CLI/migrate: humanize Codex conflict-status messaging across the migrate UI so selection prompts and plan/result rows say "Codex skill already installed in workspace" instead of surfacing internal `MIGRATION_REASON_*` codes. Thanks @sjf.

View File

@@ -593,7 +593,10 @@ export async function startGatewayServer(
const diagnosticsEnabled = isDiagnosticsEnabled(cfgAtStart);
setDiagnosticsEnabledForProcess(diagnosticsEnabled);
if (diagnosticsEnabled) {
startDiagnosticHeartbeat(undefined, { getConfig: getRuntimeConfig });
startDiagnosticHeartbeat(undefined, {
getConfig: getRuntimeConfig,
startupGraceMs: 60_000,
});
}
setGatewaySigusr1RestartPolicy({ allowExternal: isRestartEnabled(cfgAtStart) });
let getActiveTaskCount = () => 0;

View File

@@ -1055,6 +1055,48 @@ describe("stuck session diagnostics threshold", () => {
);
});
it("suppresses liveness warnings during startupGraceMs while still sampling", () => {
const warnSpy = vi.spyOn(diagnosticLogger, "warn").mockImplementation(() => undefined);
const events: string[] = [];
const sampleLiveness = vi.fn(() => ({
reasons: ["event_loop_delay" as const],
intervalMs: 30_000,
eventLoopDelayP99Ms: 1_500,
eventLoopDelayMaxMs: 2_000,
}));
const unsubscribe = onDiagnosticEvent((event) => events.push(event.type));
try {
startDiagnosticHeartbeat(
{
diagnostics: {
enabled: true,
},
},
{
emitMemorySample: createEmitMemorySampleMock(),
sampleLiveness,
startupGraceMs: 60_000,
},
);
logMessageQueued({ sessionId: "s1", sessionKey: "main", source: "test" });
vi.advanceTimersByTime(30_000);
expect(sampleLiveness).toHaveBeenCalledTimes(1);
expectNoLoggerMessageContaining(warnSpy, "liveness warning:");
expect(events).not.toContain("diagnostic.liveness.warning");
vi.advanceTimersByTime(30_000);
expect(sampleLiveness).toHaveBeenCalledTimes(2);
expectLoggerMessageContaining(warnSpy, "liveness warning:");
expect(events).toContain("diagnostic.liveness.warning");
} finally {
unsubscribe();
}
});
it("warns for liveness samples when diagnostic work is open", () => {
const warnSpy = vi.spyOn(diagnosticLogger, "warn").mockImplementation(() => undefined);

View File

@@ -123,6 +123,7 @@ type StartDiagnosticHeartbeatOptions = {
emitMemorySample?: EmitDiagnosticMemorySample;
sampleLiveness?: SampleDiagnosticLiveness;
recoverStuckSession?: RecoverStuckSession;
startupGraceMs?: number;
};
let diagnosticLivenessMonitor: EventLoopDelayMonitor | null = null;
@@ -939,6 +940,8 @@ export function startDiagnosticHeartbeat(
return;
}
startDiagnosticLivenessSampler();
const livenessGraceUntil =
opts?.startupGraceMs != null && opts.startupGraceMs > 0 ? Date.now() + opts.startupGraceMs : 0;
heartbeatInterval = setInterval(() => {
let heartbeatConfig = config;
if (!heartbeatConfig) {
@@ -953,7 +956,10 @@ export function startDiagnosticHeartbeat(
const now = Date.now();
pruneDiagnosticSessionStates(now, true);
const work = getDiagnosticWorkSnapshot(now);
const livenessSample = (opts?.sampleLiveness ?? sampleDiagnosticLiveness)(now, work);
const inStartupGrace = livenessGraceUntil > 0 && now < livenessGraceUntil;
const rawLivenessSample = (opts?.sampleLiveness ?? sampleDiagnosticLiveness)(now, work);
// Keep sampling during grace so event-loop delay baselines reset, but suppress startup-only reports.
const livenessSample = inStartupGrace ? null : rawLivenessSample;
const shouldEmitLivenessEvent =
livenessSample !== null && shouldEmitDiagnosticLivenessEvent(now);
const shouldEmitLivenessWarning =