Tests: Add tooling / skill for detecting and fixing memory leaks in tests (#50654)

* Tests: add periodic heap snapshot tooling

* Skills: add test heap leak workflow

* Apply suggestion from @greptile-apps[bot]

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update scripts/test-parallel.mjs

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
This commit is contained in:
Harold Hunt
2026-03-19 17:59:13 -04:00
committed by GitHub
parent da8fb70525
commit bbd62469fa
5 changed files with 459 additions and 19 deletions

View File

@@ -4,7 +4,11 @@ import os from "node:os";
import path from "node:path";
import { channelTestPrefixes } from "../vitest.channel-paths.mjs";
import { isUnitConfigTestFile } from "../vitest.unit-paths.mjs";
import { parseCompletedTestFileLines, sampleProcessTreeRssKb } from "./test-parallel-memory.mjs";
import {
getProcessTreeRecords,
parseCompletedTestFileLines,
sampleProcessTreeRssKb,
} from "./test-parallel-memory.mjs";
import {
appendCapturedOutput,
hasFatalTestRunOutput,
@@ -725,6 +729,25 @@ const memoryTraceEnabled =
(rawMemoryTrace !== "0" && rawMemoryTrace !== "false" && isCI));
const memoryTracePollMs = Math.max(250, parseEnvNumber("OPENCLAW_TEST_MEMORY_TRACE_POLL_MS", 1000));
const memoryTraceTopCount = Math.max(1, parseEnvNumber("OPENCLAW_TEST_MEMORY_TRACE_TOP_COUNT", 6));
const heapSnapshotIntervalMs = Math.max(
0,
parseEnvNumber("OPENCLAW_TEST_HEAPSNAPSHOT_INTERVAL_MS", 0),
);
const heapSnapshotMinIntervalMs = 5000;
const heapSnapshotEnabled =
process.platform !== "win32" &&
heapSnapshotIntervalMs >= heapSnapshotMinIntervalMs;
const heapSnapshotEnabled = process.platform !== "win32" && heapSnapshotIntervalMs > 0;
const heapSnapshotSignal = process.env.OPENCLAW_TEST_HEAPSNAPSHOT_SIGNAL?.trim() || "SIGUSR2";
const heapSnapshotBaseDir = heapSnapshotEnabled
? path.resolve(
process.env.OPENCLAW_TEST_HEAPSNAPSHOT_DIR?.trim() ||
path.join(os.tmpdir(), `openclaw-heapsnapshots-${Date.now()}`),
)
: null;
const ensureNodeOptionFlag = (nodeOptions, flagPrefix, nextValue) =>
nodeOptions.includes(flagPrefix) ? nodeOptions : `${nodeOptions} ${nextValue}`.trim();
const isNodeLikeProcess = (command) => /(?:^|\/)node(?:$|\.exe$)/iu.test(command);
const runOnce = (entry, extraArgs = []) =>
new Promise((resolve) => {
@@ -757,23 +780,44 @@ const runOnce = (entry, extraArgs = []) =>
(acc, flag) => (acc.includes(flag) ? acc : `${acc} ${flag}`.trim()),
nodeOptions,
);
const heapFlag =
const heapSnapshotDir =
heapSnapshotBaseDir === null ? null : path.join(heapSnapshotBaseDir, entry.name);
let resolvedNodeOptions =
maxOldSpaceSizeMb && !nextNodeOptions.includes("--max-old-space-size=")
? `--max-old-space-size=${maxOldSpaceSizeMb}`
: null;
const resolvedNodeOptions = heapFlag
? `${nextNodeOptions} ${heapFlag}`.trim()
: nextNodeOptions;
? `${nextNodeOptions} --max-old-space-size=${maxOldSpaceSizeMb}`.trim()
: nextNodeOptions;
if (heapSnapshotEnabled && heapSnapshotDir) {
try {
fs.mkdirSync(heapSnapshotDir, { recursive: true });
} catch (err) {
console.error(`[test-parallel] failed to create heap snapshot dir ${heapSnapshotDir}: ${String(err)}`);
resolve(1);
return;
}
resolvedNodeOptions = ensureNodeOptionFlag(
resolvedNodeOptions,
"--diagnostic-dir=",
`--diagnostic-dir=${heapSnapshotDir}`,
);
resolvedNodeOptions = ensureNodeOptionFlag(
resolvedNodeOptions,
"--heapsnapshot-signal=",
`--heapsnapshot-signal=${heapSnapshotSignal}`,
);
}
}
let output = "";
let fatalSeen = false;
let childError = null;
let child;
let pendingLine = "";
let memoryPollTimer = null;
let heapSnapshotTimer = null;
const memoryFileRecords = [];
let initialTreeSample = null;
let latestTreeSample = null;
let peakTreeSample = null;
let heapSnapshotSequence = 0;
const updatePeakTreeSample = (sample, reason) => {
if (!sample) {
return;
@@ -782,6 +826,35 @@ const runOnce = (entry, extraArgs = []) =>
peakTreeSample = { ...sample, reason };
}
};
const triggerHeapSnapshot = (reason) => {
if (!heapSnapshotEnabled || !child?.pid || !heapSnapshotDir) {
return;
}
const records = getProcessTreeRecords(child.pid) ?? [];
const targetPids = records
.filter((record) => record.pid !== process.pid && isNodeLikeProcess(record.command))
.map((record) => record.pid);
if (targetPids.length === 0) {
return;
}
heapSnapshotSequence += 1;
let signaledCount = 0;
for (const pid of targetPids) {
try {
process.kill(pid, heapSnapshotSignal);
signaledCount += 1;
} catch {
// Process likely exited between ps sampling and signal delivery.
}
}
if (signaledCount > 0) {
console.log(
`[test-parallel][heap] ${entry.name} seq=${String(heapSnapshotSequence)} reason=${reason} signaled=${String(
signaledCount,
)}/${String(targetPids.length)} dir=${heapSnapshotDir}`,
);
}
};
const captureTreeSample = (reason) => {
if (!memoryTraceEnabled || !child?.pid) {
return null;
@@ -877,6 +950,11 @@ const runOnce = (entry, extraArgs = []) =>
captureTreeSample("poll");
}, memoryTracePollMs);
}
if (heapSnapshotEnabled) {
heapSnapshotTimer = setInterval(() => {
triggerHeapSnapshot("interval");
}, heapSnapshotIntervalMs);
}
} catch (err) {
console.error(`[test-parallel] spawn failed: ${String(err)}`);
resolve(1);
@@ -905,6 +983,9 @@ const runOnce = (entry, extraArgs = []) =>
if (memoryPollTimer) {
clearInterval(memoryPollTimer);
}
if (heapSnapshotTimer) {
clearInterval(heapSnapshotTimer);
}
children.delete(child);
const resolvedCode = resolveTestRunExitCode({ code, signal, output, fatalSeen, childError });
logMemoryTraceSummary();