Tests: Add tooling / skill for detecting and fixing memory leaks in tests (#50654)

* Tests: add periodic heap snapshot tooling

* Skills: add test heap leak workflow

* Apply suggestion from @greptile-apps[bot]

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update scripts/test-parallel.mjs

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
This commit is contained in:
Harold Hunt
2026-03-19 17:59:13 -04:00
committed by GitHub
parent da8fb70525
commit bbd62469fa
5 changed files with 459 additions and 19 deletions

View File

@@ -11,7 +11,7 @@ const ANSI_ESCAPE_PATTERN = new RegExp(
const COMPLETED_TEST_FILE_LINE_PATTERN =
/(?<file>(?:src|extensions|test|ui)\/\S+?\.(?:live\.test|e2e\.test|test)\.ts)\s+\(.*\)\s+(?<duration>\d+(?:\.\d+)?)(?<unit>ms|s)\s*$/;
const PS_COLUMNS = ["pid=", "ppid=", "rss="];
const PS_COLUMNS = ["pid=", "ppid=", "rss=", "comm="];
function parseDurationMs(rawValue, unit) {
const parsed = Number.parseFloat(rawValue);
@@ -41,7 +41,7 @@ export function parseCompletedTestFileLines(text) {
.filter((entry) => entry !== null);
}
export function sampleProcessTreeRssKb(rootPid) {
export function getProcessTreeRecords(rootPid) {
if (!Number.isInteger(rootPid) || rootPid <= 0 || process.platform === "win32") {
return null;
}
@@ -54,13 +54,13 @@ export function sampleProcessTreeRssKb(rootPid) {
}
const childPidsByParent = new Map();
const rssByPid = new Map();
const recordsByPid = new Map();
for (const line of result.stdout.split(/\r?\n/u)) {
const trimmed = line.trim();
if (!trimmed) {
continue;
}
const [pidRaw, parentRaw, rssRaw] = trimmed.split(/\s+/u);
const [pidRaw, parentRaw, rssRaw, commandRaw] = trimmed.split(/\s+/u, 4);
const pid = Number.parseInt(pidRaw ?? "", 10);
const parentPid = Number.parseInt(parentRaw ?? "", 10);
const rssKb = Number.parseInt(rssRaw ?? "", 10);
@@ -70,27 +70,30 @@ export function sampleProcessTreeRssKb(rootPid) {
const siblings = childPidsByParent.get(parentPid) ?? [];
siblings.push(pid);
childPidsByParent.set(parentPid, siblings);
rssByPid.set(pid, rssKb);
recordsByPid.set(pid, {
pid,
parentPid,
rssKb,
command: commandRaw ?? "",
});
}
if (!rssByPid.has(rootPid)) {
if (!recordsByPid.has(rootPid)) {
return null;
}
let rssKb = 0;
let processCount = 0;
const queue = [rootPid];
const visited = new Set();
const records = [];
while (queue.length > 0) {
const pid = queue.shift();
if (pid === undefined || visited.has(pid)) {
continue;
}
visited.add(pid);
const currentRssKb = rssByPid.get(pid);
if (currentRssKb !== undefined) {
rssKb += currentRssKb;
processCount += 1;
const record = recordsByPid.get(pid);
if (record) {
records.push(record);
}
for (const childPid of childPidsByParent.get(pid) ?? []) {
if (!visited.has(childPid)) {
@@ -99,5 +102,21 @@ export function sampleProcessTreeRssKb(rootPid) {
}
}
return records;
}
export function sampleProcessTreeRssKb(rootPid) {
const records = getProcessTreeRecords(rootPid);
if (!records) {
return null;
}
let rssKb = 0;
let processCount = 0;
for (const record of records) {
rssKb += record.rssKb;
processCount += 1;
}
return { rssKb, processCount };
}

View File

@@ -4,7 +4,11 @@ import os from "node:os";
import path from "node:path";
import { channelTestPrefixes } from "../vitest.channel-paths.mjs";
import { isUnitConfigTestFile } from "../vitest.unit-paths.mjs";
import { parseCompletedTestFileLines, sampleProcessTreeRssKb } from "./test-parallel-memory.mjs";
import {
getProcessTreeRecords,
parseCompletedTestFileLines,
sampleProcessTreeRssKb,
} from "./test-parallel-memory.mjs";
import {
appendCapturedOutput,
hasFatalTestRunOutput,
@@ -725,6 +729,25 @@ const memoryTraceEnabled =
(rawMemoryTrace !== "0" && rawMemoryTrace !== "false" && isCI));
const memoryTracePollMs = Math.max(250, parseEnvNumber("OPENCLAW_TEST_MEMORY_TRACE_POLL_MS", 1000));
const memoryTraceTopCount = Math.max(1, parseEnvNumber("OPENCLAW_TEST_MEMORY_TRACE_TOP_COUNT", 6));
const heapSnapshotIntervalMs = Math.max(
0,
parseEnvNumber("OPENCLAW_TEST_HEAPSNAPSHOT_INTERVAL_MS", 0),
);
const heapSnapshotMinIntervalMs = 5000;
const heapSnapshotEnabled =
process.platform !== "win32" &&
heapSnapshotIntervalMs >= heapSnapshotMinIntervalMs;
const heapSnapshotEnabled = process.platform !== "win32" && heapSnapshotIntervalMs > 0;
const heapSnapshotSignal = process.env.OPENCLAW_TEST_HEAPSNAPSHOT_SIGNAL?.trim() || "SIGUSR2";
const heapSnapshotBaseDir = heapSnapshotEnabled
? path.resolve(
process.env.OPENCLAW_TEST_HEAPSNAPSHOT_DIR?.trim() ||
path.join(os.tmpdir(), `openclaw-heapsnapshots-${Date.now()}`),
)
: null;
const ensureNodeOptionFlag = (nodeOptions, flagPrefix, nextValue) =>
nodeOptions.includes(flagPrefix) ? nodeOptions : `${nodeOptions} ${nextValue}`.trim();
const isNodeLikeProcess = (command) => /(?:^|\/)node(?:$|\.exe$)/iu.test(command);
const runOnce = (entry, extraArgs = []) =>
new Promise((resolve) => {
@@ -757,23 +780,44 @@ const runOnce = (entry, extraArgs = []) =>
(acc, flag) => (acc.includes(flag) ? acc : `${acc} ${flag}`.trim()),
nodeOptions,
);
const heapFlag =
const heapSnapshotDir =
heapSnapshotBaseDir === null ? null : path.join(heapSnapshotBaseDir, entry.name);
let resolvedNodeOptions =
maxOldSpaceSizeMb && !nextNodeOptions.includes("--max-old-space-size=")
? `--max-old-space-size=${maxOldSpaceSizeMb}`
: null;
const resolvedNodeOptions = heapFlag
? `${nextNodeOptions} ${heapFlag}`.trim()
: nextNodeOptions;
? `${nextNodeOptions} --max-old-space-size=${maxOldSpaceSizeMb}`.trim()
: nextNodeOptions;
if (heapSnapshotEnabled && heapSnapshotDir) {
try {
fs.mkdirSync(heapSnapshotDir, { recursive: true });
} catch (err) {
console.error(`[test-parallel] failed to create heap snapshot dir ${heapSnapshotDir}: ${String(err)}`);
resolve(1);
return;
}
resolvedNodeOptions = ensureNodeOptionFlag(
resolvedNodeOptions,
"--diagnostic-dir=",
`--diagnostic-dir=${heapSnapshotDir}`,
);
resolvedNodeOptions = ensureNodeOptionFlag(
resolvedNodeOptions,
"--heapsnapshot-signal=",
`--heapsnapshot-signal=${heapSnapshotSignal}`,
);
}
}
let output = "";
let fatalSeen = false;
let childError = null;
let child;
let pendingLine = "";
let memoryPollTimer = null;
let heapSnapshotTimer = null;
const memoryFileRecords = [];
let initialTreeSample = null;
let latestTreeSample = null;
let peakTreeSample = null;
let heapSnapshotSequence = 0;
const updatePeakTreeSample = (sample, reason) => {
if (!sample) {
return;
@@ -782,6 +826,35 @@ const runOnce = (entry, extraArgs = []) =>
peakTreeSample = { ...sample, reason };
}
};
const triggerHeapSnapshot = (reason) => {
if (!heapSnapshotEnabled || !child?.pid || !heapSnapshotDir) {
return;
}
const records = getProcessTreeRecords(child.pid) ?? [];
const targetPids = records
.filter((record) => record.pid !== process.pid && isNodeLikeProcess(record.command))
.map((record) => record.pid);
if (targetPids.length === 0) {
return;
}
heapSnapshotSequence += 1;
let signaledCount = 0;
for (const pid of targetPids) {
try {
process.kill(pid, heapSnapshotSignal);
signaledCount += 1;
} catch {
// Process likely exited between ps sampling and signal delivery.
}
}
if (signaledCount > 0) {
console.log(
`[test-parallel][heap] ${entry.name} seq=${String(heapSnapshotSequence)} reason=${reason} signaled=${String(
signaledCount,
)}/${String(targetPids.length)} dir=${heapSnapshotDir}`,
);
}
};
const captureTreeSample = (reason) => {
if (!memoryTraceEnabled || !child?.pid) {
return null;
@@ -877,6 +950,11 @@ const runOnce = (entry, extraArgs = []) =>
captureTreeSample("poll");
}, memoryTracePollMs);
}
if (heapSnapshotEnabled) {
heapSnapshotTimer = setInterval(() => {
triggerHeapSnapshot("interval");
}, heapSnapshotIntervalMs);
}
} catch (err) {
console.error(`[test-parallel] spawn failed: ${String(err)}`);
resolve(1);
@@ -905,6 +983,9 @@ const runOnce = (entry, extraArgs = []) =>
if (memoryPollTimer) {
clearInterval(memoryPollTimer);
}
if (heapSnapshotTimer) {
clearInterval(heapSnapshotTimer);
}
children.delete(child);
const resolvedCode = resolveTestRunExitCode({ code, signal, output, fatalSeen, childError });
logMemoryTraceSummary();