mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-20 14:30:57 +00:00
Tests: Add tooling / skill for detecting and fixing memory leaks in tests (#50654)
* Tests: add periodic heap snapshot tooling * Skills: add test heap leak workflow * Apply suggestion from @greptile-apps[bot] Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> * Update scripts/test-parallel.mjs Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --------- Co-authored-by: Vincent Koc <vincentkoc@ieee.org> Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
This commit is contained in:
@@ -11,7 +11,7 @@ const ANSI_ESCAPE_PATTERN = new RegExp(
|
||||
const COMPLETED_TEST_FILE_LINE_PATTERN =
|
||||
/(?<file>(?:src|extensions|test|ui)\/\S+?\.(?:live\.test|e2e\.test|test)\.ts)\s+\(.*\)\s+(?<duration>\d+(?:\.\d+)?)(?<unit>ms|s)\s*$/;
|
||||
|
||||
const PS_COLUMNS = ["pid=", "ppid=", "rss="];
|
||||
const PS_COLUMNS = ["pid=", "ppid=", "rss=", "comm="];
|
||||
|
||||
function parseDurationMs(rawValue, unit) {
|
||||
const parsed = Number.parseFloat(rawValue);
|
||||
@@ -41,7 +41,7 @@ export function parseCompletedTestFileLines(text) {
|
||||
.filter((entry) => entry !== null);
|
||||
}
|
||||
|
||||
export function sampleProcessTreeRssKb(rootPid) {
|
||||
export function getProcessTreeRecords(rootPid) {
|
||||
if (!Number.isInteger(rootPid) || rootPid <= 0 || process.platform === "win32") {
|
||||
return null;
|
||||
}
|
||||
@@ -54,13 +54,13 @@ export function sampleProcessTreeRssKb(rootPid) {
|
||||
}
|
||||
|
||||
const childPidsByParent = new Map();
|
||||
const rssByPid = new Map();
|
||||
const recordsByPid = new Map();
|
||||
for (const line of result.stdout.split(/\r?\n/u)) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
const [pidRaw, parentRaw, rssRaw] = trimmed.split(/\s+/u);
|
||||
const [pidRaw, parentRaw, rssRaw, commandRaw] = trimmed.split(/\s+/u, 4);
|
||||
const pid = Number.parseInt(pidRaw ?? "", 10);
|
||||
const parentPid = Number.parseInt(parentRaw ?? "", 10);
|
||||
const rssKb = Number.parseInt(rssRaw ?? "", 10);
|
||||
@@ -70,27 +70,30 @@ export function sampleProcessTreeRssKb(rootPid) {
|
||||
const siblings = childPidsByParent.get(parentPid) ?? [];
|
||||
siblings.push(pid);
|
||||
childPidsByParent.set(parentPid, siblings);
|
||||
rssByPid.set(pid, rssKb);
|
||||
recordsByPid.set(pid, {
|
||||
pid,
|
||||
parentPid,
|
||||
rssKb,
|
||||
command: commandRaw ?? "",
|
||||
});
|
||||
}
|
||||
|
||||
if (!rssByPid.has(rootPid)) {
|
||||
if (!recordsByPid.has(rootPid)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let rssKb = 0;
|
||||
let processCount = 0;
|
||||
const queue = [rootPid];
|
||||
const visited = new Set();
|
||||
const records = [];
|
||||
while (queue.length > 0) {
|
||||
const pid = queue.shift();
|
||||
if (pid === undefined || visited.has(pid)) {
|
||||
continue;
|
||||
}
|
||||
visited.add(pid);
|
||||
const currentRssKb = rssByPid.get(pid);
|
||||
if (currentRssKb !== undefined) {
|
||||
rssKb += currentRssKb;
|
||||
processCount += 1;
|
||||
const record = recordsByPid.get(pid);
|
||||
if (record) {
|
||||
records.push(record);
|
||||
}
|
||||
for (const childPid of childPidsByParent.get(pid) ?? []) {
|
||||
if (!visited.has(childPid)) {
|
||||
@@ -99,5 +102,21 @@ export function sampleProcessTreeRssKb(rootPid) {
|
||||
}
|
||||
}
|
||||
|
||||
return records;
|
||||
}
|
||||
|
||||
export function sampleProcessTreeRssKb(rootPid) {
|
||||
const records = getProcessTreeRecords(rootPid);
|
||||
if (!records) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let rssKb = 0;
|
||||
let processCount = 0;
|
||||
for (const record of records) {
|
||||
rssKb += record.rssKb;
|
||||
processCount += 1;
|
||||
}
|
||||
|
||||
return { rssKb, processCount };
|
||||
}
|
||||
|
||||
@@ -4,7 +4,11 @@ import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { channelTestPrefixes } from "../vitest.channel-paths.mjs";
|
||||
import { isUnitConfigTestFile } from "../vitest.unit-paths.mjs";
|
||||
import { parseCompletedTestFileLines, sampleProcessTreeRssKb } from "./test-parallel-memory.mjs";
|
||||
import {
|
||||
getProcessTreeRecords,
|
||||
parseCompletedTestFileLines,
|
||||
sampleProcessTreeRssKb,
|
||||
} from "./test-parallel-memory.mjs";
|
||||
import {
|
||||
appendCapturedOutput,
|
||||
hasFatalTestRunOutput,
|
||||
@@ -725,6 +729,25 @@ const memoryTraceEnabled =
|
||||
(rawMemoryTrace !== "0" && rawMemoryTrace !== "false" && isCI));
|
||||
const memoryTracePollMs = Math.max(250, parseEnvNumber("OPENCLAW_TEST_MEMORY_TRACE_POLL_MS", 1000));
|
||||
const memoryTraceTopCount = Math.max(1, parseEnvNumber("OPENCLAW_TEST_MEMORY_TRACE_TOP_COUNT", 6));
|
||||
const heapSnapshotIntervalMs = Math.max(
|
||||
0,
|
||||
parseEnvNumber("OPENCLAW_TEST_HEAPSNAPSHOT_INTERVAL_MS", 0),
|
||||
);
|
||||
const heapSnapshotMinIntervalMs = 5000;
|
||||
const heapSnapshotEnabled =
|
||||
process.platform !== "win32" &&
|
||||
heapSnapshotIntervalMs >= heapSnapshotMinIntervalMs;
|
||||
const heapSnapshotEnabled = process.platform !== "win32" && heapSnapshotIntervalMs > 0;
|
||||
const heapSnapshotSignal = process.env.OPENCLAW_TEST_HEAPSNAPSHOT_SIGNAL?.trim() || "SIGUSR2";
|
||||
const heapSnapshotBaseDir = heapSnapshotEnabled
|
||||
? path.resolve(
|
||||
process.env.OPENCLAW_TEST_HEAPSNAPSHOT_DIR?.trim() ||
|
||||
path.join(os.tmpdir(), `openclaw-heapsnapshots-${Date.now()}`),
|
||||
)
|
||||
: null;
|
||||
const ensureNodeOptionFlag = (nodeOptions, flagPrefix, nextValue) =>
|
||||
nodeOptions.includes(flagPrefix) ? nodeOptions : `${nodeOptions} ${nextValue}`.trim();
|
||||
const isNodeLikeProcess = (command) => /(?:^|\/)node(?:$|\.exe$)/iu.test(command);
|
||||
|
||||
const runOnce = (entry, extraArgs = []) =>
|
||||
new Promise((resolve) => {
|
||||
@@ -757,23 +780,44 @@ const runOnce = (entry, extraArgs = []) =>
|
||||
(acc, flag) => (acc.includes(flag) ? acc : `${acc} ${flag}`.trim()),
|
||||
nodeOptions,
|
||||
);
|
||||
const heapFlag =
|
||||
const heapSnapshotDir =
|
||||
heapSnapshotBaseDir === null ? null : path.join(heapSnapshotBaseDir, entry.name);
|
||||
let resolvedNodeOptions =
|
||||
maxOldSpaceSizeMb && !nextNodeOptions.includes("--max-old-space-size=")
|
||||
? `--max-old-space-size=${maxOldSpaceSizeMb}`
|
||||
: null;
|
||||
const resolvedNodeOptions = heapFlag
|
||||
? `${nextNodeOptions} ${heapFlag}`.trim()
|
||||
: nextNodeOptions;
|
||||
? `${nextNodeOptions} --max-old-space-size=${maxOldSpaceSizeMb}`.trim()
|
||||
: nextNodeOptions;
|
||||
if (heapSnapshotEnabled && heapSnapshotDir) {
|
||||
try {
|
||||
fs.mkdirSync(heapSnapshotDir, { recursive: true });
|
||||
} catch (err) {
|
||||
console.error(`[test-parallel] failed to create heap snapshot dir ${heapSnapshotDir}: ${String(err)}`);
|
||||
resolve(1);
|
||||
return;
|
||||
}
|
||||
resolvedNodeOptions = ensureNodeOptionFlag(
|
||||
resolvedNodeOptions,
|
||||
"--diagnostic-dir=",
|
||||
`--diagnostic-dir=${heapSnapshotDir}`,
|
||||
);
|
||||
resolvedNodeOptions = ensureNodeOptionFlag(
|
||||
resolvedNodeOptions,
|
||||
"--heapsnapshot-signal=",
|
||||
`--heapsnapshot-signal=${heapSnapshotSignal}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
let output = "";
|
||||
let fatalSeen = false;
|
||||
let childError = null;
|
||||
let child;
|
||||
let pendingLine = "";
|
||||
let memoryPollTimer = null;
|
||||
let heapSnapshotTimer = null;
|
||||
const memoryFileRecords = [];
|
||||
let initialTreeSample = null;
|
||||
let latestTreeSample = null;
|
||||
let peakTreeSample = null;
|
||||
let heapSnapshotSequence = 0;
|
||||
const updatePeakTreeSample = (sample, reason) => {
|
||||
if (!sample) {
|
||||
return;
|
||||
@@ -782,6 +826,35 @@ const runOnce = (entry, extraArgs = []) =>
|
||||
peakTreeSample = { ...sample, reason };
|
||||
}
|
||||
};
|
||||
const triggerHeapSnapshot = (reason) => {
|
||||
if (!heapSnapshotEnabled || !child?.pid || !heapSnapshotDir) {
|
||||
return;
|
||||
}
|
||||
const records = getProcessTreeRecords(child.pid) ?? [];
|
||||
const targetPids = records
|
||||
.filter((record) => record.pid !== process.pid && isNodeLikeProcess(record.command))
|
||||
.map((record) => record.pid);
|
||||
if (targetPids.length === 0) {
|
||||
return;
|
||||
}
|
||||
heapSnapshotSequence += 1;
|
||||
let signaledCount = 0;
|
||||
for (const pid of targetPids) {
|
||||
try {
|
||||
process.kill(pid, heapSnapshotSignal);
|
||||
signaledCount += 1;
|
||||
} catch {
|
||||
// Process likely exited between ps sampling and signal delivery.
|
||||
}
|
||||
}
|
||||
if (signaledCount > 0) {
|
||||
console.log(
|
||||
`[test-parallel][heap] ${entry.name} seq=${String(heapSnapshotSequence)} reason=${reason} signaled=${String(
|
||||
signaledCount,
|
||||
)}/${String(targetPids.length)} dir=${heapSnapshotDir}`,
|
||||
);
|
||||
}
|
||||
};
|
||||
const captureTreeSample = (reason) => {
|
||||
if (!memoryTraceEnabled || !child?.pid) {
|
||||
return null;
|
||||
@@ -877,6 +950,11 @@ const runOnce = (entry, extraArgs = []) =>
|
||||
captureTreeSample("poll");
|
||||
}, memoryTracePollMs);
|
||||
}
|
||||
if (heapSnapshotEnabled) {
|
||||
heapSnapshotTimer = setInterval(() => {
|
||||
triggerHeapSnapshot("interval");
|
||||
}, heapSnapshotIntervalMs);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`[test-parallel] spawn failed: ${String(err)}`);
|
||||
resolve(1);
|
||||
@@ -905,6 +983,9 @@ const runOnce = (entry, extraArgs = []) =>
|
||||
if (memoryPollTimer) {
|
||||
clearInterval(memoryPollTimer);
|
||||
}
|
||||
if (heapSnapshotTimer) {
|
||||
clearInterval(heapSnapshotTimer);
|
||||
}
|
||||
children.delete(child);
|
||||
const resolvedCode = resolveTestRunExitCode({ code, signal, output, fatalSeen, childError });
|
||||
logMemoryTraceSummary();
|
||||
|
||||
Reference in New Issue
Block a user