mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-26 09:45:11 +00:00
fix(qa-lab): clean orphaned gateway runtimes
This commit is contained in:
@@ -30,6 +30,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Codex app-server: rotate oversized native Codex threads before resume and cap dynamic tool-result text entering native Codex sessions, preventing stale oversized context from surviving OpenClaw compaction. (#82981) Thanks @hansolo949.
|
||||
- Gateway/restart: drain pending replies and active chat runs during restart shutdown before sockets and channels close, aborting timed-out chat runs through the normal cleanup path. (#69121) Thanks @alexlomt.
|
||||
- Agents/Codex: use the Codex runtime context window for OpenAI-model preflight compaction and memory flush checks, so GPT-5.5 Codex sessions compact before hitting the smaller native context limit. Fixes #82982. Thanks @vliuyt.
|
||||
- QA-Lab: clean orphaned gateway temp roots when a suite parent exits and wait on gateway plus transport readiness after config restarts, reducing stale `qa-channel` noise from interrupted runs. Fixes #65506. Thanks @100yenadmin.
|
||||
- QA-Lab: wake qa-bus long polls that arrive with stale future cursors after a bus restart, preserving reconnect readiness for harness clients. (#67142) Thanks @hxy91819.
|
||||
- QA-Lab: stage Multipass transfer scripts under OpenClaw's preferred temp root instead of raw OS temp paths, keeping the VM runner inside temp-path guardrails. (#64098) Thanks @ImLukeF.
|
||||
- Agents/replies: keep surviving reply media and append a warning when other media references fail, so partial media normalization no longer drops failures silently. Thanks @Jerry-Xin.
|
||||
|
||||
@@ -46,10 +46,12 @@ function createParams(baseEnv?: NodeJS.ProcessEnv) {
|
||||
gatewayToken: "qa-token",
|
||||
homeDir: "/tmp/openclaw-qa/home",
|
||||
stateDir: "/tmp/openclaw-qa/state",
|
||||
tempRoot: "/tmp/openclaw-qa",
|
||||
xdgConfigHome: "/tmp/openclaw-qa/xdg-config",
|
||||
xdgDataHome: "/tmp/openclaw-qa/xdg-data",
|
||||
xdgCacheHome: "/tmp/openclaw-qa/xdg-cache",
|
||||
bundledPluginsDir: "/tmp/openclaw-qa/bundled-plugins",
|
||||
stagedBundledPluginsRoot: "/repo/.artifacts/qa-runtime/openclaw-qa-suite-test",
|
||||
compatibilityHostVersion: "2026.4.8",
|
||||
baseEnv,
|
||||
};
|
||||
@@ -139,6 +141,10 @@ describe("buildQaRuntimeEnv", () => {
|
||||
|
||||
expect(env.OPENCLAW_TEST_FAST).toBe("1");
|
||||
expect(env.OPENCLAW_QA_PARENT_PID).toBe(String(process.pid));
|
||||
expect(env.OPENCLAW_QA_TEMP_ROOT).toBe("/tmp/openclaw-qa");
|
||||
expect(env.OPENCLAW_QA_STAGED_RUNTIME_ROOT).toBe(
|
||||
"/repo/.artifacts/qa-runtime/openclaw-qa-suite-test",
|
||||
);
|
||||
expect(env.OPENCLAW_QA_ALLOW_LOCAL_IMAGE_PROVIDER).toBe("1");
|
||||
expect(env.OPENCLAW_ALLOW_SLOW_REPLY_TESTS).toBe("1");
|
||||
expect(env.OPENCLAW_SKIP_STARTUP_MODEL_PREWARM).toBe("1");
|
||||
|
||||
@@ -183,10 +183,12 @@ export function buildQaRuntimeEnv(params: {
|
||||
homeDir: string;
|
||||
forwardHostHome?: boolean;
|
||||
stateDir: string;
|
||||
tempRoot: string;
|
||||
xdgConfigHome: string;
|
||||
xdgDataHome: string;
|
||||
xdgCacheHome: string;
|
||||
bundledPluginsDir?: string;
|
||||
stagedBundledPluginsRoot?: string | null;
|
||||
compatibilityHostVersion?: string;
|
||||
providerMode?: QaProviderMode;
|
||||
baseEnv?: NodeJS.ProcessEnv;
|
||||
@@ -219,6 +221,10 @@ export function buildQaRuntimeEnv(params: {
|
||||
OPENCLAW_NO_RESPAWN: "1",
|
||||
OPENCLAW_TEST_FAST: "1",
|
||||
OPENCLAW_QA_PARENT_PID: String(process.pid),
|
||||
OPENCLAW_QA_TEMP_ROOT: params.tempRoot,
|
||||
...(params.stagedBundledPluginsRoot
|
||||
? { OPENCLAW_QA_STAGED_RUNTIME_ROOT: params.stagedBundledPluginsRoot }
|
||||
: {}),
|
||||
OPENCLAW_QA_ALLOW_LOCAL_IMAGE_PROVIDER: "1",
|
||||
// QA uses the fast runtime envelope for speed, but it still exercises
|
||||
// normal config-driven heartbeats and runtime config writes.
|
||||
@@ -666,10 +672,12 @@ export async function startQaGatewayChild(params: {
|
||||
homeDir,
|
||||
forwardHostHome: params.forwardHostHome,
|
||||
stateDir,
|
||||
tempRoot,
|
||||
xdgConfigHome,
|
||||
xdgDataHome,
|
||||
xdgCacheHome,
|
||||
bundledPluginsDir: stagedPluginRuntime.bundledPluginsDir,
|
||||
stagedBundledPluginsRoot,
|
||||
compatibilityHostVersion: stagedPluginRuntime.runtimeHostVersion,
|
||||
providerMode,
|
||||
forwardHostHomeForClaudeCli: liveProviderIds.includes("claude-cli"),
|
||||
|
||||
@@ -1,10 +1,30 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
getGatewayRetryAfterMs,
|
||||
isConfigApplyNoopForSnapshot,
|
||||
isConfigHashConflict,
|
||||
isConfigPatchNoopForSnapshot,
|
||||
waitForConfigRestartSettle,
|
||||
} from "./suite-runtime-gateway.js";
|
||||
import type { QaSuiteRuntimeEnv } from "./suite-runtime-types.js";
|
||||
|
||||
const fetchWithSsrFGuardMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({
|
||||
fetchWithSsrFGuard: fetchWithSsrFGuardMock,
|
||||
}));
|
||||
|
||||
afterEach(() => {
|
||||
fetchWithSsrFGuardMock.mockReset();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
function createRestartSettleEnv(waitReady: (params: unknown) => Promise<void>) {
|
||||
return {
|
||||
gateway: { baseUrl: "http://127.0.0.1:43123" },
|
||||
transport: { waitReady },
|
||||
} as unknown as Pick<QaSuiteRuntimeEnv, "gateway" | "transport">;
|
||||
}
|
||||
|
||||
describe("qa suite gateway helpers", () => {
|
||||
it("reads retry-after from the primary gateway error before appended logs", () => {
|
||||
@@ -113,4 +133,45 @@ describe("qa suite gateway helpers", () => {
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("waits for transport readiness after gateway restart health", async () => {
|
||||
const release = vi.fn(async () => {});
|
||||
fetchWithSsrFGuardMock.mockResolvedValue({
|
||||
response: { ok: true },
|
||||
release,
|
||||
});
|
||||
const waitReady = vi.fn(async () => {});
|
||||
|
||||
await waitForConfigRestartSettle(createRestartSettleEnv(waitReady), 0, 1_000);
|
||||
|
||||
expect(fetchWithSsrFGuardMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "http://127.0.0.1:43123/readyz",
|
||||
auditContext: "qa-lab-suite-wait-for-gateway-healthy",
|
||||
}),
|
||||
);
|
||||
expect(waitReady).toHaveBeenCalledWith({
|
||||
gateway: { baseUrl: "http://127.0.0.1:43123" },
|
||||
timeoutMs: expect.any(Number),
|
||||
});
|
||||
expect(release).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("keeps polling gateway health instead of sleeping blindly through restart settle", async () => {
|
||||
vi.useFakeTimers();
|
||||
const release = vi.fn(async () => {});
|
||||
fetchWithSsrFGuardMock.mockRejectedValueOnce(new Error("restart boundary")).mockResolvedValue({
|
||||
response: { ok: true },
|
||||
release,
|
||||
});
|
||||
const waitReady = vi.fn(async () => {});
|
||||
|
||||
const settling = waitForConfigRestartSettle(createRestartSettleEnv(waitReady), 500, 5_000);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(1_250);
|
||||
await settling;
|
||||
|
||||
expect(fetchWithSsrFGuardMock).toHaveBeenCalledTimes(2);
|
||||
expect(waitReady).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -65,8 +65,30 @@ async function waitForConfigRestartSettle(
|
||||
restartDelayMs = 1_000,
|
||||
timeoutMs = 60_000,
|
||||
) {
|
||||
await sleep(restartDelayMs + 750);
|
||||
await waitForGatewayHealthy(env, timeoutMs);
|
||||
const startedAt = Date.now();
|
||||
const deadline = startedAt + timeoutMs;
|
||||
const readyAfterMs = restartDelayMs + 750;
|
||||
let lastHealthError: unknown = null;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
await waitForGatewayHealthy(env, Math.max(1, Math.min(1_000, deadline - Date.now())));
|
||||
if (Date.now() - startedAt >= readyAfterMs) {
|
||||
const remainingMs = Math.max(1, deadline - Date.now());
|
||||
await waitForTransportReady(env, remainingMs);
|
||||
return;
|
||||
}
|
||||
} catch (error) {
|
||||
lastHealthError = error;
|
||||
}
|
||||
await sleep(Math.min(250, Math.max(1, deadline - Date.now())));
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`timed out after ${timeoutMs}ms waiting for config restart readiness${
|
||||
lastHealthError ? `: ${formatErrorMessage(lastHealthError)}` : ""
|
||||
}`,
|
||||
);
|
||||
}
|
||||
|
||||
function formatGatewayPrimaryErrorText(error: unknown) {
|
||||
|
||||
@@ -1,6 +1,17 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { qaSuiteProgressTesting, runQaSuite } from "./suite.js";
|
||||
|
||||
const fetchWithSsrFGuardMock = vi.hoisted(() => vi.fn());
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({
|
||||
fetchWithSsrFGuard: fetchWithSsrFGuardMock,
|
||||
}));
|
||||
|
||||
afterEach(() => {
|
||||
fetchWithSsrFGuardMock.mockReset();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
describe("qa suite", () => {
|
||||
it("rejects unsupported transport ids before starting the lab", async () => {
|
||||
const startLab = vi.fn();
|
||||
@@ -23,6 +34,46 @@ describe("qa suite", () => {
|
||||
expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("maybe")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("stops an owned lab when readiness never becomes healthy", async () => {
|
||||
const stop = vi.fn(async () => {});
|
||||
fetchWithSsrFGuardMock.mockResolvedValue({
|
||||
response: { ok: false },
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
await expect(
|
||||
qaSuiteProgressTesting.waitForQaLabReadyOrStopOwned({
|
||||
lab: {
|
||||
listenUrl: "http://127.0.0.1:43123",
|
||||
stop,
|
||||
},
|
||||
ownsLab: true,
|
||||
timeoutMs: 1,
|
||||
}),
|
||||
).rejects.toThrow("timed out after 1ms waiting for qa-lab ready");
|
||||
expect(stop).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("leaves caller-owned labs running when readiness never becomes healthy", async () => {
|
||||
const stop = vi.fn(async () => {});
|
||||
fetchWithSsrFGuardMock.mockResolvedValue({
|
||||
response: { ok: false },
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
await expect(
|
||||
qaSuiteProgressTesting.waitForQaLabReadyOrStopOwned({
|
||||
lab: {
|
||||
listenUrl: "http://127.0.0.1:43123",
|
||||
stop,
|
||||
},
|
||||
ownsLab: false,
|
||||
timeoutMs: 1,
|
||||
}),
|
||||
).rejects.toThrow("timed out after 1ms waiting for qa-lab ready");
|
||||
expect(stop).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("defaults progress logging from CI when no override is set", () => {
|
||||
expect(qaSuiteProgressTesting.shouldLogQaSuiteProgress({ CI: "true" })).toBe(true);
|
||||
expect(qaSuiteProgressTesting.shouldLogQaSuiteProgress({ CI: "false" })).toBe(false);
|
||||
|
||||
@@ -4,6 +4,7 @@ import { setTimeout as sleep } from "node:timers/promises";
|
||||
import { disposeRegisteredAgentHarnesses } from "openclaw/plugin-sdk/agent-harness";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-contracts";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
||||
import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
|
||||
import { startQaGatewayChild, type QaCliBackendAuthMode } from "./gateway-child.js";
|
||||
import type {
|
||||
QaLabLatestReport,
|
||||
@@ -151,6 +152,45 @@ function writeQaSuiteProgress(enabled: boolean, message: string) {
|
||||
process.stderr.write(`[qa-suite] ${message}\n`);
|
||||
}
|
||||
|
||||
async function waitForQaLabReady(baseUrl: string, timeoutMs = 10_000) {
|
||||
const startedAt = Date.now();
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
try {
|
||||
const { response, release } = await fetchWithSsrFGuard({
|
||||
url: `${baseUrl}/readyz`,
|
||||
policy: { allowPrivateNetwork: true },
|
||||
auditContext: "qa-lab-suite-wait-for-lab-ready",
|
||||
});
|
||||
try {
|
||||
if (response.ok) {
|
||||
return;
|
||||
}
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
} catch {
|
||||
// retry
|
||||
}
|
||||
await sleep(100);
|
||||
}
|
||||
throw new Error(`timed out after ${timeoutMs}ms waiting for qa-lab ready`);
|
||||
}
|
||||
|
||||
async function waitForQaLabReadyOrStopOwned(params: {
|
||||
lab: Pick<QaLabServerHandle, "listenUrl" | "stop">;
|
||||
ownsLab: boolean;
|
||||
timeoutMs?: number;
|
||||
}) {
|
||||
try {
|
||||
await waitForQaLabReady(params.lab.listenUrl, params.timeoutMs);
|
||||
} catch (error) {
|
||||
if (params.ownsLab) {
|
||||
await params.lab.stop();
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
function sanitizeQaSuiteProgressValue(value: string): string {
|
||||
let normalized = "";
|
||||
for (const char of value) {
|
||||
@@ -1068,6 +1108,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
embeddedGateway: "disabled",
|
||||
}));
|
||||
writeQaSuiteProgress(progressEnabled, `lab ready: ${sanitizeQaSuiteProgressValue(lab.baseUrl)}`);
|
||||
await waitForQaLabReadyOrStopOwned({ lab, ownsLab });
|
||||
const transport = createQaTransportAdapter({
|
||||
id: transportId,
|
||||
state: lab.state,
|
||||
@@ -1302,4 +1343,5 @@ export const qaSuiteProgressTesting = {
|
||||
resolveQaSuiteTransportReadyTimeoutMs,
|
||||
sanitizeQaSuiteProgressValue,
|
||||
shouldLogQaSuiteProgress,
|
||||
waitForQaLabReadyOrStopOwned,
|
||||
};
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { installQaParentWatchdog, QA_PARENT_PID_ENV } from "./qa-parent-watchdog.js";
|
||||
import {
|
||||
installQaParentWatchdog,
|
||||
QA_PARENT_PID_ENV,
|
||||
QA_STAGED_RUNTIME_ROOT_ENV,
|
||||
QA_TEMP_ROOT_ENV,
|
||||
} from "./qa-parent-watchdog.js";
|
||||
|
||||
describe("installQaParentWatchdog", () => {
|
||||
it("does not install without a QA parent pid", () => {
|
||||
@@ -10,13 +15,15 @@ describe("installQaParentWatchdog", () => {
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
it("exits when the QA parent process disappears", () => {
|
||||
it("exits when the QA parent process disappears", async () => {
|
||||
let tick: () => void = () => {
|
||||
throw new Error("watchdog interval was not installed");
|
||||
};
|
||||
const timer = { unref: vi.fn() };
|
||||
const chdir = vi.fn();
|
||||
const clearIntervalMock = vi.fn();
|
||||
const exit = vi.fn();
|
||||
const rm = vi.fn(async () => {});
|
||||
const logger = { warn: vi.fn() };
|
||||
const kill = vi.fn(() => {
|
||||
const error = new Error("missing") as NodeJS.ErrnoException;
|
||||
@@ -25,12 +32,19 @@ describe("installQaParentWatchdog", () => {
|
||||
});
|
||||
|
||||
const handle = installQaParentWatchdog({
|
||||
chdir,
|
||||
clearInterval: clearIntervalMock,
|
||||
env: { [QA_PARENT_PID_ENV]: "12345" },
|
||||
cwd: () => "/tmp/openclaw-qa-suite-test",
|
||||
env: {
|
||||
[QA_PARENT_PID_ENV]: "12345",
|
||||
[QA_STAGED_RUNTIME_ROOT_ENV]: "/repo/.artifacts/qa-runtime/openclaw-qa-suite-test",
|
||||
[QA_TEMP_ROOT_ENV]: "/tmp/openclaw-qa-suite-test",
|
||||
},
|
||||
exit,
|
||||
kill,
|
||||
logger,
|
||||
ownPid: 10,
|
||||
rm,
|
||||
setInterval: (callback) => {
|
||||
tick = callback;
|
||||
return timer;
|
||||
@@ -45,6 +59,46 @@ describe("installQaParentWatchdog", () => {
|
||||
"QA gateway parent pid 12345 exited; shutting down orphaned QA gateway",
|
||||
);
|
||||
expect(clearIntervalMock).toHaveBeenCalledWith(timer);
|
||||
expect(exit).toHaveBeenCalledWith(0);
|
||||
await vi.waitFor(() => {
|
||||
expect(chdir).toHaveBeenCalledWith("/tmp");
|
||||
expect(rm).toHaveBeenCalledWith("/tmp/openclaw-qa-suite-test");
|
||||
expect(rm).toHaveBeenCalledWith("/repo/.artifacts/qa-runtime/openclaw-qa-suite-test");
|
||||
expect(exit).toHaveBeenCalledWith(0);
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores unsafe QA temp root cleanup paths", async () => {
|
||||
let tick: () => void = () => {
|
||||
throw new Error("watchdog interval was not installed");
|
||||
};
|
||||
const exit = vi.fn();
|
||||
const rm = vi.fn(async () => {});
|
||||
const kill = vi.fn(() => {
|
||||
const error = new Error("missing") as NodeJS.ErrnoException;
|
||||
error.code = "ESRCH";
|
||||
throw error;
|
||||
});
|
||||
|
||||
installQaParentWatchdog({
|
||||
env: {
|
||||
[QA_PARENT_PID_ENV]: "12345",
|
||||
[QA_STAGED_RUNTIME_ROOT_ENV]: "/repo/.artifacts/qa-runtime/not-qa-suite",
|
||||
[QA_TEMP_ROOT_ENV]: "/tmp/not-qa-suite",
|
||||
},
|
||||
exit,
|
||||
kill,
|
||||
logger: { warn: vi.fn() },
|
||||
ownPid: 10,
|
||||
rm,
|
||||
setInterval: (callback) => {
|
||||
tick = callback;
|
||||
return { unref: vi.fn() };
|
||||
},
|
||||
});
|
||||
|
||||
tick();
|
||||
|
||||
await vi.waitFor(() => expect(exit).toHaveBeenCalledWith(0));
|
||||
expect(rm).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { createSubsystemLogger } from "../../logging/subsystem.js";
|
||||
|
||||
export const QA_PARENT_PID_ENV = "OPENCLAW_QA_PARENT_PID";
|
||||
export const QA_TEMP_ROOT_ENV = "OPENCLAW_QA_TEMP_ROOT";
|
||||
export const QA_STAGED_RUNTIME_ROOT_ENV = "OPENCLAW_QA_STAGED_RUNTIME_ROOT";
|
||||
|
||||
const DEFAULT_QA_PARENT_WATCHDOG_INTERVAL_MS = 1000;
|
||||
const QA_TEMP_ROOT_PREFIX = "openclaw-qa-suite-";
|
||||
|
||||
type QaParentWatchdogTimer =
|
||||
| number
|
||||
@@ -11,13 +16,16 @@ type QaParentWatchdogTimer =
|
||||
};
|
||||
|
||||
type QaParentWatchdogDeps = {
|
||||
chdir?: (directory: string) => void;
|
||||
clearInterval?: (timer: QaParentWatchdogTimer) => void;
|
||||
cwd?: () => string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
exit?: (code?: number) => never | void;
|
||||
intervalMs?: number;
|
||||
kill?: (pid: number, signal?: NodeJS.Signals | 0) => boolean;
|
||||
logger?: Pick<ReturnType<typeof createSubsystemLogger>, "warn">;
|
||||
ownPid?: number;
|
||||
rm?: (target: string) => Promise<void>;
|
||||
setInterval?: (callback: () => void, ms: number) => QaParentWatchdogTimer;
|
||||
};
|
||||
|
||||
@@ -38,6 +46,35 @@ function resolveQaParentPid(env: NodeJS.ProcessEnv, ownPid: number): number | nu
|
||||
return parentPid;
|
||||
}
|
||||
|
||||
function resolveQaCleanupRoot(rawValue: string | undefined): string | null {
|
||||
const raw = rawValue?.trim();
|
||||
if (!raw) {
|
||||
return null;
|
||||
}
|
||||
const cleanupRoot = path.resolve(raw);
|
||||
if (!path.basename(cleanupRoot).startsWith(QA_TEMP_ROOT_PREFIX)) {
|
||||
return null;
|
||||
}
|
||||
return cleanupRoot;
|
||||
}
|
||||
|
||||
function resolveQaCleanupRoots(env: NodeJS.ProcessEnv): string[] {
|
||||
return [
|
||||
resolveQaCleanupRoot(env[QA_TEMP_ROOT_ENV]),
|
||||
resolveQaCleanupRoot(env[QA_STAGED_RUNTIME_ROOT_ENV]),
|
||||
].filter((target, index, array): target is string => {
|
||||
return target !== null && array.indexOf(target) === index;
|
||||
});
|
||||
}
|
||||
|
||||
function pathContains(root: string, candidate: string): boolean {
|
||||
const relative = path.relative(root, candidate);
|
||||
return (
|
||||
relative === "" ||
|
||||
(relative.length > 0 && !relative.startsWith("..") && !path.isAbsolute(relative))
|
||||
);
|
||||
}
|
||||
|
||||
export function installQaParentWatchdog(
|
||||
deps: QaParentWatchdogDeps = {},
|
||||
): QaParentWatchdogHandle | null {
|
||||
@@ -57,10 +94,19 @@ export function installQaParentWatchdog(
|
||||
const kill =
|
||||
deps.kill ?? ((pid: number, signal?: NodeJS.Signals | 0) => process.kill(pid, signal));
|
||||
const logger = deps.logger ?? createSubsystemLogger("gateway");
|
||||
const qaCleanupRoots = resolveQaCleanupRoots(env);
|
||||
const chdir = deps.chdir ?? ((directory: string) => process.chdir(directory));
|
||||
const cwd = deps.cwd ?? (() => process.cwd());
|
||||
const rm =
|
||||
deps.rm ??
|
||||
(async (target: string) => {
|
||||
await fs.rm(target, { recursive: true, force: true });
|
||||
});
|
||||
const setIntervalFn =
|
||||
deps.setInterval ??
|
||||
((callback: () => void, ms: number) => setInterval(callback, ms) as QaParentWatchdogTimer);
|
||||
let stopped = false;
|
||||
let exiting = false;
|
||||
let timer: QaParentWatchdogTimer;
|
||||
|
||||
const stop = () => {
|
||||
@@ -72,7 +118,7 @@ export function installQaParentWatchdog(
|
||||
};
|
||||
|
||||
timer = setIntervalFn(() => {
|
||||
if (stopped) {
|
||||
if (stopped || exiting) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
@@ -80,8 +126,36 @@ export function installQaParentWatchdog(
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === "ESRCH") {
|
||||
logger.warn(`QA gateway parent pid ${parentPid} exited; shutting down orphaned QA gateway`);
|
||||
exiting = true;
|
||||
stop();
|
||||
exit(0);
|
||||
void (async () => {
|
||||
const currentCwd = path.resolve(cwd());
|
||||
const activeCwdRoot = qaCleanupRoots.find((cleanupRoot) =>
|
||||
pathContains(cleanupRoot, currentCwd),
|
||||
);
|
||||
if (activeCwdRoot) {
|
||||
const safeCwd = path.dirname(activeCwdRoot);
|
||||
try {
|
||||
chdir(safeCwd);
|
||||
} catch (chdirError) {
|
||||
logger.warn(
|
||||
`QA gateway parent pid ${parentPid} exited; failed to leave runtime root ${activeCwdRoot}: ${
|
||||
chdirError instanceof Error ? chdirError.message : String(chdirError)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
for (const cleanupRoot of qaCleanupRoots) {
|
||||
await rm(cleanupRoot).catch((cleanupError) => {
|
||||
logger.warn(
|
||||
`QA gateway parent pid ${parentPid} exited; failed to clean runtime root ${cleanupRoot}: ${
|
||||
cleanupError instanceof Error ? cleanupError.message : String(cleanupError)
|
||||
}`,
|
||||
);
|
||||
});
|
||||
}
|
||||
exit(0);
|
||||
})();
|
||||
}
|
||||
}
|
||||
}, deps.intervalMs ?? DEFAULT_QA_PARENT_WATCHDOG_INTERVAL_MS);
|
||||
|
||||
Reference in New Issue
Block a user