diff --git a/scripts/e2e/telegram-user-crabbox-proof.ts b/scripts/e2e/telegram-user-crabbox-proof.ts index b65c7364370..b577308ee8f 100644 --- a/scripts/e2e/telegram-user-crabbox-proof.ts +++ b/scripts/e2e/telegram-user-crabbox-proof.ts @@ -145,6 +145,9 @@ const DEFAULT_OUTPUT_ROOT = ".artifacts/qa-e2e/telegram-user-crabbox"; export const COMMAND_STDOUT_MAX_CHARS = 1024 * 1024; export const COMMAND_STDERR_TAIL_CHARS = 256 * 1024; export const COMMAND_FAILURE_STDOUT_TAIL_CHARS = 64 * 1024; +export const COMMAND_TIMEOUT_MS = 30 * 60 * 1000; +export const COMMAND_TIMEOUT_KILL_GRACE_MS = 5_000; +export const REMOTE_SETUP_COMMAND_TIMEOUT_MS = 90 * 60 * 1000; const REMOTE_ROOT = "/tmp/openclaw-telegram-user-crabbox"; const CREDENTIAL_SCRIPT = fileURLToPath(new URL("./telegram-user-credential.ts", import.meta.url)); export function readTelegramUserProofLogTailBytes(env: NodeJS.ProcessEnv = process.env): number { @@ -561,7 +564,46 @@ function commandFailureOutput(stdout: string, stderr: string): string { return `${stdoutTail}${stderr}`; } -function runCommand(params: { +function timedOutError(message: string) { + return Object.assign(new Error(message), { code: "ETIMEDOUT" }); +} + +const activeCommandChildren = new Set(); +let commandCleanupHandlersInstalled = false; + +function signalCommandTree(child: ChildProcess, signal: NodeJS.Signals) { + if (child.pid && process.platform !== "win32") { + try { + process.kill(-child.pid, signal); + return; + } catch {} + } + child.kill(signal); +} + +function signalActiveCommandChildren(signal: NodeJS.Signals) { + for (const child of activeCommandChildren) { + signalCommandTree(child, signal); + } +} + +function installCommandCleanupHandlers() { + if (commandCleanupHandlersInstalled) { + return; + } + commandCleanupHandlersInstalled = true; + process.once("exit", () => { + signalActiveCommandChildren("SIGTERM"); + }); + for (const signal of ["SIGINT", "SIGTERM"] as const) { + process.once(signal, () => { + signalActiveCommandChildren(signal); + process.kill(process.pid, signal); + }); + } +} + +export function runCommand(params: { args: string[]; command: string; cwd: string; @@ -569,6 +611,8 @@ function runCommand(params: { outputFile?: string; stdio?: "inherit" | "pipe"; stdin?: string; + timeoutKillGraceMs?: number; + timeoutMs?: number; }) { return new Promise((resolve, reject) => { if (params.outputFile) { @@ -576,12 +620,43 @@ function runCommand(params: { } const child = spawn(params.command, params.args, { cwd: params.cwd, + detached: process.platform !== "win32", env: params.env ?? process.env, stdio: ["pipe", "pipe", "pipe"], }); + activeCommandChildren.add(child); + installCommandCleanupHandlers(); let stdout = ""; let stderr = ""; + let settled = false; let stdoutLimitError: string | null = null; + let timeoutError: Error | null = null; + let killTimer: NodeJS.Timeout | undefined; + const timeoutMs = params.timeoutMs ?? COMMAND_TIMEOUT_MS; + const timeoutKillGraceMs = params.timeoutKillGraceMs ?? COMMAND_TIMEOUT_KILL_GRACE_MS; + const clearTimers = () => { + clearTimeout(timeout); + if (killTimer) { + clearTimeout(killTimer); + } + }; + const timeout = setTimeout(() => { + if (settled) { + return; + } + timeoutError = timedOutError( + `${params.command} ${params.args.join(" ")} timed out after ${timeoutMs}ms\n${commandFailureOutput( + stdout, + stderr, + )}`, + ); + signalCommandTree(child, "SIGTERM"); + killTimer = setTimeout(() => { + signalCommandTree(child, "SIGKILL"); + }, timeoutKillGraceMs); + killTimer.unref?.(); + }, timeoutMs); + timeout.unref?.(); child.stdout.on("data", (chunk: Buffer) => { const text = chunk.toString(); if (params.outputFile) { @@ -593,7 +668,7 @@ function runCommand(params: { const appended = appendCommandStdout(stdout, chunk); if (!appended.ok) { stdoutLimitError = appended.message; - child.kill("SIGKILL"); + signalCommandTree(child, "SIGKILL"); } else { stdout = appended.value; } @@ -612,8 +687,28 @@ function runCommand(params: { process.stderr.write(text); } }); - child.on("error", reject); + child.on("error", (error) => { + if (settled) { + return; + } + settled = true; + activeCommandChildren.delete(child); + clearTimers(); + reject(error); + }); child.on("close", (code, signal) => { + if (settled) { + return; + } + settled = true; + activeCommandChildren.delete(child); + if (timeoutError) { + signalCommandTree(child, "SIGKILL"); + clearTimers(); + reject(timeoutError); + return; + } + clearTimers(); if (stdoutLimitError) { reject(new Error(`${params.command} ${params.args.join(" ")} failed: ${stdoutLimitError}`)); return; @@ -1212,6 +1307,7 @@ async function runRemoteCommand(params: { cwd: string; outputFile?: string; stdio?: "inherit" | "pipe"; + timeoutMs?: number; }) { let lastError: unknown; for (let attempt = 1; attempt <= 4; attempt += 1) { @@ -1254,7 +1350,7 @@ async function sshRun( root: string, inspect: CrabboxInspect, remoteCommand: string, - options: { outputFile?: string } = {}, + options: { outputFile?: string; timeoutMs?: number } = {}, ) { const ssh = sshArgs(inspect); return await runRemoteCommand({ @@ -1263,6 +1359,7 @@ async function sshRun( cwd: root, outputFile: options.outputFile, stdio: "inherit", + timeoutMs: options.timeoutMs, }); } @@ -1713,7 +1810,9 @@ async function writeRemoteSessionScripts(params: { selectChatScript, `${REMOTE_ROOT}/select-desktop-chat.sh`, ); - await sshRun(params.root, params.inspect, `bash ${REMOTE_ROOT}/remote-setup.sh`); + await sshRun(params.root, params.inspect, `bash ${REMOTE_ROOT}/remote-setup.sh`, { + timeoutMs: REMOTE_SETUP_COMMAND_TIMEOUT_MS, + }); await sshRun(params.root, params.inspect, `bash ${REMOTE_ROOT}/launch-desktop.sh`); await sshRun(params.root, params.inspect, `bash ${REMOTE_ROOT}/authorize-desktop.sh`); await sshRun(params.root, params.inspect, `bash ${REMOTE_ROOT}/select-desktop-chat.sh`); @@ -2387,7 +2486,9 @@ async function main() { await scpToRemote(root, inspect, authorizeScript, `${REMOTE_ROOT}/authorize-desktop.sh`); await scpToRemote(root, inspect, selectChatScript, `${REMOTE_ROOT}/select-desktop-chat.sh`); await scpToRemote(root, inspect, probeScript, `${REMOTE_ROOT}/remote-probe.sh`); - await sshRun(root, inspect, `bash ${REMOTE_ROOT}/remote-setup.sh`); + await sshRun(root, inspect, `bash ${REMOTE_ROOT}/remote-setup.sh`, { + timeoutMs: REMOTE_SETUP_COMMAND_TIMEOUT_MS, + }); const sutRuntime = await startLocalSut({ gatewayPort: opts.gatewayPort, diff --git a/test/scripts/telegram-user-crabbox-proof.test.ts b/test/scripts/telegram-user-crabbox-proof.test.ts index 332b9f42f37..e94e6ce99d2 100644 --- a/test/scripts/telegram-user-crabbox-proof.test.ts +++ b/test/scripts/telegram-user-crabbox-proof.test.ts @@ -1,15 +1,20 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; +import { setTimeout as delay } from "node:timers/promises"; import { afterEach, describe, expect, it, vi } from "vitest"; import { + COMMAND_TIMEOUT_MS, createOpenClawGatewaySpawnSpec, readLogTail, readTelegramUserProofLogTailBytes, + REMOTE_SETUP_COMMAND_TIMEOUT_MS, + runCommand, waitForLog, } from "../../scripts/e2e/telegram-user-crabbox-proof.ts"; const tempDirs: string[] = []; +const posixIt = process.platform === "win32" ? it.skip : it; function makeTempDir(): string { const dir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-telegram-proof-")); @@ -17,6 +22,26 @@ function makeTempDir(): string { return dir; } +function isProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +async function waitFor(predicate: () => boolean, timeoutMs = 5_000): Promise { + const started = Date.now(); + while (Date.now() - started < timeoutMs) { + if (predicate()) { + return; + } + await delay(25); + } + throw new Error("condition was not met before timeout"); +} + afterEach(() => { vi.restoreAllMocks(); for (const dir of tempDirs.splice(0)) { @@ -45,6 +70,11 @@ describe("telegram user Crabbox proof log polling", () => { expect(spec.options.shell).toBe(false); }); + it("allows cold remote setup to outlive ordinary command timeouts", () => { + expect(REMOTE_SETUP_COMMAND_TIMEOUT_MS).toBeGreaterThan(COMMAND_TIMEOUT_MS); + expect(REMOTE_SETUP_COMMAND_TIMEOUT_MS).toBeGreaterThanOrEqual(90 * 60 * 1000); + }); + it("rejects loose numeric log tail limits instead of parsing prefixes", () => { expect(() => readTelegramUserProofLogTailBytes({ @@ -118,4 +148,54 @@ describe("telegram user Crabbox proof log polling", () => { expect(message).toContain("recent failure"); expect(message).not.toContain("old-secret"); }); + + posixIt("kills timed-out command process groups when the leader exits first", async () => { + const root = makeTempDir(); + const scriptPath = path.join(root, "trap-term.mjs"); + const grandchildPidPath = path.join(root, "grandchild.pid"); + let grandchildPid = 0; + + fs.writeFileSync( + scriptPath, + ` +import { spawn } from "node:child_process"; +import fs from "node:fs"; + +const grandchild = spawn(process.execPath, [ + "-e", + "process.on('SIGTERM', () => {}); setInterval(() => {}, 1000);", +], { stdio: "ignore" }); +fs.writeFileSync(process.argv[2], String(grandchild.pid)); +process.on("SIGTERM", () => process.exit(0)); +setInterval(() => {}, 1000); +`, + "utf8", + ); + + const runPromise = runCommand({ + args: [scriptPath, grandchildPidPath], + command: process.execPath, + cwd: root, + timeoutKillGraceMs: 25, + timeoutMs: 100, + }); + + try { + await waitFor(() => fs.existsSync(grandchildPidPath)); + grandchildPid = Number.parseInt(fs.readFileSync(grandchildPidPath, "utf8"), 10); + expect(Number.isInteger(grandchildPid)).toBe(true); + expect(isProcessAlive(grandchildPid)).toBe(true); + + await expect(runPromise).rejects.toMatchObject({ + code: "ETIMEDOUT", + message: expect.stringContaining("timed out after 100ms"), + }); + await waitFor(() => !isProcessAlive(grandchildPid)); + } finally { + await runPromise.catch(() => {}); + if (grandchildPid && isProcessAlive(grandchildPid)) { + process.kill(grandchildPid, "SIGKILL"); + } + } + }); });