From c3dcc4a2995fe0e9c851347b109f13b8cdfd2663 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 1 May 2026 19:04:21 +0100 Subject: [PATCH] test(release): harden docker release validation --- .../e2e/commitments-safety-docker-client.ts | 3 +- scripts/e2e/cron-mcp-cleanup-docker-client.ts | 13 +++ .../runtime-smoke.mjs | 61 ++++++++++-- .../e2e/lib/doctor-install-switch/scenario.sh | 4 +- scripts/e2e/lib/gateway-network/client.mjs | 97 +++++++++++++------ .../lib/upgrade-survivor/probe-gateway.mjs | 75 ++++++++------ scripts/e2e/lib/upgrade-survivor/run.sh | 11 ++- src/plugins/loader-records.test.ts | 18 ++++ src/plugins/loader-records.ts | 3 +- src/plugins/loader.ts | 4 + src/plugins/registry.ts | 4 +- 11 files changed, 215 insertions(+), 78 deletions(-) create mode 100644 src/plugins/loader-records.test.ts diff --git a/scripts/e2e/commitments-safety-docker-client.ts b/scripts/e2e/commitments-safety-docker-client.ts index 92620b2398a..8874ebabe4c 100644 --- a/scripts/e2e/commitments-safety-docker-client.ts +++ b/scripts/e2e/commitments-safety-docker-client.ts @@ -4,7 +4,6 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; -import { DEFAULT_COMMITMENT_EXTRACTION_QUEUE_MAX_ITEMS } from "../../dist/commitments/config.js"; import { configureCommitmentExtractionRuntime, drainCommitmentExtractionQueue, @@ -17,6 +16,8 @@ import { resolveCommitmentStorePath, } from "../../dist/commitments/store.js"; +const DEFAULT_COMMITMENT_EXTRACTION_QUEUE_MAX_ITEMS = 64; + function assert(condition: unknown, message: string): asserts condition { if (!condition) { throw new Error(message); diff --git a/scripts/e2e/cron-mcp-cleanup-docker-client.ts b/scripts/e2e/cron-mcp-cleanup-docker-client.ts index 4806afdf16b..364ee90685d 100644 --- a/scripts/e2e/cron-mcp-cleanup-docker-client.ts +++ b/scripts/e2e/cron-mcp-cleanup-docker-client.ts @@ -225,6 +225,19 @@ async function runSubagentCleanupScenario(params: { `agent did not accept subagent cleanup run: ${JSON.stringify(run)}`, ); + const finished = await gateway.request<{ status?: string }>( + "agent.wait", + { + runId: run.runId, + timeoutMs: 240_000, + }, + { timeoutMs: 250_000 }, + ); + assert( + finished.status === "ok", + `subagent cleanup run did not finish ok: ${JSON.stringify(finished)}`, + ); + const exitedPid = await waitForAnyProbeExit({ pidsPath, label: "subagent", diff --git a/scripts/e2e/lib/bundled-plugin-install-uninstall/runtime-smoke.mjs b/scripts/e2e/lib/bundled-plugin-install-uninstall/runtime-smoke.mjs index 71eeaf539c0..758f13580fb 100644 --- a/scripts/e2e/lib/bundled-plugin-install-uninstall/runtime-smoke.mjs +++ b/scripts/e2e/lib/bundled-plugin-install-uninstall/runtime-smoke.mjs @@ -9,9 +9,13 @@ const TOKEN = "bundled-plugin-runtime-smoke-token"; const WATCHDOG_MS = readPositiveInt(process.env.OPENCLAW_BUNDLED_PLUGIN_RUNTIME_WATCHDOG_MS, 1000); const READY_TIMEOUT_MS = readPositiveInt( process.env.OPENCLAW_BUNDLED_PLUGIN_RUNTIME_READY_MS, - 180000, + 420000, ); const RPC_TIMEOUT_MS = readPositiveInt(process.env.OPENCLAW_BUNDLED_PLUGIN_RUNTIME_RPC_MS, 60000); +const RPC_READY_TIMEOUT_MS = readPositiveInt( + process.env.OPENCLAW_BUNDLED_PLUGIN_RUNTIME_RPC_READY_MS, + 90000, +); function readPositiveInt(raw, fallback) { const parsed = Number.parseInt(String(raw || ""), 10); @@ -296,6 +300,35 @@ async function rpcCall(method, params, options) { return unwrapRpcPayload(parseJsonOutput(stdout)); } +async function retryRpcCall(method, params, options) { + const started = Date.now(); + let lastError; + while (Date.now() - started < RPC_READY_TIMEOUT_MS) { + try { + return await rpcCall(method, params, options); + } catch (error) { + lastError = error; + if (!isRetryableGatewayCallError(error)) { + throw error; + } + await delay(500); + } + } + throw lastError ?? new Error(`gateway RPC ${method} timed out before retry`); +} + +function isRetryableGatewayCallError(error) { + const text = error instanceof Error ? error.message : String(error); + return ( + text.includes("gateway starting") || + text.includes("gateway closed") || + text.includes("handshake timeout") || + text.includes("GatewayTransportError") || + text.includes("ECONNREFUSED") || + text.includes("fetch failed") + ); +} + function parseJsonOutput(stdout) { const trimmed = stdout.trim(); if (!trimmed) { @@ -402,12 +435,16 @@ async function smokePlugin(pluginId, pluginDir, requiresConfig, pluginIndex) { async function assertBaseGatewayProbes(options) { await assertHttpOk(options.port, "/healthz"); await assertReadyzProbe(options); - await rpcCall("health", {}, options); + await retryRpcCall("health", {}, options); } async function runManifestProbes(plan, options) { for (const channel of plan.channels) { - const status = await rpcCall("channels.status", { probe: false, timeoutMs: 2000 }, options); + const status = await retryRpcCall( + "channels.status", + { probe: false, timeoutMs: 2000 }, + options, + ); if (!isChannelVisible(status, channel)) { console.log( `Runtime channel status smoke skipped for ${options.pluginId}: ${channel} is not visible in dry channels.status`, @@ -415,7 +452,11 @@ async function runManifestProbes(plan, options) { } } if (plan.runtimeSlashAliases.length > 0 && plan.activeInThisProbe) { - const commands = await rpcCall("commands.list", { scope: "both", includeArgs: true }, options); + const commands = await retryRpcCall( + "commands.list", + { scope: "both", includeArgs: true }, + options, + ); for (const alias of plan.runtimeSlashAliases) { assertCommandVisible(commands, alias); } @@ -425,7 +466,7 @@ async function runManifestProbes(plan, options) { ); } if (plan.tools.length > 0 && plan.activeInThisProbe) { - const catalog = await rpcCall("tools.catalog", { includePlugins: true }, options); + const catalog = await retryRpcCall("tools.catalog", { includePlugins: true }, options); for (const tool of plan.tools) { assertToolVisible(catalog, tool); } @@ -435,8 +476,8 @@ async function runManifestProbes(plan, options) { ); } if (plan.speechProviders.length > 0) { - const providers = await rpcCall("tts.providers", {}, options); - const status = await rpcCall("tts.status", {}, options); + const providers = await retryRpcCall("tts.providers", {}, options); + const status = await retryRpcCall("tts.status", {}, options); const provider = plan.speechProviders[0]; assertSpeechProviderVisible(providers, provider, "tts.providers"); assertSpeechProviderVisible(status, provider, "tts.status"); @@ -508,7 +549,7 @@ async function runWatchdog(options) { `gateway exited after ready for ${options.pluginId}\n${tailFile(options.logPath)}`, ); } - await rpcCall("health", {}, options); + await retryRpcCall("health", {}, options); assertNoPostReadyRuntimeDepsWork(options.logPath, readyIndex); assertNoRuntimeDepsLocks(); await assertNoPackageManagerChildren(options.child.pid); @@ -650,7 +691,7 @@ async function smokeTtsGlobalDisable(pluginId, pluginDir, provider, pluginIndex) try { await waitForReady({ child, port, logPath }); await assertBaseGatewayProbes({ entrypoint, port, env }); - const providers = await rpcCall("tts.providers", {}, { entrypoint, port, env }); + const providers = await retryRpcCall("tts.providers", {}, { entrypoint, port, env }); assertSpeechProviderVisible(providers, selectedProvider, "tts.providers global-disable"); await runWatchdog({ child, @@ -713,7 +754,7 @@ async function smokeOpenAiTts(pluginIndex) { try { await waitForReady({ child, port, logPath }); await assertBaseGatewayProbes({ entrypoint, port, env }); - const result = await rpcCall( + const result = await retryRpcCall( "tts.convert", { text: "ok", provider: "openai" }, { entrypoint, port, env }, diff --git a/scripts/e2e/lib/doctor-install-switch/scenario.sh b/scripts/e2e/lib/doctor-install-switch/scenario.sh index f752513d3af..faf572e5fca 100644 --- a/scripts/e2e/lib/doctor-install-switch/scenario.sh +++ b/scripts/e2e/lib/doctor-install-switch/scenario.sh @@ -161,14 +161,14 @@ run_flow \ "npm-to-git" \ "$npm_bin daemon install --force" \ "$npm_entry" \ - "node $git_cli doctor --repair --force --yes" \ + "OPENCLAW_UPDATE_IN_PROGRESS=1 node $git_cli doctor --repair --force --yes --non-interactive" \ "$git_entry" run_flow \ "git-to-npm" \ "node $git_cli daemon install --force" \ "$git_entry" \ - "$npm_bin doctor --repair --force --yes" \ + "OPENCLAW_UPDATE_IN_PROGRESS=1 $npm_bin doctor --repair --force --yes --non-interactive" \ "$npm_entry" run_proxy_env_flow() { diff --git a/scripts/e2e/lib/gateway-network/client.mjs b/scripts/e2e/lib/gateway-network/client.mjs index 80befe7f4f9..89b70e80819 100644 --- a/scripts/e2e/lib/gateway-network/client.mjs +++ b/scripts/e2e/lib/gateway-network/client.mjs @@ -8,16 +8,28 @@ if (!url || !token) { throw new Error("missing GW_URL/GW_TOKEN"); } -const ws = new WebSocket(url); -await new Promise((resolve, reject) => { - const timer = setTimeout(() => reject(new Error("ws open timeout")), 30_000); - ws.once("open", () => { - clearTimeout(timer); - resolve(); - }); -}); +const CONNECT_READY_TIMEOUT_MS = Number.parseInt( + process.env.OPENCLAW_GATEWAY_NETWORK_CONNECT_READY_TIMEOUT_MS || "60000", + 10, +); -function onceFrame(filter, timeoutMs = 30_000) { +async function openSocket() { + const ws = new WebSocket(url); + await new Promise((resolve, reject) => { + const timer = setTimeout(() => reject(new Error("ws open timeout")), 30_000); + ws.once("open", () => { + clearTimeout(timer); + resolve(); + }); + ws.once("error", (error) => { + clearTimeout(timer); + reject(error); + }); + }); + return ws; +} + +function onceFrame(ws, filter, timeoutMs = 30_000) { return new Promise((resolve, reject) => { const timer = setTimeout(() => reject(new Error("timeout")), timeoutMs); const handler = (data) => { @@ -33,31 +45,52 @@ function onceFrame(filter, timeoutMs = 30_000) { }); } -ws.send( - JSON.stringify({ - type: "req", - id: "c1", - method: "connect", - params: { - minProtocol: PROTOCOL_VERSION, - maxProtocol: PROTOCOL_VERSION, - client: { - id: "test", - displayName: "docker-net-e2e", - version: "dev", - platform: process.platform, - mode: "test", +async function attemptConnect() { + const ws = await openSocket(); + ws.send( + JSON.stringify({ + type: "req", + id: "c1", + method: "connect", + params: { + minProtocol: PROTOCOL_VERSION, + maxProtocol: PROTOCOL_VERSION, + client: { + id: "test", + displayName: "docker-net-e2e", + version: "dev", + platform: process.platform, + mode: "test", + }, + caps: [], + auth: { token }, }, - caps: [], - auth: { token }, - }, - }), -); + }), + ); -const connectRes = await onceFrame((frame) => frame?.type === "res" && frame?.id === "c1"); -if (!connectRes.ok) { + const connectRes = await onceFrame(ws, (frame) => frame?.type === "res" && frame?.id === "c1"); + if (connectRes.ok) { + ws.close(); + return; + } + ws.close(); throw new Error(`connect failed: ${connectRes.error?.message ?? "unknown"}`); } -ws.close(); -console.log("ok"); +const startedAt = Date.now(); +let lastError; +while (Date.now() - startedAt < CONNECT_READY_TIMEOUT_MS) { + try { + await attemptConnect(); + console.log("ok"); + process.exit(0); + } catch (error) { + lastError = error; + if (!String(error).includes("gateway starting")) { + throw error; + } + await new Promise((resolve) => setTimeout(resolve, 500)); + } +} + +throw lastError ?? new Error("connect failed"); diff --git a/scripts/e2e/lib/upgrade-survivor/probe-gateway.mjs b/scripts/e2e/lib/upgrade-survivor/probe-gateway.mjs index b39f6d75285..aec17fb432d 100644 --- a/scripts/e2e/lib/upgrade-survivor/probe-gateway.mjs +++ b/scripts/e2e/lib/upgrade-survivor/probe-gateway.mjs @@ -27,37 +27,52 @@ const probePath = option("--path"); const expectKind = option("--expect"); const out = option("--out"); const url = new URL(probePath, baseUrl).toString(); +const timeoutMs = Number.parseInt( + process.env.OPENCLAW_UPGRADE_SURVIVOR_PROBE_TIMEOUT_MS || "60000", + 10, +); const startedAt = Date.now(); -const response = await fetch(url, { method: "GET" }); -const text = await response.text(); -let body; -try { - body = text ? JSON.parse(text) : null; -} catch (error) { - throw new Error(`${url} returned non-JSON probe body: ${String(error)}`, { cause: error }); -} -const elapsedMs = Date.now() - startedAt; +let lastError; +while (Date.now() - startedAt < timeoutMs) { + const attemptStartedAt = Date.now(); + try { + const response = await fetch(url, { method: "GET" }); + const text = await response.text(); + let body; + try { + body = text ? JSON.parse(text) : null; + } catch (error) { + throw new Error(`${url} returned non-JSON probe body: ${String(error)}`, { cause: error }); + } -if (!response.ok) { - throw new Error(`${url} probe failed with HTTP ${response.status}: ${text}`); -} -if (expectKind === "live") { - if (body?.ok !== true || body?.status !== "live") { - throw new Error(`${url} did not report live status: ${text}`); - } -} else if (expectKind === "ready") { - if (body?.ready !== true) { - throw new Error(`${url} did not report ready status: ${text}`); - } -} else { - throw new Error(`unknown probe expectation: ${expectKind}`); -} + if (!response.ok) { + throw new Error(`${url} probe failed with HTTP ${response.status}: ${text}`); + } + if (expectKind === "live") { + if (body?.ok !== true || body?.status !== "live") { + throw new Error(`${url} did not report live status: ${text}`); + } + } else if (expectKind === "ready") { + if (body?.ready !== true) { + throw new Error(`${url} did not report ready status: ${text}`); + } + } else { + throw new Error(`unknown probe expectation: ${expectKind}`); + } -writeJson(out, { - body, - elapsedMs, - path: probePath, - status: response.status, - url, -}); + writeJson(out, { + body, + elapsedMs: Date.now() - startedAt, + path: probePath, + status: response.status, + url, + }); + process.exit(0); + } catch (error) { + lastError = error; + const elapsedMs = Date.now() - attemptStartedAt; + await new Promise((resolve) => setTimeout(resolve, Math.max(100, 500 - elapsedMs))); + } +} +throw lastError ?? new Error(`${url} probe timed out`); diff --git a/scripts/e2e/lib/upgrade-survivor/run.sh b/scripts/e2e/lib/upgrade-survivor/run.sh index 9546cda575a..4e87d1380b1 100644 --- a/scripts/e2e/lib/upgrade-survivor/run.sh +++ b/scripts/e2e/lib/upgrade-survivor/run.sh @@ -317,8 +317,17 @@ storage_preflight() { df -h "$ARTIFACT_ROOT" "$TMPDIR" /tmp || true } +rm_rf_retry() { + local attempt + for attempt in 1 2 3 4 5; do + rm -rf "$@" && return 0 + sleep "$attempt" + done + rm -rf "$@" +} + reset_run_state() { - rm -rf "$npm_config_prefix" "$TMPDIR" "$ARTIFACT_ROOT/state-home" + rm_rf_retry "$npm_config_prefix" "$TMPDIR" "$ARTIFACT_ROOT/state-home" mkdir -p "$npm_config_prefix" "$npm_config_cache" "$TMPDIR" } diff --git a/src/plugins/loader-records.test.ts b/src/plugins/loader-records.test.ts new file mode 100644 index 00000000000..5395df84abf --- /dev/null +++ b/src/plugins/loader-records.test.ts @@ -0,0 +1,18 @@ +import { describe, expect, it } from "vitest"; +import { createPluginRecord } from "./loader-records.js"; + +describe("plugin loader records", () => { + it("preserves manifest-declared channel ids before runtime registration", () => { + const record = createPluginRecord({ + id: "kitchen-sink", + name: "Kitchen Sink", + source: "/tmp/kitchen-sink/index.js", + origin: "external", + enabled: true, + channelIds: ["kitchen-sink-channel"], + configSchema: false, + }); + + expect(record.channelIds).toEqual(["kitchen-sink-channel"]); + }); +}); diff --git a/src/plugins/loader-records.ts b/src/plugins/loader-records.ts index 167873c84df..76fa05ef25f 100644 --- a/src/plugins/loader-records.ts +++ b/src/plugins/loader-records.ts @@ -22,6 +22,7 @@ export function createPluginRecord(params: { compat?: readonly PluginCompatCode[]; activationState?: PluginActivationState; syntheticAuthRefs?: string[]; + channelIds?: readonly string[]; configSchema: boolean; contracts?: PluginManifestContracts; }): PluginRecord { @@ -47,7 +48,7 @@ export function createPluginRecord(params: { status: params.enabled ? "loaded" : "disabled", toolNames: [], hookNames: [], - channelIds: [], + channelIds: [...(params.channelIds ?? [])], cliBackendIds: [], providerIds: [], speechProviderIds: [], diff --git a/src/plugins/loader.ts b/src/plugins/loader.ts index 1d3f5a0faf1..a7e51880f02 100644 --- a/src/plugins/loader.ts +++ b/src/plugins/loader.ts @@ -1459,6 +1459,7 @@ export function loadOpenClawPlugins(options: PluginLoadOptions = {}): PluginRegi compat: collectPluginManifestCompatCodes(manifestRecord), activationState, syntheticAuthRefs: manifestRecord.syntheticAuthRefs, + channelIds: manifestRecord.channels, configSchema: Boolean(manifestRecord.configSchema), contracts: manifestRecord.contracts, }); @@ -1494,6 +1495,7 @@ export function loadOpenClawPlugins(options: PluginLoadOptions = {}): PluginRegi compat: collectPluginManifestCompatCodes(manifestRecord), activationState, syntheticAuthRefs: manifestRecord.syntheticAuthRefs, + channelIds: manifestRecord.channels, configSchema: Boolean(manifestRecord.configSchema), contracts: manifestRecord.contracts, }); @@ -2302,6 +2304,7 @@ export async function loadOpenClawPluginCliRegistry( compat: collectPluginManifestCompatCodes(manifestRecord), activationState, syntheticAuthRefs: manifestRecord.syntheticAuthRefs, + channelIds: manifestRecord.channels, configSchema: Boolean(manifestRecord.configSchema), contracts: manifestRecord.contracts, }); @@ -2337,6 +2340,7 @@ export async function loadOpenClawPluginCliRegistry( compat: collectPluginManifestCompatCodes(manifestRecord), activationState, syntheticAuthRefs: manifestRecord.syntheticAuthRefs, + channelIds: manifestRecord.channels, configSchema: Boolean(manifestRecord.configSchema), contracts: manifestRecord.contracts, }); diff --git a/src/plugins/registry.ts b/src/plugins/registry.ts index 99817a34770..cac720b8c1a 100644 --- a/src/plugins/registry.ts +++ b/src/plugins/registry.ts @@ -767,7 +767,9 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) { pluginsWithChannelRegistrationConflict.add(record.id); return; } - record.channelIds.push(id); + if (!record.channelIds.includes(id)) { + record.channelIds.push(id); + } registry.channelSetups.push({ pluginId: record.id, pluginName: record.name,