From 17085ec1a4b5cf90072020561dd86c19429e5ca0 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 7 Apr 2026 09:04:05 +0100 Subject: [PATCH] fix: make qa lab docker boot resilient --- Dockerfile | 14 ++- .../qa-lab/src/docker-up.runtime.test.ts | 76 +++++++++++- extensions/qa-lab/src/docker-up.runtime.ts | 115 +++++++++++++++++- src/dockerfile.test.ts | 7 +- 4 files changed, 201 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index a431c361687..c7f8ced9e52 100644 --- a/Dockerfile +++ b/Dockerfile @@ -103,7 +103,19 @@ RUN pnpm qa:lab:build # Prune dev dependencies and strip build-only metadata before copying # runtime assets into the final image. FROM build AS runtime-assets -RUN CI=true pnpm prune --prod && \ +ARG OPENCLAW_EXTENSIONS +ARG OPENCLAW_BUNDLED_PLUGIN_DIR +# Keep the install layer frozen, but allow prune to run against the full copied +# workspace tree subset used during `pnpm install`. The build stage only copied +# the root, `ui`, and opted-in plugin manifests into the install layer, so +# prune must not rediscover unrelated workspaces from the later full source +# copy. +RUN printf 'packages:\n - .\n - ui\n' > /tmp/pnpm-workspace.runtime.yaml && \ + for ext in $OPENCLAW_EXTENSIONS; do \ + printf ' - %s/%s\n' "$OPENCLAW_BUNDLED_PLUGIN_DIR" "$ext" >> /tmp/pnpm-workspace.runtime.yaml; \ + done && \ + cp /tmp/pnpm-workspace.runtime.yaml pnpm-workspace.yaml && \ + CI=true NPM_CONFIG_FROZEN_LOCKFILE=false pnpm prune --prod && \ find dist -type f \( -name '*.d.ts' -o -name '*.d.mts' -o -name '*.d.cts' -o -name '*.map' \) -delete # ── Runtime base images ───────────────────────────────────────── diff --git a/extensions/qa-lab/src/docker-up.runtime.test.ts b/extensions/qa-lab/src/docker-up.runtime.test.ts index b1ba1516e20..c8d7e793ae9 100644 --- a/extensions/qa-lab/src/docker-up.runtime.test.ts +++ b/extensions/qa-lab/src/docker-up.runtime.test.ts @@ -9,7 +9,7 @@ describe("runQaDockerUp", () => { it("builds the QA UI, writes the harness, starts compose, and waits for health", async () => { const calls: string[] = []; const fetchCalls: string[] = []; - const responseQueue = [false, true, false, true]; + const responseQueue = [false, true, true]; const outputDir = await mkdtemp(path.join(os.tmpdir(), "qa-docker-up-")); try { @@ -23,6 +23,9 @@ describe("runQaDockerUp", () => { { async runCommand(command, args, cwd) { calls.push([command, ...args, `@${cwd}`].join(" ")); + if (args.join(" ").includes("ps --format json openclaw-qa-gateway")) { + return { stdout: '{"Health":"healthy","State":"running"}\n', stderr: "" }; + } return { stdout: "", stderr: "" }; }, fetchImpl: vi.fn(async (input: string) => { @@ -39,12 +42,12 @@ describe("runQaDockerUp", () => { expect.stringContaining( `docker compose -f ${outputDir}/docker-compose.qa.yml up --build -d @/repo/openclaw`, ), + `docker compose -f ${outputDir}/docker-compose.qa.yml ps --format json openclaw-qa-gateway @/repo/openclaw`, ]); expect(fetchCalls).toEqual([ "http://127.0.0.1:43124/healthz", "http://127.0.0.1:43124/healthz", "http://127.0.0.1:18889/healthz", - "http://127.0.0.1:18889/healthz", ]); expect(result.qaLabUrl).toBe("http://127.0.0.1:43124"); expect(result.gatewayUrl).toBe("http://127.0.0.1:18889/"); @@ -70,6 +73,9 @@ describe("runQaDockerUp", () => { { async runCommand(command, args, cwd) { calls.push([command, ...args, `@${cwd}`].join(" ")); + if (args.join(" ").includes("ps --format json openclaw-qa-gateway")) { + return { stdout: '{"Health":"healthy","State":"running"}\n', stderr: "" }; + } return { stdout: "", stderr: "" }; }, fetchImpl: vi.fn(async () => ({ ok: true })), @@ -80,6 +86,7 @@ describe("runQaDockerUp", () => { expect(calls).toEqual([ `docker compose -f ${outputDir}/docker-compose.qa.yml down --remove-orphans @/repo/openclaw`, `docker compose -f ${outputDir}/docker-compose.qa.yml up -d @/repo/openclaw`, + `docker compose -f ${outputDir}/docker-compose.qa.yml ps --format json openclaw-qa-gateway @/repo/openclaw`, ]); } finally { await rm(outputDir, { recursive: true, force: true }); @@ -104,7 +111,10 @@ describe("runQaDockerUp", () => { }, { async runCommand() { - return { stdout: "", stderr: "" }; + return { + stdout: '{"Health":"healthy","State":"running"}\n', + stderr: "", + }; }, fetchImpl: vi.fn(async () => ({ ok: true })), sleepImpl: vi.fn(async () => {}), @@ -123,4 +133,64 @@ describe("runQaDockerUp", () => { await rm(outputDir, { recursive: true, force: true }); } }); + + it("falls back to the container IP when the host gateway port is unreachable", async () => { + const calls: string[] = []; + const fetchCalls: string[] = []; + const outputDir = await mkdtemp(path.join(os.tmpdir(), "qa-docker-up-")); + + try { + const result = await runQaDockerUp( + { + repoRoot: "/repo/openclaw", + outputDir, + gatewayPort: 18889, + qaLabPort: 43124, + skipUiBuild: true, + usePrebuiltImage: true, + }, + { + async runCommand(command, args, cwd) { + calls.push([command, ...args, `@${cwd}`].join(" ")); + const joined = args.join(" "); + if (joined.includes("ps --format json openclaw-qa-gateway")) { + return { stdout: '{"Health":"healthy","State":"running"}\n', stderr: "" }; + } + if (joined.includes("ps -q openclaw-qa-gateway")) { + return { stdout: "gateway-container\n", stderr: "" }; + } + if (command === "docker" && args[0] === "inspect") { + return { stdout: "192.168.165.4\n", stderr: "" }; + } + return { stdout: "", stderr: "" }; + }, + fetchImpl: vi.fn(async (input: string) => { + fetchCalls.push(input); + return { + ok: + input === "http://127.0.0.1:43124/healthz" || + input === "http://192.168.165.4:18789/healthz", + }; + }), + sleepImpl: vi.fn(async () => {}), + }, + ); + + expect(calls).toEqual([ + `docker compose -f ${outputDir}/docker-compose.qa.yml down --remove-orphans @/repo/openclaw`, + `docker compose -f ${outputDir}/docker-compose.qa.yml up -d @/repo/openclaw`, + `docker compose -f ${outputDir}/docker-compose.qa.yml ps --format json openclaw-qa-gateway @/repo/openclaw`, + `docker compose -f ${outputDir}/docker-compose.qa.yml ps -q openclaw-qa-gateway @/repo/openclaw`, + "docker inspect --format {{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} gateway-container @/repo/openclaw", + ]); + expect(fetchCalls).toEqual([ + "http://127.0.0.1:43124/healthz", + "http://127.0.0.1:18889/healthz", + "http://192.168.165.4:18789/healthz", + ]); + expect(result.gatewayUrl).toBe("http://192.168.165.4:18789/"); + } finally { + await rm(outputDir, { recursive: true, force: true }); + } + }); }); diff --git a/extensions/qa-lab/src/docker-up.runtime.ts b/extensions/qa-lab/src/docker-up.runtime.ts index fa98ba42cbb..c9ed7588156 100644 --- a/extensions/qa-lab/src/docker-up.runtime.ts +++ b/extensions/qa-lab/src/docker-up.runtime.ts @@ -149,6 +149,95 @@ async function waitForHealth( throw new Error(lines.filter(Boolean).join("\n")); } +async function isHealthy(url: string, fetchImpl: FetchLike) { + try { + const response = await fetchImpl(url); + return response.ok; + } catch { + return false; + } +} + +async function waitForDockerServiceHealth( + service: string, + composeFile: string, + repoRoot: string, + runCommand: RunCommand, + sleepImpl: (ms: number) => Promise, + timeoutMs = 360_000, + pollMs = 1_000, +) { + const startMs = Date.now(); + const deadline = startMs + timeoutMs; + let lastStatus = "unknown"; + + while (Date.now() < deadline) { + try { + const { stdout } = await runCommand( + "docker", + ["compose", "-f", composeFile, "ps", "--format", "json", service], + repoRoot, + ); + const rows = stdout + .trim() + .split("\n") + .map((line) => line.trim()) + .filter(Boolean) + .map((line) => JSON.parse(line) as { Health?: string; State?: string }); + const row = rows[0]; + lastStatus = row?.Health ?? row?.State ?? "unknown"; + if (lastStatus === "healthy" || lastStatus === "running") { + return; + } + } catch (error) { + lastStatus = describeError(error); + } + await sleepImpl(pollMs); + } + + const elapsedSec = Math.round((Date.now() - startMs) / 1000); + throw new Error( + [ + `${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`, + `Last status: ${lastStatus}`, + `Hint: check container logs with \`docker compose -f ${composeFile} logs ${service}\`.`, + ].join("\n"), + ); +} + +async function resolveComposeServiceUrl( + service: string, + port: number, + composeFile: string, + repoRoot: string, + runCommand: RunCommand, +) { + const { stdout: containerStdout } = await runCommand( + "docker", + ["compose", "-f", composeFile, "ps", "-q", service], + repoRoot, + ); + const containerId = containerStdout.trim(); + if (!containerId) { + return null; + } + const { stdout: ipStdout } = await runCommand( + "docker", + [ + "inspect", + "--format", + "{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}", + containerId, + ], + repoRoot, + ); + const ip = ipStdout.trim(); + if (!ip) { + return null; + } + return `http://${ip}:${port}/`; +} + export async function runQaDockerUp( params: { repoRoot?: string; @@ -222,7 +311,7 @@ export async function runQaDockerUp( await sleepImpl(3_000); const qaLabUrl = `http://127.0.0.1:${qaLabPort}`; - const gatewayUrl = `http://127.0.0.1:${gatewayPort}/`; + const hostGatewayUrl = `http://127.0.0.1:${gatewayPort}/`; await waitForHealth(`${qaLabUrl}/healthz`, { label: "QA Lab", @@ -230,12 +319,26 @@ export async function runQaDockerUp( sleepImpl, composeFile, }); - await waitForHealth(`${gatewayUrl}healthz`, { - label: "Gateway", - fetchImpl, - sleepImpl, + await waitForDockerServiceHealth( + "openclaw-qa-gateway", composeFile, - }); + repoRoot, + runCommand, + sleepImpl, + ); + let gatewayUrl = hostGatewayUrl; + if (!(await isHealthy(`${hostGatewayUrl}healthz`, fetchImpl))) { + const containerGatewayUrl = await resolveComposeServiceUrl( + "openclaw-qa-gateway", + 18789, + composeFile, + repoRoot, + runCommand, + ); + if (containerGatewayUrl && (await isHealthy(`${containerGatewayUrl}healthz`, fetchImpl))) { + gatewayUrl = containerGatewayUrl; + } + } return { outputDir, diff --git a/src/dockerfile.test.ts b/src/dockerfile.test.ts index 1384942866c..1adae61f634 100644 --- a/src/dockerfile.test.ts +++ b/src/dockerfile.test.ts @@ -45,7 +45,12 @@ describe("Dockerfile", () => { it("prunes runtime dependencies after the build stage", async () => { const dockerfile = await readFile(dockerfilePath, "utf8"); expect(dockerfile).toContain("FROM build AS runtime-assets"); - expect(dockerfile).toContain("CI=true pnpm prune --prod"); + expect(dockerfile).toContain("ARG OPENCLAW_EXTENSIONS"); + expect(dockerfile).toContain("ARG OPENCLAW_BUNDLED_PLUGIN_DIR"); + expect(dockerfile).toContain("pnpm-workspace.runtime.yaml"); + expect(dockerfile).toContain(" - ui\\n"); + expect(dockerfile).toContain("CI=true NPM_CONFIG_FROZEN_LOCKFILE=false pnpm prune --prod"); + expect(dockerfile).toContain("prune must not rediscover unrelated workspaces"); expect(dockerfile).not.toContain( `npm install --prefix "${BUNDLED_PLUGIN_ROOT_DIR}/$ext" --omit=dev --silent`, );