fix: make qa lab docker boot resilient

This commit is contained in:
Peter Steinberger
2026-04-07 09:04:05 +01:00
parent 25fae3d722
commit 17085ec1a4
4 changed files with 201 additions and 11 deletions

View File

@@ -103,7 +103,19 @@ RUN pnpm qa:lab:build
# Prune dev dependencies and strip build-only metadata before copying
# runtime assets into the final image.
FROM build AS runtime-assets
RUN CI=true pnpm prune --prod && \
ARG OPENCLAW_EXTENSIONS
ARG OPENCLAW_BUNDLED_PLUGIN_DIR
# Keep the install layer frozen, but allow prune to run against the full copied
# workspace tree subset used during `pnpm install`. The build stage only copied
# the root, `ui`, and opted-in plugin manifests into the install layer, so
# prune must not rediscover unrelated workspaces from the later full source
# copy.
RUN printf 'packages:\n - .\n - ui\n' > /tmp/pnpm-workspace.runtime.yaml && \
for ext in $OPENCLAW_EXTENSIONS; do \
printf ' - %s/%s\n' "$OPENCLAW_BUNDLED_PLUGIN_DIR" "$ext" >> /tmp/pnpm-workspace.runtime.yaml; \
done && \
cp /tmp/pnpm-workspace.runtime.yaml pnpm-workspace.yaml && \
CI=true NPM_CONFIG_FROZEN_LOCKFILE=false pnpm prune --prod && \
find dist -type f \( -name '*.d.ts' -o -name '*.d.mts' -o -name '*.d.cts' -o -name '*.map' \) -delete
# ── Runtime base images ─────────────────────────────────────────

View File

@@ -9,7 +9,7 @@ describe("runQaDockerUp", () => {
it("builds the QA UI, writes the harness, starts compose, and waits for health", async () => {
const calls: string[] = [];
const fetchCalls: string[] = [];
const responseQueue = [false, true, false, true];
const responseQueue = [false, true, true];
const outputDir = await mkdtemp(path.join(os.tmpdir(), "qa-docker-up-"));
try {
@@ -23,6 +23,9 @@ describe("runQaDockerUp", () => {
{
async runCommand(command, args, cwd) {
calls.push([command, ...args, `@${cwd}`].join(" "));
if (args.join(" ").includes("ps --format json openclaw-qa-gateway")) {
return { stdout: '{"Health":"healthy","State":"running"}\n', stderr: "" };
}
return { stdout: "", stderr: "" };
},
fetchImpl: vi.fn(async (input: string) => {
@@ -39,12 +42,12 @@ describe("runQaDockerUp", () => {
expect.stringContaining(
`docker compose -f ${outputDir}/docker-compose.qa.yml up --build -d @/repo/openclaw`,
),
`docker compose -f ${outputDir}/docker-compose.qa.yml ps --format json openclaw-qa-gateway @/repo/openclaw`,
]);
expect(fetchCalls).toEqual([
"http://127.0.0.1:43124/healthz",
"http://127.0.0.1:43124/healthz",
"http://127.0.0.1:18889/healthz",
"http://127.0.0.1:18889/healthz",
]);
expect(result.qaLabUrl).toBe("http://127.0.0.1:43124");
expect(result.gatewayUrl).toBe("http://127.0.0.1:18889/");
@@ -70,6 +73,9 @@ describe("runQaDockerUp", () => {
{
async runCommand(command, args, cwd) {
calls.push([command, ...args, `@${cwd}`].join(" "));
if (args.join(" ").includes("ps --format json openclaw-qa-gateway")) {
return { stdout: '{"Health":"healthy","State":"running"}\n', stderr: "" };
}
return { stdout: "", stderr: "" };
},
fetchImpl: vi.fn(async () => ({ ok: true })),
@@ -80,6 +86,7 @@ describe("runQaDockerUp", () => {
expect(calls).toEqual([
`docker compose -f ${outputDir}/docker-compose.qa.yml down --remove-orphans @/repo/openclaw`,
`docker compose -f ${outputDir}/docker-compose.qa.yml up -d @/repo/openclaw`,
`docker compose -f ${outputDir}/docker-compose.qa.yml ps --format json openclaw-qa-gateway @/repo/openclaw`,
]);
} finally {
await rm(outputDir, { recursive: true, force: true });
@@ -104,7 +111,10 @@ describe("runQaDockerUp", () => {
},
{
async runCommand() {
return { stdout: "", stderr: "" };
return {
stdout: '{"Health":"healthy","State":"running"}\n',
stderr: "",
};
},
fetchImpl: vi.fn(async () => ({ ok: true })),
sleepImpl: vi.fn(async () => {}),
@@ -123,4 +133,64 @@ describe("runQaDockerUp", () => {
await rm(outputDir, { recursive: true, force: true });
}
});
it("falls back to the container IP when the host gateway port is unreachable", async () => {
const calls: string[] = [];
const fetchCalls: string[] = [];
const outputDir = await mkdtemp(path.join(os.tmpdir(), "qa-docker-up-"));
try {
const result = await runQaDockerUp(
{
repoRoot: "/repo/openclaw",
outputDir,
gatewayPort: 18889,
qaLabPort: 43124,
skipUiBuild: true,
usePrebuiltImage: true,
},
{
async runCommand(command, args, cwd) {
calls.push([command, ...args, `@${cwd}`].join(" "));
const joined = args.join(" ");
if (joined.includes("ps --format json openclaw-qa-gateway")) {
return { stdout: '{"Health":"healthy","State":"running"}\n', stderr: "" };
}
if (joined.includes("ps -q openclaw-qa-gateway")) {
return { stdout: "gateway-container\n", stderr: "" };
}
if (command === "docker" && args[0] === "inspect") {
return { stdout: "192.168.165.4\n", stderr: "" };
}
return { stdout: "", stderr: "" };
},
fetchImpl: vi.fn(async (input: string) => {
fetchCalls.push(input);
return {
ok:
input === "http://127.0.0.1:43124/healthz" ||
input === "http://192.168.165.4:18789/healthz",
};
}),
sleepImpl: vi.fn(async () => {}),
},
);
expect(calls).toEqual([
`docker compose -f ${outputDir}/docker-compose.qa.yml down --remove-orphans @/repo/openclaw`,
`docker compose -f ${outputDir}/docker-compose.qa.yml up -d @/repo/openclaw`,
`docker compose -f ${outputDir}/docker-compose.qa.yml ps --format json openclaw-qa-gateway @/repo/openclaw`,
`docker compose -f ${outputDir}/docker-compose.qa.yml ps -q openclaw-qa-gateway @/repo/openclaw`,
"docker inspect --format {{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}} gateway-container @/repo/openclaw",
]);
expect(fetchCalls).toEqual([
"http://127.0.0.1:43124/healthz",
"http://127.0.0.1:18889/healthz",
"http://192.168.165.4:18789/healthz",
]);
expect(result.gatewayUrl).toBe("http://192.168.165.4:18789/");
} finally {
await rm(outputDir, { recursive: true, force: true });
}
});
});

View File

@@ -149,6 +149,95 @@ async function waitForHealth(
throw new Error(lines.filter(Boolean).join("\n"));
}
async function isHealthy(url: string, fetchImpl: FetchLike) {
try {
const response = await fetchImpl(url);
return response.ok;
} catch {
return false;
}
}
async function waitForDockerServiceHealth(
service: string,
composeFile: string,
repoRoot: string,
runCommand: RunCommand,
sleepImpl: (ms: number) => Promise<unknown>,
timeoutMs = 360_000,
pollMs = 1_000,
) {
const startMs = Date.now();
const deadline = startMs + timeoutMs;
let lastStatus = "unknown";
while (Date.now() < deadline) {
try {
const { stdout } = await runCommand(
"docker",
["compose", "-f", composeFile, "ps", "--format", "json", service],
repoRoot,
);
const rows = stdout
.trim()
.split("\n")
.map((line) => line.trim())
.filter(Boolean)
.map((line) => JSON.parse(line) as { Health?: string; State?: string });
const row = rows[0];
lastStatus = row?.Health ?? row?.State ?? "unknown";
if (lastStatus === "healthy" || lastStatus === "running") {
return;
}
} catch (error) {
lastStatus = describeError(error);
}
await sleepImpl(pollMs);
}
const elapsedSec = Math.round((Date.now() - startMs) / 1000);
throw new Error(
[
`${service} did not become healthy within ${elapsedSec}s (limit ${Math.round(timeoutMs / 1000)}s).`,
`Last status: ${lastStatus}`,
`Hint: check container logs with \`docker compose -f ${composeFile} logs ${service}\`.`,
].join("\n"),
);
}
async function resolveComposeServiceUrl(
service: string,
port: number,
composeFile: string,
repoRoot: string,
runCommand: RunCommand,
) {
const { stdout: containerStdout } = await runCommand(
"docker",
["compose", "-f", composeFile, "ps", "-q", service],
repoRoot,
);
const containerId = containerStdout.trim();
if (!containerId) {
return null;
}
const { stdout: ipStdout } = await runCommand(
"docker",
[
"inspect",
"--format",
"{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}",
containerId,
],
repoRoot,
);
const ip = ipStdout.trim();
if (!ip) {
return null;
}
return `http://${ip}:${port}/`;
}
export async function runQaDockerUp(
params: {
repoRoot?: string;
@@ -222,7 +311,7 @@ export async function runQaDockerUp(
await sleepImpl(3_000);
const qaLabUrl = `http://127.0.0.1:${qaLabPort}`;
const gatewayUrl = `http://127.0.0.1:${gatewayPort}/`;
const hostGatewayUrl = `http://127.0.0.1:${gatewayPort}/`;
await waitForHealth(`${qaLabUrl}/healthz`, {
label: "QA Lab",
@@ -230,12 +319,26 @@ export async function runQaDockerUp(
sleepImpl,
composeFile,
});
await waitForHealth(`${gatewayUrl}healthz`, {
label: "Gateway",
fetchImpl,
sleepImpl,
await waitForDockerServiceHealth(
"openclaw-qa-gateway",
composeFile,
});
repoRoot,
runCommand,
sleepImpl,
);
let gatewayUrl = hostGatewayUrl;
if (!(await isHealthy(`${hostGatewayUrl}healthz`, fetchImpl))) {
const containerGatewayUrl = await resolveComposeServiceUrl(
"openclaw-qa-gateway",
18789,
composeFile,
repoRoot,
runCommand,
);
if (containerGatewayUrl && (await isHealthy(`${containerGatewayUrl}healthz`, fetchImpl))) {
gatewayUrl = containerGatewayUrl;
}
}
return {
outputDir,

View File

@@ -45,7 +45,12 @@ describe("Dockerfile", () => {
it("prunes runtime dependencies after the build stage", async () => {
const dockerfile = await readFile(dockerfilePath, "utf8");
expect(dockerfile).toContain("FROM build AS runtime-assets");
expect(dockerfile).toContain("CI=true pnpm prune --prod");
expect(dockerfile).toContain("ARG OPENCLAW_EXTENSIONS");
expect(dockerfile).toContain("ARG OPENCLAW_BUNDLED_PLUGIN_DIR");
expect(dockerfile).toContain("pnpm-workspace.runtime.yaml");
expect(dockerfile).toContain(" - ui\\n");
expect(dockerfile).toContain("CI=true NPM_CONFIG_FROZEN_LOCKFILE=false pnpm prune --prod");
expect(dockerfile).toContain("prune must not rediscover unrelated workspaces");
expect(dockerfile).not.toContain(
`npm install --prefix "${BUNDLED_PLUGIN_ROOT_DIR}/$ext" --omit=dev --silent`,
);