test: harden Docker lanes for 10-way runs

This commit is contained in:
Peter Steinberger
2026-04-24 17:08:10 +01:00
parent cbfc21badb
commit 23c7a7d557
8 changed files with 66 additions and 16 deletions

View File

@@ -14,6 +14,9 @@ RUN useradd --create-home --shell /bin/bash appuser \
ENV HOME="/home/appuser"
ENV NODE_OPTIONS="--disable-warning=ExperimentalWarning"
# Docker E2E lanes start many loopback gateways concurrently; mDNS advertising
# is unrelated to those checks and can flap under container CPU/network load.
ENV OPENCLAW_DISABLE_BONJOUR="1"
USER appuser
WORKDIR /app

View File

@@ -107,7 +107,7 @@ echo "Checking initial RPC status..."
docker exec "$CONTAINER_NAME" bash -lc "
entry=dist/index.mjs
[ -f \"\$entry\" ] || entry=dist/index.js
node \"\$entry\" gateway status --url ws://127.0.0.1:$PORT --token '$TOKEN' --require-rpc --timeout 5000 >/tmp/config-reload-status-before.log
node \"\$entry\" gateway status --url ws://127.0.0.1:$PORT --token '$TOKEN' --require-rpc --timeout 30000 >/tmp/config-reload-status-before.log
"
echo "Mutating plugin install timestamp metadata..."
@@ -135,7 +135,7 @@ echo "Checking post-write RPC status..."
docker exec "$CONTAINER_NAME" bash -lc "
entry=dist/index.mjs
[ -f \"\$entry\" ] || entry=dist/index.js
node \"\$entry\" gateway status --url ws://127.0.0.1:$PORT --token '$TOKEN' --require-rpc --timeout 5000 >/tmp/config-reload-status-after.log
node \"\$entry\" gateway status --url ws://127.0.0.1:$PORT --token '$TOKEN' --require-rpc --timeout 30000 >/tmp/config-reload-status-after.log
"
echo "Checking reload log..."

View File

@@ -54,7 +54,7 @@ async function describeProbePid(pid: number): Promise<string | undefined> {
async function waitForProbePid(pidPath: string): Promise<number | undefined> {
const startedAt = Date.now();
while (Date.now() - startedAt < 60_000) {
while (Date.now() - startedAt < 240_000) {
const pid = await readProbePid(pidPath);
if (pid) {
return pid;
@@ -182,7 +182,7 @@ async function runCronCleanupScenario(params: {
entry.payload.jobId === job.id &&
entry.payload.action === "finished",
)?.payload,
150_000,
240_000,
);
assert(finished, "missing cron finished event");
@@ -223,7 +223,7 @@ async function runSubagentCleanupScenario(params: {
const exitedPid = await waitForAnyProbeExit({
pidsPath,
label: "subagent",
timeoutMs: 90_000,
timeoutMs: 240_000,
});
return {
runId: run.runId,

View File

@@ -53,7 +53,7 @@ docker run --rm \
trap cleanup_inner EXIT
trap dump_gateway_log_on_error ERR
gateway_ready=0
for _ in \$(seq 1 160); do
for _ in \$(seq 1 300); do
if grep -q '\[gateway\] ready' /tmp/cron-mcp-cleanup-gateway.log 2>/dev/null; then
gateway_ready=1
break

View File

@@ -36,7 +36,7 @@ docker run -d \
echo "Waiting for gateway to come up..."
ready=0
for _ in $(seq 1 40); do
for _ in $(seq 1 180); do
if [ "$(docker inspect -f '{{.State.Running}}' "$GW_NAME" 2>/dev/null || echo false)" != "true" ]; then
break
fi
@@ -94,14 +94,14 @@ if (!url || !token) throw new Error(\"missing GW_URL/GW_TOKEN\");
const ws = new WebSocket(url);
await new Promise((resolve, reject) => {
const t = setTimeout(() => reject(new Error(\"ws open timeout\")), 5000);
const t = setTimeout(() => reject(new Error(\"ws open timeout\")), 30000);
ws.once(\"open\", () => {
clearTimeout(t);
resolve();
});
});
function onceFrame(filter, timeoutMs = 5000) {
function onceFrame(filter, timeoutMs = 30000) {
return new Promise((resolve, reject) => {
const t = setTimeout(() => reject(new Error(\"timeout\")), timeoutMs);
const handler = (data) => {

View File

@@ -41,9 +41,10 @@ export type McpClientHandle = {
rawMessages: unknown[];
};
const GATEWAY_WS_OPEN_TIMEOUT_MS = 15_000;
const GATEWAY_RPC_TIMEOUT_MS = 30_000;
const GATEWAY_CONNECT_RETRY_WINDOW_MS = 240_000;
const GATEWAY_WS_OPEN_TIMEOUT_MS = 45_000;
const GATEWAY_RPC_TIMEOUT_MS = 60_000;
const GATEWAY_REQUEST_TIMEOUT_MS = 45_000;
const GATEWAY_CONNECT_RETRY_WINDOW_MS = 420_000;
export function assert(condition: unknown, message: string): asserts condition {
if (!condition) {
@@ -285,7 +286,7 @@ async function connectGatewayOnce(params: {
const timeout = setTimeout(() => {
pending.delete(id);
reject(new Error(`gateway request timeout: ${method}`));
}, 10_000);
}, GATEWAY_REQUEST_TIMEOUT_MS);
timeout.unref?.();
pending.set(id, {
resolve: (value) => {

View File

@@ -186,6 +186,44 @@ if [ "$ow_ready" -ne 1 ]; then
exit 1
fi
echo "Waiting for gateway model endpoint after Open WebUI startup..."
gateway_model_ready=0
for _ in $(seq 1 90); do
if [ "$(docker inspect -f '{{.State.Running}}' "$GW_NAME" 2>/dev/null || echo false)" != "true" ]; then
break
fi
if docker exec "$GW_NAME" bash -lc "node --input-type=module -e '
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 8000);
try {
const res = await fetch(\"http://$GW_NAME:$PORT/v1/models\", {
headers: { authorization: \"Bearer $TOKEN\" },
signal: controller.signal,
});
process.exit(res.status === 200 ? 0 : 1);
} catch {
process.exit(1);
} finally {
clearTimeout(timeout);
}
' >/dev/null 2>&1"; then
gateway_model_ready=1
break
fi
sleep 5
done
if [ "$gateway_model_ready" -ne 1 ]; then
echo "Gateway model endpoint did not stay reachable after Open WebUI startup"
docker inspect "$GW_NAME" --format '{{json .State}}' 2>/dev/null || true
if [ "$(docker inspect -f '{{.State.Running}}' "$GW_NAME" 2>/dev/null || echo false)" = "true" ]; then
docker exec "$GW_NAME" bash -lc 'tail -n 200 /tmp/openwebui-gateway.log' || true
fi
docker logs "$GW_NAME" 2>&1 | tail -n 200 || true
docker logs "$OW_NAME" 2>&1 | tail -n 200 || true
exit 1
fi
echo "Running Open WebUI -> OpenClaw smoke..."
if ! docker exec \
-e "OPENWEBUI_BASE_URL=http://$OW_NAME:$WEBUI_PORT" \
@@ -193,11 +231,17 @@ if ! docker exec \
-e "OPENWEBUI_ADMIN_PASSWORD=$ADMIN_PASSWORD" \
-e "OPENWEBUI_EXPECTED_NONCE=$PROMPT_NONCE" \
-e "OPENWEBUI_PROMPT=$PROMPT" \
-e "OPENWEBUI_MODEL_ATTEMPTS=72" \
-e "OPENWEBUI_MODEL_RETRY_MS=5000" \
"$GW_NAME" \
node /app/scripts/e2e/openwebui-probe.mjs >/tmp/openwebui-probe.log 2>&1; then
cat /tmp/openwebui-probe.log 2>/dev/null || true
echo "Open WebUI probe failed; gateway log tail:"
docker exec "$GW_NAME" bash -lc 'tail -n 200 /tmp/openwebui-gateway.log' || true
docker inspect "$GW_NAME" --format '{{json .State}}' 2>/dev/null || true
if [ "$(docker inspect -f '{{.State.Running}}' "$GW_NAME" 2>/dev/null || echo false)" = "true" ]; then
docker exec "$GW_NAME" bash -lc 'tail -n 200 /tmp/openwebui-gateway.log' || true
fi
docker logs "$GW_NAME" 2>&1 | tail -n 200 || true
echo "Open WebUI container logs:"
docker logs "$OW_NAME" 2>&1 | tail -n 200 || true
exit 1

View File

@@ -3,6 +3,8 @@ const email = process.env.OPENWEBUI_ADMIN_EMAIL ?? "";
const password = process.env.OPENWEBUI_ADMIN_PASSWORD ?? "";
const expectedNonce = process.env.OPENWEBUI_EXPECTED_NONCE ?? "";
const prompt = process.env.OPENWEBUI_PROMPT ?? "";
const modelAttempts = Number.parseInt(process.env.OPENWEBUI_MODEL_ATTEMPTS ?? "72", 10);
const modelRetryMs = Number.parseInt(process.env.OPENWEBUI_MODEL_RETRY_MS ?? "5000", 10);
if (!baseUrl || !email || !password || !expectedNonce || !prompt) {
throw new Error("Missing required OPENWEBUI_* environment variables");
@@ -72,7 +74,7 @@ const authHeaders = {
let modelIds = [];
let targetModel = "";
let lastModelsError = "";
for (let attempt = 1; attempt <= 24; attempt += 1) {
for (let attempt = 1; attempt <= modelAttempts; attempt += 1) {
const modelsRes = await fetch(`${baseUrl}/api/models`, { headers: authHeaders }).catch(
(error) => {
lastModelsError = error instanceof Error ? error.message : String(error);
@@ -91,7 +93,7 @@ for (let attempt = 1; attempt <= 24; attempt += 1) {
} else if (modelsRes) {
lastModelsError = `HTTP ${modelsRes.status} ${await modelsRes.text()}`;
}
await sleep(5_000);
await sleep(modelRetryMs);
}
if (!targetModel) {
throw new Error(