diff --git a/scripts/e2e/Dockerfile b/scripts/e2e/Dockerfile index fe6368540f4..fef5fa10f12 100644 --- a/scripts/e2e/Dockerfile +++ b/scripts/e2e/Dockerfile @@ -14,6 +14,9 @@ RUN useradd --create-home --shell /bin/bash appuser \ ENV HOME="/home/appuser" ENV NODE_OPTIONS="--disable-warning=ExperimentalWarning" +# Docker E2E lanes start many loopback gateways concurrently; mDNS advertising +# is unrelated to those checks and can flap under container CPU/network load. +ENV OPENCLAW_DISABLE_BONJOUR="1" USER appuser WORKDIR /app diff --git a/scripts/e2e/config-reload-source-docker.sh b/scripts/e2e/config-reload-source-docker.sh index 9351b175569..fefdf543481 100755 --- a/scripts/e2e/config-reload-source-docker.sh +++ b/scripts/e2e/config-reload-source-docker.sh @@ -107,7 +107,7 @@ echo "Checking initial RPC status..." docker exec "$CONTAINER_NAME" bash -lc " entry=dist/index.mjs [ -f \"\$entry\" ] || entry=dist/index.js -node \"\$entry\" gateway status --url ws://127.0.0.1:$PORT --token '$TOKEN' --require-rpc --timeout 5000 >/tmp/config-reload-status-before.log +node \"\$entry\" gateway status --url ws://127.0.0.1:$PORT --token '$TOKEN' --require-rpc --timeout 30000 >/tmp/config-reload-status-before.log " echo "Mutating plugin install timestamp metadata..." @@ -135,7 +135,7 @@ echo "Checking post-write RPC status..." docker exec "$CONTAINER_NAME" bash -lc " entry=dist/index.mjs [ -f \"\$entry\" ] || entry=dist/index.js -node \"\$entry\" gateway status --url ws://127.0.0.1:$PORT --token '$TOKEN' --require-rpc --timeout 5000 >/tmp/config-reload-status-after.log +node \"\$entry\" gateway status --url ws://127.0.0.1:$PORT --token '$TOKEN' --require-rpc --timeout 30000 >/tmp/config-reload-status-after.log " echo "Checking reload log..." diff --git a/scripts/e2e/cron-mcp-cleanup-docker-client.ts b/scripts/e2e/cron-mcp-cleanup-docker-client.ts index b67cc531d41..b43f4a52641 100644 --- a/scripts/e2e/cron-mcp-cleanup-docker-client.ts +++ b/scripts/e2e/cron-mcp-cleanup-docker-client.ts @@ -54,7 +54,7 @@ async function describeProbePid(pid: number): Promise { async function waitForProbePid(pidPath: string): Promise { const startedAt = Date.now(); - while (Date.now() - startedAt < 60_000) { + while (Date.now() - startedAt < 240_000) { const pid = await readProbePid(pidPath); if (pid) { return pid; @@ -182,7 +182,7 @@ async function runCronCleanupScenario(params: { entry.payload.jobId === job.id && entry.payload.action === "finished", )?.payload, - 150_000, + 240_000, ); assert(finished, "missing cron finished event"); @@ -223,7 +223,7 @@ async function runSubagentCleanupScenario(params: { const exitedPid = await waitForAnyProbeExit({ pidsPath, label: "subagent", - timeoutMs: 90_000, + timeoutMs: 240_000, }); return { runId: run.runId, diff --git a/scripts/e2e/cron-mcp-cleanup-docker.sh b/scripts/e2e/cron-mcp-cleanup-docker.sh index d71b645a2d1..a5241f945fe 100644 --- a/scripts/e2e/cron-mcp-cleanup-docker.sh +++ b/scripts/e2e/cron-mcp-cleanup-docker.sh @@ -53,7 +53,7 @@ docker run --rm \ trap cleanup_inner EXIT trap dump_gateway_log_on_error ERR gateway_ready=0 - for _ in \$(seq 1 160); do + for _ in \$(seq 1 300); do if grep -q '\[gateway\] ready' /tmp/cron-mcp-cleanup-gateway.log 2>/dev/null; then gateway_ready=1 break diff --git a/scripts/e2e/gateway-network-docker.sh b/scripts/e2e/gateway-network-docker.sh index b4816bb0716..3c7c80a7e46 100644 --- a/scripts/e2e/gateway-network-docker.sh +++ b/scripts/e2e/gateway-network-docker.sh @@ -36,7 +36,7 @@ docker run -d \ echo "Waiting for gateway to come up..." ready=0 -for _ in $(seq 1 40); do +for _ in $(seq 1 180); do if [ "$(docker inspect -f '{{.State.Running}}' "$GW_NAME" 2>/dev/null || echo false)" != "true" ]; then break fi @@ -94,14 +94,14 @@ if (!url || !token) throw new Error(\"missing GW_URL/GW_TOKEN\"); const ws = new WebSocket(url); await new Promise((resolve, reject) => { - const t = setTimeout(() => reject(new Error(\"ws open timeout\")), 5000); + const t = setTimeout(() => reject(new Error(\"ws open timeout\")), 30000); ws.once(\"open\", () => { clearTimeout(t); resolve(); }); }); -function onceFrame(filter, timeoutMs = 5000) { +function onceFrame(filter, timeoutMs = 30000) { return new Promise((resolve, reject) => { const t = setTimeout(() => reject(new Error(\"timeout\")), timeoutMs); const handler = (data) => { diff --git a/scripts/e2e/mcp-channels-harness.ts b/scripts/e2e/mcp-channels-harness.ts index 43973601e75..ea90aef1b41 100644 --- a/scripts/e2e/mcp-channels-harness.ts +++ b/scripts/e2e/mcp-channels-harness.ts @@ -41,9 +41,10 @@ export type McpClientHandle = { rawMessages: unknown[]; }; -const GATEWAY_WS_OPEN_TIMEOUT_MS = 15_000; -const GATEWAY_RPC_TIMEOUT_MS = 30_000; -const GATEWAY_CONNECT_RETRY_WINDOW_MS = 240_000; +const GATEWAY_WS_OPEN_TIMEOUT_MS = 45_000; +const GATEWAY_RPC_TIMEOUT_MS = 60_000; +const GATEWAY_REQUEST_TIMEOUT_MS = 45_000; +const GATEWAY_CONNECT_RETRY_WINDOW_MS = 420_000; export function assert(condition: unknown, message: string): asserts condition { if (!condition) { @@ -285,7 +286,7 @@ async function connectGatewayOnce(params: { const timeout = setTimeout(() => { pending.delete(id); reject(new Error(`gateway request timeout: ${method}`)); - }, 10_000); + }, GATEWAY_REQUEST_TIMEOUT_MS); timeout.unref?.(); pending.set(id, { resolve: (value) => { diff --git a/scripts/e2e/openwebui-docker.sh b/scripts/e2e/openwebui-docker.sh index dcb6089f58c..ca9876bef24 100755 --- a/scripts/e2e/openwebui-docker.sh +++ b/scripts/e2e/openwebui-docker.sh @@ -186,6 +186,44 @@ if [ "$ow_ready" -ne 1 ]; then exit 1 fi +echo "Waiting for gateway model endpoint after Open WebUI startup..." +gateway_model_ready=0 +for _ in $(seq 1 90); do + if [ "$(docker inspect -f '{{.State.Running}}' "$GW_NAME" 2>/dev/null || echo false)" != "true" ]; then + break + fi + if docker exec "$GW_NAME" bash -lc "node --input-type=module -e ' + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 8000); + try { + const res = await fetch(\"http://$GW_NAME:$PORT/v1/models\", { + headers: { authorization: \"Bearer $TOKEN\" }, + signal: controller.signal, + }); + process.exit(res.status === 200 ? 0 : 1); + } catch { + process.exit(1); + } finally { + clearTimeout(timeout); + } + ' >/dev/null 2>&1"; then + gateway_model_ready=1 + break + fi + sleep 5 +done + +if [ "$gateway_model_ready" -ne 1 ]; then + echo "Gateway model endpoint did not stay reachable after Open WebUI startup" + docker inspect "$GW_NAME" --format '{{json .State}}' 2>/dev/null || true + if [ "$(docker inspect -f '{{.State.Running}}' "$GW_NAME" 2>/dev/null || echo false)" = "true" ]; then + docker exec "$GW_NAME" bash -lc 'tail -n 200 /tmp/openwebui-gateway.log' || true + fi + docker logs "$GW_NAME" 2>&1 | tail -n 200 || true + docker logs "$OW_NAME" 2>&1 | tail -n 200 || true + exit 1 +fi + echo "Running Open WebUI -> OpenClaw smoke..." if ! docker exec \ -e "OPENWEBUI_BASE_URL=http://$OW_NAME:$WEBUI_PORT" \ @@ -193,11 +231,17 @@ if ! docker exec \ -e "OPENWEBUI_ADMIN_PASSWORD=$ADMIN_PASSWORD" \ -e "OPENWEBUI_EXPECTED_NONCE=$PROMPT_NONCE" \ -e "OPENWEBUI_PROMPT=$PROMPT" \ + -e "OPENWEBUI_MODEL_ATTEMPTS=72" \ + -e "OPENWEBUI_MODEL_RETRY_MS=5000" \ "$GW_NAME" \ node /app/scripts/e2e/openwebui-probe.mjs >/tmp/openwebui-probe.log 2>&1; then cat /tmp/openwebui-probe.log 2>/dev/null || true echo "Open WebUI probe failed; gateway log tail:" - docker exec "$GW_NAME" bash -lc 'tail -n 200 /tmp/openwebui-gateway.log' || true + docker inspect "$GW_NAME" --format '{{json .State}}' 2>/dev/null || true + if [ "$(docker inspect -f '{{.State.Running}}' "$GW_NAME" 2>/dev/null || echo false)" = "true" ]; then + docker exec "$GW_NAME" bash -lc 'tail -n 200 /tmp/openwebui-gateway.log' || true + fi + docker logs "$GW_NAME" 2>&1 | tail -n 200 || true echo "Open WebUI container logs:" docker logs "$OW_NAME" 2>&1 | tail -n 200 || true exit 1 diff --git a/scripts/e2e/openwebui-probe.mjs b/scripts/e2e/openwebui-probe.mjs index 30195715a90..4f36a8991c7 100644 --- a/scripts/e2e/openwebui-probe.mjs +++ b/scripts/e2e/openwebui-probe.mjs @@ -3,6 +3,8 @@ const email = process.env.OPENWEBUI_ADMIN_EMAIL ?? ""; const password = process.env.OPENWEBUI_ADMIN_PASSWORD ?? ""; const expectedNonce = process.env.OPENWEBUI_EXPECTED_NONCE ?? ""; const prompt = process.env.OPENWEBUI_PROMPT ?? ""; +const modelAttempts = Number.parseInt(process.env.OPENWEBUI_MODEL_ATTEMPTS ?? "72", 10); +const modelRetryMs = Number.parseInt(process.env.OPENWEBUI_MODEL_RETRY_MS ?? "5000", 10); if (!baseUrl || !email || !password || !expectedNonce || !prompt) { throw new Error("Missing required OPENWEBUI_* environment variables"); @@ -72,7 +74,7 @@ const authHeaders = { let modelIds = []; let targetModel = ""; let lastModelsError = ""; -for (let attempt = 1; attempt <= 24; attempt += 1) { +for (let attempt = 1; attempt <= modelAttempts; attempt += 1) { const modelsRes = await fetch(`${baseUrl}/api/models`, { headers: authHeaders }).catch( (error) => { lastModelsError = error instanceof Error ? error.message : String(error); @@ -91,7 +93,7 @@ for (let attempt = 1; attempt <= 24; attempt += 1) { } else if (modelsRes) { lastModelsError = `HTTP ${modelsRes.status} ${await modelsRes.text()}`; } - await sleep(5_000); + await sleep(modelRetryMs); } if (!targetModel) { throw new Error(