fix: harden release Docker gateway smokes

This commit is contained in:
Peter Steinberger
2026-05-02 08:00:48 +01:00
parent 85c29d1562
commit 414ed21aba
5 changed files with 69 additions and 20 deletions

View File

@@ -18,6 +18,26 @@ trap cleanup EXIT
docker_e2e_build_or_reuse "$IMAGE_NAME" config-reload "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR" "" "$SKIP_BUILD"
OPENCLAW_TEST_STATE_SCRIPT_B64="$(docker_e2e_test_state_shell_b64 config-reload empty)"
check_rpc_status() {
local out_file="$1"
docker_e2e_docker_cmd exec "$CONTAINER_NAME" bash -lc "
source /tmp/openclaw-test-state-env
source scripts/lib/openclaw-e2e-instance.sh
entry=\"\$(openclaw_e2e_resolve_entrypoint)\"
deadline=\$((SECONDS + 120))
last_status=1
while [ \"\$SECONDS\" -lt \"\$deadline\" ]; do
if node \"\$entry\" gateway status --url ws://127.0.0.1:$PORT --token '$TOKEN' --require-rpc --timeout 30000 >'$out_file' 2>'$out_file.err'; then
exit 0
fi
last_status=\$?
sleep 1
done
cat '$out_file.err' >&2 || true
exit \"\$last_status\"
"
}
echo "Starting gateway container..."
docker_e2e_run_detached_with_harness \
--name "$CONTAINER_NAME" \
@@ -47,12 +67,7 @@ if ! docker_e2e_wait_container_bash "$CONTAINER_NAME" 180 0.5 "source scripts/li
fi
echo "Checking initial RPC status..."
docker_e2e_docker_cmd exec "$CONTAINER_NAME" bash -lc "
source /tmp/openclaw-test-state-env
source scripts/lib/openclaw-e2e-instance.sh
entry=\"\$(openclaw_e2e_resolve_entrypoint)\"
node \"\$entry\" gateway status --url ws://127.0.0.1:$PORT --token '$TOKEN' --require-rpc --timeout 30000 >/tmp/config-reload-status-before.log
"
check_rpc_status /tmp/config-reload-status-before.log
echo "Mutating hot-reload gateway metadata..."
docker_e2e_docker_cmd exec "$CONTAINER_NAME" bash -lc "source /tmp/openclaw-test-state-env
@@ -67,12 +82,7 @@ if [ "$(docker_e2e_docker_cmd inspect -f '{{.State.Running}}' "$CONTAINER_NAME"
fi
echo "Checking post-write RPC status..."
docker_e2e_docker_cmd exec "$CONTAINER_NAME" bash -lc "
source /tmp/openclaw-test-state-env
source scripts/lib/openclaw-e2e-instance.sh
entry=\"\$(openclaw_e2e_resolve_entrypoint)\"
node \"\$entry\" gateway status --url ws://127.0.0.1:$PORT --token '$TOKEN' --require-rpc --timeout 30000 >/tmp/config-reload-status-after.log
"
check_rpc_status /tmp/config-reload-status-after.log
echo "Checking reload log..."
docker_e2e_docker_cmd exec "$CONTAINER_NAME" bash -lc "node scripts/e2e/lib/config-reload/assert-log.mjs"

View File

@@ -96,11 +96,27 @@ while (Date.now() < deadline) {
const message = connectRes.error?.message ?? "unknown";
lastError = new Error(`connect failed: ${message}`);
if (!message.includes("gateway starting")) {
if (
!message.includes("gateway starting") &&
!message.includes("ws open timeout") &&
!message.includes("ECONNREFUSED") &&
!message.includes("ECONNRESET") &&
!message.includes("timeout")
) {
throw lastError;
}
} catch (error) {
lastError = error instanceof Error ? error : new Error(String(error));
const message = lastError.message;
if (
!message.includes("gateway starting") &&
!message.includes("ws open timeout") &&
!message.includes("ECONNREFUSED") &&
!message.includes("ECONNRESET") &&
!message.includes("timeout")
) {
throw lastError;
}
} finally {
ws?.close();
}

View File

@@ -26,6 +26,12 @@ const baseUrl = option("--base-url");
const probePath = option("--path");
const expectKind = option("--expect");
const out = option("--out");
const allowFailing = new Set(
option("--allow-failing", "")
.split(",")
.map((entry) => entry.trim())
.filter(Boolean),
);
const timeoutMs = Number.parseInt(
option("--timeout-ms", process.env.OPENCLAW_UPGRADE_SURVIVOR_PROBE_TIMEOUT_MS || "60000"),
10,
@@ -43,7 +49,15 @@ function matchesExpectation(body) {
if (expectKind === "live") {
return body?.ok === true && body?.status === "live";
}
return body?.ready === true;
if (body?.ready === true) {
return true;
}
const failing = Array.isArray(body?.failing) ? body.failing : [];
return (
failing.length > 0 &&
allowFailing.size > 0 &&
failing.every((entry) => allowFailing.has(String(entry)))
);
}
const startedAt = Date.now();
@@ -65,7 +79,8 @@ while (Date.now() - startedAt <= timeoutMs) {
status: response.status,
text,
};
if (response.ok && matchesExpectation(body)) {
const expectationMet = matchesExpectation(body);
if ((response.ok || expectKind === "ready") && expectationMet) {
writeJson(out, {
body,
elapsedMs: Date.now() - startedAt,

View File

@@ -607,12 +607,17 @@ probe_gateway_endpoint() {
local out_file="$3"
local start_epoch
local end_epoch
local args=(
--base-url "http://127.0.0.1:18789"
--path "$path"
--expect "$expect_kind"
)
if [ -n "${OPENCLAW_UPGRADE_SURVIVOR_READYZ_ALLOW_FAILING:-}" ]; then
args+=(--allow-failing "$OPENCLAW_UPGRADE_SURVIVOR_READYZ_ALLOW_FAILING")
fi
args+=(--out "$out_file")
start_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
node scripts/e2e/lib/upgrade-survivor/probe-gateway.mjs \
--base-url "http://127.0.0.1:18789" \
--path "$path" \
--expect "$expect_kind" \
--out "$out_file"
node scripts/e2e/lib/upgrade-survivor/probe-gateway.mjs "${args[@]}"
end_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
printf '%s\n' "$(((end_epoch - start_epoch + 999) / 1000))"
}
@@ -637,7 +642,9 @@ start_gateway() {
check_gateway_probes() {
healthz_seconds="$(probe_gateway_endpoint /healthz live "$HEALTHZ_JSON")"
export OPENCLAW_UPGRADE_SURVIVOR_READYZ_ALLOW_FAILING="discord,telegram,whatsapp,feishu"
readyz_seconds="$(probe_gateway_endpoint /readyz ready "$READYZ_JSON")"
unset OPENCLAW_UPGRADE_SURVIVOR_READYZ_ALLOW_FAILING
}
check_gateway_status() {

View File

@@ -220,6 +220,7 @@ node scripts/e2e/lib/upgrade-survivor/probe-gateway.mjs \
--base-url "http://127.0.0.1:$PORT" \
--path /readyz \
--expect ready \
--allow-failing discord,telegram,whatsapp,feishu \
--out /tmp/openclaw-upgrade-survivor-readyz.json
echo "Checking gateway RPC status..."