test(update): cover authenticated restart updates

This commit is contained in:
Vincent Koc
2026-05-04 21:57:57 -07:00
parent 70f34bf177
commit 2de0113608
19 changed files with 838 additions and 57 deletions

View File

@@ -37,6 +37,7 @@ BASELINE_RAW="${OPENCLAW_UPGRADE_SURVIVOR_BASELINE:?missing OPENCLAW_UPGRADE_SUR
CANDIDATE_KIND="${OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_KIND:-tarball}"
CANDIDATE_SPEC="${OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_SPEC:-${OPENCLAW_CURRENT_PACKAGE_TGZ:-}}"
SCENARIO="${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base}"
UPDATE_RESTART_MODE="${OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE:-manual}"
CURRENT_PHASE="setup"
FAILURE_PHASE=""
FAILURE_MESSAGE=""
@@ -51,6 +52,7 @@ start_seconds=""
status_seconds=""
healthz_seconds=""
readyz_seconds=""
update_restart_seconds=""
BASELINE_INSTALL_LOG="$ARTIFACT_ROOT/baseline-install.log"
UPDATE_JSON="$ARTIFACT_ROOT/update.json"
@@ -63,6 +65,11 @@ READYZ_JSON="$ARTIFACT_ROOT/readyz.json"
STATUS_JSON="$ARTIFACT_ROOT/status.json"
STATUS_ERR="$ARTIFACT_ROOT/status.err"
BASELINE_CONFIG_VALIDATE_LOG="$ARTIFACT_ROOT/baseline-config-validate.log"
BASELINE_SERVICE_INSTALL_JSON="$ARTIFACT_ROOT/baseline-service-install.json"
BASELINE_SERVICE_INSTALL_ERR="$ARTIFACT_ROOT/baseline-service-install.err"
SYSTEMCTL_SHIM_LOG="$ARTIFACT_ROOT/systemctl-shim.log"
SYSTEMCTL_SHIM_PID_FILE="$ARTIFACT_ROOT/systemctl-shim.pid"
SYSTEMCTL_SHIM_DAEMON_LOG="$ARTIFACT_ROOT/systemctl-shim-gateway.log"
CONFIG_COVERAGE_JSON="$ARTIFACT_ROOT/config-recipe.json"
export OPENCLAW_UPGRADE_SURVIVOR_CONFIG_COVERAGE_JSON="$CONFIG_COVERAGE_JSON"
rm -f "$SUMMARY_JSON" "$CONFIG_COVERAGE_JSON"
@@ -113,6 +120,17 @@ normalize_baseline() {
validate_baseline_package_spec "$baseline_spec"
}
validate_update_restart_mode() {
case "$UPDATE_RESTART_MODE" in
manual | auto-auth)
;;
*)
echo "OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE must be manual or auto-auth; got: $UPDATE_RESTART_MODE" >&2
return 1
;;
esac
}
json_event() {
local phase="$1"
local status="$2"
@@ -139,7 +157,9 @@ write_summary() {
SUMMARY_CANDIDATE_VERSION="$candidate_version" \
SUMMARY_INSTALLED_VERSION="$installed_version" \
SUMMARY_SCENARIO="$SCENARIO" \
SUMMARY_UPDATE_RESTART_MODE="$UPDATE_RESTART_MODE" \
SUMMARY_START_SECONDS="$start_seconds" \
SUMMARY_UPDATE_RESTART_SECONDS="$update_restart_seconds" \
SUMMARY_HEALTHZ_SECONDS="$healthz_seconds" \
SUMMARY_READYZ_SECONDS="$readyz_seconds" \
SUMMARY_STATUS_SECONDS="$status_seconds" \
@@ -173,8 +193,10 @@ const summary = {
version: process.env.SUMMARY_CANDIDATE_VERSION || null,
},
installedVersion: process.env.SUMMARY_INSTALLED_VERSION || null,
updateRestartMode: process.env.SUMMARY_UPDATE_RESTART_MODE || "manual",
timings: {
startupSeconds: numberOrNull(process.env.SUMMARY_START_SECONDS),
updateRestartSeconds: numberOrNull(process.env.SUMMARY_UPDATE_RESTART_SECONDS),
healthzSeconds: numberOrNull(process.env.SUMMARY_HEALTHZ_SECONDS),
readyzSeconds: numberOrNull(process.env.SUMMARY_READYZ_SECONDS),
statusSeconds: numberOrNull(process.env.SUMMARY_STATUS_SECONDS),
@@ -197,6 +219,13 @@ cleanup() {
kill "$plugin_registry_pid" >/dev/null 2>&1 || true
fi
openclaw_e2e_terminate_gateways "${gateway_pid:-}"
if [ -s "$SYSTEMCTL_SHIM_PID_FILE" ]; then
local shim_pid
shim_pid="$(cat "$SYSTEMCTL_SHIM_PID_FILE" 2>/dev/null || true)"
if [[ "$shim_pid" =~ ^[0-9]+$ ]] && [ "$shim_pid" -gt 1 ]; then
openclaw_e2e_terminate_gateways "$shim_pid"
fi
fi
}
on_error() {
@@ -612,6 +641,7 @@ rm_rf_retry() {
reset_run_state() {
rm_rf_retry "$npm_config_prefix" "$TMPDIR" "$ARTIFACT_ROOT/state-home"
rm -f "$SYSTEMCTL_SHIM_PID_FILE" "$SYSTEMCTL_SHIM_DAEMON_LOG"
mkdir -p "$npm_config_prefix" "$npm_config_cache" "$TMPDIR"
}
@@ -670,6 +700,296 @@ validate_baseline_config() {
fi
}
install_update_restart_systemctl_shim() {
local shim_dir="$npm_config_prefix/bin"
mkdir -p "$shim_dir"
cat >"$shim_dir/systemctl" <<'SHIM'
#!/usr/bin/env bash
set -euo pipefail
log_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG:-/tmp/openclaw-systemctl-shim.log}"
pid_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE:-/tmp/openclaw-systemctl-shim.pid}"
daemon_log="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG:-/tmp/openclaw-systemctl-shim-gateway.log}"
printf '%s\n' "$*" >>"$log_file"
filtered=()
for ((i = 1; i <= $#; i++)); do
arg="${!i}"
case "$arg" in
--user | --quiet | --no-page | --now)
;;
--property)
i=$((i + 1))
;;
*)
filtered+=("$arg")
;;
esac
done
command="${filtered[0]:-status}"
is_running() {
[ -s "$pid_file" ] || return 1
local pid
pid="$(cat "$pid_file" 2>/dev/null || true)"
[ -n "$pid" ] || return 1
kill -0 "$pid" >/dev/null 2>&1
}
stop_gateway() {
[ -s "$pid_file" ] || return 0
local pid
pid="$(cat "$pid_file" 2>/dev/null || true)"
if [[ "$pid" =~ ^[0-9]+$ ]] && [ "$pid" -gt 1 ] && kill -0 "$pid" >/dev/null 2>&1; then
kill "$pid" >/dev/null 2>&1 || true
for _ in $(seq 1 100); do
kill -0 "$pid" >/dev/null 2>&1 || break
sleep 0.1
done
kill -9 "$pid" >/dev/null 2>&1 || true
fi
rm -f "$pid_file"
}
unit_path() {
printf '%s/.config/systemd/user/openclaw-gateway.service\n' "${HOME:?missing HOME}"
}
load_unit_environment() {
local unit="$1"
while IFS= read -r line; do
case "$line" in
EnvironmentFile=*)
local spec="${line#EnvironmentFile=}"
for token in $spec; do
local file="${token#-}"
[ -f "$file" ] || continue
set -a
# shellcheck disable=SC1090
. "$file"
set +a
done
;;
Environment=*)
local assignment="${line#Environment=}"
assignment="${assignment#\"}"
assignment="${assignment%\"}"
export "$assignment"
;;
esac
done <"$unit"
}
start_gateway() {
local unit
local exec_start
unit="$(unit_path)"
exec_start="$(sed -n 's/^ExecStart=//p' "$unit" | tail -n 1)"
[ -n "$exec_start" ] || {
echo "systemctl shim could not find ExecStart in $unit" >&2
return 1
}
(
load_unit_environment "$unit"
nohup bash -lc "exec $exec_start" >>"$daemon_log" 2>&1 &
printf '%s\n' "$!" >"$pid_file"
)
}
case "$command" in
daemon-reload | enable | disable)
exit 0
;;
status)
is_running && exit 0
exit 0
;;
stop)
stop_gateway
exit 0
;;
restart | start)
stop_gateway
start_gateway
exit 0
;;
is-enabled)
exit 0
;;
is-active)
is_running && exit 0
exit 3
;;
show)
if is_running; then
printf 'ActiveState=active\nSubState=running\nMainPID=%s\nExecMainStatus=0\nExecMainCode=0\n' "$(cat "$pid_file")"
else
printf 'ActiveState=inactive\nSubState=dead\nMainPID=0\nExecMainStatus=0\nExecMainCode=0\n'
fi
exit 0
;;
*)
echo "systemctl shim unsupported command: $*" >&2
exit 1
;;
esac
SHIM
chmod +x "$shim_dir/systemctl"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG="$SYSTEMCTL_SHIM_LOG"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE="$SYSTEMCTL_SHIM_PID_FILE"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG="$SYSTEMCTL_SHIM_DAEMON_LOG"
export PATH="$shim_dir:$PATH"
}
install_update_restart_service_unit() {
if ! env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway install --force --json >"$BASELINE_SERVICE_INSTALL_JSON" 2>"$BASELINE_SERVICE_INSTALL_ERR"; then
echo "baseline gateway service install failed" >&2
cat "$BASELINE_SERVICE_INSTALL_ERR" >&2 || true
cat "$BASELINE_SERVICE_INSTALL_JSON" >&2 || true
return 1
fi
}
seed_update_restart_probe_device_auth() {
node --input-type=module <<'NODE'
import crypto from "node:crypto";
import fs from "node:fs";
import path from "node:path";
const stateDir = process.env.OPENCLAW_STATE_DIR;
if (!stateDir) {
throw new Error("missing OPENCLAW_STATE_DIR");
}
const base64UrlEncode = (buf) =>
buf.toString("base64").replaceAll("+", "-").replaceAll("/", "_").replace(/=+$/g, "");
const ed25519SpkiPrefix = Buffer.from("302a300506032b6570032100", "hex");
const { publicKey, privateKey } = crypto.generateKeyPairSync("ed25519");
const publicKeyPem = publicKey.export({ type: "spki", format: "pem" });
const privateKeyPem = privateKey.export({ type: "pkcs8", format: "pem" });
const spki = crypto.createPublicKey(publicKeyPem).export({ type: "spki", format: "der" });
const rawPublicKey =
spki.length === ed25519SpkiPrefix.length + 32 &&
spki.subarray(0, ed25519SpkiPrefix.length).equals(ed25519SpkiPrefix)
? spki.subarray(ed25519SpkiPrefix.length)
: spki;
const publicKeyRaw = base64UrlEncode(rawPublicKey);
const deviceId = crypto.createHash("sha256").update(rawPublicKey).digest("hex");
const token = base64UrlEncode(crypto.randomBytes(32));
const now = Date.now();
const scopes = ["operator.read"];
function writeJson(filePath, value) {
fs.mkdirSync(path.dirname(filePath), { recursive: true });
fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, { mode: 0o600 });
try {
fs.chmodSync(filePath, 0o600);
} catch {
// best-effort inside Docker
}
}
writeJson(path.join(stateDir, "identity", "device.json"), {
version: 1,
deviceId,
publicKeyPem,
privateKeyPem,
createdAtMs: now,
});
writeJson(path.join(stateDir, "identity", "device-auth.json"), {
version: 1,
deviceId,
tokens: {
operator: {
token,
role: "operator",
scopes,
updatedAtMs: now,
},
},
});
writeJson(path.join(stateDir, "devices", "paired.json"), {
[deviceId]: {
deviceId,
publicKey: publicKeyRaw,
displayName: "upgrade survivor restart probe",
platform: process.platform,
clientId: "upgrade-survivor",
clientMode: "probe",
role: "operator",
roles: ["operator"],
scopes,
approvedScopes: scopes,
tokens: {
operator: {
token,
role: "operator",
scopes,
createdAtMs: now,
},
},
createdAtMs: now,
approvedAtMs: now,
},
});
writeJson(path.join(stateDir, "devices", "pending.json"), {});
NODE
}
write_update_restart_service_secretref_env() {
mkdir -p "$OPENCLAW_STATE_DIR"
local dotenv_path="$OPENCLAW_STATE_DIR/.env"
local tmp_path="$dotenv_path.tmp.$$"
if [ -f "$dotenv_path" ]; then
grep -v '^GATEWAY_AUTH_TOKEN_REF=' "$dotenv_path" >"$tmp_path" || true
else
: >"$tmp_path"
fi
# Managed restarts resolve SecretRefs from service-owned durable env, not the update caller.
printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >>"$tmp_path"
mv "$tmp_path" "$dotenv_path"
}
write_update_restart_service_auth_env() {
mkdir -p "$OPENCLAW_STATE_DIR"
local dotenv_path="$OPENCLAW_STATE_DIR/.env"
local tmp_path="$dotenv_path.tmp.$$"
if [ -f "$dotenv_path" ]; then
grep -v '^GATEWAY_AUTH_TOKEN_REF=' "$dotenv_path" >"$tmp_path" || true
else
: >"$tmp_path"
fi
printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >>"$tmp_path"
mv "$tmp_path" "$dotenv_path"
local systemd_env_path="$OPENCLAW_STATE_DIR/gateway.systemd.env"
printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >"$systemd_env_path"
}
prepare_update_restart_probe() {
if [ "$UPDATE_RESTART_MODE" != "auto-auth" ]; then
return 0
fi
echo "Preparing configured-auth gateway for automatic update restart."
install_update_restart_systemctl_shim
seed_update_restart_probe_device_auth
start_gateway
write_update_restart_service_secretref_env
install_update_restart_service_unit
}
prepare_update_restart_probe_current_install() {
if [ "$UPDATE_RESTART_MODE" != "auto-auth" ]; then
return 0
fi
echo "Preparing candidate-auth gateway for automatic update restart."
install_update_restart_systemctl_shim
seed_update_restart_probe_device_auth
start_gateway
write_update_restart_service_auth_env
install_update_restart_service_unit
}
assert_baseline_state() {
OPENCLAW_UPGRADE_SURVIVOR_ASSERT_STAGE=baseline \
node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-config
@@ -714,12 +1034,32 @@ resolve_candidate_version() {
update_candidate() {
echo "Updating baseline $baseline_spec to candidate $CANDIDATE_KIND:$CANDIDATE_SPEC ($candidate_version)"
if ! openclaw update --tag "$CANDIDATE_SPEC" --yes --json --no-restart >"$UPDATE_JSON" 2>"$UPDATE_ERR"; then
local update_start=""
local update_end=""
local update_args=(update --tag "$CANDIDATE_SPEC" --yes --json)
if [ "$UPDATE_RESTART_MODE" = "manual" ]; then
update_args+=(--no-restart)
else
update_start="$(node -e "process.stdout.write(String(Date.now()))")"
fi
if ! env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw "${update_args[@]}" >"$UPDATE_JSON" 2>"$UPDATE_ERR"; then
echo "openclaw update failed" >&2
cat "$UPDATE_ERR" >&2 || true
cat "$UPDATE_JSON" >&2 || true
return 1
fi
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
update_end="$(node -e "process.stdout.write(String(Date.now()))")"
update_restart_seconds=$(((update_end - update_start + 999) / 1000))
node -e '
const fs = require("node:fs");
const file = process.argv[1];
const result = JSON.parse(fs.readFileSync(file, "utf8"));
if (!result || result.status !== "ok") {
throw new Error(`update JSON did not report ok status: ${JSON.stringify(result)}`);
}
' "$UPDATE_JSON"
fi
installed_version="$(read_installed_version)"
}
@@ -776,8 +1116,11 @@ start_gateway() {
local start_epoch
local ready_epoch
start_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
openclaw gateway --port "$port" --bind loopback --allow-unconfigured >"$GATEWAY_LOG" 2>&1 &
env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway --port "$port" --bind loopback --allow-unconfigured >"$GATEWAY_LOG" 2>&1 &
gateway_pid="$!"
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
printf '%s\n' "$gateway_pid" >"$SYSTEMCTL_SHIM_PID_FILE"
fi
openclaw_e2e_wait_gateway_ready "$gateway_pid" "$GATEWAY_LOG" 360
ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
start_seconds=$(((ready_epoch - start_epoch + 999) / 1000))
@@ -788,6 +1131,13 @@ start_gateway() {
fi
}
ensure_gateway_started() {
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
return 0
fi
start_gateway
}
check_gateway_probes() {
healthz_seconds="$(probe_gateway_endpoint /healthz live "$HEALTHZ_JSON")"
export OPENCLAW_UPGRADE_SURVIVOR_READYZ_ALLOW_FAILING="discord,telegram,whatsapp,feishu,matrix"
@@ -818,6 +1168,7 @@ check_gateway_status() {
}
phase storage-preflight storage_preflight
phase validate-update-restart-mode validate_update_restart_mode
phase reset-run-state reset_run_state
phase install-baseline install_baseline
phase seed-state seed_state
@@ -830,6 +1181,7 @@ phase seed-source-only-plugin-shadow seed_source_only_plugin_shadow
phase assert-baseline assert_baseline_state
phase seed-legacy-runtime-deps-symlink seed_legacy_runtime_deps_symlink
phase resolve-candidate resolve_candidate_version
phase prepare-update-restart-probe prepare_update_restart_probe
phase update-candidate update_candidate
phase assert-legacy-plugin-dependency-debris-before-doctor assert_legacy_plugin_dependency_debris_before_doctor
phase configure-configured-plugin-install-fixture-registry configure_configured_plugin_install_fixture_registry
@@ -838,8 +1190,8 @@ phase assert-legacy-plugin-dependency-debris-cleaned assert_legacy_plugin_depend
phase assert-legacy-runtime-deps-symlink-repaired assert_legacy_runtime_deps_symlink_repaired
phase validate-post-doctor-config validate_post_doctor_config
phase assert-survival assert_survival
phase gateway-start start_gateway
phase gateway-start ensure_gateway_started
phase gateway-probes check_gateway_probes
phase gateway-status check_gateway_status
echo "Upgrade survivor Docker E2E passed baseline=${baseline_spec} scenario=${SCENARIO} candidate=${candidate_version} startup=${start_seconds}s healthz=${healthz_seconds}s readyz=${readyz_seconds}s status=${status_seconds}s."
echo "Upgrade survivor Docker E2E passed baseline=${baseline_spec} scenario=${SCENARIO} candidate=${candidate_version} updateRestartMode=${UPDATE_RESTART_MODE} startup=${start_seconds}s updateRestart=${update_restart_seconds:-manual}s healthz=${healthz_seconds}s readyz=${readyz_seconds}s status=${status_seconds}s."

View File

@@ -0,0 +1,264 @@
#!/usr/bin/env bash
install_update_restart_systemctl_shim() {
local shim_dir="$npm_config_prefix/bin"
mkdir -p "$shim_dir"
cat >"$shim_dir/systemctl" <<'SHIM'
#!/usr/bin/env bash
set -euo pipefail
log_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG:-/tmp/openclaw-systemctl-shim.log}"
pid_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE:-/tmp/openclaw-systemctl-shim.pid}"
daemon_log="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG:-/tmp/openclaw-systemctl-shim-gateway.log}"
printf '%s\n' "$*" >>"$log_file"
filtered=()
for ((i = 1; i <= $#; i++)); do
arg="${!i}"
case "$arg" in
--user | --quiet | --no-page | --now)
;;
--property)
i=$((i + 1))
;;
*)
filtered+=("$arg")
;;
esac
done
command="${filtered[0]:-status}"
is_running() {
[ -s "$pid_file" ] || return 1
local pid
pid="$(cat "$pid_file" 2>/dev/null || true)"
[ -n "$pid" ] || return 1
kill -0 "$pid" >/dev/null 2>&1
}
stop_gateway() {
[ -s "$pid_file" ] || return 0
local pid
pid="$(cat "$pid_file" 2>/dev/null || true)"
if [[ "$pid" =~ ^[0-9]+$ ]] && [ "$pid" -gt 1 ] && kill -0 "$pid" >/dev/null 2>&1; then
kill "$pid" >/dev/null 2>&1 || true
for _ in $(seq 1 100); do
kill -0 "$pid" >/dev/null 2>&1 || break
sleep 0.1
done
kill -9 "$pid" >/dev/null 2>&1 || true
fi
rm -f "$pid_file"
}
unit_path() {
printf '%s/.config/systemd/user/openclaw-gateway.service\n' "${HOME:?missing HOME}"
}
load_unit_environment() {
local unit="$1"
while IFS= read -r line; do
case "$line" in
EnvironmentFile=*)
local spec="${line#EnvironmentFile=}"
for token in $spec; do
local file="${token#-}"
[ -f "$file" ] || continue
set -a
# shellcheck disable=SC1090
. "$file"
set +a
done
;;
Environment=*)
local assignment="${line#Environment=}"
assignment="${assignment#\"}"
assignment="${assignment%\"}"
export "$assignment"
;;
esac
done <"$unit"
}
start_gateway() {
local unit
local exec_start
unit="$(unit_path)"
exec_start="$(sed -n 's/^ExecStart=//p' "$unit" | tail -n 1)"
[ -n "$exec_start" ] || {
echo "systemctl shim could not find ExecStart in $unit" >&2
return 1
}
(
load_unit_environment "$unit"
nohup bash -lc "exec $exec_start" >>"$daemon_log" 2>&1 &
printf '%s\n' "$!" >"$pid_file"
)
}
case "$command" in
daemon-reload | enable | disable)
exit 0
;;
status)
is_running && exit 0
exit 0
;;
stop)
stop_gateway
exit 0
;;
restart | start)
stop_gateway
start_gateway
exit 0
;;
is-enabled)
exit 0
;;
is-active)
is_running && exit 0
exit 3
;;
show)
if is_running; then
printf 'ActiveState=active\nSubState=running\nMainPID=%s\nExecMainStatus=0\nExecMainCode=0\n' "$(cat "$pid_file")"
else
printf 'ActiveState=inactive\nSubState=dead\nMainPID=0\nExecMainStatus=0\nExecMainCode=0\n'
fi
exit 0
;;
*)
echo "systemctl shim unsupported command: $*" >&2
exit 1
;;
esac
SHIM
chmod +x "$shim_dir/systemctl"
export PATH="$shim_dir:$PATH"
}
seed_update_restart_probe_device_auth() {
node --input-type=module <<'NODE'
import crypto from "node:crypto";
import fs from "node:fs";
import path from "node:path";
const stateDir = process.env.OPENCLAW_STATE_DIR;
if (!stateDir) {
throw new Error("missing OPENCLAW_STATE_DIR");
}
const base64UrlEncode = (buf) =>
buf.toString("base64").replaceAll("+", "-").replaceAll("/", "_").replace(/=+$/g, "");
const ed25519SpkiPrefix = Buffer.from("302a300506032b6570032100", "hex");
const { publicKey, privateKey } = crypto.generateKeyPairSync("ed25519");
const publicKeyPem = publicKey.export({ type: "spki", format: "pem" });
const privateKeyPem = privateKey.export({ type: "pkcs8", format: "pem" });
const spki = crypto.createPublicKey(publicKeyPem).export({ type: "spki", format: "der" });
const rawPublicKey =
spki.length === ed25519SpkiPrefix.length + 32 &&
spki.subarray(0, ed25519SpkiPrefix.length).equals(ed25519SpkiPrefix)
? spki.subarray(ed25519SpkiPrefix.length)
: spki;
const publicKeyRaw = base64UrlEncode(rawPublicKey);
const deviceId = crypto.createHash("sha256").update(rawPublicKey).digest("hex");
const token = base64UrlEncode(crypto.randomBytes(32));
const now = Date.now();
const scopes = ["operator.read"];
function writeJson(filePath, value) {
fs.mkdirSync(path.dirname(filePath), { recursive: true });
fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, { mode: 0o600 });
try {
fs.chmodSync(filePath, 0o600);
} catch {
}
}
writeJson(path.join(stateDir, "identity", "device.json"), {
version: 1,
deviceId,
publicKeyPem,
privateKeyPem,
createdAtMs: now,
});
writeJson(path.join(stateDir, "identity", "device-auth.json"), {
version: 1,
deviceId,
tokens: {
operator: {
token,
role: "operator",
scopes,
updatedAtMs: now,
},
},
});
writeJson(path.join(stateDir, "devices", "paired.json"), {
[deviceId]: {
deviceId,
publicKey: publicKeyRaw,
displayName: "upgrade survivor restart probe",
platform: process.platform,
clientId: "openclaw-cli",
clientMode: "probe",
role: "operator",
roles: ["operator"],
scopes,
approvedScopes: scopes,
tokens: {
operator: {
token,
role: "operator",
scopes,
createdAtMs: now,
},
},
createdAtMs: now,
approvedAtMs: now,
},
});
writeJson(path.join(stateDir, "devices", "pending.json"), {});
NODE
}
write_update_restart_service_auth_env() {
mkdir -p "$OPENCLAW_STATE_DIR"
local dotenv_path="$OPENCLAW_STATE_DIR/.env"
local tmp_path="$dotenv_path.tmp.$$"
if [ -f "$dotenv_path" ]; then
grep -v '^GATEWAY_AUTH_TOKEN_REF=' "$dotenv_path" >"$tmp_path" || true
else
: >"$tmp_path"
fi
printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >>"$tmp_path"
mv "$tmp_path" "$dotenv_path"
printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >"$OPENCLAW_STATE_DIR/gateway.systemd.env"
}
prepare_update_restart_probe_current_install() {
local port="$1"
local log_file="$2"
local start_epoch
local ready_epoch
echo "Preparing candidate-auth gateway for automatic update restart."
install_update_restart_systemctl_shim
seed_update_restart_probe_device_auth
start_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway --port "$port" --bind loopback --allow-unconfigured >"$log_file" 2>&1 &
gateway_pid="$!"
printf '%s\n' "$gateway_pid" >"$OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE"
openclaw_e2e_wait_gateway_ready "$gateway_pid" "$log_file" 360
ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
start_seconds=$(((ready_epoch - start_epoch + 999) / 1000))
write_update_restart_service_auth_env
if ! env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway install --force --json >"$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_JSON" 2>"$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_ERR"; then
echo "gateway service install failed" >&2
cat "$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_ERR" >&2 || true
cat "$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_JSON" >&2 || true
return 1
fi
}

View File

@@ -13,6 +13,7 @@ SKIP_BUILD="${OPENCLAW_UPGRADE_SURVIVOR_E2E_SKIP_BUILD:-0}"
DOCKER_RUN_TIMEOUT="${OPENCLAW_UPGRADE_SURVIVOR_DOCKER_RUN_TIMEOUT:-900s}"
BASELINE_SPEC="${OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC:-}"
SCENARIO="${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base}"
UPDATE_RESTART_MODE="${OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE:-manual}"
LANE_ARTIFACT_SUFFIX="${OPENCLAW_DOCKER_ALL_LANE_NAME:-default}"
LANE_ARTIFACT_SUFFIX="${LANE_ARTIFACT_SUFFIX//[^A-Za-z0-9_.-]/_}"
ARTIFACT_DIR="${OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_DIR:-$ROOT_DIR/.artifacts/upgrade-survivor/$LANE_ARTIFACT_SUFFIX}"
@@ -86,6 +87,7 @@ if [ "${OPENCLAW_UPGRADE_SURVIVOR_PUBLISHED_BASELINE:-0}" = "1" ]; then
-e OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_KIND="$CANDIDATE_KIND" \
-e OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_SPEC="$CANDIDATE_SPEC" \
-e OPENCLAW_UPGRADE_SURVIVOR_SCENARIO="$SCENARIO" \
-e OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE="$UPDATE_RESTART_MODE" \
-e OPENCLAW_UPGRADE_SURVIVOR_LEGACY_RUNTIME_DEPS_SYMLINK="${OPENCLAW_UPGRADE_SURVIVOR_LEGACY_RUNTIME_DEPS_SYMLINK:-}" \
-e OPENCLAW_UPGRADE_SURVIVOR_SUMMARY_JSON=/tmp/openclaw-upgrade-survivor-artifacts/summary.json \
-e OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}" \
@@ -111,6 +113,7 @@ docker_e2e_run_with_harness \
-e OPENCLAW_TEST_STATE_SCRIPT_B64="$OPENCLAW_TEST_STATE_SCRIPT_B64" \
-e OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT=/tmp/openclaw-upgrade-survivor-artifacts \
-e OPENCLAW_UPGRADE_SURVIVOR_SCENARIO="$SCENARIO" \
-e OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE="$UPDATE_RESTART_MODE" \
-e OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}" \
-e OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS="${OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS:-30}" \
-v "$ARTIFACT_DIR:/tmp/openclaw-upgrade-survivor-artifacts" \
@@ -145,6 +148,22 @@ export TELEGRAM_BOT_TOKEN="123456:upgrade-survivor-telegram-token"
export FEISHU_APP_SECRET="upgrade-survivor-feishu-secret"
export BRAVE_API_KEY="BSA_upgrade_survivor_brave_key"
UPDATE_RESTART_MODE="${OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE:-manual}"
PORT=18789
START_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}"
STATUS_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS:-30}"
GATEWAY_LOG="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/gateway.log"
SYSTEMCTL_SHIM_LOG="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/systemctl-shim.log"
SYSTEMCTL_SHIM_PID_FILE="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/systemctl-shim.pid"
SYSTEMCTL_SHIM_DAEMON_LOG="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/systemctl-shim-gateway.log"
BASELINE_SERVICE_INSTALL_JSON="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/baseline-service-install.json"
BASELINE_SERVICE_INSTALL_ERR="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/baseline-service-install.err"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG="$SYSTEMCTL_SHIM_LOG"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE="$SYSTEMCTL_SHIM_PID_FILE"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG="$SYSTEMCTL_SHIM_DAEMON_LOG"
export OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_JSON="$BASELINE_SERVICE_INSTALL_JSON"
export OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_ERR="$BASELINE_SERVICE_INSTALL_ERR"
gateway_pid=""
plugin_registry_pid=""
cleanup() {
@@ -152,6 +171,9 @@ cleanup() {
kill "$plugin_registry_pid" >/dev/null 2>&1 || true
fi
openclaw_e2e_terminate_gateways "${gateway_pid:-}"
if [ -s "$SYSTEMCTL_SHIM_PID_FILE" ]; then
openclaw_e2e_terminate_gateways "$(cat "$SYSTEMCTL_SHIM_PID_FILE" 2>/dev/null || true)"
fi
}
trap cleanup EXIT
@@ -255,10 +277,19 @@ export OPENCLAW_PACKAGE_ACCEPTANCE_LEGACY_COMPAT
echo "Checking dirty-state config before update..."
OPENCLAW_UPGRADE_SURVIVOR_ASSERT_STAGE=baseline node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-config
OPENCLAW_UPGRADE_SURVIVOR_ASSERT_STAGE=baseline node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-state
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
# shellcheck disable=SC1091
source scripts/e2e/lib/upgrade-survivor/update-restart-auth.sh
prepare_update_restart_probe_current_install "$PORT" "$GATEWAY_LOG"
fi
echo "Running package update against the mounted tarball..."
update_args=(update --tag "${OPENCLAW_CURRENT_PACKAGE_TGZ:?missing OPENCLAW_CURRENT_PACKAGE_TGZ}" --yes --json)
if [ "$UPDATE_RESTART_MODE" != "auto-auth" ]; then
update_args+=(--no-restart)
fi
set +e
openclaw update --tag "${OPENCLAW_CURRENT_PACKAGE_TGZ:?missing OPENCLAW_CURRENT_PACKAGE_TGZ}" --yes --json --no-restart >/tmp/openclaw-upgrade-survivor-update.json 2>/tmp/openclaw-upgrade-survivor-update.err
env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw "${update_args[@]}" >/tmp/openclaw-upgrade-survivor-update.json 2>/tmp/openclaw-upgrade-survivor-update.err
update_status=$?
set -e
if [ "$update_status" -ne 0 ]; then
@@ -268,38 +299,42 @@ if [ "$update_status" -ne 0 ]; then
exit "$update_status"
fi
echo "Running non-interactive doctor repair..."
configure_configured_plugin_install_fixture_registry
if ! openclaw doctor --fix --non-interactive >/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then
echo "openclaw doctor failed" >&2
cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true
exit 1
fi
if ! openclaw config validate >>/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then
echo "post-doctor config validation failed" >&2
cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true
exit 1
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
echo "Skipping doctor repair until after restart proof."
else
echo "Running non-interactive doctor repair..."
configure_configured_plugin_install_fixture_registry
if ! openclaw doctor --fix --non-interactive >/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then
echo "openclaw doctor failed" >&2
cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true
exit 1
fi
if ! openclaw config validate >>/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then
echo "post-doctor config validation failed" >&2
cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true
exit 1
fi
fi
echo "Verifying config and state survived update/doctor..."
echo "Verifying config and state survived update..."
node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-config
node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-state
PORT=18789
START_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}"
STATUS_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS:-30}"
echo "Starting gateway from upgraded state..."
start_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
openclaw gateway --port "$PORT" --bind loopback --allow-unconfigured >/tmp/openclaw-upgrade-survivor-gateway.log 2>&1 &
gateway_pid="$!"
openclaw_e2e_wait_gateway_ready "$gateway_pid" /tmp/openclaw-upgrade-survivor-gateway.log 360
ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
start_seconds=$(((ready_epoch - start_epoch + 999) / 1000))
if [ "$start_seconds" -gt "$START_BUDGET" ]; then
echo "gateway startup exceeded survivor budget: ${start_seconds}s > ${START_BUDGET}s" >&2
cat /tmp/openclaw-upgrade-survivor-gateway.log >&2 || true
exit 1
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
echo "Gateway restart was handled by openclaw update."
else
echo "Starting gateway from upgraded state..."
start_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
openclaw gateway --port "$PORT" --bind loopback --allow-unconfigured >"$GATEWAY_LOG" 2>&1 &
gateway_pid="$!"
openclaw_e2e_wait_gateway_ready "$gateway_pid" "$GATEWAY_LOG" 360
ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
start_seconds=$(((ready_epoch - start_epoch + 999) / 1000))
if [ "$start_seconds" -gt "$START_BUDGET" ]; then
echo "gateway startup exceeded survivor budget: ${start_seconds}s > ${START_BUDGET}s" >&2
cat "$GATEWAY_LOG" >&2 || true
exit 1
fi
fi
echo "Checking gateway HTTP probes..."
@@ -320,7 +355,8 @@ status_start="$(node -e "process.stdout.write(String(Date.now()))")"
if ! openclaw gateway status --url "ws://127.0.0.1:$PORT" --token "$GATEWAY_AUTH_TOKEN_REF" --require-rpc --timeout 30000 --json >/tmp/openclaw-upgrade-survivor-status.json 2>/tmp/openclaw-upgrade-survivor-status.err; then
echo "gateway status failed" >&2
cat /tmp/openclaw-upgrade-survivor-status.err >&2 || true
cat /tmp/openclaw-upgrade-survivor-gateway.log >&2 || true
cat "$GATEWAY_LOG" >&2 || true
cat "$SYSTEMCTL_SHIM_DAEMON_LOG" >&2 || true
exit 1
fi
status_end="$(node -e "process.stdout.write(String(Date.now()))")"
@@ -332,5 +368,5 @@ if [ "$status_seconds" -gt "$STATUS_BUDGET" ]; then
fi
node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-status-json /tmp/openclaw-upgrade-survivor-status.json
echo "Upgrade survivor Docker E2E passed scenario=${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base} startup=${start_seconds}s status=${status_seconds}s."
echo "Upgrade survivor Docker E2E passed scenario=${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base} updateRestartMode=${UPDATE_RESTART_MODE} startup=${start_seconds}s status=${status_seconds}s."
'

View File

@@ -9,6 +9,8 @@ const LIVE_PROFILE_TIMEOUT_MS = 20 * 60 * 1000;
const OPENWEBUI_TIMEOUT_MS = 20 * 60 * 1000;
export const BUNDLED_PLUGIN_INSTALL_UNINSTALL_SHARDS = 24;
const upgradeSurvivorCommand = "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:upgrade-survivor";
const updateRestartAuthCommand =
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:update-restart-auth";
const LIVE_RETRY_PATTERNS = [
/529\b/i,
@@ -238,6 +240,11 @@ export const mainLanes = [
weight: 3,
},
),
npmLane("update-restart-auth", updateRestartAuthCommand, {
stateScenario: "upgrade-survivor",
timeoutMs: 25 * 60 * 1000,
weight: 3,
}),
npmLane("update-migration", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:update-migration", {
stateScenario: "upgrade-survivor",
timeoutMs: 30 * 60 * 1000,
@@ -536,6 +543,11 @@ const releasePathPackageUpdateCoreLanes = [
weight: 3,
},
),
npmLane("update-restart-auth", updateRestartAuthCommand, {
stateScenario: "upgrade-survivor",
timeoutMs: 25 * 60 * 1000,
weight: 3,
}),
];
const primaryReleasePathChunks = {