test(update): cover authenticated restart updates

This commit is contained in:
Vincent Koc
2026-05-04 21:57:57 -07:00
parent 70f34bf177
commit 2de0113608
19 changed files with 838 additions and 57 deletions

View File

@@ -34,7 +34,7 @@ on:
default: 1
type: number
published_upgrade_survivor_baseline:
description: Published OpenClaw package baseline for the published-upgrade-survivor/update-migration Docker lane
description: Published OpenClaw package baseline for the published-upgrade-survivor/update-migration Docker lanes
required: false
default: openclaw@latest
type: string
@@ -129,7 +129,7 @@ on:
default: 1
type: number
published_upgrade_survivor_baseline:
description: Published OpenClaw package baseline for the published-upgrade-survivor/update-migration Docker lane
description: Published OpenClaw package baseline for the published-upgrade-survivor/update-restart-auth/update-migration Docker lanes
required: false
default: openclaw@latest
type: string

View File

@@ -558,7 +558,7 @@ jobs:
artifact_name: ${{ needs.prepare_release_package.outputs.artifact_name }}
package_sha256: ${{ needs.prepare_release_package.outputs.package_sha256 }}
suite_profile: custom
docker_lanes: doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update
docker_lanes: doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update
published_upgrade_survivor_baselines: ${{ needs.resolve_target.outputs.run_release_soak == 'true' && 'last-stable-4 2026.4.23 2026.5.2 2026.4.15' || '' }}
published_upgrade_survivor_scenarios: ${{ needs.resolve_target.outputs.run_release_soak == 'true' && 'reported-issues' || '' }}
telegram_mode: mock-openai

View File

@@ -386,10 +386,10 @@ jobs:
docker_lanes="npm-onboard-channel-agent gateway-network config-reload"
;;
package)
docker_lanes="npm-onboard-channel-agent doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update"
docker_lanes="npm-onboard-channel-agent doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update"
;;
product)
docker_lanes="npm-onboard-channel-agent doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins plugin-update mcp-channels cron-mcp-cleanup openai-web-search-minimal openwebui"
docker_lanes="npm-onboard-channel-agent doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins plugin-update mcp-channels cron-mcp-cleanup openai-web-search-minimal openwebui"
include_openwebui=true
;;
full)

View File

@@ -66,6 +66,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Update/restart: probe managed Gateway restarts with the service environment and add a Docker product lane that exercises candidate-owned `openclaw update --yes --json` restarts, so SecretRef-backed local gateway auth cannot regress behind mocked restart checks. Thanks @vincentkoc.
- Plugins/install: honor the beta update channel for onboarding and doctor-managed plugin installs by requesting floating npm and ClawHub specs with `@beta` while keeping persistent install records on the catalog default. Thanks @vincentkoc.
- WhatsApp/onboarding: canonicalize setup and pairing allowlist entries to WhatsApp's digit-only phone ids while still accepting E.164, JID, and `whatsapp:` inputs, so personal-phone allowlists match WhatsApp Web sender ids after setup. Thanks @vincentkoc.
- Gateway/startup: load provider plugins that own explicitly configured image, video, or music generation defaults so generation tools become live after gateway restart instead of remaining catalog-only. Fixes #77244. Thanks @buyuangtampan, @Nikoxx99, and @vincentkoc.

View File

@@ -78,6 +78,7 @@ pnpm test:docker:plugin-lifecycle-matrix
pnpm test:docker:plugin-update
pnpm test:docker:upgrade-survivor
pnpm test:docker:published-upgrade-survivor
pnpm test:docker:update-restart-auth
pnpm test:docker:update-migration
```
@@ -103,6 +104,10 @@ Important lanes:
configures it through a baked `openclaw config set` recipe, updates it to the
candidate tarball, runs doctor, checks legacy cleanup, starts the Gateway, and
probes `/healthz`, `/readyz`, and RPC status.
- `test:docker:update-restart-auth` installs the candidate package, starts a
managed token-auth Gateway, unsets caller gateway auth env for
`openclaw update --yes --json`, and requires the candidate update command to
restart the Gateway before the normal probes.
- `test:docker:update-migration` is the cleanup-heavy published-update lane. It
starts from a configured Discord/Telegram-style user state, runs baseline
doctor so configured plugin dependencies have a chance to materialize, seeds
@@ -164,10 +169,10 @@ resolved release SHA. For post-publish proof, pass
`package_acceptance_package_spec=openclaw@YYYY.M.D` so the same upgrade matrix
targets the shipped npm package instead.
Release checks call Package Acceptance with the package/update/plugin set:
Release checks call Package Acceptance with the package/update/restart/plugin set:
```text
doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update
doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update
```
When release soak is enabled, they also pass:
@@ -224,7 +229,7 @@ For release candidates, the default proof stack is:
1. `pnpm check:changed` and `pnpm test:changed` for source-level regressions.
2. `pnpm release:check` for package artifact integrity.
3. Package Acceptance `package` profile or the release-check custom package
lanes for install/update/plugin contracts.
lanes for install/update/restart/plugin contracts.
4. Cross-OS release checks for OS-specific installer, onboarding, and platform
behavior.
5. Live suites only when the changed surface touches provider or hosted-service
@@ -245,7 +250,8 @@ Compatibility leniency is narrow and time boxed:
warning or skipping.
Do not add new startup migrations for these old shapes. Add or extend a doctor
repair, then prove it with `upgrade-survivor` or `published-upgrade-survivor`.
repair, then prove it with `upgrade-survivor`, `published-upgrade-survivor`, or
`update-restart-auth` when the update command owns the restart.
## Adding coverage
@@ -257,6 +263,7 @@ can fail for the right reason:
checker test.
- CLI install/update behavior: Docker lane assertion or fixture.
- Published-release migration behavior: `published-upgrade-survivor` scenario.
- Update-owned restart behavior: `update-restart-auth`.
- Registry/package source behavior: `test:docker:plugins` fixture or ClawHub
fixture server.
- Dependency layout or cleanup behavior: assert both runtime execution and the

View File

@@ -141,11 +141,13 @@ the maintainer-only release runbook.
`telegram_mode=mock-openai` or `telegram_mode=live-frontier`. When the
selected Docker lanes include `published-upgrade-survivor`, the package
artifact is the candidate and `published_upgrade_survivor_baseline` selects
the published baseline.
the published baseline. `update-restart-auth` uses the candidate package as
both the installed CLI and the package-under-test so it exercises the
candidate update command's managed restart path.
Example: `gh workflow run package-acceptance.yml --ref main -f workflow_ref=main -f source=npm -f package_spec=openclaw@beta -f suite_profile=product -f published_upgrade_survivor_baseline=openclaw@2026.4.26 -f telegram_mode=mock-openai`
Common profiles:
- `smoke`: install/channel/agent, gateway network, and config reload lanes
- `package`: artifact-native package/update/plugin lanes without OpenWebUI or live ClawHub
- `package`: artifact-native package/update/restart/plugin lanes without OpenWebUI or live ClawHub
- `product`: package profile plus MCP channels, cron/subagent cleanup,
OpenAI web search, and OpenWebUI
- `full`: Docker release-path chunks with OpenWebUI
@@ -486,11 +488,12 @@ Supported candidate sources:
`OpenClaw Release Checks` runs Package Acceptance with `source=artifact`, the
prepared release package artifact, `suite_profile=custom`,
`docker_lanes=doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update`,
`telegram_mode=mock-openai`. Package Acceptance keeps migration, update, stale
plugin dependency cleanup, offline plugin fixtures, plugin update, and Telegram
package QA against the same resolved tarball. Blocking release checks use the
default latest published package baseline; `run_release_soak=true` or
`docker_lanes=doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update`,
`telegram_mode=mock-openai`. Package Acceptance keeps migration, update,
configured-auth update restart, stale plugin dependency cleanup, offline plugin
fixtures, plugin update, and Telegram package QA against the same resolved
tarball. Blocking release checks use the default latest published package
baseline; `run_release_soak=true` or
`release_profile=full` expands to every stable npm-published baseline from
`2026.4.23` through `latest` plus reported-issue fixtures. Use
Package Acceptance with `source=npm` for an already shipped candidate, or
@@ -536,8 +539,8 @@ Common package profiles:
- `smoke`: quick package install/channel/agent, gateway network, and config
reload lanes
- `package`: install/update/plugin package contracts without live ClawHub; this is the release-check
default
- `package`: install/update/restart/plugin package contracts without live
ClawHub; this is the release-check default
- `product`: `package` plus MCP channels, cron/subagent cleanup, OpenAI web
search, and OpenWebUI
- `full`: Docker release-path chunks with OpenWebUI

View File

@@ -1570,6 +1570,7 @@
"test:docker:timings": "node scripts/docker-e2e-timings.mjs",
"test:docker:update-channel-switch": "bash scripts/e2e/update-channel-switch-docker.sh",
"test:docker:update-migration": "env OPENCLAW_UPGRADE_SURVIVOR_PUBLISHED_BASELINE=1 OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC=${OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC:-openclaw@2026.4.23} OPENCLAW_UPGRADE_SURVIVOR_SCENARIO=${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-plugin-deps-cleanup} bash scripts/e2e/upgrade-survivor-docker.sh",
"test:docker:update-restart-auth": "env OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE=auto-auth OPENCLAW_UPGRADE_SURVIVOR_DOCKER_RUN_TIMEOUT=${OPENCLAW_UPGRADE_SURVIVOR_DOCKER_RUN_TIMEOUT:-1500s} bash scripts/e2e/upgrade-survivor-docker.sh",
"test:docker:upgrade-survivor": "bash scripts/e2e/upgrade-survivor-docker.sh",
"test:e2e": "node scripts/run-vitest.mjs run --config test/vitest/vitest.e2e.config.ts",
"test:e2e:openshell": "OPENCLAW_E2E_OPENSHELL=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.e2e.config.ts extensions/openshell/src/backend.e2e.test.ts",

View File

@@ -37,6 +37,7 @@ BASELINE_RAW="${OPENCLAW_UPGRADE_SURVIVOR_BASELINE:?missing OPENCLAW_UPGRADE_SUR
CANDIDATE_KIND="${OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_KIND:-tarball}"
CANDIDATE_SPEC="${OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_SPEC:-${OPENCLAW_CURRENT_PACKAGE_TGZ:-}}"
SCENARIO="${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base}"
UPDATE_RESTART_MODE="${OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE:-manual}"
CURRENT_PHASE="setup"
FAILURE_PHASE=""
FAILURE_MESSAGE=""
@@ -51,6 +52,7 @@ start_seconds=""
status_seconds=""
healthz_seconds=""
readyz_seconds=""
update_restart_seconds=""
BASELINE_INSTALL_LOG="$ARTIFACT_ROOT/baseline-install.log"
UPDATE_JSON="$ARTIFACT_ROOT/update.json"
@@ -63,6 +65,11 @@ READYZ_JSON="$ARTIFACT_ROOT/readyz.json"
STATUS_JSON="$ARTIFACT_ROOT/status.json"
STATUS_ERR="$ARTIFACT_ROOT/status.err"
BASELINE_CONFIG_VALIDATE_LOG="$ARTIFACT_ROOT/baseline-config-validate.log"
BASELINE_SERVICE_INSTALL_JSON="$ARTIFACT_ROOT/baseline-service-install.json"
BASELINE_SERVICE_INSTALL_ERR="$ARTIFACT_ROOT/baseline-service-install.err"
SYSTEMCTL_SHIM_LOG="$ARTIFACT_ROOT/systemctl-shim.log"
SYSTEMCTL_SHIM_PID_FILE="$ARTIFACT_ROOT/systemctl-shim.pid"
SYSTEMCTL_SHIM_DAEMON_LOG="$ARTIFACT_ROOT/systemctl-shim-gateway.log"
CONFIG_COVERAGE_JSON="$ARTIFACT_ROOT/config-recipe.json"
export OPENCLAW_UPGRADE_SURVIVOR_CONFIG_COVERAGE_JSON="$CONFIG_COVERAGE_JSON"
rm -f "$SUMMARY_JSON" "$CONFIG_COVERAGE_JSON"
@@ -113,6 +120,17 @@ normalize_baseline() {
validate_baseline_package_spec "$baseline_spec"
}
validate_update_restart_mode() {
case "$UPDATE_RESTART_MODE" in
manual | auto-auth)
;;
*)
echo "OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE must be manual or auto-auth; got: $UPDATE_RESTART_MODE" >&2
return 1
;;
esac
}
json_event() {
local phase="$1"
local status="$2"
@@ -139,7 +157,9 @@ write_summary() {
SUMMARY_CANDIDATE_VERSION="$candidate_version" \
SUMMARY_INSTALLED_VERSION="$installed_version" \
SUMMARY_SCENARIO="$SCENARIO" \
SUMMARY_UPDATE_RESTART_MODE="$UPDATE_RESTART_MODE" \
SUMMARY_START_SECONDS="$start_seconds" \
SUMMARY_UPDATE_RESTART_SECONDS="$update_restart_seconds" \
SUMMARY_HEALTHZ_SECONDS="$healthz_seconds" \
SUMMARY_READYZ_SECONDS="$readyz_seconds" \
SUMMARY_STATUS_SECONDS="$status_seconds" \
@@ -173,8 +193,10 @@ const summary = {
version: process.env.SUMMARY_CANDIDATE_VERSION || null,
},
installedVersion: process.env.SUMMARY_INSTALLED_VERSION || null,
updateRestartMode: process.env.SUMMARY_UPDATE_RESTART_MODE || "manual",
timings: {
startupSeconds: numberOrNull(process.env.SUMMARY_START_SECONDS),
updateRestartSeconds: numberOrNull(process.env.SUMMARY_UPDATE_RESTART_SECONDS),
healthzSeconds: numberOrNull(process.env.SUMMARY_HEALTHZ_SECONDS),
readyzSeconds: numberOrNull(process.env.SUMMARY_READYZ_SECONDS),
statusSeconds: numberOrNull(process.env.SUMMARY_STATUS_SECONDS),
@@ -197,6 +219,13 @@ cleanup() {
kill "$plugin_registry_pid" >/dev/null 2>&1 || true
fi
openclaw_e2e_terminate_gateways "${gateway_pid:-}"
if [ -s "$SYSTEMCTL_SHIM_PID_FILE" ]; then
local shim_pid
shim_pid="$(cat "$SYSTEMCTL_SHIM_PID_FILE" 2>/dev/null || true)"
if [[ "$shim_pid" =~ ^[0-9]+$ ]] && [ "$shim_pid" -gt 1 ]; then
openclaw_e2e_terminate_gateways "$shim_pid"
fi
fi
}
on_error() {
@@ -612,6 +641,7 @@ rm_rf_retry() {
reset_run_state() {
rm_rf_retry "$npm_config_prefix" "$TMPDIR" "$ARTIFACT_ROOT/state-home"
rm -f "$SYSTEMCTL_SHIM_PID_FILE" "$SYSTEMCTL_SHIM_DAEMON_LOG"
mkdir -p "$npm_config_prefix" "$npm_config_cache" "$TMPDIR"
}
@@ -670,6 +700,296 @@ validate_baseline_config() {
fi
}
install_update_restart_systemctl_shim() {
local shim_dir="$npm_config_prefix/bin"
mkdir -p "$shim_dir"
cat >"$shim_dir/systemctl" <<'SHIM'
#!/usr/bin/env bash
set -euo pipefail
log_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG:-/tmp/openclaw-systemctl-shim.log}"
pid_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE:-/tmp/openclaw-systemctl-shim.pid}"
daemon_log="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG:-/tmp/openclaw-systemctl-shim-gateway.log}"
printf '%s\n' "$*" >>"$log_file"
filtered=()
for ((i = 1; i <= $#; i++)); do
arg="${!i}"
case "$arg" in
--user | --quiet | --no-page | --now)
;;
--property)
i=$((i + 1))
;;
*)
filtered+=("$arg")
;;
esac
done
command="${filtered[0]:-status}"
is_running() {
[ -s "$pid_file" ] || return 1
local pid
pid="$(cat "$pid_file" 2>/dev/null || true)"
[ -n "$pid" ] || return 1
kill -0 "$pid" >/dev/null 2>&1
}
stop_gateway() {
[ -s "$pid_file" ] || return 0
local pid
pid="$(cat "$pid_file" 2>/dev/null || true)"
if [[ "$pid" =~ ^[0-9]+$ ]] && [ "$pid" -gt 1 ] && kill -0 "$pid" >/dev/null 2>&1; then
kill "$pid" >/dev/null 2>&1 || true
for _ in $(seq 1 100); do
kill -0 "$pid" >/dev/null 2>&1 || break
sleep 0.1
done
kill -9 "$pid" >/dev/null 2>&1 || true
fi
rm -f "$pid_file"
}
unit_path() {
printf '%s/.config/systemd/user/openclaw-gateway.service\n' "${HOME:?missing HOME}"
}
load_unit_environment() {
local unit="$1"
while IFS= read -r line; do
case "$line" in
EnvironmentFile=*)
local spec="${line#EnvironmentFile=}"
for token in $spec; do
local file="${token#-}"
[ -f "$file" ] || continue
set -a
# shellcheck disable=SC1090
. "$file"
set +a
done
;;
Environment=*)
local assignment="${line#Environment=}"
assignment="${assignment#\"}"
assignment="${assignment%\"}"
export "$assignment"
;;
esac
done <"$unit"
}
start_gateway() {
local unit
local exec_start
unit="$(unit_path)"
exec_start="$(sed -n 's/^ExecStart=//p' "$unit" | tail -n 1)"
[ -n "$exec_start" ] || {
echo "systemctl shim could not find ExecStart in $unit" >&2
return 1
}
(
load_unit_environment "$unit"
nohup bash -lc "exec $exec_start" >>"$daemon_log" 2>&1 &
printf '%s\n' "$!" >"$pid_file"
)
}
case "$command" in
daemon-reload | enable | disable)
exit 0
;;
status)
is_running && exit 0
exit 0
;;
stop)
stop_gateway
exit 0
;;
restart | start)
stop_gateway
start_gateway
exit 0
;;
is-enabled)
exit 0
;;
is-active)
is_running && exit 0
exit 3
;;
show)
if is_running; then
printf 'ActiveState=active\nSubState=running\nMainPID=%s\nExecMainStatus=0\nExecMainCode=0\n' "$(cat "$pid_file")"
else
printf 'ActiveState=inactive\nSubState=dead\nMainPID=0\nExecMainStatus=0\nExecMainCode=0\n'
fi
exit 0
;;
*)
echo "systemctl shim unsupported command: $*" >&2
exit 1
;;
esac
SHIM
chmod +x "$shim_dir/systemctl"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG="$SYSTEMCTL_SHIM_LOG"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE="$SYSTEMCTL_SHIM_PID_FILE"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG="$SYSTEMCTL_SHIM_DAEMON_LOG"
export PATH="$shim_dir:$PATH"
}
install_update_restart_service_unit() {
if ! env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway install --force --json >"$BASELINE_SERVICE_INSTALL_JSON" 2>"$BASELINE_SERVICE_INSTALL_ERR"; then
echo "baseline gateway service install failed" >&2
cat "$BASELINE_SERVICE_INSTALL_ERR" >&2 || true
cat "$BASELINE_SERVICE_INSTALL_JSON" >&2 || true
return 1
fi
}
seed_update_restart_probe_device_auth() {
node --input-type=module <<'NODE'
import crypto from "node:crypto";
import fs from "node:fs";
import path from "node:path";
const stateDir = process.env.OPENCLAW_STATE_DIR;
if (!stateDir) {
throw new Error("missing OPENCLAW_STATE_DIR");
}
const base64UrlEncode = (buf) =>
buf.toString("base64").replaceAll("+", "-").replaceAll("/", "_").replace(/=+$/g, "");
const ed25519SpkiPrefix = Buffer.from("302a300506032b6570032100", "hex");
const { publicKey, privateKey } = crypto.generateKeyPairSync("ed25519");
const publicKeyPem = publicKey.export({ type: "spki", format: "pem" });
const privateKeyPem = privateKey.export({ type: "pkcs8", format: "pem" });
const spki = crypto.createPublicKey(publicKeyPem).export({ type: "spki", format: "der" });
const rawPublicKey =
spki.length === ed25519SpkiPrefix.length + 32 &&
spki.subarray(0, ed25519SpkiPrefix.length).equals(ed25519SpkiPrefix)
? spki.subarray(ed25519SpkiPrefix.length)
: spki;
const publicKeyRaw = base64UrlEncode(rawPublicKey);
const deviceId = crypto.createHash("sha256").update(rawPublicKey).digest("hex");
const token = base64UrlEncode(crypto.randomBytes(32));
const now = Date.now();
const scopes = ["operator.read"];
function writeJson(filePath, value) {
fs.mkdirSync(path.dirname(filePath), { recursive: true });
fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, { mode: 0o600 });
try {
fs.chmodSync(filePath, 0o600);
} catch {
// best-effort inside Docker
}
}
writeJson(path.join(stateDir, "identity", "device.json"), {
version: 1,
deviceId,
publicKeyPem,
privateKeyPem,
createdAtMs: now,
});
writeJson(path.join(stateDir, "identity", "device-auth.json"), {
version: 1,
deviceId,
tokens: {
operator: {
token,
role: "operator",
scopes,
updatedAtMs: now,
},
},
});
writeJson(path.join(stateDir, "devices", "paired.json"), {
[deviceId]: {
deviceId,
publicKey: publicKeyRaw,
displayName: "upgrade survivor restart probe",
platform: process.platform,
clientId: "upgrade-survivor",
clientMode: "probe",
role: "operator",
roles: ["operator"],
scopes,
approvedScopes: scopes,
tokens: {
operator: {
token,
role: "operator",
scopes,
createdAtMs: now,
},
},
createdAtMs: now,
approvedAtMs: now,
},
});
writeJson(path.join(stateDir, "devices", "pending.json"), {});
NODE
}
write_update_restart_service_secretref_env() {
mkdir -p "$OPENCLAW_STATE_DIR"
local dotenv_path="$OPENCLAW_STATE_DIR/.env"
local tmp_path="$dotenv_path.tmp.$$"
if [ -f "$dotenv_path" ]; then
grep -v '^GATEWAY_AUTH_TOKEN_REF=' "$dotenv_path" >"$tmp_path" || true
else
: >"$tmp_path"
fi
# Managed restarts resolve SecretRefs from service-owned durable env, not the update caller.
printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >>"$tmp_path"
mv "$tmp_path" "$dotenv_path"
}
write_update_restart_service_auth_env() {
mkdir -p "$OPENCLAW_STATE_DIR"
local dotenv_path="$OPENCLAW_STATE_DIR/.env"
local tmp_path="$dotenv_path.tmp.$$"
if [ -f "$dotenv_path" ]; then
grep -v '^GATEWAY_AUTH_TOKEN_REF=' "$dotenv_path" >"$tmp_path" || true
else
: >"$tmp_path"
fi
printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >>"$tmp_path"
mv "$tmp_path" "$dotenv_path"
local systemd_env_path="$OPENCLAW_STATE_DIR/gateway.systemd.env"
printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >"$systemd_env_path"
}
prepare_update_restart_probe() {
if [ "$UPDATE_RESTART_MODE" != "auto-auth" ]; then
return 0
fi
echo "Preparing configured-auth gateway for automatic update restart."
install_update_restart_systemctl_shim
seed_update_restart_probe_device_auth
start_gateway
write_update_restart_service_secretref_env
install_update_restart_service_unit
}
prepare_update_restart_probe_current_install() {
if [ "$UPDATE_RESTART_MODE" != "auto-auth" ]; then
return 0
fi
echo "Preparing candidate-auth gateway for automatic update restart."
install_update_restart_systemctl_shim
seed_update_restart_probe_device_auth
start_gateway
write_update_restart_service_auth_env
install_update_restart_service_unit
}
assert_baseline_state() {
OPENCLAW_UPGRADE_SURVIVOR_ASSERT_STAGE=baseline \
node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-config
@@ -714,12 +1034,32 @@ resolve_candidate_version() {
update_candidate() {
echo "Updating baseline $baseline_spec to candidate $CANDIDATE_KIND:$CANDIDATE_SPEC ($candidate_version)"
if ! openclaw update --tag "$CANDIDATE_SPEC" --yes --json --no-restart >"$UPDATE_JSON" 2>"$UPDATE_ERR"; then
local update_start=""
local update_end=""
local update_args=(update --tag "$CANDIDATE_SPEC" --yes --json)
if [ "$UPDATE_RESTART_MODE" = "manual" ]; then
update_args+=(--no-restart)
else
update_start="$(node -e "process.stdout.write(String(Date.now()))")"
fi
if ! env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw "${update_args[@]}" >"$UPDATE_JSON" 2>"$UPDATE_ERR"; then
echo "openclaw update failed" >&2
cat "$UPDATE_ERR" >&2 || true
cat "$UPDATE_JSON" >&2 || true
return 1
fi
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
update_end="$(node -e "process.stdout.write(String(Date.now()))")"
update_restart_seconds=$(((update_end - update_start + 999) / 1000))
node -e '
const fs = require("node:fs");
const file = process.argv[1];
const result = JSON.parse(fs.readFileSync(file, "utf8"));
if (!result || result.status !== "ok") {
throw new Error(`update JSON did not report ok status: ${JSON.stringify(result)}`);
}
' "$UPDATE_JSON"
fi
installed_version="$(read_installed_version)"
}
@@ -776,8 +1116,11 @@ start_gateway() {
local start_epoch
local ready_epoch
start_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
openclaw gateway --port "$port" --bind loopback --allow-unconfigured >"$GATEWAY_LOG" 2>&1 &
env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway --port "$port" --bind loopback --allow-unconfigured >"$GATEWAY_LOG" 2>&1 &
gateway_pid="$!"
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
printf '%s\n' "$gateway_pid" >"$SYSTEMCTL_SHIM_PID_FILE"
fi
openclaw_e2e_wait_gateway_ready "$gateway_pid" "$GATEWAY_LOG" 360
ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
start_seconds=$(((ready_epoch - start_epoch + 999) / 1000))
@@ -788,6 +1131,13 @@ start_gateway() {
fi
}
ensure_gateway_started() {
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
return 0
fi
start_gateway
}
check_gateway_probes() {
healthz_seconds="$(probe_gateway_endpoint /healthz live "$HEALTHZ_JSON")"
export OPENCLAW_UPGRADE_SURVIVOR_READYZ_ALLOW_FAILING="discord,telegram,whatsapp,feishu,matrix"
@@ -818,6 +1168,7 @@ check_gateway_status() {
}
phase storage-preflight storage_preflight
phase validate-update-restart-mode validate_update_restart_mode
phase reset-run-state reset_run_state
phase install-baseline install_baseline
phase seed-state seed_state
@@ -830,6 +1181,7 @@ phase seed-source-only-plugin-shadow seed_source_only_plugin_shadow
phase assert-baseline assert_baseline_state
phase seed-legacy-runtime-deps-symlink seed_legacy_runtime_deps_symlink
phase resolve-candidate resolve_candidate_version
phase prepare-update-restart-probe prepare_update_restart_probe
phase update-candidate update_candidate
phase assert-legacy-plugin-dependency-debris-before-doctor assert_legacy_plugin_dependency_debris_before_doctor
phase configure-configured-plugin-install-fixture-registry configure_configured_plugin_install_fixture_registry
@@ -838,8 +1190,8 @@ phase assert-legacy-plugin-dependency-debris-cleaned assert_legacy_plugin_depend
phase assert-legacy-runtime-deps-symlink-repaired assert_legacy_runtime_deps_symlink_repaired
phase validate-post-doctor-config validate_post_doctor_config
phase assert-survival assert_survival
phase gateway-start start_gateway
phase gateway-start ensure_gateway_started
phase gateway-probes check_gateway_probes
phase gateway-status check_gateway_status
echo "Upgrade survivor Docker E2E passed baseline=${baseline_spec} scenario=${SCENARIO} candidate=${candidate_version} startup=${start_seconds}s healthz=${healthz_seconds}s readyz=${readyz_seconds}s status=${status_seconds}s."
echo "Upgrade survivor Docker E2E passed baseline=${baseline_spec} scenario=${SCENARIO} candidate=${candidate_version} updateRestartMode=${UPDATE_RESTART_MODE} startup=${start_seconds}s updateRestart=${update_restart_seconds:-manual}s healthz=${healthz_seconds}s readyz=${readyz_seconds}s status=${status_seconds}s."

View File

@@ -0,0 +1,264 @@
#!/usr/bin/env bash
install_update_restart_systemctl_shim() {
local shim_dir="$npm_config_prefix/bin"
mkdir -p "$shim_dir"
cat >"$shim_dir/systemctl" <<'SHIM'
#!/usr/bin/env bash
set -euo pipefail
log_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG:-/tmp/openclaw-systemctl-shim.log}"
pid_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE:-/tmp/openclaw-systemctl-shim.pid}"
daemon_log="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG:-/tmp/openclaw-systemctl-shim-gateway.log}"
printf '%s\n' "$*" >>"$log_file"
filtered=()
for ((i = 1; i <= $#; i++)); do
arg="${!i}"
case "$arg" in
--user | --quiet | --no-page | --now)
;;
--property)
i=$((i + 1))
;;
*)
filtered+=("$arg")
;;
esac
done
command="${filtered[0]:-status}"
is_running() {
[ -s "$pid_file" ] || return 1
local pid
pid="$(cat "$pid_file" 2>/dev/null || true)"
[ -n "$pid" ] || return 1
kill -0 "$pid" >/dev/null 2>&1
}
stop_gateway() {
[ -s "$pid_file" ] || return 0
local pid
pid="$(cat "$pid_file" 2>/dev/null || true)"
if [[ "$pid" =~ ^[0-9]+$ ]] && [ "$pid" -gt 1 ] && kill -0 "$pid" >/dev/null 2>&1; then
kill "$pid" >/dev/null 2>&1 || true
for _ in $(seq 1 100); do
kill -0 "$pid" >/dev/null 2>&1 || break
sleep 0.1
done
kill -9 "$pid" >/dev/null 2>&1 || true
fi
rm -f "$pid_file"
}
unit_path() {
printf '%s/.config/systemd/user/openclaw-gateway.service\n' "${HOME:?missing HOME}"
}
load_unit_environment() {
local unit="$1"
while IFS= read -r line; do
case "$line" in
EnvironmentFile=*)
local spec="${line#EnvironmentFile=}"
for token in $spec; do
local file="${token#-}"
[ -f "$file" ] || continue
set -a
# shellcheck disable=SC1090
. "$file"
set +a
done
;;
Environment=*)
local assignment="${line#Environment=}"
assignment="${assignment#\"}"
assignment="${assignment%\"}"
export "$assignment"
;;
esac
done <"$unit"
}
start_gateway() {
local unit
local exec_start
unit="$(unit_path)"
exec_start="$(sed -n 's/^ExecStart=//p' "$unit" | tail -n 1)"
[ -n "$exec_start" ] || {
echo "systemctl shim could not find ExecStart in $unit" >&2
return 1
}
(
load_unit_environment "$unit"
nohup bash -lc "exec $exec_start" >>"$daemon_log" 2>&1 &
printf '%s\n' "$!" >"$pid_file"
)
}
case "$command" in
daemon-reload | enable | disable)
exit 0
;;
status)
is_running && exit 0
exit 0
;;
stop)
stop_gateway
exit 0
;;
restart | start)
stop_gateway
start_gateway
exit 0
;;
is-enabled)
exit 0
;;
is-active)
is_running && exit 0
exit 3
;;
show)
if is_running; then
printf 'ActiveState=active\nSubState=running\nMainPID=%s\nExecMainStatus=0\nExecMainCode=0\n' "$(cat "$pid_file")"
else
printf 'ActiveState=inactive\nSubState=dead\nMainPID=0\nExecMainStatus=0\nExecMainCode=0\n'
fi
exit 0
;;
*)
echo "systemctl shim unsupported command: $*" >&2
exit 1
;;
esac
SHIM
chmod +x "$shim_dir/systemctl"
export PATH="$shim_dir:$PATH"
}
seed_update_restart_probe_device_auth() {
node --input-type=module <<'NODE'
import crypto from "node:crypto";
import fs from "node:fs";
import path from "node:path";
const stateDir = process.env.OPENCLAW_STATE_DIR;
if (!stateDir) {
throw new Error("missing OPENCLAW_STATE_DIR");
}
const base64UrlEncode = (buf) =>
buf.toString("base64").replaceAll("+", "-").replaceAll("/", "_").replace(/=+$/g, "");
const ed25519SpkiPrefix = Buffer.from("302a300506032b6570032100", "hex");
const { publicKey, privateKey } = crypto.generateKeyPairSync("ed25519");
const publicKeyPem = publicKey.export({ type: "spki", format: "pem" });
const privateKeyPem = privateKey.export({ type: "pkcs8", format: "pem" });
const spki = crypto.createPublicKey(publicKeyPem).export({ type: "spki", format: "der" });
const rawPublicKey =
spki.length === ed25519SpkiPrefix.length + 32 &&
spki.subarray(0, ed25519SpkiPrefix.length).equals(ed25519SpkiPrefix)
? spki.subarray(ed25519SpkiPrefix.length)
: spki;
const publicKeyRaw = base64UrlEncode(rawPublicKey);
const deviceId = crypto.createHash("sha256").update(rawPublicKey).digest("hex");
const token = base64UrlEncode(crypto.randomBytes(32));
const now = Date.now();
const scopes = ["operator.read"];
function writeJson(filePath, value) {
fs.mkdirSync(path.dirname(filePath), { recursive: true });
fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, { mode: 0o600 });
try {
fs.chmodSync(filePath, 0o600);
} catch {
}
}
writeJson(path.join(stateDir, "identity", "device.json"), {
version: 1,
deviceId,
publicKeyPem,
privateKeyPem,
createdAtMs: now,
});
writeJson(path.join(stateDir, "identity", "device-auth.json"), {
version: 1,
deviceId,
tokens: {
operator: {
token,
role: "operator",
scopes,
updatedAtMs: now,
},
},
});
writeJson(path.join(stateDir, "devices", "paired.json"), {
[deviceId]: {
deviceId,
publicKey: publicKeyRaw,
displayName: "upgrade survivor restart probe",
platform: process.platform,
clientId: "openclaw-cli",
clientMode: "probe",
role: "operator",
roles: ["operator"],
scopes,
approvedScopes: scopes,
tokens: {
operator: {
token,
role: "operator",
scopes,
createdAtMs: now,
},
},
createdAtMs: now,
approvedAtMs: now,
},
});
writeJson(path.join(stateDir, "devices", "pending.json"), {});
NODE
}
write_update_restart_service_auth_env() {
mkdir -p "$OPENCLAW_STATE_DIR"
local dotenv_path="$OPENCLAW_STATE_DIR/.env"
local tmp_path="$dotenv_path.tmp.$$"
if [ -f "$dotenv_path" ]; then
grep -v '^GATEWAY_AUTH_TOKEN_REF=' "$dotenv_path" >"$tmp_path" || true
else
: >"$tmp_path"
fi
printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >>"$tmp_path"
mv "$tmp_path" "$dotenv_path"
printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >"$OPENCLAW_STATE_DIR/gateway.systemd.env"
}
prepare_update_restart_probe_current_install() {
local port="$1"
local log_file="$2"
local start_epoch
local ready_epoch
echo "Preparing candidate-auth gateway for automatic update restart."
install_update_restart_systemctl_shim
seed_update_restart_probe_device_auth
start_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway --port "$port" --bind loopback --allow-unconfigured >"$log_file" 2>&1 &
gateway_pid="$!"
printf '%s\n' "$gateway_pid" >"$OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE"
openclaw_e2e_wait_gateway_ready "$gateway_pid" "$log_file" 360
ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
start_seconds=$(((ready_epoch - start_epoch + 999) / 1000))
write_update_restart_service_auth_env
if ! env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway install --force --json >"$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_JSON" 2>"$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_ERR"; then
echo "gateway service install failed" >&2
cat "$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_ERR" >&2 || true
cat "$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_JSON" >&2 || true
return 1
fi
}

View File

@@ -13,6 +13,7 @@ SKIP_BUILD="${OPENCLAW_UPGRADE_SURVIVOR_E2E_SKIP_BUILD:-0}"
DOCKER_RUN_TIMEOUT="${OPENCLAW_UPGRADE_SURVIVOR_DOCKER_RUN_TIMEOUT:-900s}"
BASELINE_SPEC="${OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC:-}"
SCENARIO="${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base}"
UPDATE_RESTART_MODE="${OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE:-manual}"
LANE_ARTIFACT_SUFFIX="${OPENCLAW_DOCKER_ALL_LANE_NAME:-default}"
LANE_ARTIFACT_SUFFIX="${LANE_ARTIFACT_SUFFIX//[^A-Za-z0-9_.-]/_}"
ARTIFACT_DIR="${OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_DIR:-$ROOT_DIR/.artifacts/upgrade-survivor/$LANE_ARTIFACT_SUFFIX}"
@@ -86,6 +87,7 @@ if [ "${OPENCLAW_UPGRADE_SURVIVOR_PUBLISHED_BASELINE:-0}" = "1" ]; then
-e OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_KIND="$CANDIDATE_KIND" \
-e OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_SPEC="$CANDIDATE_SPEC" \
-e OPENCLAW_UPGRADE_SURVIVOR_SCENARIO="$SCENARIO" \
-e OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE="$UPDATE_RESTART_MODE" \
-e OPENCLAW_UPGRADE_SURVIVOR_LEGACY_RUNTIME_DEPS_SYMLINK="${OPENCLAW_UPGRADE_SURVIVOR_LEGACY_RUNTIME_DEPS_SYMLINK:-}" \
-e OPENCLAW_UPGRADE_SURVIVOR_SUMMARY_JSON=/tmp/openclaw-upgrade-survivor-artifacts/summary.json \
-e OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}" \
@@ -111,6 +113,7 @@ docker_e2e_run_with_harness \
-e OPENCLAW_TEST_STATE_SCRIPT_B64="$OPENCLAW_TEST_STATE_SCRIPT_B64" \
-e OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT=/tmp/openclaw-upgrade-survivor-artifacts \
-e OPENCLAW_UPGRADE_SURVIVOR_SCENARIO="$SCENARIO" \
-e OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE="$UPDATE_RESTART_MODE" \
-e OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}" \
-e OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS="${OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS:-30}" \
-v "$ARTIFACT_DIR:/tmp/openclaw-upgrade-survivor-artifacts" \
@@ -145,6 +148,22 @@ export TELEGRAM_BOT_TOKEN="123456:upgrade-survivor-telegram-token"
export FEISHU_APP_SECRET="upgrade-survivor-feishu-secret"
export BRAVE_API_KEY="BSA_upgrade_survivor_brave_key"
UPDATE_RESTART_MODE="${OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE:-manual}"
PORT=18789
START_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}"
STATUS_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS:-30}"
GATEWAY_LOG="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/gateway.log"
SYSTEMCTL_SHIM_LOG="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/systemctl-shim.log"
SYSTEMCTL_SHIM_PID_FILE="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/systemctl-shim.pid"
SYSTEMCTL_SHIM_DAEMON_LOG="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/systemctl-shim-gateway.log"
BASELINE_SERVICE_INSTALL_JSON="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/baseline-service-install.json"
BASELINE_SERVICE_INSTALL_ERR="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/baseline-service-install.err"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG="$SYSTEMCTL_SHIM_LOG"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE="$SYSTEMCTL_SHIM_PID_FILE"
export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG="$SYSTEMCTL_SHIM_DAEMON_LOG"
export OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_JSON="$BASELINE_SERVICE_INSTALL_JSON"
export OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_ERR="$BASELINE_SERVICE_INSTALL_ERR"
gateway_pid=""
plugin_registry_pid=""
cleanup() {
@@ -152,6 +171,9 @@ cleanup() {
kill "$plugin_registry_pid" >/dev/null 2>&1 || true
fi
openclaw_e2e_terminate_gateways "${gateway_pid:-}"
if [ -s "$SYSTEMCTL_SHIM_PID_FILE" ]; then
openclaw_e2e_terminate_gateways "$(cat "$SYSTEMCTL_SHIM_PID_FILE" 2>/dev/null || true)"
fi
}
trap cleanup EXIT
@@ -255,10 +277,19 @@ export OPENCLAW_PACKAGE_ACCEPTANCE_LEGACY_COMPAT
echo "Checking dirty-state config before update..."
OPENCLAW_UPGRADE_SURVIVOR_ASSERT_STAGE=baseline node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-config
OPENCLAW_UPGRADE_SURVIVOR_ASSERT_STAGE=baseline node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-state
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
# shellcheck disable=SC1091
source scripts/e2e/lib/upgrade-survivor/update-restart-auth.sh
prepare_update_restart_probe_current_install "$PORT" "$GATEWAY_LOG"
fi
echo "Running package update against the mounted tarball..."
update_args=(update --tag "${OPENCLAW_CURRENT_PACKAGE_TGZ:?missing OPENCLAW_CURRENT_PACKAGE_TGZ}" --yes --json)
if [ "$UPDATE_RESTART_MODE" != "auto-auth" ]; then
update_args+=(--no-restart)
fi
set +e
openclaw update --tag "${OPENCLAW_CURRENT_PACKAGE_TGZ:?missing OPENCLAW_CURRENT_PACKAGE_TGZ}" --yes --json --no-restart >/tmp/openclaw-upgrade-survivor-update.json 2>/tmp/openclaw-upgrade-survivor-update.err
env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw "${update_args[@]}" >/tmp/openclaw-upgrade-survivor-update.json 2>/tmp/openclaw-upgrade-survivor-update.err
update_status=$?
set -e
if [ "$update_status" -ne 0 ]; then
@@ -268,38 +299,42 @@ if [ "$update_status" -ne 0 ]; then
exit "$update_status"
fi
echo "Running non-interactive doctor repair..."
configure_configured_plugin_install_fixture_registry
if ! openclaw doctor --fix --non-interactive >/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then
echo "openclaw doctor failed" >&2
cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true
exit 1
fi
if ! openclaw config validate >>/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then
echo "post-doctor config validation failed" >&2
cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true
exit 1
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
echo "Skipping doctor repair until after restart proof."
else
echo "Running non-interactive doctor repair..."
configure_configured_plugin_install_fixture_registry
if ! openclaw doctor --fix --non-interactive >/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then
echo "openclaw doctor failed" >&2
cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true
exit 1
fi
if ! openclaw config validate >>/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then
echo "post-doctor config validation failed" >&2
cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true
exit 1
fi
fi
echo "Verifying config and state survived update/doctor..."
echo "Verifying config and state survived update..."
node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-config
node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-state
PORT=18789
START_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}"
STATUS_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS:-30}"
echo "Starting gateway from upgraded state..."
start_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
openclaw gateway --port "$PORT" --bind loopback --allow-unconfigured >/tmp/openclaw-upgrade-survivor-gateway.log 2>&1 &
gateway_pid="$!"
openclaw_e2e_wait_gateway_ready "$gateway_pid" /tmp/openclaw-upgrade-survivor-gateway.log 360
ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
start_seconds=$(((ready_epoch - start_epoch + 999) / 1000))
if [ "$start_seconds" -gt "$START_BUDGET" ]; then
echo "gateway startup exceeded survivor budget: ${start_seconds}s > ${START_BUDGET}s" >&2
cat /tmp/openclaw-upgrade-survivor-gateway.log >&2 || true
exit 1
if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then
echo "Gateway restart was handled by openclaw update."
else
echo "Starting gateway from upgraded state..."
start_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
openclaw gateway --port "$PORT" --bind loopback --allow-unconfigured >"$GATEWAY_LOG" 2>&1 &
gateway_pid="$!"
openclaw_e2e_wait_gateway_ready "$gateway_pid" "$GATEWAY_LOG" 360
ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")"
start_seconds=$(((ready_epoch - start_epoch + 999) / 1000))
if [ "$start_seconds" -gt "$START_BUDGET" ]; then
echo "gateway startup exceeded survivor budget: ${start_seconds}s > ${START_BUDGET}s" >&2
cat "$GATEWAY_LOG" >&2 || true
exit 1
fi
fi
echo "Checking gateway HTTP probes..."
@@ -320,7 +355,8 @@ status_start="$(node -e "process.stdout.write(String(Date.now()))")"
if ! openclaw gateway status --url "ws://127.0.0.1:$PORT" --token "$GATEWAY_AUTH_TOKEN_REF" --require-rpc --timeout 30000 --json >/tmp/openclaw-upgrade-survivor-status.json 2>/tmp/openclaw-upgrade-survivor-status.err; then
echo "gateway status failed" >&2
cat /tmp/openclaw-upgrade-survivor-status.err >&2 || true
cat /tmp/openclaw-upgrade-survivor-gateway.log >&2 || true
cat "$GATEWAY_LOG" >&2 || true
cat "$SYSTEMCTL_SHIM_DAEMON_LOG" >&2 || true
exit 1
fi
status_end="$(node -e "process.stdout.write(String(Date.now()))")"
@@ -332,5 +368,5 @@ if [ "$status_seconds" -gt "$STATUS_BUDGET" ]; then
fi
node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-status-json /tmp/openclaw-upgrade-survivor-status.json
echo "Upgrade survivor Docker E2E passed scenario=${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base} startup=${start_seconds}s status=${status_seconds}s."
echo "Upgrade survivor Docker E2E passed scenario=${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base} updateRestartMode=${UPDATE_RESTART_MODE} startup=${start_seconds}s status=${status_seconds}s."
'

View File

@@ -9,6 +9,8 @@ const LIVE_PROFILE_TIMEOUT_MS = 20 * 60 * 1000;
const OPENWEBUI_TIMEOUT_MS = 20 * 60 * 1000;
export const BUNDLED_PLUGIN_INSTALL_UNINSTALL_SHARDS = 24;
const upgradeSurvivorCommand = "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:upgrade-survivor";
const updateRestartAuthCommand =
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:update-restart-auth";
const LIVE_RETRY_PATTERNS = [
/529\b/i,
@@ -238,6 +240,11 @@ export const mainLanes = [
weight: 3,
},
),
npmLane("update-restart-auth", updateRestartAuthCommand, {
stateScenario: "upgrade-survivor",
timeoutMs: 25 * 60 * 1000,
weight: 3,
}),
npmLane("update-migration", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:update-migration", {
stateScenario: "upgrade-survivor",
timeoutMs: 30 * 60 * 1000,
@@ -536,6 +543,11 @@ const releasePathPackageUpdateCoreLanes = [
weight: 3,
},
),
npmLane("update-restart-auth", updateRestartAuthCommand, {
stateScenario: "upgrade-survivor",
timeoutMs: 25 * 60 * 1000,
weight: 3,
}),
];
const primaryReleasePathChunks = {

View File

@@ -414,6 +414,10 @@ describe("inspectGatewayRestart", () => {
server: { version: "2026.4.24", connId: "new" },
});
const service = makeGatewayService({ status: "running", pid: 8000 });
const serviceEnv = {
...process.env,
OPENCLAW_STATE_DIR: "/tmp/openclaw-restart-service-state",
} as NodeJS.ProcessEnv;
inspectPortUsage.mockResolvedValue({
port: 18789,
status: "busy",
@@ -427,6 +431,7 @@ describe("inspectGatewayRestart", () => {
port: 18789,
expectedVersion: "2026.4.24",
attempts: 1,
env: serviceEnv,
});
expect(snapshot).toMatchObject({
@@ -443,6 +448,7 @@ describe("inspectGatewayRestart", () => {
expect(probeGateway).toHaveBeenCalledWith(
expect.objectContaining({
auth: { token: "probe-token", password: undefined },
env: serviceEnv,
}),
);
});

View File

@@ -237,6 +237,7 @@ async function confirmGatewayReachable(params: {
port: number;
includeHealthDetails?: boolean;
auth?: GatewayRestartProbeAuth;
env?: NodeJS.ProcessEnv;
}): Promise<GatewayReachability> {
const token = normalizeOptionalString(params.auth?.token ?? process.env.OPENCLAW_GATEWAY_TOKEN);
const password = normalizeOptionalString(
@@ -247,6 +248,7 @@ async function confirmGatewayReachable(params: {
auth: token || password ? { token, password } : undefined,
timeoutMs: 3_000,
includeDetails: params.includeHealthDetails === true,
env: params.env,
});
const reachedGateway =
probe.ok ||
@@ -307,6 +309,7 @@ async function inspectGatewayPortHealth(params: {
await confirmGatewayReachable({
port: params.port,
auth: params.auth,
env: process.env,
})
).reachable;
} catch {
@@ -336,6 +339,7 @@ export async function inspectGatewayRestart(params: {
port: params.port,
includeHealthDetails: Boolean(expectedVersion),
auth: params.probeAuth,
env,
});
activatedPluginErrors = reachability.activatedPluginErrors;
channelProbeErrors = reachability.channelProbeErrors;

View File

@@ -822,6 +822,39 @@ describe("GatewayClient connect auth payload", () => {
client.stop();
});
it("loads stored device auth from the provided env", () => {
loadDeviceAuthTokenMock.mockReturnValue({
token: "stored-device-token",
scopes: ["operator.read"],
});
const env = {
...process.env,
OPENCLAW_STATE_DIR: "/tmp/openclaw-client-service-state",
} as NodeJS.ProcessEnv;
const client = new GatewayClient({
url: "ws://127.0.0.1:18789",
env,
});
client.start();
const ws = getLatestWs();
ws.emitOpen();
emitConnectChallenge(ws);
expect(loadDeviceAuthTokenMock).toHaveBeenCalledWith(
expect.objectContaining({
deviceId: expect.any(String),
role: "operator",
env,
}),
);
expect(connectFrameFrom(ws)).toMatchObject({
token: "stored-device-token",
deviceToken: "stored-device-token",
});
client.stop();
});
it("uses bootstrap token when no shared or device token is available", () => {
loadDeviceAuthTokenMock.mockReturnValue(undefined);
const client = new GatewayClient({

View File

@@ -151,6 +151,7 @@ export type GatewayClientOptions = {
commands?: string[];
permissions?: Record<string, boolean>;
pathEnv?: string;
env?: NodeJS.ProcessEnv;
deviceIdentity?: DeviceIdentity | null;
minProtocol?: number;
maxProtocol?: number;
@@ -369,7 +370,7 @@ export class GatewayClient {
const deviceId = this.opts.deviceIdentity.deviceId;
const role = this.opts.role ?? "operator";
try {
clearDeviceAuthToken({ deviceId, role });
clearDeviceAuthToken({ deviceId, role, env: this.opts.env });
logDebug(`cleared stale device-auth token for device ${deviceId}`);
} catch (err) {
logDebug(
@@ -592,6 +593,7 @@ export class GatewayClient {
role: authInfo.role ?? role,
token: authInfo.deviceToken,
scopes: authInfo.scopes ?? [],
env: this.opts.env,
});
}
this.backoffMs = 1000;
@@ -675,6 +677,7 @@ export class GatewayClient {
const storedAuth = loadDeviceAuthToken({
deviceId: this.opts.deviceIdentity.deviceId,
role,
env: this.opts.env,
});
if (!storedAuth) {
return null;

View File

@@ -31,6 +31,8 @@ const deviceIdentityState = vi.hoisted(() => ({
scopes: ["operator.read"],
updatedAtMs: 1,
} as Record<string, unknown> | null,
identityPaths: [] as unknown[],
tokenParams: [] as unknown[],
}));
const eventLoopReadyState = vi.hoisted(() => ({
@@ -135,7 +137,8 @@ vi.mock("../infra/device-identity.js", () => ({
}
return deviceIdentityState.value;
},
loadDeviceIdentityIfPresent: () => {
loadDeviceIdentityIfPresent: (filePath: unknown) => {
deviceIdentityState.identityPaths.push(filePath);
if (deviceIdentityState.throwOnLoad) {
throw new Error("read-only identity dir");
}
@@ -144,7 +147,10 @@ vi.mock("../infra/device-identity.js", () => ({
}));
vi.mock("../infra/device-auth-store.js", () => ({
loadDeviceAuthToken: () => deviceIdentityState.cachedToken,
loadDeviceAuthToken: (params: unknown) => {
deviceIdentityState.tokenParams.push(params);
return deviceIdentityState.cachedToken;
},
}));
vi.mock("./event-loop-ready.js", () => ({
@@ -165,6 +171,8 @@ describe("probeGateway", () => {
scopes: ["operator.read"],
updatedAtMs: 1,
};
deviceIdentityState.identityPaths = [];
deviceIdentityState.tokenParams = [];
gatewayClientState.startMode = "hello";
gatewayClientState.options = null;
gatewayClientState.requests = [];
@@ -266,6 +274,32 @@ describe("probeGateway", () => {
});
});
it("loads probe identity and cached device auth from the provided env", async () => {
const env = {
...process.env,
OPENCLAW_STATE_DIR: "/tmp/openclaw-probe-service-state",
} as NodeJS.ProcessEnv;
await probeGateway({
url: "ws://127.0.0.1:18789",
auth: { token: "secret" },
timeoutMs: 1_000,
env,
});
expect(deviceIdentityState.identityPaths).toEqual([
"/tmp/openclaw-probe-service-state/identity/device.json",
]);
expect(deviceIdentityState.tokenParams).toEqual([
{
deviceId: "test-device-identity",
role: "operator",
env,
},
]);
expect(gatewayClientState.options).toEqual(expect.objectContaining({ env }));
});
it("keeps device identity enabled for remote probes", async () => {
await probeGateway({
url: "wss://gateway.example/ws",

View File

@@ -1,4 +1,6 @@
import { randomUUID } from "node:crypto";
import path from "node:path";
import { resolveStateDir } from "../config/paths.js";
import { loadDeviceAuthToken } from "../infra/device-auth-store.js";
import { formatErrorMessage } from "../infra/errors.js";
import type { SystemPresence } from "../infra/system-presence.js";
@@ -149,6 +151,7 @@ export async function probeGateway(opts: {
includeDetails?: boolean;
detailLevel?: "none" | "presence" | "full";
tlsFingerprint?: string;
env?: NodeJS.ProcessEnv;
}): Promise<GatewayProbeResult> {
const startedAt = Date.now();
const instanceId = randomUUID();
@@ -168,7 +171,8 @@ export async function probeGateway(opts: {
return null;
}
const { loadDeviceIdentityIfPresent } = await import("../infra/device-identity.js");
const identity = loadDeviceIdentityIfPresent();
const stateDir = resolveStateDir(opts.env);
const identity = loadDeviceIdentityIfPresent(path.join(stateDir, "identity", "device.json"));
if (!identity) {
return null;
}
@@ -178,6 +182,7 @@ export async function probeGateway(opts: {
const cachedOperatorToken = loadDeviceAuthToken({
deviceId: identity.deviceId,
role: "operator",
env: opts.env,
});
return cachedOperatorToken ? identity : null;
} catch {
@@ -261,6 +266,7 @@ export async function probeGateway(opts: {
password: opts.auth?.password,
tlsFingerprint: opts.tlsFingerprint,
preauthHandshakeTimeoutMs: opts.preauthHandshakeTimeoutMs,
env: opts.env,
scopes: [READ_SCOPE],
clientName: GATEWAY_CLIENT_NAMES.CLI,
clientVersion: "dev",

View File

@@ -156,6 +156,7 @@ describe("scripts/lib/docker-e2e-plan", () => {
"update-channel-switch",
"upgrade-survivor",
"published-upgrade-survivor",
"update-restart-auth",
]);
expect(packageUpdateCore.lanes).toEqual(
expect.arrayContaining([
@@ -188,6 +189,11 @@ describe("scripts/lib/docker-e2e-plan", () => {
name: "published-upgrade-survivor",
stateScenario: "upgrade-survivor",
}),
expect.objectContaining({
name: "update-restart-auth",
command: "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:update-restart-auth",
stateScenario: "upgrade-survivor",
}),
]),
);
expect(pluginsRuntimePlugins.lanes.map((lane) => lane.name)).toEqual(["plugins"]);

View File

@@ -105,6 +105,7 @@ describe("package acceptance workflow", () => {
expect(workflow).toContain("npm-onboard-channel-agent doctor-switch");
expect(workflow).toContain("update-channel-switch upgrade-survivor");
expect(workflow).toContain("published-upgrade-survivor");
expect(workflow).toContain("published-upgrade-survivor update-restart-auth");
expect(workflow).toContain("plugins-offline plugin-update");
expect(workflow).toContain("include_release_path_suites=true");
expect(workflow).not.toContain("telegram_mode requires source=npm");
@@ -252,7 +253,19 @@ describe("package artifact reuse", () => {
expect(scheduler).toContain('["OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS",');
expect(scheduler).toContain('["OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS",');
expect(packageJson).toContain("OPENCLAW_UPGRADE_SURVIVOR_PUBLISHED_BASELINE=1");
expect(packageJson).toContain("test:docker:update-restart-auth");
expect(packageJson).toContain("OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE=auto-auth");
expect(publishedUpgradeSurvivor).toContain("validate_baseline_package_spec");
expect(publishedUpgradeSurvivor).toContain("OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE");
expect(publishedUpgradeSurvivor).toContain('local shim_dir="$npm_config_prefix/bin"');
expect(publishedUpgradeSurvivor).toContain("seed_update_restart_probe_device_auth");
expect(publishedUpgradeSurvivor).toContain("upgrade survivor restart probe");
expect(publishedUpgradeSurvivor).toContain("write_update_restart_service_secretref_env");
expect(publishedUpgradeSurvivor).toContain("GATEWAY_AUTH_TOKEN_REF=%s");
expect(publishedUpgradeSurvivor).toContain(
"env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw",
);
expect(publishedUpgradeSurvivor).toContain("phase prepare-update-restart-probe");
expect(publishedUpgradeSurvivor).toContain("openclaw@(alpha|beta|latest|");
expect(publishedUpgradeSurvivor).toContain("plugin_deps_cleanup_plugin_dirs");
expect(publishedUpgradeSurvivor).toContain('"$(package_root)/extensions/$plugin"');
@@ -534,7 +547,7 @@ describe("package artifact reuse", () => {
);
expect(workflow).toContain("suite_profile: custom");
expect(workflow).toContain(
"docker_lanes: doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update",
"docker_lanes: doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update",
);
expect(workflow).toContain(
"published_upgrade_survivor_baselines: ${{ needs.resolve_target.outputs.run_release_soak == 'true' && 'last-stable-4 2026.4.23 2026.5.2 2026.4.15' || '' }}",