From 2de0113608cf264decf1427223de0fd5f6d2ddab Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Mon, 4 May 2026 21:57:57 -0700 Subject: [PATCH] test(update): cover authenticated restart updates --- .../openclaw-live-and-e2e-checks-reusable.yml | 4 +- .github/workflows/openclaw-release-checks.yml | 2 +- .github/workflows/package-acceptance.yml | 4 +- CHANGELOG.md | 1 + docs/help/testing-updates-plugins.md | 15 +- docs/reference/RELEASING.md | 21 +- package.json | 1 + scripts/e2e/lib/upgrade-survivor/run.sh | 360 +++++++++++++++++- .../upgrade-survivor/update-restart-auth.sh | 264 +++++++++++++ scripts/e2e/upgrade-survivor-docker.sh | 96 +++-- scripts/lib/docker-e2e-scenarios.mjs | 12 + src/cli/daemon-cli/restart-health.test.ts | 6 + src/cli/daemon-cli/restart-health.ts | 4 + src/gateway/client.test.ts | 33 ++ src/gateway/client.ts | 5 +- src/gateway/probe.test.ts | 38 +- src/gateway/probe.ts | 8 +- test/scripts/docker-e2e-plan.test.ts | 6 + .../package-acceptance-workflow.test.ts | 15 +- 19 files changed, 838 insertions(+), 57 deletions(-) create mode 100644 scripts/e2e/lib/upgrade-survivor/update-restart-auth.sh diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml index c0043b74a93..32dc4ba38bf 100644 --- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml +++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml @@ -34,7 +34,7 @@ on: default: 1 type: number published_upgrade_survivor_baseline: - description: Published OpenClaw package baseline for the published-upgrade-survivor/update-migration Docker lane + description: Published OpenClaw package baseline for the published-upgrade-survivor/update-migration Docker lanes required: false default: openclaw@latest type: string @@ -129,7 +129,7 @@ on: default: 1 type: number published_upgrade_survivor_baseline: - description: Published OpenClaw package baseline for the published-upgrade-survivor/update-migration Docker lane + description: Published OpenClaw package baseline for the published-upgrade-survivor/update-restart-auth/update-migration Docker lanes required: false default: openclaw@latest type: string diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml index 92ca6d09334..7612423e86d 100644 --- a/.github/workflows/openclaw-release-checks.yml +++ b/.github/workflows/openclaw-release-checks.yml @@ -558,7 +558,7 @@ jobs: artifact_name: ${{ needs.prepare_release_package.outputs.artifact_name }} package_sha256: ${{ needs.prepare_release_package.outputs.package_sha256 }} suite_profile: custom - docker_lanes: doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update + docker_lanes: doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update published_upgrade_survivor_baselines: ${{ needs.resolve_target.outputs.run_release_soak == 'true' && 'last-stable-4 2026.4.23 2026.5.2 2026.4.15' || '' }} published_upgrade_survivor_scenarios: ${{ needs.resolve_target.outputs.run_release_soak == 'true' && 'reported-issues' || '' }} telegram_mode: mock-openai diff --git a/.github/workflows/package-acceptance.yml b/.github/workflows/package-acceptance.yml index 215e623aafc..05c376ab219 100644 --- a/.github/workflows/package-acceptance.yml +++ b/.github/workflows/package-acceptance.yml @@ -386,10 +386,10 @@ jobs: docker_lanes="npm-onboard-channel-agent gateway-network config-reload" ;; package) - docker_lanes="npm-onboard-channel-agent doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update" + docker_lanes="npm-onboard-channel-agent doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update" ;; product) - docker_lanes="npm-onboard-channel-agent doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins plugin-update mcp-channels cron-mcp-cleanup openai-web-search-minimal openwebui" + docker_lanes="npm-onboard-channel-agent doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins plugin-update mcp-channels cron-mcp-cleanup openai-web-search-minimal openwebui" include_openwebui=true ;; full) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07776e4ec5e..6ec8aa49998 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Update/restart: probe managed Gateway restarts with the service environment and add a Docker product lane that exercises candidate-owned `openclaw update --yes --json` restarts, so SecretRef-backed local gateway auth cannot regress behind mocked restart checks. Thanks @vincentkoc. - Plugins/install: honor the beta update channel for onboarding and doctor-managed plugin installs by requesting floating npm and ClawHub specs with `@beta` while keeping persistent install records on the catalog default. Thanks @vincentkoc. - WhatsApp/onboarding: canonicalize setup and pairing allowlist entries to WhatsApp's digit-only phone ids while still accepting E.164, JID, and `whatsapp:` inputs, so personal-phone allowlists match WhatsApp Web sender ids after setup. Thanks @vincentkoc. - Gateway/startup: load provider plugins that own explicitly configured image, video, or music generation defaults so generation tools become live after gateway restart instead of remaining catalog-only. Fixes #77244. Thanks @buyuangtampan, @Nikoxx99, and @vincentkoc. diff --git a/docs/help/testing-updates-plugins.md b/docs/help/testing-updates-plugins.md index dfa5b26fbeb..adb20cf83b3 100644 --- a/docs/help/testing-updates-plugins.md +++ b/docs/help/testing-updates-plugins.md @@ -78,6 +78,7 @@ pnpm test:docker:plugin-lifecycle-matrix pnpm test:docker:plugin-update pnpm test:docker:upgrade-survivor pnpm test:docker:published-upgrade-survivor +pnpm test:docker:update-restart-auth pnpm test:docker:update-migration ``` @@ -103,6 +104,10 @@ Important lanes: configures it through a baked `openclaw config set` recipe, updates it to the candidate tarball, runs doctor, checks legacy cleanup, starts the Gateway, and probes `/healthz`, `/readyz`, and RPC status. +- `test:docker:update-restart-auth` installs the candidate package, starts a + managed token-auth Gateway, unsets caller gateway auth env for + `openclaw update --yes --json`, and requires the candidate update command to + restart the Gateway before the normal probes. - `test:docker:update-migration` is the cleanup-heavy published-update lane. It starts from a configured Discord/Telegram-style user state, runs baseline doctor so configured plugin dependencies have a chance to materialize, seeds @@ -164,10 +169,10 @@ resolved release SHA. For post-publish proof, pass `package_acceptance_package_spec=openclaw@YYYY.M.D` so the same upgrade matrix targets the shipped npm package instead. -Release checks call Package Acceptance with the package/update/plugin set: +Release checks call Package Acceptance with the package/update/restart/plugin set: ```text -doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update +doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update ``` When release soak is enabled, they also pass: @@ -224,7 +229,7 @@ For release candidates, the default proof stack is: 1. `pnpm check:changed` and `pnpm test:changed` for source-level regressions. 2. `pnpm release:check` for package artifact integrity. 3. Package Acceptance `package` profile or the release-check custom package - lanes for install/update/plugin contracts. + lanes for install/update/restart/plugin contracts. 4. Cross-OS release checks for OS-specific installer, onboarding, and platform behavior. 5. Live suites only when the changed surface touches provider or hosted-service @@ -245,7 +250,8 @@ Compatibility leniency is narrow and time boxed: warning or skipping. Do not add new startup migrations for these old shapes. Add or extend a doctor -repair, then prove it with `upgrade-survivor` or `published-upgrade-survivor`. +repair, then prove it with `upgrade-survivor`, `published-upgrade-survivor`, or +`update-restart-auth` when the update command owns the restart. ## Adding coverage @@ -257,6 +263,7 @@ can fail for the right reason: checker test. - CLI install/update behavior: Docker lane assertion or fixture. - Published-release migration behavior: `published-upgrade-survivor` scenario. +- Update-owned restart behavior: `update-restart-auth`. - Registry/package source behavior: `test:docker:plugins` fixture or ClawHub fixture server. - Dependency layout or cleanup behavior: assert both runtime execution and the diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md index ea5e0ee5852..6d0bc5d6254 100644 --- a/docs/reference/RELEASING.md +++ b/docs/reference/RELEASING.md @@ -141,11 +141,13 @@ the maintainer-only release runbook. `telegram_mode=mock-openai` or `telegram_mode=live-frontier`. When the selected Docker lanes include `published-upgrade-survivor`, the package artifact is the candidate and `published_upgrade_survivor_baseline` selects - the published baseline. + the published baseline. `update-restart-auth` uses the candidate package as + both the installed CLI and the package-under-test so it exercises the + candidate update command's managed restart path. Example: `gh workflow run package-acceptance.yml --ref main -f workflow_ref=main -f source=npm -f package_spec=openclaw@beta -f suite_profile=product -f published_upgrade_survivor_baseline=openclaw@2026.4.26 -f telegram_mode=mock-openai` Common profiles: - `smoke`: install/channel/agent, gateway network, and config reload lanes - - `package`: artifact-native package/update/plugin lanes without OpenWebUI or live ClawHub + - `package`: artifact-native package/update/restart/plugin lanes without OpenWebUI or live ClawHub - `product`: package profile plus MCP channels, cron/subagent cleanup, OpenAI web search, and OpenWebUI - `full`: Docker release-path chunks with OpenWebUI @@ -486,11 +488,12 @@ Supported candidate sources: `OpenClaw Release Checks` runs Package Acceptance with `source=artifact`, the prepared release package artifact, `suite_profile=custom`, -`docker_lanes=doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update`, -`telegram_mode=mock-openai`. Package Acceptance keeps migration, update, stale -plugin dependency cleanup, offline plugin fixtures, plugin update, and Telegram -package QA against the same resolved tarball. Blocking release checks use the -default latest published package baseline; `run_release_soak=true` or +`docker_lanes=doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update`, +`telegram_mode=mock-openai`. Package Acceptance keeps migration, update, +configured-auth update restart, stale plugin dependency cleanup, offline plugin +fixtures, plugin update, and Telegram package QA against the same resolved +tarball. Blocking release checks use the default latest published package +baseline; `run_release_soak=true` or `release_profile=full` expands to every stable npm-published baseline from `2026.4.23` through `latest` plus reported-issue fixtures. Use Package Acceptance with `source=npm` for an already shipped candidate, or @@ -536,8 +539,8 @@ Common package profiles: - `smoke`: quick package install/channel/agent, gateway network, and config reload lanes -- `package`: install/update/plugin package contracts without live ClawHub; this is the release-check - default +- `package`: install/update/restart/plugin package contracts without live + ClawHub; this is the release-check default - `product`: `package` plus MCP channels, cron/subagent cleanup, OpenAI web search, and OpenWebUI - `full`: Docker release-path chunks with OpenWebUI diff --git a/package.json b/package.json index 4f48851b7ce..bf356561cab 100644 --- a/package.json +++ b/package.json @@ -1570,6 +1570,7 @@ "test:docker:timings": "node scripts/docker-e2e-timings.mjs", "test:docker:update-channel-switch": "bash scripts/e2e/update-channel-switch-docker.sh", "test:docker:update-migration": "env OPENCLAW_UPGRADE_SURVIVOR_PUBLISHED_BASELINE=1 OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC=${OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC:-openclaw@2026.4.23} OPENCLAW_UPGRADE_SURVIVOR_SCENARIO=${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-plugin-deps-cleanup} bash scripts/e2e/upgrade-survivor-docker.sh", + "test:docker:update-restart-auth": "env OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE=auto-auth OPENCLAW_UPGRADE_SURVIVOR_DOCKER_RUN_TIMEOUT=${OPENCLAW_UPGRADE_SURVIVOR_DOCKER_RUN_TIMEOUT:-1500s} bash scripts/e2e/upgrade-survivor-docker.sh", "test:docker:upgrade-survivor": "bash scripts/e2e/upgrade-survivor-docker.sh", "test:e2e": "node scripts/run-vitest.mjs run --config test/vitest/vitest.e2e.config.ts", "test:e2e:openshell": "OPENCLAW_E2E_OPENSHELL=1 node scripts/run-vitest.mjs run --config test/vitest/vitest.e2e.config.ts extensions/openshell/src/backend.e2e.test.ts", diff --git a/scripts/e2e/lib/upgrade-survivor/run.sh b/scripts/e2e/lib/upgrade-survivor/run.sh index 3f64479651f..af3c05d6305 100644 --- a/scripts/e2e/lib/upgrade-survivor/run.sh +++ b/scripts/e2e/lib/upgrade-survivor/run.sh @@ -37,6 +37,7 @@ BASELINE_RAW="${OPENCLAW_UPGRADE_SURVIVOR_BASELINE:?missing OPENCLAW_UPGRADE_SUR CANDIDATE_KIND="${OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_KIND:-tarball}" CANDIDATE_SPEC="${OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_SPEC:-${OPENCLAW_CURRENT_PACKAGE_TGZ:-}}" SCENARIO="${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base}" +UPDATE_RESTART_MODE="${OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE:-manual}" CURRENT_PHASE="setup" FAILURE_PHASE="" FAILURE_MESSAGE="" @@ -51,6 +52,7 @@ start_seconds="" status_seconds="" healthz_seconds="" readyz_seconds="" +update_restart_seconds="" BASELINE_INSTALL_LOG="$ARTIFACT_ROOT/baseline-install.log" UPDATE_JSON="$ARTIFACT_ROOT/update.json" @@ -63,6 +65,11 @@ READYZ_JSON="$ARTIFACT_ROOT/readyz.json" STATUS_JSON="$ARTIFACT_ROOT/status.json" STATUS_ERR="$ARTIFACT_ROOT/status.err" BASELINE_CONFIG_VALIDATE_LOG="$ARTIFACT_ROOT/baseline-config-validate.log" +BASELINE_SERVICE_INSTALL_JSON="$ARTIFACT_ROOT/baseline-service-install.json" +BASELINE_SERVICE_INSTALL_ERR="$ARTIFACT_ROOT/baseline-service-install.err" +SYSTEMCTL_SHIM_LOG="$ARTIFACT_ROOT/systemctl-shim.log" +SYSTEMCTL_SHIM_PID_FILE="$ARTIFACT_ROOT/systemctl-shim.pid" +SYSTEMCTL_SHIM_DAEMON_LOG="$ARTIFACT_ROOT/systemctl-shim-gateway.log" CONFIG_COVERAGE_JSON="$ARTIFACT_ROOT/config-recipe.json" export OPENCLAW_UPGRADE_SURVIVOR_CONFIG_COVERAGE_JSON="$CONFIG_COVERAGE_JSON" rm -f "$SUMMARY_JSON" "$CONFIG_COVERAGE_JSON" @@ -113,6 +120,17 @@ normalize_baseline() { validate_baseline_package_spec "$baseline_spec" } +validate_update_restart_mode() { + case "$UPDATE_RESTART_MODE" in + manual | auto-auth) + ;; + *) + echo "OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE must be manual or auto-auth; got: $UPDATE_RESTART_MODE" >&2 + return 1 + ;; + esac +} + json_event() { local phase="$1" local status="$2" @@ -139,7 +157,9 @@ write_summary() { SUMMARY_CANDIDATE_VERSION="$candidate_version" \ SUMMARY_INSTALLED_VERSION="$installed_version" \ SUMMARY_SCENARIO="$SCENARIO" \ + SUMMARY_UPDATE_RESTART_MODE="$UPDATE_RESTART_MODE" \ SUMMARY_START_SECONDS="$start_seconds" \ + SUMMARY_UPDATE_RESTART_SECONDS="$update_restart_seconds" \ SUMMARY_HEALTHZ_SECONDS="$healthz_seconds" \ SUMMARY_READYZ_SECONDS="$readyz_seconds" \ SUMMARY_STATUS_SECONDS="$status_seconds" \ @@ -173,8 +193,10 @@ const summary = { version: process.env.SUMMARY_CANDIDATE_VERSION || null, }, installedVersion: process.env.SUMMARY_INSTALLED_VERSION || null, + updateRestartMode: process.env.SUMMARY_UPDATE_RESTART_MODE || "manual", timings: { startupSeconds: numberOrNull(process.env.SUMMARY_START_SECONDS), + updateRestartSeconds: numberOrNull(process.env.SUMMARY_UPDATE_RESTART_SECONDS), healthzSeconds: numberOrNull(process.env.SUMMARY_HEALTHZ_SECONDS), readyzSeconds: numberOrNull(process.env.SUMMARY_READYZ_SECONDS), statusSeconds: numberOrNull(process.env.SUMMARY_STATUS_SECONDS), @@ -197,6 +219,13 @@ cleanup() { kill "$plugin_registry_pid" >/dev/null 2>&1 || true fi openclaw_e2e_terminate_gateways "${gateway_pid:-}" + if [ -s "$SYSTEMCTL_SHIM_PID_FILE" ]; then + local shim_pid + shim_pid="$(cat "$SYSTEMCTL_SHIM_PID_FILE" 2>/dev/null || true)" + if [[ "$shim_pid" =~ ^[0-9]+$ ]] && [ "$shim_pid" -gt 1 ]; then + openclaw_e2e_terminate_gateways "$shim_pid" + fi + fi } on_error() { @@ -612,6 +641,7 @@ rm_rf_retry() { reset_run_state() { rm_rf_retry "$npm_config_prefix" "$TMPDIR" "$ARTIFACT_ROOT/state-home" + rm -f "$SYSTEMCTL_SHIM_PID_FILE" "$SYSTEMCTL_SHIM_DAEMON_LOG" mkdir -p "$npm_config_prefix" "$npm_config_cache" "$TMPDIR" } @@ -670,6 +700,296 @@ validate_baseline_config() { fi } +install_update_restart_systemctl_shim() { + local shim_dir="$npm_config_prefix/bin" + mkdir -p "$shim_dir" + cat >"$shim_dir/systemctl" <<'SHIM' +#!/usr/bin/env bash +set -euo pipefail + +log_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG:-/tmp/openclaw-systemctl-shim.log}" +pid_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE:-/tmp/openclaw-systemctl-shim.pid}" +daemon_log="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG:-/tmp/openclaw-systemctl-shim-gateway.log}" +printf '%s\n' "$*" >>"$log_file" + +filtered=() +for ((i = 1; i <= $#; i++)); do + arg="${!i}" + case "$arg" in + --user | --quiet | --no-page | --now) + ;; + --property) + i=$((i + 1)) + ;; + *) + filtered+=("$arg") + ;; + esac +done + +command="${filtered[0]:-status}" + +is_running() { + [ -s "$pid_file" ] || return 1 + local pid + pid="$(cat "$pid_file" 2>/dev/null || true)" + [ -n "$pid" ] || return 1 + kill -0 "$pid" >/dev/null 2>&1 +} + +stop_gateway() { + [ -s "$pid_file" ] || return 0 + local pid + pid="$(cat "$pid_file" 2>/dev/null || true)" + if [[ "$pid" =~ ^[0-9]+$ ]] && [ "$pid" -gt 1 ] && kill -0 "$pid" >/dev/null 2>&1; then + kill "$pid" >/dev/null 2>&1 || true + for _ in $(seq 1 100); do + kill -0 "$pid" >/dev/null 2>&1 || break + sleep 0.1 + done + kill -9 "$pid" >/dev/null 2>&1 || true + fi + rm -f "$pid_file" +} + +unit_path() { + printf '%s/.config/systemd/user/openclaw-gateway.service\n' "${HOME:?missing HOME}" +} + +load_unit_environment() { + local unit="$1" + while IFS= read -r line; do + case "$line" in + EnvironmentFile=*) + local spec="${line#EnvironmentFile=}" + for token in $spec; do + local file="${token#-}" + [ -f "$file" ] || continue + set -a + # shellcheck disable=SC1090 + . "$file" + set +a + done + ;; + Environment=*) + local assignment="${line#Environment=}" + assignment="${assignment#\"}" + assignment="${assignment%\"}" + export "$assignment" + ;; + esac + done <"$unit" +} + +start_gateway() { + local unit + local exec_start + unit="$(unit_path)" + exec_start="$(sed -n 's/^ExecStart=//p' "$unit" | tail -n 1)" + [ -n "$exec_start" ] || { + echo "systemctl shim could not find ExecStart in $unit" >&2 + return 1 + } + ( + load_unit_environment "$unit" + nohup bash -lc "exec $exec_start" >>"$daemon_log" 2>&1 & + printf '%s\n' "$!" >"$pid_file" + ) +} + +case "$command" in + daemon-reload | enable | disable) + exit 0 + ;; + status) + is_running && exit 0 + exit 0 + ;; + stop) + stop_gateway + exit 0 + ;; + restart | start) + stop_gateway + start_gateway + exit 0 + ;; + is-enabled) + exit 0 + ;; + is-active) + is_running && exit 0 + exit 3 + ;; + show) + if is_running; then + printf 'ActiveState=active\nSubState=running\nMainPID=%s\nExecMainStatus=0\nExecMainCode=0\n' "$(cat "$pid_file")" + else + printf 'ActiveState=inactive\nSubState=dead\nMainPID=0\nExecMainStatus=0\nExecMainCode=0\n' + fi + exit 0 + ;; + *) + echo "systemctl shim unsupported command: $*" >&2 + exit 1 + ;; +esac +SHIM + chmod +x "$shim_dir/systemctl" + export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG="$SYSTEMCTL_SHIM_LOG" + export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE="$SYSTEMCTL_SHIM_PID_FILE" + export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG="$SYSTEMCTL_SHIM_DAEMON_LOG" + export PATH="$shim_dir:$PATH" +} + +install_update_restart_service_unit() { + if ! env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway install --force --json >"$BASELINE_SERVICE_INSTALL_JSON" 2>"$BASELINE_SERVICE_INSTALL_ERR"; then + echo "baseline gateway service install failed" >&2 + cat "$BASELINE_SERVICE_INSTALL_ERR" >&2 || true + cat "$BASELINE_SERVICE_INSTALL_JSON" >&2 || true + return 1 + fi +} + +seed_update_restart_probe_device_auth() { + node --input-type=module <<'NODE' +import crypto from "node:crypto"; +import fs from "node:fs"; +import path from "node:path"; + +const stateDir = process.env.OPENCLAW_STATE_DIR; +if (!stateDir) { + throw new Error("missing OPENCLAW_STATE_DIR"); +} + +const base64UrlEncode = (buf) => + buf.toString("base64").replaceAll("+", "-").replaceAll("/", "_").replace(/=+$/g, ""); +const ed25519SpkiPrefix = Buffer.from("302a300506032b6570032100", "hex"); +const { publicKey, privateKey } = crypto.generateKeyPairSync("ed25519"); +const publicKeyPem = publicKey.export({ type: "spki", format: "pem" }); +const privateKeyPem = privateKey.export({ type: "pkcs8", format: "pem" }); +const spki = crypto.createPublicKey(publicKeyPem).export({ type: "spki", format: "der" }); +const rawPublicKey = + spki.length === ed25519SpkiPrefix.length + 32 && + spki.subarray(0, ed25519SpkiPrefix.length).equals(ed25519SpkiPrefix) + ? spki.subarray(ed25519SpkiPrefix.length) + : spki; +const publicKeyRaw = base64UrlEncode(rawPublicKey); +const deviceId = crypto.createHash("sha256").update(rawPublicKey).digest("hex"); +const token = base64UrlEncode(crypto.randomBytes(32)); +const now = Date.now(); +const scopes = ["operator.read"]; + +function writeJson(filePath, value) { + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, { mode: 0o600 }); + try { + fs.chmodSync(filePath, 0o600); + } catch { + // best-effort inside Docker + } +} + +writeJson(path.join(stateDir, "identity", "device.json"), { + version: 1, + deviceId, + publicKeyPem, + privateKeyPem, + createdAtMs: now, +}); +writeJson(path.join(stateDir, "identity", "device-auth.json"), { + version: 1, + deviceId, + tokens: { + operator: { + token, + role: "operator", + scopes, + updatedAtMs: now, + }, + }, +}); +writeJson(path.join(stateDir, "devices", "paired.json"), { + [deviceId]: { + deviceId, + publicKey: publicKeyRaw, + displayName: "upgrade survivor restart probe", + platform: process.platform, + clientId: "upgrade-survivor", + clientMode: "probe", + role: "operator", + roles: ["operator"], + scopes, + approvedScopes: scopes, + tokens: { + operator: { + token, + role: "operator", + scopes, + createdAtMs: now, + }, + }, + createdAtMs: now, + approvedAtMs: now, + }, +}); +writeJson(path.join(stateDir, "devices", "pending.json"), {}); +NODE +} + +write_update_restart_service_secretref_env() { + mkdir -p "$OPENCLAW_STATE_DIR" + local dotenv_path="$OPENCLAW_STATE_DIR/.env" + local tmp_path="$dotenv_path.tmp.$$" + if [ -f "$dotenv_path" ]; then + grep -v '^GATEWAY_AUTH_TOKEN_REF=' "$dotenv_path" >"$tmp_path" || true + else + : >"$tmp_path" + fi + # Managed restarts resolve SecretRefs from service-owned durable env, not the update caller. + printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >>"$tmp_path" + mv "$tmp_path" "$dotenv_path" +} + +write_update_restart_service_auth_env() { + mkdir -p "$OPENCLAW_STATE_DIR" + local dotenv_path="$OPENCLAW_STATE_DIR/.env" + local tmp_path="$dotenv_path.tmp.$$" + if [ -f "$dotenv_path" ]; then + grep -v '^GATEWAY_AUTH_TOKEN_REF=' "$dotenv_path" >"$tmp_path" || true + else + : >"$tmp_path" + fi + printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >>"$tmp_path" + mv "$tmp_path" "$dotenv_path" + local systemd_env_path="$OPENCLAW_STATE_DIR/gateway.systemd.env" + printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >"$systemd_env_path" +} + +prepare_update_restart_probe() { + if [ "$UPDATE_RESTART_MODE" != "auto-auth" ]; then + return 0 + fi + echo "Preparing configured-auth gateway for automatic update restart." + install_update_restart_systemctl_shim + seed_update_restart_probe_device_auth + start_gateway + write_update_restart_service_secretref_env + install_update_restart_service_unit +} + +prepare_update_restart_probe_current_install() { + if [ "$UPDATE_RESTART_MODE" != "auto-auth" ]; then + return 0 + fi + echo "Preparing candidate-auth gateway for automatic update restart." + install_update_restart_systemctl_shim + seed_update_restart_probe_device_auth + start_gateway + write_update_restart_service_auth_env + install_update_restart_service_unit +} + assert_baseline_state() { OPENCLAW_UPGRADE_SURVIVOR_ASSERT_STAGE=baseline \ node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-config @@ -714,12 +1034,32 @@ resolve_candidate_version() { update_candidate() { echo "Updating baseline $baseline_spec to candidate $CANDIDATE_KIND:$CANDIDATE_SPEC ($candidate_version)" - if ! openclaw update --tag "$CANDIDATE_SPEC" --yes --json --no-restart >"$UPDATE_JSON" 2>"$UPDATE_ERR"; then + local update_start="" + local update_end="" + local update_args=(update --tag "$CANDIDATE_SPEC" --yes --json) + if [ "$UPDATE_RESTART_MODE" = "manual" ]; then + update_args+=(--no-restart) + else + update_start="$(node -e "process.stdout.write(String(Date.now()))")" + fi + if ! env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw "${update_args[@]}" >"$UPDATE_JSON" 2>"$UPDATE_ERR"; then echo "openclaw update failed" >&2 cat "$UPDATE_ERR" >&2 || true cat "$UPDATE_JSON" >&2 || true return 1 fi + if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then + update_end="$(node -e "process.stdout.write(String(Date.now()))")" + update_restart_seconds=$(((update_end - update_start + 999) / 1000)) + node -e ' + const fs = require("node:fs"); + const file = process.argv[1]; + const result = JSON.parse(fs.readFileSync(file, "utf8")); + if (!result || result.status !== "ok") { + throw new Error(`update JSON did not report ok status: ${JSON.stringify(result)}`); + } + ' "$UPDATE_JSON" + fi installed_version="$(read_installed_version)" } @@ -776,8 +1116,11 @@ start_gateway() { local start_epoch local ready_epoch start_epoch="$(node -e "process.stdout.write(String(Date.now()))")" - openclaw gateway --port "$port" --bind loopback --allow-unconfigured >"$GATEWAY_LOG" 2>&1 & + env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway --port "$port" --bind loopback --allow-unconfigured >"$GATEWAY_LOG" 2>&1 & gateway_pid="$!" + if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then + printf '%s\n' "$gateway_pid" >"$SYSTEMCTL_SHIM_PID_FILE" + fi openclaw_e2e_wait_gateway_ready "$gateway_pid" "$GATEWAY_LOG" 360 ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")" start_seconds=$(((ready_epoch - start_epoch + 999) / 1000)) @@ -788,6 +1131,13 @@ start_gateway() { fi } +ensure_gateway_started() { + if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then + return 0 + fi + start_gateway +} + check_gateway_probes() { healthz_seconds="$(probe_gateway_endpoint /healthz live "$HEALTHZ_JSON")" export OPENCLAW_UPGRADE_SURVIVOR_READYZ_ALLOW_FAILING="discord,telegram,whatsapp,feishu,matrix" @@ -818,6 +1168,7 @@ check_gateway_status() { } phase storage-preflight storage_preflight +phase validate-update-restart-mode validate_update_restart_mode phase reset-run-state reset_run_state phase install-baseline install_baseline phase seed-state seed_state @@ -830,6 +1181,7 @@ phase seed-source-only-plugin-shadow seed_source_only_plugin_shadow phase assert-baseline assert_baseline_state phase seed-legacy-runtime-deps-symlink seed_legacy_runtime_deps_symlink phase resolve-candidate resolve_candidate_version +phase prepare-update-restart-probe prepare_update_restart_probe phase update-candidate update_candidate phase assert-legacy-plugin-dependency-debris-before-doctor assert_legacy_plugin_dependency_debris_before_doctor phase configure-configured-plugin-install-fixture-registry configure_configured_plugin_install_fixture_registry @@ -838,8 +1190,8 @@ phase assert-legacy-plugin-dependency-debris-cleaned assert_legacy_plugin_depend phase assert-legacy-runtime-deps-symlink-repaired assert_legacy_runtime_deps_symlink_repaired phase validate-post-doctor-config validate_post_doctor_config phase assert-survival assert_survival -phase gateway-start start_gateway +phase gateway-start ensure_gateway_started phase gateway-probes check_gateway_probes phase gateway-status check_gateway_status -echo "Upgrade survivor Docker E2E passed baseline=${baseline_spec} scenario=${SCENARIO} candidate=${candidate_version} startup=${start_seconds}s healthz=${healthz_seconds}s readyz=${readyz_seconds}s status=${status_seconds}s." +echo "Upgrade survivor Docker E2E passed baseline=${baseline_spec} scenario=${SCENARIO} candidate=${candidate_version} updateRestartMode=${UPDATE_RESTART_MODE} startup=${start_seconds}s updateRestart=${update_restart_seconds:-manual}s healthz=${healthz_seconds}s readyz=${readyz_seconds}s status=${status_seconds}s." diff --git a/scripts/e2e/lib/upgrade-survivor/update-restart-auth.sh b/scripts/e2e/lib/upgrade-survivor/update-restart-auth.sh new file mode 100644 index 00000000000..216bf9874f6 --- /dev/null +++ b/scripts/e2e/lib/upgrade-survivor/update-restart-auth.sh @@ -0,0 +1,264 @@ +#!/usr/bin/env bash + +install_update_restart_systemctl_shim() { + local shim_dir="$npm_config_prefix/bin" + mkdir -p "$shim_dir" + cat >"$shim_dir/systemctl" <<'SHIM' +#!/usr/bin/env bash +set -euo pipefail + +log_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG:-/tmp/openclaw-systemctl-shim.log}" +pid_file="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE:-/tmp/openclaw-systemctl-shim.pid}" +daemon_log="${OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG:-/tmp/openclaw-systemctl-shim-gateway.log}" +printf '%s\n' "$*" >>"$log_file" + +filtered=() +for ((i = 1; i <= $#; i++)); do + arg="${!i}" + case "$arg" in + --user | --quiet | --no-page | --now) + ;; + --property) + i=$((i + 1)) + ;; + *) + filtered+=("$arg") + ;; + esac +done + +command="${filtered[0]:-status}" + +is_running() { + [ -s "$pid_file" ] || return 1 + local pid + pid="$(cat "$pid_file" 2>/dev/null || true)" + [ -n "$pid" ] || return 1 + kill -0 "$pid" >/dev/null 2>&1 +} + +stop_gateway() { + [ -s "$pid_file" ] || return 0 + local pid + pid="$(cat "$pid_file" 2>/dev/null || true)" + if [[ "$pid" =~ ^[0-9]+$ ]] && [ "$pid" -gt 1 ] && kill -0 "$pid" >/dev/null 2>&1; then + kill "$pid" >/dev/null 2>&1 || true + for _ in $(seq 1 100); do + kill -0 "$pid" >/dev/null 2>&1 || break + sleep 0.1 + done + kill -9 "$pid" >/dev/null 2>&1 || true + fi + rm -f "$pid_file" +} + +unit_path() { + printf '%s/.config/systemd/user/openclaw-gateway.service\n' "${HOME:?missing HOME}" +} + +load_unit_environment() { + local unit="$1" + while IFS= read -r line; do + case "$line" in + EnvironmentFile=*) + local spec="${line#EnvironmentFile=}" + for token in $spec; do + local file="${token#-}" + [ -f "$file" ] || continue + set -a + # shellcheck disable=SC1090 + . "$file" + set +a + done + ;; + Environment=*) + local assignment="${line#Environment=}" + assignment="${assignment#\"}" + assignment="${assignment%\"}" + export "$assignment" + ;; + esac + done <"$unit" +} + +start_gateway() { + local unit + local exec_start + unit="$(unit_path)" + exec_start="$(sed -n 's/^ExecStart=//p' "$unit" | tail -n 1)" + [ -n "$exec_start" ] || { + echo "systemctl shim could not find ExecStart in $unit" >&2 + return 1 + } + ( + load_unit_environment "$unit" + nohup bash -lc "exec $exec_start" >>"$daemon_log" 2>&1 & + printf '%s\n' "$!" >"$pid_file" + ) +} + +case "$command" in + daemon-reload | enable | disable) + exit 0 + ;; + status) + is_running && exit 0 + exit 0 + ;; + stop) + stop_gateway + exit 0 + ;; + restart | start) + stop_gateway + start_gateway + exit 0 + ;; + is-enabled) + exit 0 + ;; + is-active) + is_running && exit 0 + exit 3 + ;; + show) + if is_running; then + printf 'ActiveState=active\nSubState=running\nMainPID=%s\nExecMainStatus=0\nExecMainCode=0\n' "$(cat "$pid_file")" + else + printf 'ActiveState=inactive\nSubState=dead\nMainPID=0\nExecMainStatus=0\nExecMainCode=0\n' + fi + exit 0 + ;; + *) + echo "systemctl shim unsupported command: $*" >&2 + exit 1 + ;; +esac +SHIM + chmod +x "$shim_dir/systemctl" + export PATH="$shim_dir:$PATH" +} + +seed_update_restart_probe_device_auth() { + node --input-type=module <<'NODE' +import crypto from "node:crypto"; +import fs from "node:fs"; +import path from "node:path"; + +const stateDir = process.env.OPENCLAW_STATE_DIR; +if (!stateDir) { + throw new Error("missing OPENCLAW_STATE_DIR"); +} + +const base64UrlEncode = (buf) => + buf.toString("base64").replaceAll("+", "-").replaceAll("/", "_").replace(/=+$/g, ""); +const ed25519SpkiPrefix = Buffer.from("302a300506032b6570032100", "hex"); +const { publicKey, privateKey } = crypto.generateKeyPairSync("ed25519"); +const publicKeyPem = publicKey.export({ type: "spki", format: "pem" }); +const privateKeyPem = privateKey.export({ type: "pkcs8", format: "pem" }); +const spki = crypto.createPublicKey(publicKeyPem).export({ type: "spki", format: "der" }); +const rawPublicKey = + spki.length === ed25519SpkiPrefix.length + 32 && + spki.subarray(0, ed25519SpkiPrefix.length).equals(ed25519SpkiPrefix) + ? spki.subarray(ed25519SpkiPrefix.length) + : spki; +const publicKeyRaw = base64UrlEncode(rawPublicKey); +const deviceId = crypto.createHash("sha256").update(rawPublicKey).digest("hex"); +const token = base64UrlEncode(crypto.randomBytes(32)); +const now = Date.now(); +const scopes = ["operator.read"]; + +function writeJson(filePath, value) { + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + fs.writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, { mode: 0o600 }); + try { + fs.chmodSync(filePath, 0o600); + } catch { + } +} + +writeJson(path.join(stateDir, "identity", "device.json"), { + version: 1, + deviceId, + publicKeyPem, + privateKeyPem, + createdAtMs: now, +}); +writeJson(path.join(stateDir, "identity", "device-auth.json"), { + version: 1, + deviceId, + tokens: { + operator: { + token, + role: "operator", + scopes, + updatedAtMs: now, + }, + }, +}); +writeJson(path.join(stateDir, "devices", "paired.json"), { + [deviceId]: { + deviceId, + publicKey: publicKeyRaw, + displayName: "upgrade survivor restart probe", + platform: process.platform, + clientId: "openclaw-cli", + clientMode: "probe", + role: "operator", + roles: ["operator"], + scopes, + approvedScopes: scopes, + tokens: { + operator: { + token, + role: "operator", + scopes, + createdAtMs: now, + }, + }, + createdAtMs: now, + approvedAtMs: now, + }, +}); +writeJson(path.join(stateDir, "devices", "pending.json"), {}); +NODE +} + +write_update_restart_service_auth_env() { + mkdir -p "$OPENCLAW_STATE_DIR" + local dotenv_path="$OPENCLAW_STATE_DIR/.env" + local tmp_path="$dotenv_path.tmp.$$" + if [ -f "$dotenv_path" ]; then + grep -v '^GATEWAY_AUTH_TOKEN_REF=' "$dotenv_path" >"$tmp_path" || true + else + : >"$tmp_path" + fi + printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >>"$tmp_path" + mv "$tmp_path" "$dotenv_path" + printf 'GATEWAY_AUTH_TOKEN_REF=%s\n' "$GATEWAY_AUTH_TOKEN_REF" >"$OPENCLAW_STATE_DIR/gateway.systemd.env" +} + +prepare_update_restart_probe_current_install() { + local port="$1" + local log_file="$2" + local start_epoch + local ready_epoch + + echo "Preparing candidate-auth gateway for automatic update restart." + install_update_restart_systemctl_shim + seed_update_restart_probe_device_auth + start_epoch="$(node -e "process.stdout.write(String(Date.now()))")" + env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway --port "$port" --bind loopback --allow-unconfigured >"$log_file" 2>&1 & + gateway_pid="$!" + printf '%s\n' "$gateway_pid" >"$OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE" + openclaw_e2e_wait_gateway_ready "$gateway_pid" "$log_file" 360 + ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")" + start_seconds=$(((ready_epoch - start_epoch + 999) / 1000)) + write_update_restart_service_auth_env + if ! env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw gateway install --force --json >"$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_JSON" 2>"$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_ERR"; then + echo "gateway service install failed" >&2 + cat "$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_ERR" >&2 || true + cat "$OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_JSON" >&2 || true + return 1 + fi +} diff --git a/scripts/e2e/upgrade-survivor-docker.sh b/scripts/e2e/upgrade-survivor-docker.sh index 23de3a1a1c5..d418c6eaa7b 100755 --- a/scripts/e2e/upgrade-survivor-docker.sh +++ b/scripts/e2e/upgrade-survivor-docker.sh @@ -13,6 +13,7 @@ SKIP_BUILD="${OPENCLAW_UPGRADE_SURVIVOR_E2E_SKIP_BUILD:-0}" DOCKER_RUN_TIMEOUT="${OPENCLAW_UPGRADE_SURVIVOR_DOCKER_RUN_TIMEOUT:-900s}" BASELINE_SPEC="${OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPEC:-}" SCENARIO="${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base}" +UPDATE_RESTART_MODE="${OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE:-manual}" LANE_ARTIFACT_SUFFIX="${OPENCLAW_DOCKER_ALL_LANE_NAME:-default}" LANE_ARTIFACT_SUFFIX="${LANE_ARTIFACT_SUFFIX//[^A-Za-z0-9_.-]/_}" ARTIFACT_DIR="${OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_DIR:-$ROOT_DIR/.artifacts/upgrade-survivor/$LANE_ARTIFACT_SUFFIX}" @@ -86,6 +87,7 @@ if [ "${OPENCLAW_UPGRADE_SURVIVOR_PUBLISHED_BASELINE:-0}" = "1" ]; then -e OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_KIND="$CANDIDATE_KIND" \ -e OPENCLAW_UPGRADE_SURVIVOR_CANDIDATE_SPEC="$CANDIDATE_SPEC" \ -e OPENCLAW_UPGRADE_SURVIVOR_SCENARIO="$SCENARIO" \ + -e OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE="$UPDATE_RESTART_MODE" \ -e OPENCLAW_UPGRADE_SURVIVOR_LEGACY_RUNTIME_DEPS_SYMLINK="${OPENCLAW_UPGRADE_SURVIVOR_LEGACY_RUNTIME_DEPS_SYMLINK:-}" \ -e OPENCLAW_UPGRADE_SURVIVOR_SUMMARY_JSON=/tmp/openclaw-upgrade-survivor-artifacts/summary.json \ -e OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}" \ @@ -111,6 +113,7 @@ docker_e2e_run_with_harness \ -e OPENCLAW_TEST_STATE_SCRIPT_B64="$OPENCLAW_TEST_STATE_SCRIPT_B64" \ -e OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT=/tmp/openclaw-upgrade-survivor-artifacts \ -e OPENCLAW_UPGRADE_SURVIVOR_SCENARIO="$SCENARIO" \ + -e OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE="$UPDATE_RESTART_MODE" \ -e OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}" \ -e OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS="${OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS:-30}" \ -v "$ARTIFACT_DIR:/tmp/openclaw-upgrade-survivor-artifacts" \ @@ -145,6 +148,22 @@ export TELEGRAM_BOT_TOKEN="123456:upgrade-survivor-telegram-token" export FEISHU_APP_SECRET="upgrade-survivor-feishu-secret" export BRAVE_API_KEY="BSA_upgrade_survivor_brave_key" +UPDATE_RESTART_MODE="${OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE:-manual}" +PORT=18789 +START_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}" +STATUS_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS:-30}" +GATEWAY_LOG="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/gateway.log" +SYSTEMCTL_SHIM_LOG="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/systemctl-shim.log" +SYSTEMCTL_SHIM_PID_FILE="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/systemctl-shim.pid" +SYSTEMCTL_SHIM_DAEMON_LOG="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/systemctl-shim-gateway.log" +BASELINE_SERVICE_INSTALL_JSON="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/baseline-service-install.json" +BASELINE_SERVICE_INSTALL_ERR="$OPENCLAW_UPGRADE_SURVIVOR_ARTIFACT_ROOT/baseline-service-install.err" +export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_LOG="$SYSTEMCTL_SHIM_LOG" +export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_PID_FILE="$SYSTEMCTL_SHIM_PID_FILE" +export OPENCLAW_UPGRADE_SURVIVOR_SYSTEMCTL_SHIM_DAEMON_LOG="$SYSTEMCTL_SHIM_DAEMON_LOG" +export OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_JSON="$BASELINE_SERVICE_INSTALL_JSON" +export OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SERVICE_INSTALL_ERR="$BASELINE_SERVICE_INSTALL_ERR" + gateway_pid="" plugin_registry_pid="" cleanup() { @@ -152,6 +171,9 @@ cleanup() { kill "$plugin_registry_pid" >/dev/null 2>&1 || true fi openclaw_e2e_terminate_gateways "${gateway_pid:-}" + if [ -s "$SYSTEMCTL_SHIM_PID_FILE" ]; then + openclaw_e2e_terminate_gateways "$(cat "$SYSTEMCTL_SHIM_PID_FILE" 2>/dev/null || true)" + fi } trap cleanup EXIT @@ -255,10 +277,19 @@ export OPENCLAW_PACKAGE_ACCEPTANCE_LEGACY_COMPAT echo "Checking dirty-state config before update..." OPENCLAW_UPGRADE_SURVIVOR_ASSERT_STAGE=baseline node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-config OPENCLAW_UPGRADE_SURVIVOR_ASSERT_STAGE=baseline node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-state +if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then + # shellcheck disable=SC1091 + source scripts/e2e/lib/upgrade-survivor/update-restart-auth.sh + prepare_update_restart_probe_current_install "$PORT" "$GATEWAY_LOG" +fi echo "Running package update against the mounted tarball..." +update_args=(update --tag "${OPENCLAW_CURRENT_PACKAGE_TGZ:?missing OPENCLAW_CURRENT_PACKAGE_TGZ}" --yes --json) +if [ "$UPDATE_RESTART_MODE" != "auto-auth" ]; then + update_args+=(--no-restart) +fi set +e -openclaw update --tag "${OPENCLAW_CURRENT_PACKAGE_TGZ:?missing OPENCLAW_CURRENT_PACKAGE_TGZ}" --yes --json --no-restart >/tmp/openclaw-upgrade-survivor-update.json 2>/tmp/openclaw-upgrade-survivor-update.err +env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw "${update_args[@]}" >/tmp/openclaw-upgrade-survivor-update.json 2>/tmp/openclaw-upgrade-survivor-update.err update_status=$? set -e if [ "$update_status" -ne 0 ]; then @@ -268,38 +299,42 @@ if [ "$update_status" -ne 0 ]; then exit "$update_status" fi -echo "Running non-interactive doctor repair..." -configure_configured_plugin_install_fixture_registry -if ! openclaw doctor --fix --non-interactive >/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then - echo "openclaw doctor failed" >&2 - cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true - exit 1 -fi -if ! openclaw config validate >>/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then - echo "post-doctor config validation failed" >&2 - cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true - exit 1 +if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then + echo "Skipping doctor repair until after restart proof." +else + echo "Running non-interactive doctor repair..." + configure_configured_plugin_install_fixture_registry + if ! openclaw doctor --fix --non-interactive >/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then + echo "openclaw doctor failed" >&2 + cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true + exit 1 + fi + if ! openclaw config validate >>/tmp/openclaw-upgrade-survivor-doctor.log 2>&1; then + echo "post-doctor config validation failed" >&2 + cat /tmp/openclaw-upgrade-survivor-doctor.log >&2 || true + exit 1 + fi fi -echo "Verifying config and state survived update/doctor..." +echo "Verifying config and state survived update..." node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-config node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-state -PORT=18789 -START_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_START_BUDGET_SECONDS:-90}" -STATUS_BUDGET="${OPENCLAW_UPGRADE_SURVIVOR_STATUS_BUDGET_SECONDS:-30}" - -echo "Starting gateway from upgraded state..." -start_epoch="$(node -e "process.stdout.write(String(Date.now()))")" -openclaw gateway --port "$PORT" --bind loopback --allow-unconfigured >/tmp/openclaw-upgrade-survivor-gateway.log 2>&1 & -gateway_pid="$!" -openclaw_e2e_wait_gateway_ready "$gateway_pid" /tmp/openclaw-upgrade-survivor-gateway.log 360 -ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")" -start_seconds=$(((ready_epoch - start_epoch + 999) / 1000)) -if [ "$start_seconds" -gt "$START_BUDGET" ]; then - echo "gateway startup exceeded survivor budget: ${start_seconds}s > ${START_BUDGET}s" >&2 - cat /tmp/openclaw-upgrade-survivor-gateway.log >&2 || true - exit 1 +if [ "$UPDATE_RESTART_MODE" = "auto-auth" ]; then + echo "Gateway restart was handled by openclaw update." +else + echo "Starting gateway from upgraded state..." + start_epoch="$(node -e "process.stdout.write(String(Date.now()))")" + openclaw gateway --port "$PORT" --bind loopback --allow-unconfigured >"$GATEWAY_LOG" 2>&1 & + gateway_pid="$!" + openclaw_e2e_wait_gateway_ready "$gateway_pid" "$GATEWAY_LOG" 360 + ready_epoch="$(node -e "process.stdout.write(String(Date.now()))")" + start_seconds=$(((ready_epoch - start_epoch + 999) / 1000)) + if [ "$start_seconds" -gt "$START_BUDGET" ]; then + echo "gateway startup exceeded survivor budget: ${start_seconds}s > ${START_BUDGET}s" >&2 + cat "$GATEWAY_LOG" >&2 || true + exit 1 + fi fi echo "Checking gateway HTTP probes..." @@ -320,7 +355,8 @@ status_start="$(node -e "process.stdout.write(String(Date.now()))")" if ! openclaw gateway status --url "ws://127.0.0.1:$PORT" --token "$GATEWAY_AUTH_TOKEN_REF" --require-rpc --timeout 30000 --json >/tmp/openclaw-upgrade-survivor-status.json 2>/tmp/openclaw-upgrade-survivor-status.err; then echo "gateway status failed" >&2 cat /tmp/openclaw-upgrade-survivor-status.err >&2 || true - cat /tmp/openclaw-upgrade-survivor-gateway.log >&2 || true + cat "$GATEWAY_LOG" >&2 || true + cat "$SYSTEMCTL_SHIM_DAEMON_LOG" >&2 || true exit 1 fi status_end="$(node -e "process.stdout.write(String(Date.now()))")" @@ -332,5 +368,5 @@ if [ "$status_seconds" -gt "$STATUS_BUDGET" ]; then fi node scripts/e2e/lib/upgrade-survivor/assertions.mjs assert-status-json /tmp/openclaw-upgrade-survivor-status.json -echo "Upgrade survivor Docker E2E passed scenario=${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base} startup=${start_seconds}s status=${status_seconds}s." +echo "Upgrade survivor Docker E2E passed scenario=${OPENCLAW_UPGRADE_SURVIVOR_SCENARIO:-base} updateRestartMode=${UPDATE_RESTART_MODE} startup=${start_seconds}s status=${status_seconds}s." ' diff --git a/scripts/lib/docker-e2e-scenarios.mjs b/scripts/lib/docker-e2e-scenarios.mjs index 571fbcaa7e2..80ca8937448 100644 --- a/scripts/lib/docker-e2e-scenarios.mjs +++ b/scripts/lib/docker-e2e-scenarios.mjs @@ -9,6 +9,8 @@ const LIVE_PROFILE_TIMEOUT_MS = 20 * 60 * 1000; const OPENWEBUI_TIMEOUT_MS = 20 * 60 * 1000; export const BUNDLED_PLUGIN_INSTALL_UNINSTALL_SHARDS = 24; const upgradeSurvivorCommand = "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:upgrade-survivor"; +const updateRestartAuthCommand = + "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:update-restart-auth"; const LIVE_RETRY_PATTERNS = [ /529\b/i, @@ -238,6 +240,11 @@ export const mainLanes = [ weight: 3, }, ), + npmLane("update-restart-auth", updateRestartAuthCommand, { + stateScenario: "upgrade-survivor", + timeoutMs: 25 * 60 * 1000, + weight: 3, + }), npmLane("update-migration", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:update-migration", { stateScenario: "upgrade-survivor", timeoutMs: 30 * 60 * 1000, @@ -536,6 +543,11 @@ const releasePathPackageUpdateCoreLanes = [ weight: 3, }, ), + npmLane("update-restart-auth", updateRestartAuthCommand, { + stateScenario: "upgrade-survivor", + timeoutMs: 25 * 60 * 1000, + weight: 3, + }), ]; const primaryReleasePathChunks = { diff --git a/src/cli/daemon-cli/restart-health.test.ts b/src/cli/daemon-cli/restart-health.test.ts index d4d00dffff2..da8f5effd86 100644 --- a/src/cli/daemon-cli/restart-health.test.ts +++ b/src/cli/daemon-cli/restart-health.test.ts @@ -414,6 +414,10 @@ describe("inspectGatewayRestart", () => { server: { version: "2026.4.24", connId: "new" }, }); const service = makeGatewayService({ status: "running", pid: 8000 }); + const serviceEnv = { + ...process.env, + OPENCLAW_STATE_DIR: "/tmp/openclaw-restart-service-state", + } as NodeJS.ProcessEnv; inspectPortUsage.mockResolvedValue({ port: 18789, status: "busy", @@ -427,6 +431,7 @@ describe("inspectGatewayRestart", () => { port: 18789, expectedVersion: "2026.4.24", attempts: 1, + env: serviceEnv, }); expect(snapshot).toMatchObject({ @@ -443,6 +448,7 @@ describe("inspectGatewayRestart", () => { expect(probeGateway).toHaveBeenCalledWith( expect.objectContaining({ auth: { token: "probe-token", password: undefined }, + env: serviceEnv, }), ); }); diff --git a/src/cli/daemon-cli/restart-health.ts b/src/cli/daemon-cli/restart-health.ts index 25ce1720ef6..e35236d5092 100644 --- a/src/cli/daemon-cli/restart-health.ts +++ b/src/cli/daemon-cli/restart-health.ts @@ -237,6 +237,7 @@ async function confirmGatewayReachable(params: { port: number; includeHealthDetails?: boolean; auth?: GatewayRestartProbeAuth; + env?: NodeJS.ProcessEnv; }): Promise { const token = normalizeOptionalString(params.auth?.token ?? process.env.OPENCLAW_GATEWAY_TOKEN); const password = normalizeOptionalString( @@ -247,6 +248,7 @@ async function confirmGatewayReachable(params: { auth: token || password ? { token, password } : undefined, timeoutMs: 3_000, includeDetails: params.includeHealthDetails === true, + env: params.env, }); const reachedGateway = probe.ok || @@ -307,6 +309,7 @@ async function inspectGatewayPortHealth(params: { await confirmGatewayReachable({ port: params.port, auth: params.auth, + env: process.env, }) ).reachable; } catch { @@ -336,6 +339,7 @@ export async function inspectGatewayRestart(params: { port: params.port, includeHealthDetails: Boolean(expectedVersion), auth: params.probeAuth, + env, }); activatedPluginErrors = reachability.activatedPluginErrors; channelProbeErrors = reachability.channelProbeErrors; diff --git a/src/gateway/client.test.ts b/src/gateway/client.test.ts index 01ddbff5a10..cf1fd9d4b78 100644 --- a/src/gateway/client.test.ts +++ b/src/gateway/client.test.ts @@ -822,6 +822,39 @@ describe("GatewayClient connect auth payload", () => { client.stop(); }); + it("loads stored device auth from the provided env", () => { + loadDeviceAuthTokenMock.mockReturnValue({ + token: "stored-device-token", + scopes: ["operator.read"], + }); + const env = { + ...process.env, + OPENCLAW_STATE_DIR: "/tmp/openclaw-client-service-state", + } as NodeJS.ProcessEnv; + const client = new GatewayClient({ + url: "ws://127.0.0.1:18789", + env, + }); + + client.start(); + const ws = getLatestWs(); + ws.emitOpen(); + emitConnectChallenge(ws); + + expect(loadDeviceAuthTokenMock).toHaveBeenCalledWith( + expect.objectContaining({ + deviceId: expect.any(String), + role: "operator", + env, + }), + ); + expect(connectFrameFrom(ws)).toMatchObject({ + token: "stored-device-token", + deviceToken: "stored-device-token", + }); + client.stop(); + }); + it("uses bootstrap token when no shared or device token is available", () => { loadDeviceAuthTokenMock.mockReturnValue(undefined); const client = new GatewayClient({ diff --git a/src/gateway/client.ts b/src/gateway/client.ts index 9f959f8b3c4..daa7b6421ac 100644 --- a/src/gateway/client.ts +++ b/src/gateway/client.ts @@ -151,6 +151,7 @@ export type GatewayClientOptions = { commands?: string[]; permissions?: Record; pathEnv?: string; + env?: NodeJS.ProcessEnv; deviceIdentity?: DeviceIdentity | null; minProtocol?: number; maxProtocol?: number; @@ -369,7 +370,7 @@ export class GatewayClient { const deviceId = this.opts.deviceIdentity.deviceId; const role = this.opts.role ?? "operator"; try { - clearDeviceAuthToken({ deviceId, role }); + clearDeviceAuthToken({ deviceId, role, env: this.opts.env }); logDebug(`cleared stale device-auth token for device ${deviceId}`); } catch (err) { logDebug( @@ -592,6 +593,7 @@ export class GatewayClient { role: authInfo.role ?? role, token: authInfo.deviceToken, scopes: authInfo.scopes ?? [], + env: this.opts.env, }); } this.backoffMs = 1000; @@ -675,6 +677,7 @@ export class GatewayClient { const storedAuth = loadDeviceAuthToken({ deviceId: this.opts.deviceIdentity.deviceId, role, + env: this.opts.env, }); if (!storedAuth) { return null; diff --git a/src/gateway/probe.test.ts b/src/gateway/probe.test.ts index fbd57b6785c..a451358562e 100644 --- a/src/gateway/probe.test.ts +++ b/src/gateway/probe.test.ts @@ -31,6 +31,8 @@ const deviceIdentityState = vi.hoisted(() => ({ scopes: ["operator.read"], updatedAtMs: 1, } as Record | null, + identityPaths: [] as unknown[], + tokenParams: [] as unknown[], })); const eventLoopReadyState = vi.hoisted(() => ({ @@ -135,7 +137,8 @@ vi.mock("../infra/device-identity.js", () => ({ } return deviceIdentityState.value; }, - loadDeviceIdentityIfPresent: () => { + loadDeviceIdentityIfPresent: (filePath: unknown) => { + deviceIdentityState.identityPaths.push(filePath); if (deviceIdentityState.throwOnLoad) { throw new Error("read-only identity dir"); } @@ -144,7 +147,10 @@ vi.mock("../infra/device-identity.js", () => ({ })); vi.mock("../infra/device-auth-store.js", () => ({ - loadDeviceAuthToken: () => deviceIdentityState.cachedToken, + loadDeviceAuthToken: (params: unknown) => { + deviceIdentityState.tokenParams.push(params); + return deviceIdentityState.cachedToken; + }, })); vi.mock("./event-loop-ready.js", () => ({ @@ -165,6 +171,8 @@ describe("probeGateway", () => { scopes: ["operator.read"], updatedAtMs: 1, }; + deviceIdentityState.identityPaths = []; + deviceIdentityState.tokenParams = []; gatewayClientState.startMode = "hello"; gatewayClientState.options = null; gatewayClientState.requests = []; @@ -266,6 +274,32 @@ describe("probeGateway", () => { }); }); + it("loads probe identity and cached device auth from the provided env", async () => { + const env = { + ...process.env, + OPENCLAW_STATE_DIR: "/tmp/openclaw-probe-service-state", + } as NodeJS.ProcessEnv; + + await probeGateway({ + url: "ws://127.0.0.1:18789", + auth: { token: "secret" }, + timeoutMs: 1_000, + env, + }); + + expect(deviceIdentityState.identityPaths).toEqual([ + "/tmp/openclaw-probe-service-state/identity/device.json", + ]); + expect(deviceIdentityState.tokenParams).toEqual([ + { + deviceId: "test-device-identity", + role: "operator", + env, + }, + ]); + expect(gatewayClientState.options).toEqual(expect.objectContaining({ env })); + }); + it("keeps device identity enabled for remote probes", async () => { await probeGateway({ url: "wss://gateway.example/ws", diff --git a/src/gateway/probe.ts b/src/gateway/probe.ts index 6ca77a2ade3..c39234568c1 100644 --- a/src/gateway/probe.ts +++ b/src/gateway/probe.ts @@ -1,4 +1,6 @@ import { randomUUID } from "node:crypto"; +import path from "node:path"; +import { resolveStateDir } from "../config/paths.js"; import { loadDeviceAuthToken } from "../infra/device-auth-store.js"; import { formatErrorMessage } from "../infra/errors.js"; import type { SystemPresence } from "../infra/system-presence.js"; @@ -149,6 +151,7 @@ export async function probeGateway(opts: { includeDetails?: boolean; detailLevel?: "none" | "presence" | "full"; tlsFingerprint?: string; + env?: NodeJS.ProcessEnv; }): Promise { const startedAt = Date.now(); const instanceId = randomUUID(); @@ -168,7 +171,8 @@ export async function probeGateway(opts: { return null; } const { loadDeviceIdentityIfPresent } = await import("../infra/device-identity.js"); - const identity = loadDeviceIdentityIfPresent(); + const stateDir = resolveStateDir(opts.env); + const identity = loadDeviceIdentityIfPresent(path.join(stateDir, "identity", "device.json")); if (!identity) { return null; } @@ -178,6 +182,7 @@ export async function probeGateway(opts: { const cachedOperatorToken = loadDeviceAuthToken({ deviceId: identity.deviceId, role: "operator", + env: opts.env, }); return cachedOperatorToken ? identity : null; } catch { @@ -261,6 +266,7 @@ export async function probeGateway(opts: { password: opts.auth?.password, tlsFingerprint: opts.tlsFingerprint, preauthHandshakeTimeoutMs: opts.preauthHandshakeTimeoutMs, + env: opts.env, scopes: [READ_SCOPE], clientName: GATEWAY_CLIENT_NAMES.CLI, clientVersion: "dev", diff --git a/test/scripts/docker-e2e-plan.test.ts b/test/scripts/docker-e2e-plan.test.ts index 344ba23d73a..6b44bbc3802 100644 --- a/test/scripts/docker-e2e-plan.test.ts +++ b/test/scripts/docker-e2e-plan.test.ts @@ -156,6 +156,7 @@ describe("scripts/lib/docker-e2e-plan", () => { "update-channel-switch", "upgrade-survivor", "published-upgrade-survivor", + "update-restart-auth", ]); expect(packageUpdateCore.lanes).toEqual( expect.arrayContaining([ @@ -188,6 +189,11 @@ describe("scripts/lib/docker-e2e-plan", () => { name: "published-upgrade-survivor", stateScenario: "upgrade-survivor", }), + expect.objectContaining({ + name: "update-restart-auth", + command: "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:update-restart-auth", + stateScenario: "upgrade-survivor", + }), ]), ); expect(pluginsRuntimePlugins.lanes.map((lane) => lane.name)).toEqual(["plugins"]); diff --git a/test/scripts/package-acceptance-workflow.test.ts b/test/scripts/package-acceptance-workflow.test.ts index 1d4126f3864..95559c30241 100644 --- a/test/scripts/package-acceptance-workflow.test.ts +++ b/test/scripts/package-acceptance-workflow.test.ts @@ -105,6 +105,7 @@ describe("package acceptance workflow", () => { expect(workflow).toContain("npm-onboard-channel-agent doctor-switch"); expect(workflow).toContain("update-channel-switch upgrade-survivor"); expect(workflow).toContain("published-upgrade-survivor"); + expect(workflow).toContain("published-upgrade-survivor update-restart-auth"); expect(workflow).toContain("plugins-offline plugin-update"); expect(workflow).toContain("include_release_path_suites=true"); expect(workflow).not.toContain("telegram_mode requires source=npm"); @@ -252,7 +253,19 @@ describe("package artifact reuse", () => { expect(scheduler).toContain('["OPENCLAW_UPGRADE_SURVIVOR_BASELINE_SPECS",'); expect(scheduler).toContain('["OPENCLAW_UPGRADE_SURVIVOR_SCENARIOS",'); expect(packageJson).toContain("OPENCLAW_UPGRADE_SURVIVOR_PUBLISHED_BASELINE=1"); + expect(packageJson).toContain("test:docker:update-restart-auth"); + expect(packageJson).toContain("OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE=auto-auth"); expect(publishedUpgradeSurvivor).toContain("validate_baseline_package_spec"); + expect(publishedUpgradeSurvivor).toContain("OPENCLAW_UPGRADE_SURVIVOR_UPDATE_RESTART_MODE"); + expect(publishedUpgradeSurvivor).toContain('local shim_dir="$npm_config_prefix/bin"'); + expect(publishedUpgradeSurvivor).toContain("seed_update_restart_probe_device_auth"); + expect(publishedUpgradeSurvivor).toContain("upgrade survivor restart probe"); + expect(publishedUpgradeSurvivor).toContain("write_update_restart_service_secretref_env"); + expect(publishedUpgradeSurvivor).toContain("GATEWAY_AUTH_TOKEN_REF=%s"); + expect(publishedUpgradeSurvivor).toContain( + "env -u OPENCLAW_GATEWAY_TOKEN -u OPENCLAW_GATEWAY_PASSWORD openclaw", + ); + expect(publishedUpgradeSurvivor).toContain("phase prepare-update-restart-probe"); expect(publishedUpgradeSurvivor).toContain("openclaw@(alpha|beta|latest|"); expect(publishedUpgradeSurvivor).toContain("plugin_deps_cleanup_plugin_dirs"); expect(publishedUpgradeSurvivor).toContain('"$(package_root)/extensions/$plugin"'); @@ -534,7 +547,7 @@ describe("package artifact reuse", () => { ); expect(workflow).toContain("suite_profile: custom"); expect(workflow).toContain( - "docker_lanes: doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor plugins-offline plugin-update", + "docker_lanes: doctor-switch update-channel-switch upgrade-survivor published-upgrade-survivor update-restart-auth plugins-offline plugin-update", ); expect(workflow).toContain( "published_upgrade_survivor_baselines: ${{ needs.resolve_target.outputs.run_release_soak == 'true' && 'last-stable-4 2026.4.23 2026.5.2 2026.4.15' || '' }}",