ci: shard release validation hotspots

This commit is contained in:
Peter Steinberger
2026-04-29 20:40:32 +01:00
parent fdba408bce
commit dac7237494
7 changed files with 270 additions and 93 deletions

View File

@@ -24,6 +24,40 @@ OPENAI_API_KEY="${OPENAI_API_KEY:-}"
ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}"
ANTHROPIC_API_TOKEN="${ANTHROPIC_API_TOKEN:-}"
AGENT_TURN_TIMEOUT_SECONDS="${OPENCLAW_INSTALL_E2E_AGENT_TURN_TIMEOUT_SECONDS:-600}"
AGENT_TURNS_PARALLEL="${OPENCLAW_INSTALL_E2E_AGENT_TURNS_PARALLEL:-1}"
time_phase() {
local name="$1"
shift
local started_at
local finished_at
local status
started_at="$(date +%s)"
echo "==> Phase start: $name"
set +e
"$@"
status="$?"
set -e
finished_at="$(date +%s)"
if [[ "$status" -eq 0 ]]; then
echo "==> Phase passed: $name ($((finished_at - started_at))s)"
else
echo "==> Phase failed: $name ($((finished_at - started_at))s, status=$status)" >&2
fi
return "$status"
}
PHASE_MARK_STARTED_AT=0
phase_mark_start() {
PHASE_MARK_STARTED_AT="$(date +%s)"
echo "==> Phase start: $1"
}
phase_mark_passed() {
local name="$1"
echo "==> Phase passed: $name ($(($(date +%s) - PHASE_MARK_STARTED_AT))s)"
}
# This image runs as a non-root user, so seed a user-local npm prefix before we
# preinstall an older global version to exercise the upgrade path.
@@ -53,48 +87,58 @@ elif [[ "$MODELS_MODE" == "anthropic" && -z "$ANTHROPIC_API_TOKEN" && -z "$ANTHR
exit 2
fi
echo "==> Resolve npm versions"
EXPECTED_VERSION="$(quiet_npm view "openclaw@${INSTALL_TAG}" version)"
if [[ -z "$EXPECTED_VERSION" || "$EXPECTED_VERSION" == "undefined" || "$EXPECTED_VERSION" == "null" ]]; then
echo "ERROR: unable to resolve openclaw@${INSTALL_TAG} version" >&2
exit 2
fi
if [[ -n "$E2E_PREVIOUS_VERSION" ]]; then
PREVIOUS_VERSION="$E2E_PREVIOUS_VERSION"
else
PREVIOUS_VERSION="$(VERSIONS_JSON="$(quiet_npm view openclaw versions --json)" node - <<'NODE'
resolve_npm_versions() {
EXPECTED_VERSION="$(quiet_npm view "openclaw@${INSTALL_TAG}" version)"
if [[ -z "$EXPECTED_VERSION" || "$EXPECTED_VERSION" == "undefined" || "$EXPECTED_VERSION" == "null" ]]; then
echo "ERROR: unable to resolve openclaw@${INSTALL_TAG} version" >&2
return 2
fi
if [[ -n "$E2E_PREVIOUS_VERSION" ]]; then
PREVIOUS_VERSION="$E2E_PREVIOUS_VERSION"
else
PREVIOUS_VERSION="$(VERSIONS_JSON="$(quiet_npm view openclaw versions --json)" node - <<'NODE'
const versions = JSON.parse(process.env.VERSIONS_JSON || "[]");
if (!Array.isArray(versions) || versions.length === 0) process.exit(1);
process.stdout.write(versions.length >= 2 ? versions[versions.length - 2] : versions[0]);
NODE
)"
fi
echo "expected=$EXPECTED_VERSION previous=$PREVIOUS_VERSION"
)"
fi
echo "expected=$EXPECTED_VERSION previous=$PREVIOUS_VERSION"
}
if [[ "$SKIP_PREVIOUS" == "1" ]]; then
echo "==> Skip preinstall previous (OPENCLAW_INSTALL_E2E_SKIP_PREVIOUS=1)"
else
echo "==> Preinstall previous (forces installer upgrade path; avoids read() prompt)"
quiet_npm install -g "openclaw@${PREVIOUS_VERSION}"
fi
preinstall_previous_version() {
if [[ "$SKIP_PREVIOUS" == "1" ]]; then
echo "Skip preinstall previous (OPENCLAW_INSTALL_E2E_SKIP_PREVIOUS=1)"
else
echo "Preinstall previous (forces installer upgrade path; avoids read() prompt)"
quiet_npm install -g "openclaw@${PREVIOUS_VERSION}"
fi
}
echo "==> Run official installer one-liner"
if [[ "$INSTALL_TAG" == "beta" ]]; then
curl -fsSL "$INSTALL_URL" | OPENCLAW_BETA=1 bash
elif [[ "$INSTALL_TAG" != "latest" ]]; then
curl -fsSL "$INSTALL_URL" | OPENCLAW_VERSION="$INSTALL_TAG" bash
else
curl -fsSL "$INSTALL_URL" | bash
fi
run_official_installer() {
if [[ "$INSTALL_TAG" == "beta" ]]; then
curl -fsSL "$INSTALL_URL" | OPENCLAW_BETA=1 bash
elif [[ "$INSTALL_TAG" != "latest" ]]; then
curl -fsSL "$INSTALL_URL" | OPENCLAW_VERSION="$INSTALL_TAG" bash
else
curl -fsSL "$INSTALL_URL" | bash
fi
}
echo "==> Verify installed version"
INSTALLED_VERSION="$(openclaw --version 2>/dev/null | head -n 1 | tr -d '\r')"
INSTALLED_VERSION="$(extract_openclaw_semver "$INSTALLED_VERSION")"
echo "installed=$INSTALLED_VERSION expected=$EXPECTED_VERSION"
if [[ "$INSTALLED_VERSION" != "$EXPECTED_VERSION" ]]; then
echo "ERROR: expected openclaw@$EXPECTED_VERSION, got openclaw@$INSTALLED_VERSION" >&2
exit 1
fi
verify_installed_version() {
INSTALLED_VERSION="$(openclaw --version 2>/dev/null | head -n 1 | tr -d '\r')"
INSTALLED_VERSION="$(extract_openclaw_semver "$INSTALLED_VERSION")"
echo "installed=$INSTALLED_VERSION expected=$EXPECTED_VERSION"
if [[ "$INSTALLED_VERSION" != "$EXPECTED_VERSION" ]]; then
echo "ERROR: expected openclaw@$EXPECTED_VERSION, got openclaw@$INSTALLED_VERSION" >&2
return 1
fi
}
time_phase "Resolve npm versions" resolve_npm_versions
time_phase "Preinstall previous" preinstall_previous_version
time_phase "Run official installer one-liner" run_official_installer
time_phase "Verify installed version" verify_installed_version
set_image_model() {
local profile="$1"
@@ -249,6 +293,46 @@ fs.writeFileSync(path, `${JSON.stringify(parsed, null, 2)}\n`, "utf8");
NODE
}
RUN_AGENT_TURN_BG_PID=""
run_agent_turn_logged() {
local label="$1"
local profile="$2"
local session_id="$3"
local prompt="$4"
local out_json="$5"
local started_at
SESSION_JSONL="$(session_jsonl_path "$profile" "$session_id")"
started_at="$(date +%s)"
echo "==> Agent turn start: $label ($profile)"
run_agent_turn "$profile" "$session_id" "$prompt" "$out_json"
echo "==> Agent turn passed: $label ($profile, $(($(date +%s) - started_at))s)"
}
run_agent_turn_bg() {
local label="$1"
local profile="$2"
local session_id="$3"
local prompt="$4"
local out_json="$5"
(
set -euo pipefail
run_agent_turn_logged "$label" "$profile" "$session_id" "$prompt" "$out_json"
) &
RUN_AGENT_TURN_BG_PID="$!"
}
wait_agent_turn_batch() {
local failed=0
local pid
for pid in "$@"; do
if ! wait "$pid"; then
failed=1
fi
done
return "$failed"
}
dump_profile_debug() {
local profile="$1"
local turn_output="$2"
@@ -450,7 +534,7 @@ run_profile() {
local workspace="$3"
local agent_model_provider="$4" # "openai"|"anthropic"
echo "==> Onboard ($profile)"
phase_mark_start "Onboard ($profile)"
if [[ "$agent_model_provider" == "openai" ]]; then
openclaw --profile "$profile" onboard \
--non-interactive \
@@ -501,8 +585,9 @@ run_profile() {
--workspace "$workspace" \
--skip-health
fi
phase_mark_passed "Onboard ($profile)"
echo "==> Verify workspace identity files ($profile)"
phase_mark_start "Verify workspace identity files ($profile)"
test -f "$workspace/AGENTS.md"
test -f "$workspace/IDENTITY.md"
test -f "$workspace/USER.md"
@@ -512,8 +597,9 @@ run_profile() {
# first-run identity ritual. Drop BOOTSTRAP.md so provider prompts stay focused
# on the fixture task and do not spend turns following onboarding copy.
rm -f "$workspace/BOOTSTRAP.md"
phase_mark_passed "Verify workspace identity files ($profile)"
echo "==> Configure models ($profile)"
phase_mark_start "Configure models ($profile)"
local agent_model
local image_model
if [[ "$agent_model_provider" == "openai" ]]; then
@@ -534,8 +620,9 @@ run_profile() {
fi
echo "model=$agent_model"
echo "imageModel=$image_model"
phase_mark_passed "Configure models ($profile)"
echo "==> Prepare tool fixtures ($profile)"
phase_mark_start "Prepare tool fixtures ($profile)"
PROOF_TXT="$workspace/proof.txt"
PROOF_COPY="$workspace/copy.txt"
HOSTNAME_TXT="$workspace/hostname.txt"
@@ -548,8 +635,9 @@ run_profile() {
echo -n "$PROOF_VALUE" >"$PROOF_TXT"
write_png_lr_rg "$IMAGE_PNG"
EXPECTED_HOSTNAME="$(hostname | tr -d '\r\n')"
phase_mark_passed "Prepare tool fixtures ($profile)"
echo "==> Start gateway ($profile)"
phase_mark_start "Start gateway ($profile)"
GATEWAY_LOG="$workspace/gateway.log"
openclaw --profile "$profile" gateway --port "$port" --bind loopback >"$GATEWAY_LOG" 2>&1 &
GATEWAY_PID="$!"
@@ -560,6 +648,7 @@ run_profile() {
fi
}
trap cleanup_profile EXIT
phase_mark_passed "Start gateway ($profile)"
TURN1_JSON="/tmp/agent-${profile}-1.json"
TURN2_JSON="/tmp/agent-${profile}-2.json"
@@ -569,7 +658,7 @@ run_profile() {
TURN4_JSON="/tmp/agent-${profile}-4.json"
HEALTH_JSON="/tmp/health-${profile}.json"
echo "==> Wait for health ($profile)"
phase_mark_start "Wait for health ($profile)"
for _ in $(seq 1 240); do
if openclaw --profile "$profile" health --timeout 5000 --json >/dev/null 2>&1; then
break
@@ -581,14 +670,46 @@ run_profile() {
dump_profile_debug "$profile" "$HEALTH_JSON" >&2 || true
return 1
fi
phase_mark_passed "Wait for health ($profile)"
echo "==> Agent turns ($profile)"
phase_mark_start "Agent turns ($profile)"
TURN1_SESSION_ID="${SESSION_ID_PREFIX}-read-proof"
SESSION_JSONL="$(session_jsonl_path "$profile" "$TURN1_SESSION_ID")"
run_agent_turn "$profile" "$TURN1_SESSION_ID" \
"Use the read tool (not exec) to read ${PROOF_TXT}. Reply with the exact contents only (no extra whitespace)." \
"$TURN1_JSON"
local prompt1
prompt1="Use the read tool (not exec) to read ${PROOF_TXT}. Reply with the exact contents only (no extra whitespace)."
local prompt2
prompt2=$'Use the write tool (not exec) to write exactly this string into '"${PROOF_COPY}"$':\n'"${PROOF_VALUE}"$'\nReply with exactly: WROTE'
local prompt3
prompt3="Use the exec tool to run this command: hostname. Reply with the exact stdout only (trim trailing newline)."
local prompt3b
prompt3b=$'Use the write tool to write exactly this string into '"${HOSTNAME_TXT}"$':\n'"${EXPECTED_HOSTNAME}"$'\nReply with exactly: WROTE'
local prompt4
prompt4="Use the image tool on ${IMAGE_PNG}. Determine which color is on the left half and which is on the right half. Then use the write tool to write exactly: LEFT=RED RIGHT=GREEN into ${IMAGE_TXT}. Reply with exactly: LEFT=RED RIGHT=GREEN"
TURN2_SESSION_ID="${SESSION_ID_PREFIX}-write-copy"
TURN3_SESSION_ID="${SESSION_ID_PREFIX}-exec-hostname"
TURN3B_SESSION_ID="${SESSION_ID_PREFIX}-write-hostname"
TURN4_SESSION_ID="${SESSION_ID_PREFIX}-image-write"
if [[ "$AGENT_TURNS_PARALLEL" == "1" ]]; then
local turn_pids=()
run_agent_turn_bg "read proof" "$profile" "$TURN1_SESSION_ID" "$prompt1" "$TURN1_JSON"
turn_pids+=("$RUN_AGENT_TURN_BG_PID")
run_agent_turn_bg "write proof copy" "$profile" "$TURN2_SESSION_ID" "$prompt2" "$TURN2_JSON"
turn_pids+=("$RUN_AGENT_TURN_BG_PID")
run_agent_turn_bg "exec hostname" "$profile" "$TURN3_SESSION_ID" "$prompt3" "$TURN3_JSON"
turn_pids+=("$RUN_AGENT_TURN_BG_PID")
run_agent_turn_bg "write hostname" "$profile" "$TURN3B_SESSION_ID" "$prompt3b" "$TURN3B_JSON"
turn_pids+=("$RUN_AGENT_TURN_BG_PID")
run_agent_turn_bg "image write" "$profile" "$TURN4_SESSION_ID" "$prompt4" "$TURN4_JSON"
turn_pids+=("$RUN_AGENT_TURN_BG_PID")
wait_agent_turn_batch "${turn_pids[@]}"
else
run_agent_turn_logged "read proof" "$profile" "$TURN1_SESSION_ID" "$prompt1" "$TURN1_JSON"
run_agent_turn_logged "write proof copy" "$profile" "$TURN2_SESSION_ID" "$prompt2" "$TURN2_JSON"
run_agent_turn_logged "exec hostname" "$profile" "$TURN3_SESSION_ID" "$prompt3" "$TURN3_JSON"
run_agent_turn_logged "write hostname" "$profile" "$TURN3B_SESSION_ID" "$prompt3b" "$TURN3B_JSON"
run_agent_turn_logged "image write" "$profile" "$TURN4_SESSION_ID" "$prompt4" "$TURN4_JSON"
fi
assert_agent_json_has_text "$TURN1_JSON"
assert_agent_json_ok "$TURN1_JSON" "$agent_model_provider"
local reply1
@@ -598,11 +719,6 @@ run_profile() {
exit 1
fi
local prompt2
prompt2=$'Use the write tool (not exec) to write exactly this string into '"${PROOF_COPY}"$':\n'"${reply1}"$'\nReply with exactly: WROTE'
TURN2_SESSION_ID="${SESSION_ID_PREFIX}-write-copy"
SESSION_JSONL="$(session_jsonl_path "$profile" "$TURN2_SESSION_ID")"
run_agent_turn "$profile" "$TURN2_SESSION_ID" "$prompt2" "$TURN2_JSON"
assert_agent_json_has_text "$TURN2_JSON"
assert_agent_json_ok "$TURN2_JSON" "$agent_model_provider"
local copy_value
@@ -612,8 +728,7 @@ run_profile() {
exit 1
fi
TURN2B_SESSION_ID="${SESSION_ID_PREFIX}-read-copy"
SESSION_JSONL="$(session_jsonl_path "$profile" "$TURN2B_SESSION_ID")"
run_agent_turn "$profile" "$TURN2B_SESSION_ID" \
run_agent_turn_logged "read proof copy" "$profile" "$TURN2B_SESSION_ID" \
"Use the read tool (not exec) to read ${PROOF_COPY}. Reply with the exact contents only (no extra whitespace)." \
"$TURN2B_JSON"
assert_agent_json_has_text "$TURN2B_JSON"
@@ -625,11 +740,6 @@ run_profile() {
exit 1
fi
TURN3_SESSION_ID="${SESSION_ID_PREFIX}-exec-hostname"
SESSION_JSONL="$(session_jsonl_path "$profile" "$TURN3_SESSION_ID")"
run_agent_turn "$profile" "$TURN3_SESSION_ID" \
"Use the exec tool to run this command: hostname. Reply with the exact stdout only (trim trailing newline)." \
"$TURN3_JSON"
assert_agent_json_has_text "$TURN3_JSON"
assert_agent_json_ok "$TURN3_JSON" "$agent_model_provider"
local reply3
@@ -638,11 +748,6 @@ run_profile() {
echo "ERROR: agent did not run hostname correctly ($profile): $reply3" >&2
exit 1
fi
local prompt3b
prompt3b=$'Use the write tool to write exactly this string into '"${HOSTNAME_TXT}"$':\n'"${reply3}"$'\nReply with exactly: WROTE'
TURN3B_SESSION_ID="${SESSION_ID_PREFIX}-write-hostname"
SESSION_JSONL="$(session_jsonl_path "$profile" "$TURN3B_SESSION_ID")"
run_agent_turn "$profile" "$TURN3B_SESSION_ID" "$prompt3b" "$TURN3B_JSON"
assert_agent_json_has_text "$TURN3B_JSON"
assert_agent_json_ok "$TURN3B_JSON" "$agent_model_provider"
if [[ "$(cat "$HOSTNAME_TXT" 2>/dev/null | tr -d '\r\n' || true)" != "$EXPECTED_HOSTNAME" ]]; then
@@ -650,11 +755,6 @@ run_profile() {
exit 1
fi
TURN4_SESSION_ID="${SESSION_ID_PREFIX}-image-write"
SESSION_JSONL="$(session_jsonl_path "$profile" "$TURN4_SESSION_ID")"
run_agent_turn "$profile" "$TURN4_SESSION_ID" \
"Use the image tool on ${IMAGE_PNG}. Determine which color is on the left half and which is on the right half. Then use the write tool to write exactly: LEFT=RED RIGHT=GREEN into ${IMAGE_TXT}. Reply with exactly: LEFT=RED RIGHT=GREEN" \
"$TURN4_JSON"
assert_agent_json_has_text "$TURN4_JSON"
assert_agent_json_ok "$TURN4_JSON" "$agent_model_provider"
if [[ "$(cat "$IMAGE_TXT" 2>/dev/null | tr -d '\r\n' || true)" != "LEFT=RED RIGHT=GREEN" ]]; then
@@ -667,8 +767,9 @@ run_profile() {
echo "ERROR: agent reply did not contain expected marker ($profile): $reply4" >&2
exit 1
fi
phase_mark_passed "Agent turns ($profile)"
echo "==> Verify tool usage via session transcript ($profile)"
phase_mark_start "Verify tool usage via session transcript ($profile)"
# Give the gateway a moment to flush transcripts.
sleep 1
assert_session_used_tools "$(session_jsonl_path "$profile" "$TURN1_SESSION_ID")" read
@@ -677,6 +778,7 @@ run_profile() {
assert_session_used_tools "$(session_jsonl_path "$profile" "$TURN3_SESSION_ID")" exec
assert_session_used_tools "$(session_jsonl_path "$profile" "$TURN3B_SESSION_ID")" write
assert_session_used_tools "$(session_jsonl_path "$profile" "$TURN4_SESSION_ID")" image write
phase_mark_passed "Verify tool usage via session transcript ($profile)"
cleanup_profile
trap - EXIT