From f3d531439bc2d4bb9a48e0db1a6fa04ba177b798 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 5 May 2026 09:01:43 +0100 Subject: [PATCH] feat: add reusable Mantis evidence publishing --- .../mantis-discord-status-reactions.yml | 188 ++----- .github/workflows/mantis-scenario.yml | 83 +++ .../workflows/mantis-slack-desktop-smoke.yml | 328 ++++++++++++ docs/concepts/mantis.md | 66 +++ extensions/qa-lab/src/mantis/run.runtime.ts | 117 +++++ scripts/mantis/publish-pr-evidence.mjs | 475 ++++++++++++++++++ scripts/test-projects.test-support.mjs | 5 + .../mantis-publish-pr-evidence.test.ts | 117 +++++ 8 files changed, 1233 insertions(+), 146 deletions(-) create mode 100644 .github/workflows/mantis-scenario.yml create mode 100644 .github/workflows/mantis-slack-desktop-smoke.yml create mode 100644 scripts/mantis/publish-pr-evidence.mjs create mode 100644 test/scripts/mantis-publish-pr-evidence.test.ts diff --git a/.github/workflows/mantis-discord-status-reactions.yml b/.github/workflows/mantis-discord-status-reactions.yml index 4602348570f..17485bd6488 100644 --- a/.github/workflows/mantis-discord-status-reactions.yml +++ b/.github/workflows/mantis-discord-status-reactions.yml @@ -474,6 +474,40 @@ jobs: echo "- Candidate desktop video: \`candidate/discord-status-reactions-tool-only-desktop.mp4\`" } > "$root/mantis-report.md" + jq -n \ + --arg baseline_status "$baseline_status" \ + --arg candidate_status "$candidate_status" \ + --arg baseline_sha "${{ needs.validate_refs.outputs.baseline_revision }}" \ + --arg candidate_sha "${{ needs.validate_refs.outputs.candidate_revision }}" \ + '{ + schemaVersion: 1, + id: "discord-status-reactions", + title: "Mantis Discord Status Reactions QA", + summary: "Mantis reran Discord status reactions against the known queued-only baseline and the candidate ref. The baseline reproduced the bug, while the candidate showed the expected queued -> thinking -> done reaction sequence.", + scenario: "discord-status-reactions-tool-only", + comparison: { + baseline: { sha: $baseline_sha, expected: "queued-only", status: $baseline_status, reproduced: ($baseline_status == "fail") }, + candidate: { sha: $candidate_sha, expected: "queued -> thinking -> done", status: $candidate_status, fixed: ($candidate_status == "pass") }, + pass: (($baseline_status == "fail") and ($candidate_status == "pass")) + }, + artifacts: [ + { kind: "timeline", lane: "baseline", label: "Baseline queued-only", path: "baseline/discord-status-reactions-tool-only-timeline.png", targetPath: "baseline.png", alt: "Baseline Discord status reaction timeline", width: 420 }, + { kind: "timeline", lane: "candidate", label: "Candidate queued -> thinking -> done", path: "candidate/discord-status-reactions-tool-only-timeline.png", targetPath: "candidate.png", alt: "Candidate Discord status reaction timeline", width: 420 }, + { kind: "desktopScreenshot", lane: "baseline", label: "Baseline desktop/VNC browser", path: "baseline/discord-status-reactions-tool-only-desktop.png", targetPath: "baseline-desktop.png", alt: "Baseline Mantis desktop browser screenshot", width: 420 }, + { kind: "desktopScreenshot", lane: "candidate", label: "Candidate desktop/VNC browser", path: "candidate/discord-status-reactions-tool-only-desktop.png", targetPath: "candidate-desktop.png", alt: "Candidate Mantis desktop browser screenshot", width: 420 }, + { kind: "motionPreview", lane: "baseline", label: "Baseline motion preview", path: "baseline/discord-status-reactions-tool-only-desktop-preview.gif", targetPath: "baseline-desktop-preview.gif", alt: "Animated baseline desktop preview", width: 420, required: false }, + { kind: "motionPreview", lane: "candidate", label: "Candidate motion preview", path: "candidate/discord-status-reactions-tool-only-desktop-preview.gif", targetPath: "candidate-desktop-preview.gif", alt: "Animated candidate desktop preview", width: 420, required: false }, + { kind: "motionClip", lane: "baseline", label: "Baseline change MP4", path: "baseline/discord-status-reactions-tool-only-desktop-change.mp4", targetPath: "baseline-desktop-change.mp4", required: false }, + { kind: "motionClip", lane: "candidate", label: "Candidate change MP4", path: "candidate/discord-status-reactions-tool-only-desktop-change.mp4", targetPath: "candidate-desktop-change.mp4", required: false }, + { kind: "fullVideo", lane: "baseline", label: "Baseline desktop MP4", path: "baseline/discord-status-reactions-tool-only-desktop.mp4", targetPath: "baseline-desktop.mp4" }, + { kind: "fullVideo", lane: "candidate", label: "Candidate desktop MP4", path: "candidate/discord-status-reactions-tool-only-desktop.mp4", targetPath: "candidate-desktop.mp4" }, + { kind: "metadata", lane: "baseline", label: "Baseline preview metadata", path: "baseline/discord-status-reactions-tool-only-desktop-preview.json", targetPath: "baseline-desktop-preview.json", required: false }, + { kind: "metadata", lane: "candidate", label: "Candidate preview metadata", path: "candidate/discord-status-reactions-tool-only-desktop-preview.json", targetPath: "candidate-desktop-preview.json", required: false }, + { kind: "metadata", lane: "run", label: "Comparison JSON", path: "comparison.json", targetPath: "comparison.json" }, + { kind: "report", lane: "run", label: "Mantis report", path: "mantis-report.md", targetPath: "mantis-report.md" } + ] + }' > "$root/mantis-evidence.json" + cat "$root/mantis-report.md" >> "$GITHUB_STEP_SUMMARY" if [[ "$baseline_status" != "fail" ]]; then @@ -514,155 +548,17 @@ jobs: GH_TOKEN: ${{ steps.mantis_app_token.outputs.token }} TARGET_PR: ${{ needs.resolve_request.outputs.pr_number }} ARTIFACT_URL: ${{ steps.upload_artifact.outputs.artifact-url }} - BASELINE_SHA: ${{ needs.validate_refs.outputs.baseline_revision }} - CANDIDATE_SHA: ${{ needs.validate_refs.outputs.candidate_revision }} REQUEST_SOURCE: ${{ needs.resolve_request.outputs.request_source }} shell: bash run: | set -euo pipefail - if [[ ! "$TARGET_PR" =~ ^[0-9]+$ ]]; then - echo "pr_number must be numeric, got '${TARGET_PR}'." >&2 - exit 1 - fi - root=".artifacts/qa-e2e/mantis/discord-status-reactions" - for required in \ - "$root/comparison.json" \ - "$root/baseline/discord-status-reactions-tool-only-timeline.png" \ - "$root/candidate/discord-status-reactions-tool-only-timeline.png" \ - "$root/baseline/discord-status-reactions-tool-only-desktop.png" \ - "$root/candidate/discord-status-reactions-tool-only-desktop.png" \ - "$root/baseline/discord-status-reactions-tool-only-desktop.mp4" \ - "$root/candidate/discord-status-reactions-tool-only-desktop.mp4" - do - if [[ ! -f "$required" ]]; then - echo "Missing required QA evidence file: $required" >&2 - exit 1 - fi - done - - gh api "repos/${GITHUB_REPOSITORY}/pulls/${TARGET_PR}" --jq '.number' >/dev/null - - artifact_root="mantis/discord-status-reactions/pr-${TARGET_PR}/run-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" - artifacts_worktree="$(mktemp -d)" - git init --quiet "$artifacts_worktree" - git -C "$artifacts_worktree" config user.name "github-actions[bot]" - git -C "$artifacts_worktree" config user.email "41898282+github-actions[bot]@users.noreply.github.com" - git -C "$artifacts_worktree" remote add origin "https://x-access-token:${GH_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" - - if git -C "$artifacts_worktree" fetch --quiet origin qa-artifacts; then - git -C "$artifacts_worktree" checkout --quiet -B qa-artifacts FETCH_HEAD - else - git -C "$artifacts_worktree" checkout --quiet --orphan qa-artifacts - fi - - mkdir -p "$artifacts_worktree/$artifact_root" - cp "$root/baseline/discord-status-reactions-tool-only-timeline.png" "$artifacts_worktree/$artifact_root/baseline.png" - cp "$root/candidate/discord-status-reactions-tool-only-timeline.png" "$artifacts_worktree/$artifact_root/candidate.png" - cp "$root/baseline/discord-status-reactions-tool-only-desktop.png" "$artifacts_worktree/$artifact_root/baseline-desktop.png" - cp "$root/candidate/discord-status-reactions-tool-only-desktop.png" "$artifacts_worktree/$artifact_root/candidate-desktop.png" - has_desktop_previews="false" - if [[ -f "$root/baseline/discord-status-reactions-tool-only-desktop-preview.gif" && -f "$root/candidate/discord-status-reactions-tool-only-desktop-preview.gif" ]]; then - cp "$root/baseline/discord-status-reactions-tool-only-desktop-preview.gif" "$artifacts_worktree/$artifact_root/baseline-desktop-preview.gif" - cp "$root/candidate/discord-status-reactions-tool-only-desktop-preview.gif" "$artifacts_worktree/$artifact_root/candidate-desktop-preview.gif" - cp "$root/baseline/discord-status-reactions-tool-only-desktop-preview.json" "$artifacts_worktree/$artifact_root/baseline-desktop-preview.json" - cp "$root/candidate/discord-status-reactions-tool-only-desktop-preview.json" "$artifacts_worktree/$artifact_root/candidate-desktop-preview.json" - has_desktop_previews="true" - fi - has_change_clips="false" - if [[ -f "$root/baseline/discord-status-reactions-tool-only-desktop-change.mp4" && -f "$root/candidate/discord-status-reactions-tool-only-desktop-change.mp4" ]]; then - cp "$root/baseline/discord-status-reactions-tool-only-desktop-change.mp4" "$artifacts_worktree/$artifact_root/baseline-desktop-change.mp4" - cp "$root/candidate/discord-status-reactions-tool-only-desktop-change.mp4" "$artifacts_worktree/$artifact_root/candidate-desktop-change.mp4" - has_change_clips="true" - fi - cp "$root/baseline/discord-status-reactions-tool-only-desktop.mp4" "$artifacts_worktree/$artifact_root/baseline-desktop.mp4" - cp "$root/candidate/discord-status-reactions-tool-only-desktop.mp4" "$artifacts_worktree/$artifact_root/candidate-desktop.mp4" - cp "$root/comparison.json" "$artifacts_worktree/$artifact_root/comparison.json" - cp "$root/mantis-report.md" "$artifacts_worktree/$artifact_root/mantis-report.md" - - git -C "$artifacts_worktree" add "$artifact_root" - if git -C "$artifacts_worktree" diff --cached --quiet; then - echo "No QA screenshot/video artifact changes to publish." - else - git -C "$artifacts_worktree" commit --quiet -m "qa: publish Mantis Discord evidence for PR ${TARGET_PR}" - git -C "$artifacts_worktree" push --quiet origin HEAD:qa-artifacts - fi - - encoded_artifact_root="${artifact_root// /%20}" - raw_base="https://raw.githubusercontent.com/${GITHUB_REPOSITORY}/qa-artifacts/${encoded_artifact_root}" - baseline_status="$(jq -r '.baseline.status' "$root/comparison.json")" - candidate_status="$(jq -r '.candidate.status' "$root/comparison.json")" - pass="$(jq -r '.pass' "$root/comparison.json")" - preview_section="" - if [[ "$has_desktop_previews" == "true" ]]; then - preview_section="$(cat < | Animated candidate desktop preview | - EOF - )" - fi - change_clip_section="" - if [[ "$has_change_clips" == "true" ]]; then - change_clip_section="$(cat < "$comment_file" < - ## Mantis Discord Status Reactions QA - - Summary: Mantis reran Discord status reactions against the known queued-only baseline and the candidate ref. The baseline reproduced the bug, while the candidate showed the expected queued -> thinking -> done reaction sequence. - - - Scenario: \`discord-status-reactions-tool-only\` - - Trigger: \`${REQUEST_SOURCE}\` - - Run: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID} - - Artifact: ${ARTIFACT_URL} - - Baseline: \`${baseline_status}\` at \`${BASELINE_SHA}\` - - Candidate: \`${candidate_status}\` at \`${CANDIDATE_SHA}\` - - Overall: \`${pass}\` - - | Baseline queued-only | Candidate queued -> thinking -> done | - | --- | --- | - | Baseline Discord status reaction timeline | Candidate Discord status reaction timeline | - - | Baseline desktop/VNC browser | Candidate desktop/VNC browser | - | --- | --- | - | Baseline Mantis desktop browser screenshot | Candidate Mantis desktop browser screenshot | - ${preview_section} - ${change_clip_section} - - Full videos: - - [Baseline desktop MP4](${raw_base}/baseline-desktop.mp4) - - [Candidate desktop MP4](${raw_base}/candidate-desktop.mp4) - - Raw QA files: https://github.com/${GITHUB_REPOSITORY}/tree/qa-artifacts/${artifact_root} - EOF - - comment_id="$( - gh api --paginate "repos/${GITHUB_REPOSITORY}/issues/${TARGET_PR}/comments" \ - --jq '.[] | select(.body | contains("")) | .id' \ - | tail -n 1 - )" - - if [[ -n "$comment_id" ]]; then - comment_payload="$(mktemp)" - jq -n --rawfile body "$comment_file" '{ body: $body }' > "$comment_payload" - if gh api --method PATCH "repos/${GITHUB_REPOSITORY}/issues/comments/${comment_id}" --input "$comment_payload" >/dev/null; then - echo "Updated Mantis QA evidence comment on PR #${TARGET_PR}." - else - echo "::warning::Could not update existing Mantis QA evidence comment ${comment_id}; creating a new one." - gh pr comment "$TARGET_PR" --body-file "$comment_file" - echo "Created Mantis QA evidence comment on PR #${TARGET_PR}." - fi - else - gh pr comment "$TARGET_PR" --body-file "$comment_file" - echo "Created Mantis QA evidence comment on PR #${TARGET_PR}." - fi + node scripts/mantis/publish-pr-evidence.mjs \ + --manifest "$root/mantis-evidence.json" \ + --target-pr "$TARGET_PR" \ + --artifact-root "mantis/discord-status-reactions/pr-${TARGET_PR}/run-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" \ + --marker "" \ + --artifact-url "$ARTIFACT_URL" \ + --run-url "https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" \ + --request-source "$REQUEST_SOURCE" diff --git a/.github/workflows/mantis-scenario.yml b/.github/workflows/mantis-scenario.yml new file mode 100644 index 00000000000..624914c7a80 --- /dev/null +++ b/.github/workflows/mantis-scenario.yml @@ -0,0 +1,83 @@ +name: Mantis Scenario + +on: + workflow_dispatch: + inputs: + scenario_id: + description: Mantis scenario id to run + required: true + default: discord-status-reactions-tool-only + type: choice + options: + - discord-status-reactions-tool-only + - slack-desktop-smoke + baseline_ref: + description: Optional baseline ref for before/after scenarios + required: false + default: 0bf06e953fdda290799fc9fb9244a8f67fdae593 + type: string + candidate_ref: + description: Candidate ref, tag, or SHA + required: true + default: main + type: string + pr_number: + description: Optional PR number to receive QA evidence + required: false + type: string + +permissions: + actions: write + contents: read + +concurrency: + group: mantis-scenario-${{ inputs.scenario_id }}-${{ inputs.pr_number || inputs.candidate_ref || github.run_id }} + cancel-in-progress: false + +jobs: + dispatch: + name: Dispatch selected Mantis workflow + runs-on: blacksmith-8vcpu-ubuntu-2404 + steps: + - name: Dispatch scenario + env: + GH_TOKEN: ${{ github.token }} + BASELINE_REF: ${{ inputs.baseline_ref }} + CANDIDATE_REF: ${{ inputs.candidate_ref }} + PR_NUMBER: ${{ inputs.pr_number }} + SCENARIO_ID: ${{ inputs.scenario_id }} + shell: bash + run: | + set -euo pipefail + + case "$SCENARIO_ID" in + discord-status-reactions-tool-only) + args=( + workflow run mantis-discord-status-reactions.yml + --repo "$GITHUB_REPOSITORY" + --ref main + -f "baseline_ref=${BASELINE_REF}" + -f "candidate_ref=${CANDIDATE_REF}" + ) + if [[ -n "${PR_NUMBER:-}" ]]; then + args+=(-f "pr_number=${PR_NUMBER}") + fi + gh "${args[@]}" + ;; + slack-desktop-smoke) + args=( + workflow run mantis-slack-desktop-smoke.yml + --repo "$GITHUB_REPOSITORY" + --ref main + -f "candidate_ref=${CANDIDATE_REF}" + ) + if [[ -n "${PR_NUMBER:-}" ]]; then + args+=(-f "pr_number=${PR_NUMBER}") + fi + gh "${args[@]}" + ;; + *) + echo "Unsupported Mantis scenario: ${SCENARIO_ID}" >&2 + exit 1 + ;; + esac diff --git a/.github/workflows/mantis-slack-desktop-smoke.yml b/.github/workflows/mantis-slack-desktop-smoke.yml new file mode 100644 index 00000000000..76dac4038c0 --- /dev/null +++ b/.github/workflows/mantis-slack-desktop-smoke.yml @@ -0,0 +1,328 @@ +name: Mantis Slack Desktop Smoke + +on: + workflow_dispatch: + inputs: + candidate_ref: + description: Ref, tag, or SHA to run inside the VNC desktop + required: true + default: main + type: string + pr_number: + description: Optional PR number to receive the QA evidence comment + required: false + type: string + scenario_id: + description: Slack QA scenario id + required: true + default: slack-canary + type: string + keep_vm: + description: Keep the desktop lease open after a passing run + required: false + default: false + type: boolean + +permissions: + contents: write + issues: write + pull-requests: write + +concurrency: + group: mantis-slack-desktop-smoke-${{ inputs.pr_number || inputs.candidate_ref || github.run_id }}-${{ github.run_attempt }} + cancel-in-progress: false + +env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" + NODE_VERSION: "24.x" + PNPM_VERSION: "10.33.0" + OPENCLAW_BUILD_PRIVATE_QA: "1" + OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1" + +jobs: + authorize_actor: + name: Authorize workflow actor + runs-on: blacksmith-8vcpu-ubuntu-2404 + steps: + - name: Require maintainer-level repository access + uses: actions/github-script@v8 + with: + script: | + const allowed = new Set(["admin", "maintain", "write"]); + const { owner, repo } = context.repo; + const { data } = await github.rest.repos.getCollaboratorPermissionLevel({ + owner, + repo, + username: context.actor, + }); + const permission = data.permission; + core.info(`Actor ${context.actor} permission: ${permission}`); + if (!allowed.has(permission)) { + core.setFailed( + `Workflow requires write/maintain/admin access. Actor "${context.actor}" has "${permission}".`, + ); + } + + validate_ref: + name: Validate candidate ref + needs: authorize_actor + runs-on: blacksmith-8vcpu-ubuntu-2404 + outputs: + candidate_revision: ${{ steps.validate.outputs.candidate_revision }} + steps: + - name: Checkout harness ref + uses: actions/checkout@v6 + with: + persist-credentials: false + fetch-depth: 0 + + - name: Validate ref is trusted + id: validate + env: + GH_TOKEN: ${{ github.token }} + CANDIDATE_REF: ${{ inputs.candidate_ref }} + shell: bash + run: | + set -euo pipefail + + git fetch --no-tags origin +refs/heads/main:refs/remotes/origin/main + + revision="$(git rev-parse "${CANDIDATE_REF}^{commit}")" + reason="" + if git merge-base --is-ancestor "$revision" refs/remotes/origin/main; then + reason="main-ancestor" + elif git tag --points-at "$revision" | grep -Eq '^v'; then + reason="release-tag" + else + pr_head_count="$( + gh api \ + -H "Accept: application/vnd.github+json" \ + "repos/${GITHUB_REPOSITORY}/commits/${revision}/pulls" \ + --jq '[.[] | select(.state == "open" and .head.repo.full_name == "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'")] | length' + )" + if [[ "$pr_head_count" != "0" ]]; then + reason="open-pr-head" + fi + fi + + if [[ -z "$reason" ]]; then + echo "Candidate ref '${CANDIDATE_REF}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run." >&2 + exit 1 + fi + + echo "candidate_revision=${revision}" >> "$GITHUB_OUTPUT" + { + echo "candidate: \`${CANDIDATE_REF}\`" + echo "candidate SHA: \`${revision}\`" + echo "candidate trust reason: \`${reason}\`" + } >> "$GITHUB_STEP_SUMMARY" + + run_slack_desktop: + name: Run Slack desktop smoke + needs: validate_ref + runs-on: blacksmith-8vcpu-ubuntu-2404 + timeout-minutes: 180 + environment: qa-live-shared + steps: + - name: Checkout harness ref + uses: actions/checkout@v6 + with: + persist-credentials: false + fetch-depth: 0 + + - name: Setup Node environment + uses: ./.github/actions/setup-node-env + with: + node-version: ${{ env.NODE_VERSION }} + pnpm-version: ${{ env.PNPM_VERSION }} + install-bun: "true" + + - name: Build Mantis harness + run: pnpm build + + - name: Setup Go for Crabbox CLI + uses: actions/setup-go@v6 + with: + go-version: "1.26.x" + cache: false + + - name: Install Crabbox CLI + shell: bash + run: | + set -euo pipefail + install_dir="${RUNNER_TEMP}/crabbox" + mkdir -p "$install_dir" "$HOME/.local/bin" + git clone --depth 1 https://github.com/openclaw/crabbox.git "$install_dir/src" + go build -C "$install_dir/src" -o "$HOME/.local/bin/crabbox" ./cmd/crabbox + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + "$HOME/.local/bin/crabbox" --version + "$HOME/.local/bin/crabbox" warmup --help 2>&1 | grep -q -- "-desktop" + "$HOME/.local/bin/crabbox" media preview --help >/dev/null + + - name: Prepare candidate worktree + env: + CANDIDATE_SHA: ${{ needs.validate_ref.outputs.candidate_revision }} + shell: bash + run: | + set -euo pipefail + worktree_root=".artifacts/qa-e2e/mantis/slack-desktop-smoke-worktrees" + mkdir -p "$worktree_root" + git worktree add --detach "$worktree_root/candidate" "$CANDIDATE_SHA" + pnpm --dir "$worktree_root/candidate" install --frozen-lockfile + pnpm --dir "$worktree_root/candidate" build + + - name: Run Slack desktop scenario + id: run_mantis + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENCLAW_LIVE_OPENAI_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENCLAW_QA_CONVEX_SITE_URL: ${{ secrets.OPENCLAW_QA_CONVEX_SITE_URL }} + OPENCLAW_QA_CONVEX_SECRET_CI: ${{ secrets.OPENCLAW_QA_CONVEX_SECRET_CI }} + OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1" + CRABBOX_COORDINATOR: ${{ secrets.CRABBOX_COORDINATOR }} + CRABBOX_COORDINATOR_TOKEN: ${{ secrets.CRABBOX_COORDINATOR_TOKEN }} + OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR: ${{ secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR }} + OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN: ${{ secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN }} + CRABBOX_ACCESS_CLIENT_ID: ${{ secrets.CRABBOX_ACCESS_CLIENT_ID }} + CRABBOX_ACCESS_CLIENT_SECRET: ${{ secrets.CRABBOX_ACCESS_CLIENT_SECRET }} + KEEP_VM: ${{ inputs.keep_vm }} + SCENARIO_ID: ${{ inputs.scenario_id }} + shell: bash + run: | + set -euo pipefail + + require_var() { + local key="$1" + if [[ -z "${!key:-}" ]]; then + echo "Missing required ${key}." >&2 + exit 1 + fi + } + + CRABBOX_COORDINATOR="${CRABBOX_COORDINATOR:-${OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR:-}}" + CRABBOX_COORDINATOR_TOKEN="${CRABBOX_COORDINATOR_TOKEN:-${OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN:-}}" + export CRABBOX_COORDINATOR CRABBOX_COORDINATOR_TOKEN + + require_var OPENCLAW_LIVE_OPENAI_KEY + require_var OPENCLAW_QA_CONVEX_SITE_URL + require_var OPENCLAW_QA_CONVEX_SECRET_CI + require_var CRABBOX_COORDINATOR_TOKEN + + candidate_repo="$(pwd)/.artifacts/qa-e2e/mantis/slack-desktop-smoke-worktrees/candidate" + root="$candidate_repo/.artifacts/qa-e2e/mantis/slack-desktop-smoke" + echo "output_dir=${root}" >> "$GITHUB_OUTPUT" + keep_args=() + if [[ "$KEEP_VM" == "true" ]]; then + keep_args=(--keep-lease) + fi + + pnpm openclaw qa mantis slack-desktop-smoke \ + --repo-root "$candidate_repo" \ + --output-dir "$root" \ + --provider hetzner \ + --class standard \ + --idle-timeout 45m \ + --ttl 120m \ + --gateway-setup \ + --credential-source convex \ + --credential-role ci \ + --provider-mode live-frontier \ + --model openai/gpt-5.4 \ + --alt-model openai/gpt-5.4 \ + --fast \ + --scenario "$SCENARIO_ID" \ + "${keep_args[@]}" + + if [[ -f "$root/slack-desktop-smoke.mp4" ]]; then + if ! command -v ffmpeg >/dev/null 2>&1 || ! command -v ffprobe >/dev/null 2>&1; then + sudo apt-get update && sudo apt-get install -y ffmpeg || true + fi + if ! crabbox media preview \ + --input "$root/slack-desktop-smoke.mp4" \ + --output "$root/slack-desktop-smoke-preview.gif" \ + --trimmed-video-output "$root/slack-desktop-smoke-change.mp4" \ + --json > "$root/slack-desktop-smoke-preview.json"; then + rm -f "$root/slack-desktop-smoke-preview.gif" + rm -f "$root/slack-desktop-smoke-change.mp4" + rm -f "$root/slack-desktop-smoke-preview.json" + echo "::warning::Could not generate Slack motion-trimmed desktop preview." + fi + fi + + status="$(jq -r '.status' "$root/mantis-slack-desktop-smoke-summary.json")" + jq -n \ + --arg status "$status" \ + --arg candidate_sha "${{ needs.validate_ref.outputs.candidate_revision }}" \ + --arg scenario "$SCENARIO_ID" \ + '{ + schemaVersion: 1, + id: "slack-desktop-smoke", + title: "Mantis Slack Desktop Smoke QA", + summary: "Mantis ran Slack QA inside a Crabbox Linux VNC desktop, started an OpenClaw Slack gateway in that VM, opened Slack Web in the visible browser, and captured screenshot/video evidence.", + scenario: $scenario, + comparison: { + candidate: { sha: $candidate_sha, expected: "Slack QA and VM gateway setup pass", status: $status, fixed: ($status == "pass") }, + pass: ($status == "pass") + }, + artifacts: [ + { kind: "desktopScreenshot", lane: "candidate", label: "Slack desktop/VNC browser", path: "slack-desktop-smoke.png", targetPath: "slack-desktop.png", alt: "Slack Web desktop screenshot from the Mantis VM", width: 720, inline: true }, + { kind: "motionPreview", lane: "candidate", label: "Slack motion preview", path: "slack-desktop-smoke-preview.gif", targetPath: "slack-desktop-preview.gif", alt: "Animated Slack desktop preview", width: 720, inline: true, required: false }, + { kind: "motionClip", lane: "candidate", label: "Slack change MP4", path: "slack-desktop-smoke-change.mp4", targetPath: "slack-desktop-change.mp4", required: false }, + { kind: "fullVideo", lane: "candidate", label: "Slack desktop MP4", path: "slack-desktop-smoke.mp4", targetPath: "slack-desktop.mp4", required: false }, + { kind: "metadata", lane: "run", label: "Slack desktop summary", path: "mantis-slack-desktop-smoke-summary.json", targetPath: "summary.json" }, + { kind: "report", lane: "run", label: "Slack desktop report", path: "mantis-slack-desktop-smoke-report.md", targetPath: "report.md" }, + { kind: "metadata", lane: "run", label: "Slack command log", path: "slack-desktop-command.log", targetPath: "slack-desktop-command.log", required: false }, + { kind: "metadata", lane: "run", label: "Slack preview metadata", path: "slack-desktop-smoke-preview.json", targetPath: "slack-desktop-preview.json", required: false }, + { kind: "metadata", lane: "run", label: "Slack error", path: "error.txt", targetPath: "error.txt", required: false } + ] + }' > "$root/mantis-evidence.json" + + cat "$root/mantis-slack-desktop-smoke-report.md" >> "$GITHUB_STEP_SUMMARY" + + if [[ "$status" != "pass" ]]; then + echo "Slack desktop smoke failed." >&2 + exit 1 + fi + + - name: Upload Mantis Slack desktop artifacts + id: upload_artifact + if: ${{ always() && steps.run_mantis.outputs.output_dir != '' }} + uses: actions/upload-artifact@v4 + with: + name: mantis-slack-desktop-smoke-${{ github.run_id }}-${{ github.run_attempt }} + path: ${{ steps.run_mantis.outputs.output_dir }} + retention-days: 14 + if-no-files-found: warn + + - name: Create Mantis GitHub App token + id: mantis_app_token + if: ${{ always() && inputs.pr_number != '' }} + uses: actions/create-github-app-token@v3 + with: + app-id: ${{ secrets.MANTIS_GITHUB_APP_ID }} + private-key: ${{ secrets.MANTIS_GITHUB_APP_PRIVATE_KEY }} + owner: ${{ github.repository_owner }} + repositories: ${{ github.event.repository.name }} + permission-contents: write + permission-issues: write + permission-pull-requests: write + + - name: Comment PR with inline QA evidence + if: ${{ always() && inputs.pr_number != '' && steps.run_mantis.outputs.output_dir != '' }} + env: + GH_TOKEN: ${{ steps.mantis_app_token.outputs.token }} + TARGET_PR: ${{ inputs.pr_number }} + ARTIFACT_URL: ${{ steps.upload_artifact.outputs.artifact-url }} + REQUEST_SOURCE: workflow_dispatch + shell: bash + run: | + set -euo pipefail + root="${{ steps.run_mantis.outputs.output_dir }}" + node scripts/mantis/publish-pr-evidence.mjs \ + --manifest "$root/mantis-evidence.json" \ + --target-pr "$TARGET_PR" \ + --artifact-root "mantis/slack-desktop-smoke/pr-${TARGET_PR}/run-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" \ + --marker "" \ + --artifact-url "$ARTIFACT_URL" \ + --run-url "https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" \ + --request-source "$REQUEST_SOURCE" diff --git a/docs/concepts/mantis.md b/docs/concepts/mantis.md index af162041564..9ac8e88695d 100644 --- a/docs/concepts/mantis.md +++ b/docs/concepts/mantis.md @@ -176,6 +176,72 @@ Crabbox CLI from `openclaw/crabbox` main so it can use the current desktop/browser lease flags before the next Crabbox binary release is cut. +`Mantis Scenario` is the generic manual entrypoint. It takes a `scenario_id`, +`candidate_ref`, optional `baseline_ref`, and optional `pr_number`, then +dispatches the scenario-owned workflow. The wrapper is intentionally thin: +scenario workflows still own their transport setup, credentials, VM class, +expected oracle, and artifact manifest. + +`Mantis Slack Desktop Smoke` is the first Slack VM workflow. It checks out the +trusted candidate ref in a separate worktree, leases a Crabbox Linux desktop, +runs `pnpm openclaw qa mantis slack-desktop-smoke --gateway-setup` against that +candidate, opens Slack Web in the VNC browser, records the desktop, generates a +motion-trimmed preview with `crabbox media preview`, uploads the full artifact +directory, and optionally posts the inline evidence comment on the target PR. +Use this lane when you want "a Linux desktop with Slack and a claw running" +instead of only a bot-to-bot Slack transcript. + +Every PR-publishing scenario writes `mantis-evidence.json` next to its report. +This schema is the handoff between scenario code and GitHub comments: + +```json +{ + "schemaVersion": 1, + "id": "discord-status-reactions", + "title": "Mantis Discord Status Reactions QA", + "summary": "Human-readable top summary for the PR comment.", + "scenario": "discord-status-reactions-tool-only", + "comparison": { + "baseline": { "sha": "...", "status": "fail", "expected": "queued-only" }, + "candidate": { "sha": "...", "status": "pass", "expected": "queued -> thinking -> done" }, + "pass": true + }, + "artifacts": [ + { + "kind": "timeline", + "lane": "baseline", + "label": "Baseline queued-only", + "path": "baseline/timeline.png", + "targetPath": "baseline.png", + "alt": "Baseline Discord timeline", + "width": 420 + } + ] +} +``` + +Artifact `path` values are relative to the manifest directory. `targetPath` +values are relative paths under the `qa-artifacts` branch publish directory. +The publisher rejects path traversal and skips entries marked +`"required": false` when optional previews or videos are unavailable. + +Supported artifact kinds: + +- `timeline`: deterministic scenario screenshot, usually before/after. +- `desktopScreenshot`: VNC/browser desktop screenshot. +- `motionPreview`: inline animated GIF generated from the desktop recording. +- `motionClip`: motion-trimmed MP4 that removes static lead-in and tail. +- `fullVideo`: full MP4 recording for deep inspection. +- `metadata`: JSON/log sidecar. +- `report`: Markdown report. + +The reusable publisher is `scripts/mantis/publish-pr-evidence.mjs`. Workflows +call it with the manifest, target PR, `qa-artifacts` target root, comment marker, +Actions artifact URL, run URL, and request source. It copies declared artifacts +to the `qa-artifacts` branch, builds a summary-first PR comment with inline +images/previews and linked videos, then updates the existing marker comment or +creates one. + You can also trigger the status-reactions run directly from a PR comment: ```text diff --git a/extensions/qa-lab/src/mantis/run.runtime.ts b/extensions/qa-lab/src/mantis/run.runtime.ts index b5b35a6393f..66753f0fb80 100644 --- a/extensions/qa-lab/src/mantis/run.runtime.ts +++ b/extensions/qa-lab/src/mantis/run.runtime.ts @@ -24,6 +24,7 @@ export type MantisBeforeAfterOptions = { export type MantisBeforeAfterResult = { comparisonPath: string; + manifestPath: string; outputDir: string; reportPath: string; status: "pass" | "fail"; @@ -217,6 +218,106 @@ function renderReport(params: { return `${lines.join("\n")}\n`; } +function relativeArtifactPath(outputDir: string, artifactPath: string | undefined) { + if (!artifactPath) { + return undefined; + } + return path.isAbsolute(artifactPath) ? path.relative(outputDir, artifactPath) : artifactPath; +} + +function buildEvidenceManifest(params: { + baseline: LaneResult; + candidate: LaneResult; + comparison: Comparison; + outputDir: string; +}) { + const artifacts: { + alt?: string; + kind: string; + label: string; + lane: "baseline" | "candidate" | "run"; + path: string; + required?: boolean; + targetPath: string; + width?: number; + }[] = [ + { + kind: "metadata", + label: "Comparison JSON", + lane: "run", + path: "comparison.json", + targetPath: "comparison.json", + }, + { + kind: "report", + label: "Mantis report", + lane: "run", + path: "mantis-report.md", + targetPath: "mantis-report.md", + }, + ]; + const baselineScreenshot = relativeArtifactPath(params.outputDir, params.baseline.screenshotPath); + if (baselineScreenshot) { + artifacts.push({ + alt: "Baseline Discord status reaction timeline", + kind: "timeline", + label: "Baseline queued-only", + lane: "baseline", + path: baselineScreenshot, + targetPath: "baseline.png", + width: 420, + }); + } + const candidateScreenshot = relativeArtifactPath( + params.outputDir, + params.candidate.screenshotPath, + ); + if (candidateScreenshot) { + artifacts.push({ + alt: "Candidate Discord status reaction timeline", + kind: "timeline", + label: "Candidate queued -> thinking -> done", + lane: "candidate", + path: candidateScreenshot, + targetPath: "candidate.png", + width: 420, + }); + } + const baselineVideo = relativeArtifactPath(params.outputDir, params.baseline.videoPath); + if (baselineVideo) { + artifacts.push({ + kind: "fullVideo", + label: "Baseline MP4", + lane: "baseline", + path: baselineVideo, + targetPath: "baseline.mp4", + required: false, + }); + } + const candidateVideo = relativeArtifactPath(params.outputDir, params.candidate.videoPath); + if (candidateVideo) { + artifacts.push({ + kind: "fullVideo", + label: "Candidate MP4", + lane: "candidate", + path: candidateVideo, + targetPath: "candidate.mp4", + required: false, + }); + } + + return { + artifacts, + comparison: params.comparison, + id: params.comparison.scenario, + scenario: params.comparison.scenario, + schemaVersion: 1, + summary: + "Mantis ran the before/after scenario, captured baseline and candidate evidence, and compared the expected bug reproduction against the candidate fix.", + title: "Mantis Before/After QA", + }; +} + async function copyScreenshot(params: { lane: "baseline" | "candidate"; result: LaneResult }) { if (!params.result.screenshotPath) { return undefined; @@ -359,6 +460,7 @@ export async function runMantisBeforeAfter( const runner = opts.commandRunner ?? defaultCommandRunner; const worktreeRoot = path.join(outputDir, "worktrees"); const comparisonPath = path.join(outputDir, "comparison.json"); + const manifestPath = path.join(outputDir, "mantis-evidence.json"); const reportPath = path.join(outputDir, "mantis-report.md"); await fs.mkdir(worktreeRoot, { recursive: true }); @@ -423,8 +525,23 @@ export async function runMantisBeforeAfter( }), "utf8", ); + await fs.writeFile( + manifestPath, + `${JSON.stringify( + buildEvidenceManifest({ + baseline: baselineResult, + candidate: candidateResult, + comparison, + outputDir, + }), + null, + 2, + )}\n`, + "utf8", + ); return { comparisonPath, + manifestPath, outputDir, reportPath, status: comparison.pass ? "pass" : "fail", diff --git a/scripts/mantis/publish-pr-evidence.mjs b/scripts/mantis/publish-pr-evidence.mjs new file mode 100644 index 00000000000..211074e67db --- /dev/null +++ b/scripts/mantis/publish-pr-evidence.mjs @@ -0,0 +1,475 @@ +#!/usr/bin/env node +import { execFileSync, spawnSync } from "node:child_process"; +import { + copyFileSync, + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + rmSync, + statSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +function parseArgs(argv) { + const args = {}; + for (let index = 0; index < argv.length; index += 1) { + const key = argv[index]; + if (!key.startsWith("--")) { + throw new Error(`Unexpected argument: ${key}`); + } + const name = key.slice(2).replaceAll("-", "_"); + const value = argv[index + 1]; + if (!value || value.startsWith("--")) { + throw new Error(`Missing value for ${key}`); + } + args[name] = value; + index += 1; + } + return args; +} + +function readJson(filePath) { + return JSON.parse(readFileSync(filePath, "utf8")); +} + +function assertInside(parentDir, candidatePath, label) { + const relative = path.relative(parentDir, candidatePath); + if (relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative))) { + return candidatePath; + } + throw new Error(`${label} escapes manifest directory: ${candidatePath}`); +} + +function normalizeTargetPath(targetPath) { + const normalized = path.posix.normalize(String(targetPath).replaceAll("\\", "/")); + if ( + normalized === "." || + normalized === "" || + normalized.startsWith("../") || + normalized.includes("/../") || + normalized.startsWith("/") || + /^[A-Za-z]:/u.test(normalized) + ) { + throw new Error(`Invalid artifact target path: ${targetPath}`); + } + return normalized; +} + +function resolveArtifact(manifestDir, artifact) { + if (!artifact || typeof artifact !== "object") { + throw new Error("Manifest artifact entries must be objects."); + } + if (!artifact.path) { + throw new Error("Manifest artifact entry is missing path."); + } + + const source = assertInside( + manifestDir, + path.resolve(manifestDir, artifact.path), + `Artifact ${artifact.label ?? artifact.path}`, + ); + const required = artifact.required !== false; + if (!existsSync(source)) { + if (required) { + throw new Error(`Missing required artifact: ${artifact.path}`); + } + return null; + } + if (!statSync(source).isFile()) { + throw new Error(`Artifact is not a file: ${artifact.path}`); + } + + return { + ...artifact, + kind: artifact.kind ?? "attachment", + lane: artifact.lane ?? "run", + label: artifact.label ?? artifact.path, + required, + source, + targetPath: normalizeTargetPath(artifact.targetPath ?? path.basename(artifact.path)), + }; +} + +export function loadEvidenceManifest(manifestPath) { + const resolvedManifest = path.resolve(manifestPath); + const manifestDir = path.dirname(resolvedManifest); + const manifest = readJson(resolvedManifest); + if (manifest.schemaVersion !== 1) { + throw new Error(`Unsupported Mantis evidence manifest schema: ${manifest.schemaVersion}`); + } + if (!manifest.id || !manifest.title || !manifest.scenario) { + throw new Error("Mantis evidence manifest requires id, title, and scenario."); + } + const artifacts = (manifest.artifacts ?? []) + .map((artifact) => resolveArtifact(manifestDir, artifact)) + .filter(Boolean); + artifacts.push({ + kind: "metadata", + lane: "run", + label: "Mantis evidence manifest", + source: resolvedManifest, + targetPath: "mantis-evidence.json", + }); + return { + ...manifest, + artifacts, + manifestDir, + }; +} + +function encodePathForUrl(input) { + return input + .split("/") + .filter(Boolean) + .map((part) => encodeURIComponent(part)) + .join("/"); +} + +function artifactUrl(rawBase, artifact) { + return `${rawBase}/${encodePathForUrl(artifact.targetPath)}`; +} + +function byLane(artifacts, kind) { + const lanes = new Map(); + for (const artifact of artifacts) { + if (artifact.kind !== kind) { + continue; + } + lanes.set(artifact.lane, artifact); + } + return lanes; +} + +function findPair(artifacts, kind, leftLane, rightLane) { + const lanes = byLane(artifacts, kind); + const left = lanes.get(leftLane); + const right = lanes.get(rightLane); + return left && right ? { left, right } : null; +} + +function renderPairTable({ pair, rawBase }) { + const { left, right } = pair; + if (!left || !right) { + return ""; + } + const width = Math.min(Number(left.width ?? right.width ?? 420) || 420, 720); + return [ + `| ${left.label} | ${right.label} |`, + "| --- | --- |", + `| ${left.alt ?? left.label} | ${right.alt ?? right.label} |`, + "", + ].join("\n"); +} + +function renderSingleImageTables({ artifacts, rawBase, pairedKeys }) { + const renderedPairs = new Set(pairedKeys); + return artifacts + .filter( + (artifact) => artifact.inline && !renderedPairs.has(`${artifact.kind}:${artifact.lane}`), + ) + .map((artifact) => { + const width = Math.min(Number(artifact.width ?? 720) || 720, 900); + return [ + `**${artifact.label}**`, + "", + `${artifact.alt ?? artifact.label}`, + "", + ].join("\n"); + }) + .join("\n"); +} + +function renderLinkList({ artifacts, kind, rawBase, title }) { + const links = artifacts + .filter((artifact) => artifact.kind === kind) + .map((artifact) => `- [${artifact.label}](${artifactUrl(rawBase, artifact)})`); + if (links.length === 0) { + return ""; + } + return [`${title}:`, ...links, ""].join("\n"); +} + +function laneLine(label, lane) { + if (!lane) { + return ""; + } + const pieces = [`- ${label}: \`${lane.status ?? "unknown"}\``]; + if (lane.sha) { + pieces.push(` at \`${lane.sha}\``); + } else if (lane.ref) { + pieces.push(` at \`${lane.ref}\``); + } + if (lane.expected) { + pieces.push(`, expected ${lane.expected}`); + } + return pieces.join(""); +} + +export function renderEvidenceComment({ + artifactRoot, + artifactUrl: actionsArtifactUrl, + manifest, + marker, + rawBase, + requestSource, + runUrl, + treeUrl, +}) { + const comparison = manifest.comparison ?? {}; + const baseline = comparison.baseline; + const candidate = comparison.candidate; + const pairs = [ + findPair(manifest.artifacts, "timeline", "baseline", "candidate"), + findPair(manifest.artifacts, "desktopScreenshot", "baseline", "candidate"), + findPair(manifest.artifacts, "motionPreview", "baseline", "candidate"), + ].filter(Boolean); + const pairedKeys = pairs.flatMap((pair) => [ + `${pair.left.kind}:${pair.left.lane}`, + `${pair.right.kind}:${pair.right.lane}`, + ]); + const lines = [ + marker, + `## ${manifest.title}`, + "", + `Summary: ${manifest.summary ?? "Mantis captured QA evidence for this scenario."}`, + "", + `- Scenario: \`${manifest.scenario}\``, + ]; + if (requestSource) { + lines.push(`- Trigger: \`${requestSource}\``); + } + if (runUrl) { + lines.push(`- Run: ${runUrl}`); + } + if (actionsArtifactUrl) { + lines.push(`- Artifact: ${actionsArtifactUrl}`); + } + const baselineLine = laneLine("Baseline", baseline); + if (baselineLine) { + lines.push(baselineLine); + } + const candidateLine = laneLine("Candidate", candidate); + if (candidateLine) { + lines.push(candidateLine); + } + if (typeof comparison.pass === "boolean") { + lines.push(`- Overall: \`${comparison.pass}\``); + } + lines.push(""); + + const pairedSections = pairs.map((pair) => renderPairTable({ pair, rawBase })); + + lines.push(...pairedSections); + const singleTables = renderSingleImageTables({ + artifacts: manifest.artifacts, + pairedKeys, + rawBase, + }); + if (singleTables) { + lines.push(singleTables); + } + const motionClips = renderLinkList({ + artifacts: manifest.artifacts, + kind: "motionClip", + rawBase, + title: "Motion-trimmed clips", + }); + if (motionClips) { + lines.push(motionClips); + } + const fullVideos = renderLinkList({ + artifacts: manifest.artifacts, + kind: "fullVideo", + rawBase, + title: "Full videos", + }); + if (fullVideos) { + lines.push(fullVideos); + } + lines.push( + `Raw QA files: ${treeUrl ?? `https://github.com/${process.env.GITHUB_REPOSITORY}/tree/qa-artifacts/${artifactRoot}`}`, + ); + return `${lines.join("\n").replace(/\n{3,}/gu, "\n\n")}\n`; +} + +function run(command, args, options = {}) { + return execFileSync(command, args, { + encoding: "utf8", + stdio: options.stdio ?? ["ignore", "pipe", "inherit"], + ...options, + }); +} + +function runStatus(command, args, options = {}) { + const result = spawnSync(command, args, { + stdio: "ignore", + ...options, + }); + if (result.error) { + throw result.error; + } + return result.status ?? 1; +} + +function publishArtifactFiles({ artifactRoot, ghToken, manifest, repo }) { + const worktree = mkdtempSync(path.join(tmpdir(), "mantis-qa-artifacts-")); + const safeArtifactRoot = normalizeTargetPath(artifactRoot); + try { + run("git", ["init", "--quiet", worktree]); + run("git", ["-C", worktree, "config", "user.name", "github-actions[bot]"]); + run("git", [ + "-C", + worktree, + "config", + "user.email", + "41898282+github-actions[bot]@users.noreply.github.com", + ]); + run("git", [ + "-C", + worktree, + "remote", + "add", + "origin", + `https://x-access-token:${ghToken}@github.com/${repo}.git`, + ]); + try { + run("git", ["-C", worktree, "fetch", "--quiet", "origin", "qa-artifacts"]); + run("git", ["-C", worktree, "checkout", "--quiet", "-B", "qa-artifacts", "FETCH_HEAD"]); + } catch { + run("git", ["-C", worktree, "checkout", "--quiet", "--orphan", "qa-artifacts"]); + } + + const destinationRoot = path.join(worktree, safeArtifactRoot); + for (const artifact of manifest.artifacts) { + const destination = assertInside( + destinationRoot, + path.resolve(destinationRoot, artifact.targetPath), + `Artifact target ${artifact.targetPath}`, + ); + mkdirSync(path.dirname(destination), { recursive: true }); + copyFileSync(artifact.source, destination); + } + + run("git", ["-C", worktree, "add", safeArtifactRoot]); + const hasChanges = runStatus("git", ["-C", worktree, "diff", "--cached", "--quiet"]) !== 0; + if (hasChanges) { + run("git", [ + "-C", + worktree, + "commit", + "--quiet", + "-m", + `qa: publish Mantis evidence for ${manifest.id}`, + ]); + run("git", ["-C", worktree, "push", "--quiet", "origin", "HEAD:qa-artifacts"]); + } else { + console.log("No QA evidence artifact changes to publish."); + } + } finally { + rmSync(worktree, { force: true, recursive: true }); + } + return safeArtifactRoot; +} + +function upsertPrComment({ body, marker, prNumber, repo }) { + run("gh", ["api", `repos/${repo}/pulls/${prNumber}`, "--jq", ".number"]); + const commentId = run("gh", [ + "api", + "--paginate", + `repos/${repo}/issues/${prNumber}/comments`, + "--jq", + `.[] | select(.body | contains("${marker}")) | .id`, + ]) + .trim() + .split("\n") + .findLast((line) => line.length > 0); + const bodyFile = path.join(mkdtempSync(path.join(tmpdir(), "mantis-comment-")), "body.md"); + writeFileSync(bodyFile, body); + try { + if (commentId) { + const payloadFile = `${bodyFile}.json`; + writeFileSync(payloadFile, JSON.stringify({ body })); + try { + run("gh", [ + "api", + "--method", + "PATCH", + `repos/${repo}/issues/comments/${commentId}`, + "--input", + payloadFile, + ]); + console.log(`Updated Mantis QA evidence comment on PR #${prNumber}.`); + return; + } catch { + console.warn( + `Could not update existing Mantis QA evidence comment ${commentId}; creating a new one.`, + ); + } + } + run("gh", ["pr", "comment", prNumber, "--body-file", bodyFile], { stdio: "inherit" }); + console.log(`Created Mantis QA evidence comment on PR #${prNumber}.`); + } finally { + rmSync(path.dirname(bodyFile), { force: true, recursive: true }); + } +} + +export function publishEvidence(rawArgs = process.argv.slice(2)) { + const args = parseArgs(rawArgs); + const required = ["manifest", "target_pr", "artifact_root", "marker"]; + for (const key of required) { + if (!args[key]) { + throw new Error(`Missing --${key.replaceAll("_", "-")}.`); + } + } + if (!/^[0-9]+$/u.test(args.target_pr)) { + throw new Error(`--target-pr must be numeric, got ${args.target_pr}.`); + } + const repo = args.repo ?? process.env.GITHUB_REPOSITORY; + const ghToken = process.env.GH_TOKEN ?? process.env.GITHUB_TOKEN; + if (!repo) { + throw new Error("Missing --repo or GITHUB_REPOSITORY."); + } + if (!ghToken) { + throw new Error("Missing GH_TOKEN or GITHUB_TOKEN."); + } + + const manifest = loadEvidenceManifest(args.manifest); + const artifactRoot = publishArtifactFiles({ + artifactRoot: args.artifact_root, + ghToken, + manifest, + repo, + }); + const rawBase = `https://raw.githubusercontent.com/${repo}/qa-artifacts/${encodePathForUrl(artifactRoot)}`; + const treeUrl = `https://github.com/${repo}/tree/qa-artifacts/${encodePathForUrl(artifactRoot)}`; + const body = renderEvidenceComment({ + artifactRoot, + artifactUrl: args.artifact_url, + manifest, + marker: args.marker, + rawBase, + requestSource: args.request_source, + runUrl: args.run_url, + treeUrl, + }); + upsertPrComment({ + body, + marker: args.marker, + prNumber: args.target_pr, + repo, + }); +} + +const executedPath = process.argv[1] ? path.resolve(process.argv[1]) : ""; +if (executedPath === fileURLToPath(import.meta.url)) { + try { + publishEvidence(); + } catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); + } +} diff --git a/scripts/test-projects.test-support.mjs b/scripts/test-projects.test-support.mjs index 8a061835919..8854cf3b908 100644 --- a/scripts/test-projects.test-support.mjs +++ b/scripts/test-projects.test-support.mjs @@ -238,6 +238,7 @@ const TOOLING_SOURCE_TEST_TARGETS = new Map([ ["scripts/lib/live-docker-stage.sh", ["test/scripts/live-docker-stage.test.ts"]], ["scripts/lib/openclaw-test-state.mjs", ["test/scripts/openclaw-test-state.test.ts"]], ["scripts/lib/vitest-local-scheduling.mjs", ["test/scripts/vitest-local-scheduling.test.ts"]], + ["scripts/mantis/publish-pr-evidence.mjs", ["test/scripts/mantis-publish-pr-evidence.test.ts"]], [ "scripts/run-vitest.mjs", [ @@ -286,6 +287,10 @@ const TOOLING_TEST_TARGETS = new Map([ ], ["test/scripts/live-docker-stage.test.ts", ["test/scripts/live-docker-stage.test.ts"]], ["test/scripts/openclaw-test-state.test.ts", ["test/scripts/openclaw-test-state.test.ts"]], + [ + "test/scripts/mantis-publish-pr-evidence.test.ts", + ["test/scripts/mantis-publish-pr-evidence.test.ts"], + ], [ "test/scripts/plugin-prerelease-test-plan.test.ts", ["test/scripts/plugin-prerelease-test-plan.test.ts"], diff --git a/test/scripts/mantis-publish-pr-evidence.test.ts b/test/scripts/mantis-publish-pr-evidence.test.ts new file mode 100644 index 00000000000..a641d83e474 --- /dev/null +++ b/test/scripts/mantis-publish-pr-evidence.test.ts @@ -0,0 +1,117 @@ +import { mkdirSync, mkdtempSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; +import { + loadEvidenceManifest, + renderEvidenceComment, +} from "../../scripts/mantis/publish-pr-evidence.mjs"; + +function writeFixtureManifest() { + const dir = mkdtempSync(path.join(tmpdir(), "mantis-evidence-test-")); + mkdirSync(path.join(dir, "baseline"), { recursive: true }); + mkdirSync(path.join(dir, "candidate"), { recursive: true }); + writeFileSync(path.join(dir, "baseline", "timeline.png"), "baseline timeline"); + writeFileSync(path.join(dir, "candidate", "timeline.png"), "candidate timeline"); + writeFileSync(path.join(dir, "baseline", "change.mp4"), "baseline clip"); + const manifestPath = path.join(dir, "mantis-evidence.json"); + writeFileSync( + manifestPath, + JSON.stringify({ + schemaVersion: 1, + id: "discord-status-reactions", + title: "Mantis Discord Status Reactions QA", + summary: "Mantis reran the scenario.", + scenario: "discord-status-reactions-tool-only", + comparison: { + baseline: { + expected: "queued-only", + sha: "aaa", + status: "fail", + }, + candidate: { + expected: "queued -> thinking -> done", + sha: "bbb", + status: "pass", + }, + pass: true, + }, + artifacts: [ + { + alt: "Baseline timeline", + kind: "timeline", + label: "Baseline queued-only", + lane: "baseline", + path: "baseline/timeline.png", + targetPath: "baseline.png", + }, + { + alt: "Candidate timeline", + kind: "timeline", + label: "Candidate queued -> thinking -> done", + lane: "candidate", + path: "candidate/timeline.png", + targetPath: "candidate.png", + }, + { + kind: "motionClip", + label: "Baseline change MP4", + lane: "baseline", + path: "baseline/change.mp4", + targetPath: "baseline-change.mp4", + }, + ], + }), + ); + return manifestPath; +} + +describe("scripts/mantis/publish-pr-evidence", () => { + it("renders a manifest-driven PR comment with inline screenshots and video links", () => { + const manifest = loadEvidenceManifest(writeFixtureManifest()); + const body = renderEvidenceComment({ + artifactRoot: "mantis/discord/pr-1/run-1", + artifactUrl: "https://github.com/openclaw/openclaw/actions/runs/1/artifacts/2", + manifest, + marker: "", + rawBase: + "https://raw.githubusercontent.com/openclaw/openclaw/qa-artifacts/mantis/discord/pr-1/run-1", + requestSource: "workflow_dispatch", + runUrl: "https://github.com/openclaw/openclaw/actions/runs/1", + treeUrl: "https://github.com/openclaw/openclaw/tree/qa-artifacts/mantis/discord/pr-1/run-1", + }); + + expect(body).toContain(""); + expect(body).toContain("Summary: Mantis reran the scenario."); + expect(body).toContain("| Baseline queued-only | Candidate queued -> thinking -> done |"); + expect(body).toContain( + ' { + const dir = mkdtempSync(path.join(tmpdir(), "mantis-evidence-test-")); + const manifestPath = path.join(dir, "mantis-evidence.json"); + writeFileSync( + manifestPath, + JSON.stringify({ + artifacts: [ + { + kind: "metadata", + path: "../outside.json", + }, + ], + id: "bad", + scenario: "bad", + schemaVersion: 1, + title: "Bad", + }), + ); + + expect(() => loadEvidenceManifest(manifestPath)).toThrow(/escapes manifest directory/u); + }); +});