feat: add reusable Mantis evidence publishing

This commit is contained in:
Peter Steinberger
2026-05-05 09:01:43 +01:00
parent 5a0d6c7ad8
commit f3d531439b
8 changed files with 1233 additions and 146 deletions

View File

@@ -474,6 +474,40 @@ jobs:
echo "- Candidate desktop video: \`candidate/discord-status-reactions-tool-only-desktop.mp4\`"
} > "$root/mantis-report.md"
jq -n \
--arg baseline_status "$baseline_status" \
--arg candidate_status "$candidate_status" \
--arg baseline_sha "${{ needs.validate_refs.outputs.baseline_revision }}" \
--arg candidate_sha "${{ needs.validate_refs.outputs.candidate_revision }}" \
'{
schemaVersion: 1,
id: "discord-status-reactions",
title: "Mantis Discord Status Reactions QA",
summary: "Mantis reran Discord status reactions against the known queued-only baseline and the candidate ref. The baseline reproduced the bug, while the candidate showed the expected queued -> thinking -> done reaction sequence.",
scenario: "discord-status-reactions-tool-only",
comparison: {
baseline: { sha: $baseline_sha, expected: "queued-only", status: $baseline_status, reproduced: ($baseline_status == "fail") },
candidate: { sha: $candidate_sha, expected: "queued -> thinking -> done", status: $candidate_status, fixed: ($candidate_status == "pass") },
pass: (($baseline_status == "fail") and ($candidate_status == "pass"))
},
artifacts: [
{ kind: "timeline", lane: "baseline", label: "Baseline queued-only", path: "baseline/discord-status-reactions-tool-only-timeline.png", targetPath: "baseline.png", alt: "Baseline Discord status reaction timeline", width: 420 },
{ kind: "timeline", lane: "candidate", label: "Candidate queued -> thinking -> done", path: "candidate/discord-status-reactions-tool-only-timeline.png", targetPath: "candidate.png", alt: "Candidate Discord status reaction timeline", width: 420 },
{ kind: "desktopScreenshot", lane: "baseline", label: "Baseline desktop/VNC browser", path: "baseline/discord-status-reactions-tool-only-desktop.png", targetPath: "baseline-desktop.png", alt: "Baseline Mantis desktop browser screenshot", width: 420 },
{ kind: "desktopScreenshot", lane: "candidate", label: "Candidate desktop/VNC browser", path: "candidate/discord-status-reactions-tool-only-desktop.png", targetPath: "candidate-desktop.png", alt: "Candidate Mantis desktop browser screenshot", width: 420 },
{ kind: "motionPreview", lane: "baseline", label: "Baseline motion preview", path: "baseline/discord-status-reactions-tool-only-desktop-preview.gif", targetPath: "baseline-desktop-preview.gif", alt: "Animated baseline desktop preview", width: 420, required: false },
{ kind: "motionPreview", lane: "candidate", label: "Candidate motion preview", path: "candidate/discord-status-reactions-tool-only-desktop-preview.gif", targetPath: "candidate-desktop-preview.gif", alt: "Animated candidate desktop preview", width: 420, required: false },
{ kind: "motionClip", lane: "baseline", label: "Baseline change MP4", path: "baseline/discord-status-reactions-tool-only-desktop-change.mp4", targetPath: "baseline-desktop-change.mp4", required: false },
{ kind: "motionClip", lane: "candidate", label: "Candidate change MP4", path: "candidate/discord-status-reactions-tool-only-desktop-change.mp4", targetPath: "candidate-desktop-change.mp4", required: false },
{ kind: "fullVideo", lane: "baseline", label: "Baseline desktop MP4", path: "baseline/discord-status-reactions-tool-only-desktop.mp4", targetPath: "baseline-desktop.mp4" },
{ kind: "fullVideo", lane: "candidate", label: "Candidate desktop MP4", path: "candidate/discord-status-reactions-tool-only-desktop.mp4", targetPath: "candidate-desktop.mp4" },
{ kind: "metadata", lane: "baseline", label: "Baseline preview metadata", path: "baseline/discord-status-reactions-tool-only-desktop-preview.json", targetPath: "baseline-desktop-preview.json", required: false },
{ kind: "metadata", lane: "candidate", label: "Candidate preview metadata", path: "candidate/discord-status-reactions-tool-only-desktop-preview.json", targetPath: "candidate-desktop-preview.json", required: false },
{ kind: "metadata", lane: "run", label: "Comparison JSON", path: "comparison.json", targetPath: "comparison.json" },
{ kind: "report", lane: "run", label: "Mantis report", path: "mantis-report.md", targetPath: "mantis-report.md" }
]
}' > "$root/mantis-evidence.json"
cat "$root/mantis-report.md" >> "$GITHUB_STEP_SUMMARY"
if [[ "$baseline_status" != "fail" ]]; then
@@ -514,155 +548,17 @@ jobs:
GH_TOKEN: ${{ steps.mantis_app_token.outputs.token }}
TARGET_PR: ${{ needs.resolve_request.outputs.pr_number }}
ARTIFACT_URL: ${{ steps.upload_artifact.outputs.artifact-url }}
BASELINE_SHA: ${{ needs.validate_refs.outputs.baseline_revision }}
CANDIDATE_SHA: ${{ needs.validate_refs.outputs.candidate_revision }}
REQUEST_SOURCE: ${{ needs.resolve_request.outputs.request_source }}
shell: bash
run: |
set -euo pipefail
if [[ ! "$TARGET_PR" =~ ^[0-9]+$ ]]; then
echo "pr_number must be numeric, got '${TARGET_PR}'." >&2
exit 1
fi
root=".artifacts/qa-e2e/mantis/discord-status-reactions"
for required in \
"$root/comparison.json" \
"$root/baseline/discord-status-reactions-tool-only-timeline.png" \
"$root/candidate/discord-status-reactions-tool-only-timeline.png" \
"$root/baseline/discord-status-reactions-tool-only-desktop.png" \
"$root/candidate/discord-status-reactions-tool-only-desktop.png" \
"$root/baseline/discord-status-reactions-tool-only-desktop.mp4" \
"$root/candidate/discord-status-reactions-tool-only-desktop.mp4"
do
if [[ ! -f "$required" ]]; then
echo "Missing required QA evidence file: $required" >&2
exit 1
fi
done
gh api "repos/${GITHUB_REPOSITORY}/pulls/${TARGET_PR}" --jq '.number' >/dev/null
artifact_root="mantis/discord-status-reactions/pr-${TARGET_PR}/run-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
artifacts_worktree="$(mktemp -d)"
git init --quiet "$artifacts_worktree"
git -C "$artifacts_worktree" config user.name "github-actions[bot]"
git -C "$artifacts_worktree" config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git -C "$artifacts_worktree" remote add origin "https://x-access-token:${GH_TOKEN}@github.com/${GITHUB_REPOSITORY}.git"
if git -C "$artifacts_worktree" fetch --quiet origin qa-artifacts; then
git -C "$artifacts_worktree" checkout --quiet -B qa-artifacts FETCH_HEAD
else
git -C "$artifacts_worktree" checkout --quiet --orphan qa-artifacts
fi
mkdir -p "$artifacts_worktree/$artifact_root"
cp "$root/baseline/discord-status-reactions-tool-only-timeline.png" "$artifacts_worktree/$artifact_root/baseline.png"
cp "$root/candidate/discord-status-reactions-tool-only-timeline.png" "$artifacts_worktree/$artifact_root/candidate.png"
cp "$root/baseline/discord-status-reactions-tool-only-desktop.png" "$artifacts_worktree/$artifact_root/baseline-desktop.png"
cp "$root/candidate/discord-status-reactions-tool-only-desktop.png" "$artifacts_worktree/$artifact_root/candidate-desktop.png"
has_desktop_previews="false"
if [[ -f "$root/baseline/discord-status-reactions-tool-only-desktop-preview.gif" && -f "$root/candidate/discord-status-reactions-tool-only-desktop-preview.gif" ]]; then
cp "$root/baseline/discord-status-reactions-tool-only-desktop-preview.gif" "$artifacts_worktree/$artifact_root/baseline-desktop-preview.gif"
cp "$root/candidate/discord-status-reactions-tool-only-desktop-preview.gif" "$artifacts_worktree/$artifact_root/candidate-desktop-preview.gif"
cp "$root/baseline/discord-status-reactions-tool-only-desktop-preview.json" "$artifacts_worktree/$artifact_root/baseline-desktop-preview.json"
cp "$root/candidate/discord-status-reactions-tool-only-desktop-preview.json" "$artifacts_worktree/$artifact_root/candidate-desktop-preview.json"
has_desktop_previews="true"
fi
has_change_clips="false"
if [[ -f "$root/baseline/discord-status-reactions-tool-only-desktop-change.mp4" && -f "$root/candidate/discord-status-reactions-tool-only-desktop-change.mp4" ]]; then
cp "$root/baseline/discord-status-reactions-tool-only-desktop-change.mp4" "$artifacts_worktree/$artifact_root/baseline-desktop-change.mp4"
cp "$root/candidate/discord-status-reactions-tool-only-desktop-change.mp4" "$artifacts_worktree/$artifact_root/candidate-desktop-change.mp4"
has_change_clips="true"
fi
cp "$root/baseline/discord-status-reactions-tool-only-desktop.mp4" "$artifacts_worktree/$artifact_root/baseline-desktop.mp4"
cp "$root/candidate/discord-status-reactions-tool-only-desktop.mp4" "$artifacts_worktree/$artifact_root/candidate-desktop.mp4"
cp "$root/comparison.json" "$artifacts_worktree/$artifact_root/comparison.json"
cp "$root/mantis-report.md" "$artifacts_worktree/$artifact_root/mantis-report.md"
git -C "$artifacts_worktree" add "$artifact_root"
if git -C "$artifacts_worktree" diff --cached --quiet; then
echo "No QA screenshot/video artifact changes to publish."
else
git -C "$artifacts_worktree" commit --quiet -m "qa: publish Mantis Discord evidence for PR ${TARGET_PR}"
git -C "$artifacts_worktree" push --quiet origin HEAD:qa-artifacts
fi
encoded_artifact_root="${artifact_root// /%20}"
raw_base="https://raw.githubusercontent.com/${GITHUB_REPOSITORY}/qa-artifacts/${encoded_artifact_root}"
baseline_status="$(jq -r '.baseline.status' "$root/comparison.json")"
candidate_status="$(jq -r '.candidate.status' "$root/comparison.json")"
pass="$(jq -r '.pass' "$root/comparison.json")"
preview_section=""
if [[ "$has_desktop_previews" == "true" ]]; then
preview_section="$(cat <<EOF
| Baseline motion preview | Candidate motion preview |
| --- | --- |
| <img src="${raw_base}/baseline-desktop-preview.gif" width="420" alt="Animated baseline desktop preview"> | <img src="${raw_base}/candidate-desktop-preview.gif" width="420" alt="Animated candidate desktop preview"> |
EOF
)"
fi
change_clip_section=""
if [[ "$has_change_clips" == "true" ]]; then
change_clip_section="$(cat <<EOF
Motion-trimmed clips:
- [Baseline change MP4](${raw_base}/baseline-desktop-change.mp4)
- [Candidate change MP4](${raw_base}/candidate-desktop-change.mp4)
EOF
)"
fi
comment_file="$(mktemp)"
cat > "$comment_file" <<EOF
<!-- mantis-discord-status-reactions -->
## Mantis Discord Status Reactions QA
Summary: Mantis reran Discord status reactions against the known queued-only baseline and the candidate ref. The baseline reproduced the bug, while the candidate showed the expected queued -> thinking -> done reaction sequence.
- Scenario: \`discord-status-reactions-tool-only\`
- Trigger: \`${REQUEST_SOURCE}\`
- Run: https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}
- Artifact: ${ARTIFACT_URL}
- Baseline: \`${baseline_status}\` at \`${BASELINE_SHA}\`
- Candidate: \`${candidate_status}\` at \`${CANDIDATE_SHA}\`
- Overall: \`${pass}\`
| Baseline queued-only | Candidate queued -> thinking -> done |
| --- | --- |
| <img src="${raw_base}/baseline.png" width="420" alt="Baseline Discord status reaction timeline"> | <img src="${raw_base}/candidate.png" width="420" alt="Candidate Discord status reaction timeline"> |
| Baseline desktop/VNC browser | Candidate desktop/VNC browser |
| --- | --- |
| <img src="${raw_base}/baseline-desktop.png" width="420" alt="Baseline Mantis desktop browser screenshot"> | <img src="${raw_base}/candidate-desktop.png" width="420" alt="Candidate Mantis desktop browser screenshot"> |
${preview_section}
${change_clip_section}
Full videos:
- [Baseline desktop MP4](${raw_base}/baseline-desktop.mp4)
- [Candidate desktop MP4](${raw_base}/candidate-desktop.mp4)
Raw QA files: https://github.com/${GITHUB_REPOSITORY}/tree/qa-artifacts/${artifact_root}
EOF
comment_id="$(
gh api --paginate "repos/${GITHUB_REPOSITORY}/issues/${TARGET_PR}/comments" \
--jq '.[] | select(.body | contains("<!-- mantis-discord-status-reactions -->")) | .id' \
| tail -n 1
)"
if [[ -n "$comment_id" ]]; then
comment_payload="$(mktemp)"
jq -n --rawfile body "$comment_file" '{ body: $body }' > "$comment_payload"
if gh api --method PATCH "repos/${GITHUB_REPOSITORY}/issues/comments/${comment_id}" --input "$comment_payload" >/dev/null; then
echo "Updated Mantis QA evidence comment on PR #${TARGET_PR}."
else
echo "::warning::Could not update existing Mantis QA evidence comment ${comment_id}; creating a new one."
gh pr comment "$TARGET_PR" --body-file "$comment_file"
echo "Created Mantis QA evidence comment on PR #${TARGET_PR}."
fi
else
gh pr comment "$TARGET_PR" --body-file "$comment_file"
echo "Created Mantis QA evidence comment on PR #${TARGET_PR}."
fi
node scripts/mantis/publish-pr-evidence.mjs \
--manifest "$root/mantis-evidence.json" \
--target-pr "$TARGET_PR" \
--artifact-root "mantis/discord-status-reactions/pr-${TARGET_PR}/run-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" \
--marker "<!-- mantis-discord-status-reactions -->" \
--artifact-url "$ARTIFACT_URL" \
--run-url "https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" \
--request-source "$REQUEST_SOURCE"

83
.github/workflows/mantis-scenario.yml vendored Normal file
View File

@@ -0,0 +1,83 @@
name: Mantis Scenario
on:
workflow_dispatch:
inputs:
scenario_id:
description: Mantis scenario id to run
required: true
default: discord-status-reactions-tool-only
type: choice
options:
- discord-status-reactions-tool-only
- slack-desktop-smoke
baseline_ref:
description: Optional baseline ref for before/after scenarios
required: false
default: 0bf06e953fdda290799fc9fb9244a8f67fdae593
type: string
candidate_ref:
description: Candidate ref, tag, or SHA
required: true
default: main
type: string
pr_number:
description: Optional PR number to receive QA evidence
required: false
type: string
permissions:
actions: write
contents: read
concurrency:
group: mantis-scenario-${{ inputs.scenario_id }}-${{ inputs.pr_number || inputs.candidate_ref || github.run_id }}
cancel-in-progress: false
jobs:
dispatch:
name: Dispatch selected Mantis workflow
runs-on: blacksmith-8vcpu-ubuntu-2404
steps:
- name: Dispatch scenario
env:
GH_TOKEN: ${{ github.token }}
BASELINE_REF: ${{ inputs.baseline_ref }}
CANDIDATE_REF: ${{ inputs.candidate_ref }}
PR_NUMBER: ${{ inputs.pr_number }}
SCENARIO_ID: ${{ inputs.scenario_id }}
shell: bash
run: |
set -euo pipefail
case "$SCENARIO_ID" in
discord-status-reactions-tool-only)
args=(
workflow run mantis-discord-status-reactions.yml
--repo "$GITHUB_REPOSITORY"
--ref main
-f "baseline_ref=${BASELINE_REF}"
-f "candidate_ref=${CANDIDATE_REF}"
)
if [[ -n "${PR_NUMBER:-}" ]]; then
args+=(-f "pr_number=${PR_NUMBER}")
fi
gh "${args[@]}"
;;
slack-desktop-smoke)
args=(
workflow run mantis-slack-desktop-smoke.yml
--repo "$GITHUB_REPOSITORY"
--ref main
-f "candidate_ref=${CANDIDATE_REF}"
)
if [[ -n "${PR_NUMBER:-}" ]]; then
args+=(-f "pr_number=${PR_NUMBER}")
fi
gh "${args[@]}"
;;
*)
echo "Unsupported Mantis scenario: ${SCENARIO_ID}" >&2
exit 1
;;
esac

View File

@@ -0,0 +1,328 @@
name: Mantis Slack Desktop Smoke
on:
workflow_dispatch:
inputs:
candidate_ref:
description: Ref, tag, or SHA to run inside the VNC desktop
required: true
default: main
type: string
pr_number:
description: Optional PR number to receive the QA evidence comment
required: false
type: string
scenario_id:
description: Slack QA scenario id
required: true
default: slack-canary
type: string
keep_vm:
description: Keep the desktop lease open after a passing run
required: false
default: false
type: boolean
permissions:
contents: write
issues: write
pull-requests: write
concurrency:
group: mantis-slack-desktop-smoke-${{ inputs.pr_number || inputs.candidate_ref || github.run_id }}-${{ github.run_attempt }}
cancel-in-progress: false
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
NODE_VERSION: "24.x"
PNPM_VERSION: "10.33.0"
OPENCLAW_BUILD_PRIVATE_QA: "1"
OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1"
jobs:
authorize_actor:
name: Authorize workflow actor
runs-on: blacksmith-8vcpu-ubuntu-2404
steps:
- name: Require maintainer-level repository access
uses: actions/github-script@v8
with:
script: |
const allowed = new Set(["admin", "maintain", "write"]);
const { owner, repo } = context.repo;
const { data } = await github.rest.repos.getCollaboratorPermissionLevel({
owner,
repo,
username: context.actor,
});
const permission = data.permission;
core.info(`Actor ${context.actor} permission: ${permission}`);
if (!allowed.has(permission)) {
core.setFailed(
`Workflow requires write/maintain/admin access. Actor "${context.actor}" has "${permission}".`,
);
}
validate_ref:
name: Validate candidate ref
needs: authorize_actor
runs-on: blacksmith-8vcpu-ubuntu-2404
outputs:
candidate_revision: ${{ steps.validate.outputs.candidate_revision }}
steps:
- name: Checkout harness ref
uses: actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- name: Validate ref is trusted
id: validate
env:
GH_TOKEN: ${{ github.token }}
CANDIDATE_REF: ${{ inputs.candidate_ref }}
shell: bash
run: |
set -euo pipefail
git fetch --no-tags origin +refs/heads/main:refs/remotes/origin/main
revision="$(git rev-parse "${CANDIDATE_REF}^{commit}")"
reason=""
if git merge-base --is-ancestor "$revision" refs/remotes/origin/main; then
reason="main-ancestor"
elif git tag --points-at "$revision" | grep -Eq '^v'; then
reason="release-tag"
else
pr_head_count="$(
gh api \
-H "Accept: application/vnd.github+json" \
"repos/${GITHUB_REPOSITORY}/commits/${revision}/pulls" \
--jq '[.[] | select(.state == "open" and .head.repo.full_name == "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'")] | length'
)"
if [[ "$pr_head_count" != "0" ]]; then
reason="open-pr-head"
fi
fi
if [[ -z "$reason" ]]; then
echo "Candidate ref '${CANDIDATE_REF}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run." >&2
exit 1
fi
echo "candidate_revision=${revision}" >> "$GITHUB_OUTPUT"
{
echo "candidate: \`${CANDIDATE_REF}\`"
echo "candidate SHA: \`${revision}\`"
echo "candidate trust reason: \`${reason}\`"
} >> "$GITHUB_STEP_SUMMARY"
run_slack_desktop:
name: Run Slack desktop smoke
needs: validate_ref
runs-on: blacksmith-8vcpu-ubuntu-2404
timeout-minutes: 180
environment: qa-live-shared
steps:
- name: Checkout harness ref
uses: actions/checkout@v6
with:
persist-credentials: false
fetch-depth: 0
- name: Setup Node environment
uses: ./.github/actions/setup-node-env
with:
node-version: ${{ env.NODE_VERSION }}
pnpm-version: ${{ env.PNPM_VERSION }}
install-bun: "true"
- name: Build Mantis harness
run: pnpm build
- name: Setup Go for Crabbox CLI
uses: actions/setup-go@v6
with:
go-version: "1.26.x"
cache: false
- name: Install Crabbox CLI
shell: bash
run: |
set -euo pipefail
install_dir="${RUNNER_TEMP}/crabbox"
mkdir -p "$install_dir" "$HOME/.local/bin"
git clone --depth 1 https://github.com/openclaw/crabbox.git "$install_dir/src"
go build -C "$install_dir/src" -o "$HOME/.local/bin/crabbox" ./cmd/crabbox
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
"$HOME/.local/bin/crabbox" --version
"$HOME/.local/bin/crabbox" warmup --help 2>&1 | grep -q -- "-desktop"
"$HOME/.local/bin/crabbox" media preview --help >/dev/null
- name: Prepare candidate worktree
env:
CANDIDATE_SHA: ${{ needs.validate_ref.outputs.candidate_revision }}
shell: bash
run: |
set -euo pipefail
worktree_root=".artifacts/qa-e2e/mantis/slack-desktop-smoke-worktrees"
mkdir -p "$worktree_root"
git worktree add --detach "$worktree_root/candidate" "$CANDIDATE_SHA"
pnpm --dir "$worktree_root/candidate" install --frozen-lockfile
pnpm --dir "$worktree_root/candidate" build
- name: Run Slack desktop scenario
id: run_mantis
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENCLAW_LIVE_OPENAI_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENCLAW_QA_CONVEX_SITE_URL: ${{ secrets.OPENCLAW_QA_CONVEX_SITE_URL }}
OPENCLAW_QA_CONVEX_SECRET_CI: ${{ secrets.OPENCLAW_QA_CONVEX_SECRET_CI }}
OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1"
CRABBOX_COORDINATOR: ${{ secrets.CRABBOX_COORDINATOR }}
CRABBOX_COORDINATOR_TOKEN: ${{ secrets.CRABBOX_COORDINATOR_TOKEN }}
OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR: ${{ secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR }}
OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN: ${{ secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN }}
CRABBOX_ACCESS_CLIENT_ID: ${{ secrets.CRABBOX_ACCESS_CLIENT_ID }}
CRABBOX_ACCESS_CLIENT_SECRET: ${{ secrets.CRABBOX_ACCESS_CLIENT_SECRET }}
KEEP_VM: ${{ inputs.keep_vm }}
SCENARIO_ID: ${{ inputs.scenario_id }}
shell: bash
run: |
set -euo pipefail
require_var() {
local key="$1"
if [[ -z "${!key:-}" ]]; then
echo "Missing required ${key}." >&2
exit 1
fi
}
CRABBOX_COORDINATOR="${CRABBOX_COORDINATOR:-${OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR:-}}"
CRABBOX_COORDINATOR_TOKEN="${CRABBOX_COORDINATOR_TOKEN:-${OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN:-}}"
export CRABBOX_COORDINATOR CRABBOX_COORDINATOR_TOKEN
require_var OPENCLAW_LIVE_OPENAI_KEY
require_var OPENCLAW_QA_CONVEX_SITE_URL
require_var OPENCLAW_QA_CONVEX_SECRET_CI
require_var CRABBOX_COORDINATOR_TOKEN
candidate_repo="$(pwd)/.artifacts/qa-e2e/mantis/slack-desktop-smoke-worktrees/candidate"
root="$candidate_repo/.artifacts/qa-e2e/mantis/slack-desktop-smoke"
echo "output_dir=${root}" >> "$GITHUB_OUTPUT"
keep_args=()
if [[ "$KEEP_VM" == "true" ]]; then
keep_args=(--keep-lease)
fi
pnpm openclaw qa mantis slack-desktop-smoke \
--repo-root "$candidate_repo" \
--output-dir "$root" \
--provider hetzner \
--class standard \
--idle-timeout 45m \
--ttl 120m \
--gateway-setup \
--credential-source convex \
--credential-role ci \
--provider-mode live-frontier \
--model openai/gpt-5.4 \
--alt-model openai/gpt-5.4 \
--fast \
--scenario "$SCENARIO_ID" \
"${keep_args[@]}"
if [[ -f "$root/slack-desktop-smoke.mp4" ]]; then
if ! command -v ffmpeg >/dev/null 2>&1 || ! command -v ffprobe >/dev/null 2>&1; then
sudo apt-get update && sudo apt-get install -y ffmpeg || true
fi
if ! crabbox media preview \
--input "$root/slack-desktop-smoke.mp4" \
--output "$root/slack-desktop-smoke-preview.gif" \
--trimmed-video-output "$root/slack-desktop-smoke-change.mp4" \
--json > "$root/slack-desktop-smoke-preview.json"; then
rm -f "$root/slack-desktop-smoke-preview.gif"
rm -f "$root/slack-desktop-smoke-change.mp4"
rm -f "$root/slack-desktop-smoke-preview.json"
echo "::warning::Could not generate Slack motion-trimmed desktop preview."
fi
fi
status="$(jq -r '.status' "$root/mantis-slack-desktop-smoke-summary.json")"
jq -n \
--arg status "$status" \
--arg candidate_sha "${{ needs.validate_ref.outputs.candidate_revision }}" \
--arg scenario "$SCENARIO_ID" \
'{
schemaVersion: 1,
id: "slack-desktop-smoke",
title: "Mantis Slack Desktop Smoke QA",
summary: "Mantis ran Slack QA inside a Crabbox Linux VNC desktop, started an OpenClaw Slack gateway in that VM, opened Slack Web in the visible browser, and captured screenshot/video evidence.",
scenario: $scenario,
comparison: {
candidate: { sha: $candidate_sha, expected: "Slack QA and VM gateway setup pass", status: $status, fixed: ($status == "pass") },
pass: ($status == "pass")
},
artifacts: [
{ kind: "desktopScreenshot", lane: "candidate", label: "Slack desktop/VNC browser", path: "slack-desktop-smoke.png", targetPath: "slack-desktop.png", alt: "Slack Web desktop screenshot from the Mantis VM", width: 720, inline: true },
{ kind: "motionPreview", lane: "candidate", label: "Slack motion preview", path: "slack-desktop-smoke-preview.gif", targetPath: "slack-desktop-preview.gif", alt: "Animated Slack desktop preview", width: 720, inline: true, required: false },
{ kind: "motionClip", lane: "candidate", label: "Slack change MP4", path: "slack-desktop-smoke-change.mp4", targetPath: "slack-desktop-change.mp4", required: false },
{ kind: "fullVideo", lane: "candidate", label: "Slack desktop MP4", path: "slack-desktop-smoke.mp4", targetPath: "slack-desktop.mp4", required: false },
{ kind: "metadata", lane: "run", label: "Slack desktop summary", path: "mantis-slack-desktop-smoke-summary.json", targetPath: "summary.json" },
{ kind: "report", lane: "run", label: "Slack desktop report", path: "mantis-slack-desktop-smoke-report.md", targetPath: "report.md" },
{ kind: "metadata", lane: "run", label: "Slack command log", path: "slack-desktop-command.log", targetPath: "slack-desktop-command.log", required: false },
{ kind: "metadata", lane: "run", label: "Slack preview metadata", path: "slack-desktop-smoke-preview.json", targetPath: "slack-desktop-preview.json", required: false },
{ kind: "metadata", lane: "run", label: "Slack error", path: "error.txt", targetPath: "error.txt", required: false }
]
}' > "$root/mantis-evidence.json"
cat "$root/mantis-slack-desktop-smoke-report.md" >> "$GITHUB_STEP_SUMMARY"
if [[ "$status" != "pass" ]]; then
echo "Slack desktop smoke failed." >&2
exit 1
fi
- name: Upload Mantis Slack desktop artifacts
id: upload_artifact
if: ${{ always() && steps.run_mantis.outputs.output_dir != '' }}
uses: actions/upload-artifact@v4
with:
name: mantis-slack-desktop-smoke-${{ github.run_id }}-${{ github.run_attempt }}
path: ${{ steps.run_mantis.outputs.output_dir }}
retention-days: 14
if-no-files-found: warn
- name: Create Mantis GitHub App token
id: mantis_app_token
if: ${{ always() && inputs.pr_number != '' }}
uses: actions/create-github-app-token@v3
with:
app-id: ${{ secrets.MANTIS_GITHUB_APP_ID }}
private-key: ${{ secrets.MANTIS_GITHUB_APP_PRIVATE_KEY }}
owner: ${{ github.repository_owner }}
repositories: ${{ github.event.repository.name }}
permission-contents: write
permission-issues: write
permission-pull-requests: write
- name: Comment PR with inline QA evidence
if: ${{ always() && inputs.pr_number != '' && steps.run_mantis.outputs.output_dir != '' }}
env:
GH_TOKEN: ${{ steps.mantis_app_token.outputs.token }}
TARGET_PR: ${{ inputs.pr_number }}
ARTIFACT_URL: ${{ steps.upload_artifact.outputs.artifact-url }}
REQUEST_SOURCE: workflow_dispatch
shell: bash
run: |
set -euo pipefail
root="${{ steps.run_mantis.outputs.output_dir }}"
node scripts/mantis/publish-pr-evidence.mjs \
--manifest "$root/mantis-evidence.json" \
--target-pr "$TARGET_PR" \
--artifact-root "mantis/slack-desktop-smoke/pr-${TARGET_PR}/run-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" \
--marker "<!-- mantis-slack-desktop-smoke -->" \
--artifact-url "$ARTIFACT_URL" \
--run-url "https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" \
--request-source "$REQUEST_SOURCE"

View File

@@ -176,6 +176,72 @@ Crabbox CLI from
`openclaw/crabbox` main so it can use the current desktop/browser lease flags
before the next Crabbox binary release is cut.
`Mantis Scenario` is the generic manual entrypoint. It takes a `scenario_id`,
`candidate_ref`, optional `baseline_ref`, and optional `pr_number`, then
dispatches the scenario-owned workflow. The wrapper is intentionally thin:
scenario workflows still own their transport setup, credentials, VM class,
expected oracle, and artifact manifest.
`Mantis Slack Desktop Smoke` is the first Slack VM workflow. It checks out the
trusted candidate ref in a separate worktree, leases a Crabbox Linux desktop,
runs `pnpm openclaw qa mantis slack-desktop-smoke --gateway-setup` against that
candidate, opens Slack Web in the VNC browser, records the desktop, generates a
motion-trimmed preview with `crabbox media preview`, uploads the full artifact
directory, and optionally posts the inline evidence comment on the target PR.
Use this lane when you want "a Linux desktop with Slack and a claw running"
instead of only a bot-to-bot Slack transcript.
Every PR-publishing scenario writes `mantis-evidence.json` next to its report.
This schema is the handoff between scenario code and GitHub comments:
```json
{
"schemaVersion": 1,
"id": "discord-status-reactions",
"title": "Mantis Discord Status Reactions QA",
"summary": "Human-readable top summary for the PR comment.",
"scenario": "discord-status-reactions-tool-only",
"comparison": {
"baseline": { "sha": "...", "status": "fail", "expected": "queued-only" },
"candidate": { "sha": "...", "status": "pass", "expected": "queued -> thinking -> done" },
"pass": true
},
"artifacts": [
{
"kind": "timeline",
"lane": "baseline",
"label": "Baseline queued-only",
"path": "baseline/timeline.png",
"targetPath": "baseline.png",
"alt": "Baseline Discord timeline",
"width": 420
}
]
}
```
Artifact `path` values are relative to the manifest directory. `targetPath`
values are relative paths under the `qa-artifacts` branch publish directory.
The publisher rejects path traversal and skips entries marked
`"required": false` when optional previews or videos are unavailable.
Supported artifact kinds:
- `timeline`: deterministic scenario screenshot, usually before/after.
- `desktopScreenshot`: VNC/browser desktop screenshot.
- `motionPreview`: inline animated GIF generated from the desktop recording.
- `motionClip`: motion-trimmed MP4 that removes static lead-in and tail.
- `fullVideo`: full MP4 recording for deep inspection.
- `metadata`: JSON/log sidecar.
- `report`: Markdown report.
The reusable publisher is `scripts/mantis/publish-pr-evidence.mjs`. Workflows
call it with the manifest, target PR, `qa-artifacts` target root, comment marker,
Actions artifact URL, run URL, and request source. It copies declared artifacts
to the `qa-artifacts` branch, builds a summary-first PR comment with inline
images/previews and linked videos, then updates the existing marker comment or
creates one.
You can also trigger the status-reactions run directly from a PR comment:
```text

View File

@@ -24,6 +24,7 @@ export type MantisBeforeAfterOptions = {
export type MantisBeforeAfterResult = {
comparisonPath: string;
manifestPath: string;
outputDir: string;
reportPath: string;
status: "pass" | "fail";
@@ -217,6 +218,106 @@ function renderReport(params: {
return `${lines.join("\n")}\n`;
}
function relativeArtifactPath(outputDir: string, artifactPath: string | undefined) {
if (!artifactPath) {
return undefined;
}
return path.isAbsolute(artifactPath) ? path.relative(outputDir, artifactPath) : artifactPath;
}
function buildEvidenceManifest(params: {
baseline: LaneResult;
candidate: LaneResult;
comparison: Comparison;
outputDir: string;
}) {
const artifacts: {
alt?: string;
kind: string;
label: string;
lane: "baseline" | "candidate" | "run";
path: string;
required?: boolean;
targetPath: string;
width?: number;
}[] = [
{
kind: "metadata",
label: "Comparison JSON",
lane: "run",
path: "comparison.json",
targetPath: "comparison.json",
},
{
kind: "report",
label: "Mantis report",
lane: "run",
path: "mantis-report.md",
targetPath: "mantis-report.md",
},
];
const baselineScreenshot = relativeArtifactPath(params.outputDir, params.baseline.screenshotPath);
if (baselineScreenshot) {
artifacts.push({
alt: "Baseline Discord status reaction timeline",
kind: "timeline",
label: "Baseline queued-only",
lane: "baseline",
path: baselineScreenshot,
targetPath: "baseline.png",
width: 420,
});
}
const candidateScreenshot = relativeArtifactPath(
params.outputDir,
params.candidate.screenshotPath,
);
if (candidateScreenshot) {
artifacts.push({
alt: "Candidate Discord status reaction timeline",
kind: "timeline",
label: "Candidate queued -> thinking -> done",
lane: "candidate",
path: candidateScreenshot,
targetPath: "candidate.png",
width: 420,
});
}
const baselineVideo = relativeArtifactPath(params.outputDir, params.baseline.videoPath);
if (baselineVideo) {
artifacts.push({
kind: "fullVideo",
label: "Baseline MP4",
lane: "baseline",
path: baselineVideo,
targetPath: "baseline.mp4",
required: false,
});
}
const candidateVideo = relativeArtifactPath(params.outputDir, params.candidate.videoPath);
if (candidateVideo) {
artifacts.push({
kind: "fullVideo",
label: "Candidate MP4",
lane: "candidate",
path: candidateVideo,
targetPath: "candidate.mp4",
required: false,
});
}
return {
artifacts,
comparison: params.comparison,
id: params.comparison.scenario,
scenario: params.comparison.scenario,
schemaVersion: 1,
summary:
"Mantis ran the before/after scenario, captured baseline and candidate evidence, and compared the expected bug reproduction against the candidate fix.",
title: "Mantis Before/After QA",
};
}
async function copyScreenshot(params: { lane: "baseline" | "candidate"; result: LaneResult }) {
if (!params.result.screenshotPath) {
return undefined;
@@ -359,6 +460,7 @@ export async function runMantisBeforeAfter(
const runner = opts.commandRunner ?? defaultCommandRunner;
const worktreeRoot = path.join(outputDir, "worktrees");
const comparisonPath = path.join(outputDir, "comparison.json");
const manifestPath = path.join(outputDir, "mantis-evidence.json");
const reportPath = path.join(outputDir, "mantis-report.md");
await fs.mkdir(worktreeRoot, { recursive: true });
@@ -423,8 +525,23 @@ export async function runMantisBeforeAfter(
}),
"utf8",
);
await fs.writeFile(
manifestPath,
`${JSON.stringify(
buildEvidenceManifest({
baseline: baselineResult,
candidate: candidateResult,
comparison,
outputDir,
}),
null,
2,
)}\n`,
"utf8",
);
return {
comparisonPath,
manifestPath,
outputDir,
reportPath,
status: comparison.pass ? "pass" : "fail",

View File

@@ -0,0 +1,475 @@
#!/usr/bin/env node
import { execFileSync, spawnSync } from "node:child_process";
import {
copyFileSync,
existsSync,
mkdirSync,
mkdtempSync,
readFileSync,
rmSync,
statSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
function parseArgs(argv) {
const args = {};
for (let index = 0; index < argv.length; index += 1) {
const key = argv[index];
if (!key.startsWith("--")) {
throw new Error(`Unexpected argument: ${key}`);
}
const name = key.slice(2).replaceAll("-", "_");
const value = argv[index + 1];
if (!value || value.startsWith("--")) {
throw new Error(`Missing value for ${key}`);
}
args[name] = value;
index += 1;
}
return args;
}
function readJson(filePath) {
return JSON.parse(readFileSync(filePath, "utf8"));
}
function assertInside(parentDir, candidatePath, label) {
const relative = path.relative(parentDir, candidatePath);
if (relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative))) {
return candidatePath;
}
throw new Error(`${label} escapes manifest directory: ${candidatePath}`);
}
function normalizeTargetPath(targetPath) {
const normalized = path.posix.normalize(String(targetPath).replaceAll("\\", "/"));
if (
normalized === "." ||
normalized === "" ||
normalized.startsWith("../") ||
normalized.includes("/../") ||
normalized.startsWith("/") ||
/^[A-Za-z]:/u.test(normalized)
) {
throw new Error(`Invalid artifact target path: ${targetPath}`);
}
return normalized;
}
function resolveArtifact(manifestDir, artifact) {
if (!artifact || typeof artifact !== "object") {
throw new Error("Manifest artifact entries must be objects.");
}
if (!artifact.path) {
throw new Error("Manifest artifact entry is missing path.");
}
const source = assertInside(
manifestDir,
path.resolve(manifestDir, artifact.path),
`Artifact ${artifact.label ?? artifact.path}`,
);
const required = artifact.required !== false;
if (!existsSync(source)) {
if (required) {
throw new Error(`Missing required artifact: ${artifact.path}`);
}
return null;
}
if (!statSync(source).isFile()) {
throw new Error(`Artifact is not a file: ${artifact.path}`);
}
return {
...artifact,
kind: artifact.kind ?? "attachment",
lane: artifact.lane ?? "run",
label: artifact.label ?? artifact.path,
required,
source,
targetPath: normalizeTargetPath(artifact.targetPath ?? path.basename(artifact.path)),
};
}
export function loadEvidenceManifest(manifestPath) {
const resolvedManifest = path.resolve(manifestPath);
const manifestDir = path.dirname(resolvedManifest);
const manifest = readJson(resolvedManifest);
if (manifest.schemaVersion !== 1) {
throw new Error(`Unsupported Mantis evidence manifest schema: ${manifest.schemaVersion}`);
}
if (!manifest.id || !manifest.title || !manifest.scenario) {
throw new Error("Mantis evidence manifest requires id, title, and scenario.");
}
const artifacts = (manifest.artifacts ?? [])
.map((artifact) => resolveArtifact(manifestDir, artifact))
.filter(Boolean);
artifacts.push({
kind: "metadata",
lane: "run",
label: "Mantis evidence manifest",
source: resolvedManifest,
targetPath: "mantis-evidence.json",
});
return {
...manifest,
artifacts,
manifestDir,
};
}
function encodePathForUrl(input) {
return input
.split("/")
.filter(Boolean)
.map((part) => encodeURIComponent(part))
.join("/");
}
function artifactUrl(rawBase, artifact) {
return `${rawBase}/${encodePathForUrl(artifact.targetPath)}`;
}
function byLane(artifacts, kind) {
const lanes = new Map();
for (const artifact of artifacts) {
if (artifact.kind !== kind) {
continue;
}
lanes.set(artifact.lane, artifact);
}
return lanes;
}
function findPair(artifacts, kind, leftLane, rightLane) {
const lanes = byLane(artifacts, kind);
const left = lanes.get(leftLane);
const right = lanes.get(rightLane);
return left && right ? { left, right } : null;
}
function renderPairTable({ pair, rawBase }) {
const { left, right } = pair;
if (!left || !right) {
return "";
}
const width = Math.min(Number(left.width ?? right.width ?? 420) || 420, 720);
return [
`| ${left.label} | ${right.label} |`,
"| --- | --- |",
`| <img src="${artifactUrl(rawBase, left)}" width="${width}" alt="${left.alt ?? left.label}"> | <img src="${artifactUrl(rawBase, right)}" width="${width}" alt="${right.alt ?? right.label}"> |`,
"",
].join("\n");
}
function renderSingleImageTables({ artifacts, rawBase, pairedKeys }) {
const renderedPairs = new Set(pairedKeys);
return artifacts
.filter(
(artifact) => artifact.inline && !renderedPairs.has(`${artifact.kind}:${artifact.lane}`),
)
.map((artifact) => {
const width = Math.min(Number(artifact.width ?? 720) || 720, 900);
return [
`**${artifact.label}**`,
"",
`<img src="${artifactUrl(rawBase, artifact)}" width="${width}" alt="${artifact.alt ?? artifact.label}">`,
"",
].join("\n");
})
.join("\n");
}
function renderLinkList({ artifacts, kind, rawBase, title }) {
const links = artifacts
.filter((artifact) => artifact.kind === kind)
.map((artifact) => `- [${artifact.label}](${artifactUrl(rawBase, artifact)})`);
if (links.length === 0) {
return "";
}
return [`${title}:`, ...links, ""].join("\n");
}
function laneLine(label, lane) {
if (!lane) {
return "";
}
const pieces = [`- ${label}: \`${lane.status ?? "unknown"}\``];
if (lane.sha) {
pieces.push(` at \`${lane.sha}\``);
} else if (lane.ref) {
pieces.push(` at \`${lane.ref}\``);
}
if (lane.expected) {
pieces.push(`, expected ${lane.expected}`);
}
return pieces.join("");
}
export function renderEvidenceComment({
artifactRoot,
artifactUrl: actionsArtifactUrl,
manifest,
marker,
rawBase,
requestSource,
runUrl,
treeUrl,
}) {
const comparison = manifest.comparison ?? {};
const baseline = comparison.baseline;
const candidate = comparison.candidate;
const pairs = [
findPair(manifest.artifacts, "timeline", "baseline", "candidate"),
findPair(manifest.artifacts, "desktopScreenshot", "baseline", "candidate"),
findPair(manifest.artifacts, "motionPreview", "baseline", "candidate"),
].filter(Boolean);
const pairedKeys = pairs.flatMap((pair) => [
`${pair.left.kind}:${pair.left.lane}`,
`${pair.right.kind}:${pair.right.lane}`,
]);
const lines = [
marker,
`## ${manifest.title}`,
"",
`Summary: ${manifest.summary ?? "Mantis captured QA evidence for this scenario."}`,
"",
`- Scenario: \`${manifest.scenario}\``,
];
if (requestSource) {
lines.push(`- Trigger: \`${requestSource}\``);
}
if (runUrl) {
lines.push(`- Run: ${runUrl}`);
}
if (actionsArtifactUrl) {
lines.push(`- Artifact: ${actionsArtifactUrl}`);
}
const baselineLine = laneLine("Baseline", baseline);
if (baselineLine) {
lines.push(baselineLine);
}
const candidateLine = laneLine("Candidate", candidate);
if (candidateLine) {
lines.push(candidateLine);
}
if (typeof comparison.pass === "boolean") {
lines.push(`- Overall: \`${comparison.pass}\``);
}
lines.push("");
const pairedSections = pairs.map((pair) => renderPairTable({ pair, rawBase }));
lines.push(...pairedSections);
const singleTables = renderSingleImageTables({
artifacts: manifest.artifacts,
pairedKeys,
rawBase,
});
if (singleTables) {
lines.push(singleTables);
}
const motionClips = renderLinkList({
artifacts: manifest.artifacts,
kind: "motionClip",
rawBase,
title: "Motion-trimmed clips",
});
if (motionClips) {
lines.push(motionClips);
}
const fullVideos = renderLinkList({
artifacts: manifest.artifacts,
kind: "fullVideo",
rawBase,
title: "Full videos",
});
if (fullVideos) {
lines.push(fullVideos);
}
lines.push(
`Raw QA files: ${treeUrl ?? `https://github.com/${process.env.GITHUB_REPOSITORY}/tree/qa-artifacts/${artifactRoot}`}`,
);
return `${lines.join("\n").replace(/\n{3,}/gu, "\n\n")}\n`;
}
function run(command, args, options = {}) {
return execFileSync(command, args, {
encoding: "utf8",
stdio: options.stdio ?? ["ignore", "pipe", "inherit"],
...options,
});
}
function runStatus(command, args, options = {}) {
const result = spawnSync(command, args, {
stdio: "ignore",
...options,
});
if (result.error) {
throw result.error;
}
return result.status ?? 1;
}
function publishArtifactFiles({ artifactRoot, ghToken, manifest, repo }) {
const worktree = mkdtempSync(path.join(tmpdir(), "mantis-qa-artifacts-"));
const safeArtifactRoot = normalizeTargetPath(artifactRoot);
try {
run("git", ["init", "--quiet", worktree]);
run("git", ["-C", worktree, "config", "user.name", "github-actions[bot]"]);
run("git", [
"-C",
worktree,
"config",
"user.email",
"41898282+github-actions[bot]@users.noreply.github.com",
]);
run("git", [
"-C",
worktree,
"remote",
"add",
"origin",
`https://x-access-token:${ghToken}@github.com/${repo}.git`,
]);
try {
run("git", ["-C", worktree, "fetch", "--quiet", "origin", "qa-artifacts"]);
run("git", ["-C", worktree, "checkout", "--quiet", "-B", "qa-artifacts", "FETCH_HEAD"]);
} catch {
run("git", ["-C", worktree, "checkout", "--quiet", "--orphan", "qa-artifacts"]);
}
const destinationRoot = path.join(worktree, safeArtifactRoot);
for (const artifact of manifest.artifacts) {
const destination = assertInside(
destinationRoot,
path.resolve(destinationRoot, artifact.targetPath),
`Artifact target ${artifact.targetPath}`,
);
mkdirSync(path.dirname(destination), { recursive: true });
copyFileSync(artifact.source, destination);
}
run("git", ["-C", worktree, "add", safeArtifactRoot]);
const hasChanges = runStatus("git", ["-C", worktree, "diff", "--cached", "--quiet"]) !== 0;
if (hasChanges) {
run("git", [
"-C",
worktree,
"commit",
"--quiet",
"-m",
`qa: publish Mantis evidence for ${manifest.id}`,
]);
run("git", ["-C", worktree, "push", "--quiet", "origin", "HEAD:qa-artifacts"]);
} else {
console.log("No QA evidence artifact changes to publish.");
}
} finally {
rmSync(worktree, { force: true, recursive: true });
}
return safeArtifactRoot;
}
function upsertPrComment({ body, marker, prNumber, repo }) {
run("gh", ["api", `repos/${repo}/pulls/${prNumber}`, "--jq", ".number"]);
const commentId = run("gh", [
"api",
"--paginate",
`repos/${repo}/issues/${prNumber}/comments`,
"--jq",
`.[] | select(.body | contains("${marker}")) | .id`,
])
.trim()
.split("\n")
.findLast((line) => line.length > 0);
const bodyFile = path.join(mkdtempSync(path.join(tmpdir(), "mantis-comment-")), "body.md");
writeFileSync(bodyFile, body);
try {
if (commentId) {
const payloadFile = `${bodyFile}.json`;
writeFileSync(payloadFile, JSON.stringify({ body }));
try {
run("gh", [
"api",
"--method",
"PATCH",
`repos/${repo}/issues/comments/${commentId}`,
"--input",
payloadFile,
]);
console.log(`Updated Mantis QA evidence comment on PR #${prNumber}.`);
return;
} catch {
console.warn(
`Could not update existing Mantis QA evidence comment ${commentId}; creating a new one.`,
);
}
}
run("gh", ["pr", "comment", prNumber, "--body-file", bodyFile], { stdio: "inherit" });
console.log(`Created Mantis QA evidence comment on PR #${prNumber}.`);
} finally {
rmSync(path.dirname(bodyFile), { force: true, recursive: true });
}
}
export function publishEvidence(rawArgs = process.argv.slice(2)) {
const args = parseArgs(rawArgs);
const required = ["manifest", "target_pr", "artifact_root", "marker"];
for (const key of required) {
if (!args[key]) {
throw new Error(`Missing --${key.replaceAll("_", "-")}.`);
}
}
if (!/^[0-9]+$/u.test(args.target_pr)) {
throw new Error(`--target-pr must be numeric, got ${args.target_pr}.`);
}
const repo = args.repo ?? process.env.GITHUB_REPOSITORY;
const ghToken = process.env.GH_TOKEN ?? process.env.GITHUB_TOKEN;
if (!repo) {
throw new Error("Missing --repo or GITHUB_REPOSITORY.");
}
if (!ghToken) {
throw new Error("Missing GH_TOKEN or GITHUB_TOKEN.");
}
const manifest = loadEvidenceManifest(args.manifest);
const artifactRoot = publishArtifactFiles({
artifactRoot: args.artifact_root,
ghToken,
manifest,
repo,
});
const rawBase = `https://raw.githubusercontent.com/${repo}/qa-artifacts/${encodePathForUrl(artifactRoot)}`;
const treeUrl = `https://github.com/${repo}/tree/qa-artifacts/${encodePathForUrl(artifactRoot)}`;
const body = renderEvidenceComment({
artifactRoot,
artifactUrl: args.artifact_url,
manifest,
marker: args.marker,
rawBase,
requestSource: args.request_source,
runUrl: args.run_url,
treeUrl,
});
upsertPrComment({
body,
marker: args.marker,
prNumber: args.target_pr,
repo,
});
}
const executedPath = process.argv[1] ? path.resolve(process.argv[1]) : "";
if (executedPath === fileURLToPath(import.meta.url)) {
try {
publishEvidence();
} catch (error) {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
}
}

View File

@@ -238,6 +238,7 @@ const TOOLING_SOURCE_TEST_TARGETS = new Map([
["scripts/lib/live-docker-stage.sh", ["test/scripts/live-docker-stage.test.ts"]],
["scripts/lib/openclaw-test-state.mjs", ["test/scripts/openclaw-test-state.test.ts"]],
["scripts/lib/vitest-local-scheduling.mjs", ["test/scripts/vitest-local-scheduling.test.ts"]],
["scripts/mantis/publish-pr-evidence.mjs", ["test/scripts/mantis-publish-pr-evidence.test.ts"]],
[
"scripts/run-vitest.mjs",
[
@@ -286,6 +287,10 @@ const TOOLING_TEST_TARGETS = new Map([
],
["test/scripts/live-docker-stage.test.ts", ["test/scripts/live-docker-stage.test.ts"]],
["test/scripts/openclaw-test-state.test.ts", ["test/scripts/openclaw-test-state.test.ts"]],
[
"test/scripts/mantis-publish-pr-evidence.test.ts",
["test/scripts/mantis-publish-pr-evidence.test.ts"],
],
[
"test/scripts/plugin-prerelease-test-plan.test.ts",
["test/scripts/plugin-prerelease-test-plan.test.ts"],

View File

@@ -0,0 +1,117 @@
import { mkdirSync, mkdtempSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import {
loadEvidenceManifest,
renderEvidenceComment,
} from "../../scripts/mantis/publish-pr-evidence.mjs";
function writeFixtureManifest() {
const dir = mkdtempSync(path.join(tmpdir(), "mantis-evidence-test-"));
mkdirSync(path.join(dir, "baseline"), { recursive: true });
mkdirSync(path.join(dir, "candidate"), { recursive: true });
writeFileSync(path.join(dir, "baseline", "timeline.png"), "baseline timeline");
writeFileSync(path.join(dir, "candidate", "timeline.png"), "candidate timeline");
writeFileSync(path.join(dir, "baseline", "change.mp4"), "baseline clip");
const manifestPath = path.join(dir, "mantis-evidence.json");
writeFileSync(
manifestPath,
JSON.stringify({
schemaVersion: 1,
id: "discord-status-reactions",
title: "Mantis Discord Status Reactions QA",
summary: "Mantis reran the scenario.",
scenario: "discord-status-reactions-tool-only",
comparison: {
baseline: {
expected: "queued-only",
sha: "aaa",
status: "fail",
},
candidate: {
expected: "queued -> thinking -> done",
sha: "bbb",
status: "pass",
},
pass: true,
},
artifacts: [
{
alt: "Baseline timeline",
kind: "timeline",
label: "Baseline queued-only",
lane: "baseline",
path: "baseline/timeline.png",
targetPath: "baseline.png",
},
{
alt: "Candidate timeline",
kind: "timeline",
label: "Candidate queued -> thinking -> done",
lane: "candidate",
path: "candidate/timeline.png",
targetPath: "candidate.png",
},
{
kind: "motionClip",
label: "Baseline change MP4",
lane: "baseline",
path: "baseline/change.mp4",
targetPath: "baseline-change.mp4",
},
],
}),
);
return manifestPath;
}
describe("scripts/mantis/publish-pr-evidence", () => {
it("renders a manifest-driven PR comment with inline screenshots and video links", () => {
const manifest = loadEvidenceManifest(writeFixtureManifest());
const body = renderEvidenceComment({
artifactRoot: "mantis/discord/pr-1/run-1",
artifactUrl: "https://github.com/openclaw/openclaw/actions/runs/1/artifacts/2",
manifest,
marker: "<!-- mantis-discord-status-reactions -->",
rawBase:
"https://raw.githubusercontent.com/openclaw/openclaw/qa-artifacts/mantis/discord/pr-1/run-1",
requestSource: "workflow_dispatch",
runUrl: "https://github.com/openclaw/openclaw/actions/runs/1",
treeUrl: "https://github.com/openclaw/openclaw/tree/qa-artifacts/mantis/discord/pr-1/run-1",
});
expect(body).toContain("<!-- mantis-discord-status-reactions -->");
expect(body).toContain("Summary: Mantis reran the scenario.");
expect(body).toContain("| Baseline queued-only | Candidate queued -> thinking -> done |");
expect(body).toContain(
'<img src="https://raw.githubusercontent.com/openclaw/openclaw/qa-artifacts/mantis/discord/pr-1/run-1/baseline.png"',
);
expect(body).toContain(
"[Baseline change MP4](https://raw.githubusercontent.com/openclaw/openclaw/qa-artifacts/mantis/discord/pr-1/run-1/baseline-change.mp4)",
);
expect(body).toContain("- Overall: `true`");
});
it("rejects artifact paths that escape the manifest directory", () => {
const dir = mkdtempSync(path.join(tmpdir(), "mantis-evidence-test-"));
const manifestPath = path.join(dir, "mantis-evidence.json");
writeFileSync(
manifestPath,
JSON.stringify({
artifacts: [
{
kind: "metadata",
path: "../outside.json",
},
],
id: "bad",
scenario: "bad",
schemaVersion: 1,
title: "Bad",
}),
);
expect(() => loadEvidenceManifest(manifestPath)).toThrow(/escapes manifest directory/u);
});
});