fix(mantis): suppress auto no-proof comments

2026-06-03 03:56:24 +00:00 · 2026-05-18 21:08:43 +05:30
parent 8c2a390fbc
commit 98256b192b
3 changed files with 33 additions and 27 deletions
--- a/.github/codex/prompts/mantis-telegram-desktop-proof.md
+++ b/.github/codex/prompts/mantis-telegram-desktop-proof.md
@@ -18,6 +18,10 @@ Hard limits:
 - Do not force GIFs for internal-only, workflow-only, test-only, docs-only, or
  otherwise non-visual PRs. A no-visual-proof manifest is a successful workflow
  outcome when GIFs would be misleading, but it is not proof that the PR passed.
+- Do not skip Telegram-visible PRs just because the proof needs a specific
+  message, mock response, media attachment, command, button, reaction, stop
+  timing, approval prompt, or progress/final delivery sequence. First write a
+  concrete proof plan and try the standard harness path.
 - Keep public-facing manifest summaries short and user-domain. Do not mention
  harness internals, mock-provider limits, secret/trust boundaries, local paths,
  transcript seeding, or workflow implementation details in the summary.
@@ -42,7 +46,15 @@ Required workflow:
 2. Inspect the PR with `gh pr view "$MANTIS_PR_NUMBER"` and
   `gh pr diff "$MANTIS_PR_NUMBER"`.
 3. Decide whether the PR has a visibly reproducible Telegram Desktop
-   before/after. If it does not, write
+   before/after. Treat these as visible until proven otherwise: message text
+   formatting/content, progress drafts, native drafts, final delivery, media or
+   document delivery, inline buttons, approval prompts, stop/abort behavior,
+   reactions/status indicators, guest/inline responses, TTS/voice/audio
+   delivery, and routing changes whose result is visible in the chat. For those
+   PRs, define the exact Telegram stimulus and expected main/PR visual delta
+   before deciding to skip.
+
+   If the PR does not have a Telegram-visible before/after, write
   `${MANTIS_OUTPUT_DIR}/mantis-evidence.json` with `comparison.pass: true`, no
   artifacts, and a summary that starts with
   `Mantis did not generate before/after GIFs because`. Include a short
@@ -78,8 +90,9 @@ than Telegram-visible behavior`. Use this manifest shape and do not create
   ```

   If the PR appears visual but proof is blocked by Telegram Desktop session
-   state, authorization, credentials, Crabbox, or another capture-infrastructure
-   issue, do not describe it as a no-visual PR. Write a manifest with
+   state, authorization, credentials, Crabbox, missing Telegram client support,
+   unavailable media/provider setup, or another capture-infrastructure issue,
+   do not describe it as a no-visual PR. Write a manifest with
   `comparison.pass: false`, skipped lanes, no artifacts, and a summary that
   starts with `Mantis could not capture Telegram Desktop proof because`. The
   publisher will keep that out of PR comments so the failure stays in the
--- a/scripts/mantis/publish-pr-evidence.mjs
+++ b/scripts/mantis/publish-pr-evidence.mjs
@@ -316,37 +316,27 @@ function hasVisibleProofArtifacts(manifest) {
  );
 }

-function isSkippedNoVisualProof(manifest) {
-  const comparison = manifest.comparison ?? {};
-  return (
-    !hasVisibleProofArtifacts(manifest) &&
-    comparison.baseline?.status === "skipped" &&
-    comparison.candidate?.status === "skipped"
-  );
+function isTelegramDesktopProof(manifest) {
+  return manifest.id === "telegram-desktop-proof" || manifest.scenario === "telegram-desktop-proof";
 }

 function publicSummary(manifest) {
-  if (isSkippedNoVisualProof(manifest)) {
-    return "Mantis did not generate before/after GIFs because this PR does not have a clean Telegram-visible before/after proof in the standard Mantis run.";
-  }
  return manifest.summary ?? "Mantis captured QA evidence for this scenario.";
 }

 function overallStatus(manifest) {
-  if (isSkippedNoVisualProof(manifest)) {
-    return "skipped";
-  }
  const pass = manifest.comparison?.pass;
  return typeof pass === "boolean" ? String(pass) : "";
 }

-export function shouldPublishPrComment(manifest) {
-  if (!isSkippedNoVisualProof(manifest)) {
+export function shouldPublishPrComment(manifest, { requestSource } = {}) {
+  if (!isTelegramDesktopProof(manifest) || hasVisibleProofArtifacts(manifest)) {
    return true;
  }
-  return !/(authorization[- ]?error|credential infrastructure|logged[- ]out|login screen|welcome screen|bad telegram session)/iu.test(
-    manifest.summary ?? "",
-  );
+  if (requestSource === "pull_request_target") {
+    return false;
+  }
+  return manifest.comparison?.pass === true;
 }

 export function renderEvidenceComment({
@@ -593,7 +583,7 @@ export async function publishEvidence(rawArgs = process.argv.slice(2)) {
    runUrl: args.run_url,
    treeUrl: published.treeUrl,
  });
-  if (!shouldPublishPrComment(manifest)) {
+  if (!shouldPublishPrComment(manifest, { requestSource: args.request_source })) {
    console.log("Skipped Mantis QA evidence PR comment because the run did not capture proof.");
    return;
  }
--- a/test/scripts/mantis-publish-pr-evidence.test.ts
+++ b/test/scripts/mantis-publish-pr-evidence.test.ts
@@ -277,11 +277,13 @@ describe("scripts/mantis/publish-pr-evidence", () => {
      "mantis-evidence.json",
    ]);
    expect(body).toContain(
-      "Summary: Mantis did not generate before/after GIFs because this PR does not have a clean Telegram-visible before/after proof in the standard Mantis run.",
+      "Summary: Mantis did not generate before/after GIFs because this PR changes CI wiring only.",
    );
-    expect(body).toContain("- Overall: `skipped`");
+    expect(body).toContain("- Overall: `true`");
    expect(body).not.toContain("<table");
    expect(body).not.toContain("<img ");
+    expect(shouldPublishPrComment(manifest, { requestSource: "issue_comment" })).toBe(true);
+    expect(shouldPublishPrComment(manifest, { requestSource: "pull_request_target" })).toBe(false);
  });

  it("does not publish PR comments for Telegram capture infrastructure failures", () => {
@@ -323,10 +325,11 @@ describe("scripts/mantis/publish-pr-evidence", () => {
    });

    expect(body).toContain(
-      "Summary: Mantis did not generate before/after GIFs because this PR does not have a clean Telegram-visible before/after proof in the standard Mantis run.",
+      "Summary: Mantis could not capture Telegram Desktop proof because native Telegram Desktop opened to the logged-out welcome screen.",
    );
-    expect(body).toContain("- Overall: `skipped`");
-    expect(shouldPublishPrComment(manifest)).toBe(false);
+    expect(body).toContain("- Overall: `false`");
+    expect(shouldPublishPrComment(manifest, { requestSource: "issue_comment" })).toBe(false);
+    expect(shouldPublishPrComment(manifest, { requestSource: "pull_request_target" })).toBe(false);
  });

  it("rejects artifact paths that escape the manifest directory", () => {