From d83cd282c602bd61ee54ed7baec9377ef60d309b Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Wed, 24 Jun 2026 22:37:41 +0800 Subject: [PATCH] fix(qa): record checked-out ref in evidence (#96434) Merged via squash. Prepared head SHA: 86b3df6e59b17fb4a1670a2007c03e11cc974e41 Co-authored-by: vincentkoc <25068+vincentkoc@users.noreply.github.com> Co-authored-by: vincentkoc <25068+vincentkoc@users.noreply.github.com> Reviewed-by: @vincentkoc --- .../qa-lab/src/evidence-summary.test.ts | 24 +++++++++++ extensions/qa-lab/src/evidence-summary.ts | 43 ++++++++++++++++--- .../discord/discord-live.runtime.ts | 1 + .../slack/slack-live.runtime.ts | 1 + .../telegram/telegram-live.runtime.ts | 1 + .../whatsapp/whatsapp-live.runtime.ts | 1 + extensions/qa-lab/src/suite.ts | 6 +++ .../qa-lab/src/test-file-scenario-runner.ts | 4 ++ scripts/qa/ux-matrix-evidence-producer.ts | 24 +++++++---- 9 files changed, 92 insertions(+), 13 deletions(-) diff --git a/extensions/qa-lab/src/evidence-summary.test.ts b/extensions/qa-lab/src/evidence-summary.test.ts index 21b9e98ef87..e55371abd39 100644 --- a/extensions/qa-lab/src/evidence-summary.test.ts +++ b/extensions/qa-lab/src/evidence-summary.test.ts @@ -1,4 +1,5 @@ // Qa Lab tests cover QA evidence summary behavior. +import { execFileSync } from "node:child_process"; import { describe, expect, it } from "vitest"; import { QA_EVIDENCE_SUMMARY_KIND, @@ -123,6 +124,29 @@ describe("evidence summary", () => { }); }); + it("prefers the checked-out ref over an inherited GitHub event SHA", () => { + const repoRoot = process.cwd(); + const checkedOutRef = execFileSync("git", ["rev-parse", "--verify", "HEAD"], { + cwd: repoRoot, + encoding: "utf8", + }).trim(); + const evidence = buildQaSuiteEvidenceSummary({ + artifactPaths: [], + channelId: "qa-channel", + env: { + GITHUB_SHA: "bd479958c04a1eadbda8b6105e0722588d71e9ad", + } as NodeJS.ProcessEnv, + generatedAt: "2026-06-24T12:00:00.000Z", + primaryModel: "mock-openai/gpt-5.5", + providerMode: "mock-openai", + repoRoot, + scenarioDefinitions: [{ id: "ref-probe", title: "Ref probe" }], + scenarioResults: [{ name: "Ref probe", status: "pass" }], + }); + + expect(evidence.entries[0]?.execution?.environment.ref).toBe(checkedOutRef); + }); + it("builds Telegram live transport evidence entries", () => { const evidence = buildLiveTransportEvidenceSummary({ artifactPaths: [ diff --git a/extensions/qa-lab/src/evidence-summary.ts b/extensions/qa-lab/src/evidence-summary.ts index 9212af79857..d428671462f 100644 --- a/extensions/qa-lab/src/evidence-summary.ts +++ b/extensions/qa-lab/src/evidence-summary.ts @@ -1,4 +1,5 @@ // Qa Lab plugin module implements QA evidence summary behavior. +import { execFileSync } from "node:child_process"; import { z } from "zod"; import { splitQaModelRef } from "./model-selection.js"; import { getQaProvider, type QaProviderMode } from "./providers/index.js"; @@ -288,6 +289,7 @@ type QaEvidenceBuildBase = { channelDriver?: string; packageSource?: QaEvidencePackageSource; profile?: QaEvidenceProfile; + repoRoot?: string; runner?: string; }; @@ -388,9 +390,31 @@ function resolveQaEvidenceChannelDriver(params: { env?: NodeJS.ProcessEnv; fallb return id ? { id } : undefined; } -function resolveQaEvidenceEnvironment(env: NodeJS.ProcessEnv | undefined) { +function resolveQaEvidenceCheckoutRef(repoRoot?: string) { + try { + const ref = execFileSync("git", ["rev-parse", "--verify", "HEAD"], { + cwd: repoRoot ?? process.cwd(), + encoding: "utf8", + stdio: ["ignore", "pipe", "ignore"], + }).trim(); + return ref || undefined; + } catch { + return undefined; + } +} + +export function resolveQaEvidenceEnvironment(params: { + env?: NodeJS.ProcessEnv; + repoRoot?: string; +}) { return { - ref: env?.OPENCLAW_QA_REF?.trim() || env?.GITHUB_SHA?.trim() || null, + // GitHub's GITHUB_SHA describes the workflow event, not necessarily the + // checked-out ref selected by a manual or remote QA run. + ref: + params.env?.OPENCLAW_QA_REF?.trim() || + resolveQaEvidenceCheckoutRef(params.repoRoot) || + params.env?.GITHUB_SHA?.trim() || + null, os: process.platform, nodeVersion: process.version, }; @@ -550,7 +574,10 @@ export function buildQaSuiteEvidenceSummary( }, ): QaEvidenceSummaryJson { const provider = buildQaEvidenceProvider(params); - const environment = resolveQaEvidenceEnvironment(params.env); + const environment = resolveQaEvidenceEnvironment({ + env: params.env, + repoRoot: params.repoRoot, + }); const packageSource = resolveQaEvidenceBuildPackageSource(params); const runner = resolveQaEvidenceRunner({ env: params.env, fallback: params.runner }); const profile = resolveQaEvidenceProfile({ @@ -622,7 +649,10 @@ function buildTestRunnerEvidenceSummary( }, ): QaEvidenceSummaryJson { const provider = buildQaEvidenceProvider(params); - const environment = resolveQaEvidenceEnvironment(params.env); + const environment = resolveQaEvidenceEnvironment({ + env: params.env, + repoRoot: params.repoRoot, + }); const packageSource = resolveQaEvidenceBuildPackageSource(params); const runner = resolveQaEvidenceRunner({ env: params.env, @@ -726,7 +756,10 @@ export function buildLiveTransportEvidenceSummary( }, ): QaEvidenceSummaryJson { const provider = buildQaEvidenceProvider(params); - const environment = resolveQaEvidenceEnvironment(params.env); + const environment = resolveQaEvidenceEnvironment({ + env: params.env, + repoRoot: params.repoRoot, + }); const packageSource = resolveQaEvidenceBuildPackageSource(params); const runner = resolveQaEvidenceRunner({ env: params.env, fallback: params.runner }); const profile = resolveQaEvidenceProfile({ diff --git a/extensions/qa-lab/src/live-transports/discord/discord-live.runtime.ts b/extensions/qa-lab/src/live-transports/discord/discord-live.runtime.ts index ae374a4d8d8..b2db1f191ff 100644 --- a/extensions/qa-lab/src/live-transports/discord/discord-live.runtime.ts +++ b/extensions/qa-lab/src/live-transports/discord/discord-live.runtime.ts @@ -1863,6 +1863,7 @@ export async function runDiscordQaLive(params: { generatedAt: finishedAt, primaryModel, providerMode, + repoRoot, transportId: "discord", }); await fs.writeFile( diff --git a/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts b/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts index 27562013a48..233b0da20bf 100644 --- a/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts +++ b/extensions/qa-lab/src/live-transports/slack/slack-live.runtime.ts @@ -2037,6 +2037,7 @@ export async function runSlackQaLive(params: { generatedAt: finishedAt, primaryModel, providerMode, + repoRoot, transportId: "slack", }); await fs.writeFile( diff --git a/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts b/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts index 58f936f1164..88f96bdea8a 100644 --- a/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts +++ b/extensions/qa-lab/src/live-transports/telegram/telegram-live.runtime.ts @@ -2188,6 +2188,7 @@ export async function runTelegramQaLive(params: { generatedAt: finishedAt, primaryModel, providerMode, + repoRoot, checks: scenarioResults, transportId: "telegram", }); diff --git a/extensions/qa-lab/src/live-transports/whatsapp/whatsapp-live.runtime.ts b/extensions/qa-lab/src/live-transports/whatsapp/whatsapp-live.runtime.ts index 1fdf0799116..ce7c9ab2519 100644 --- a/extensions/qa-lab/src/live-transports/whatsapp/whatsapp-live.runtime.ts +++ b/extensions/qa-lab/src/live-transports/whatsapp/whatsapp-live.runtime.ts @@ -3282,6 +3282,7 @@ export async function runWhatsAppQaLive(params: { generatedAt: finishedAt, primaryModel, providerMode, + repoRoot, transportId: "whatsapp", }); await fs.writeFile( diff --git a/extensions/qa-lab/src/suite.ts b/extensions/qa-lab/src/suite.ts index d63046fdcd5..4527873651c 100644 --- a/extensions/qa-lab/src/suite.ts +++ b/extensions/qa-lab/src/suite.ts @@ -848,6 +848,7 @@ async function runQaRuntimeParitySuite(params: { const finishedAt = new Date(); const { evidence, evidencePath, report, reportPath, summaryPath } = await writeQaSuiteArtifacts( { + repoRoot: params.repoRoot, outputDir: params.outputDir, startedAt: params.startedAt, finishedAt, @@ -900,6 +901,7 @@ async function runQaRuntimeParitySuite(params: { } async function writeQaSuiteArtifacts(params: { + repoRoot?: string; outputDir: string; startedAt: Date; finishedAt: Date; @@ -974,6 +976,7 @@ async function writeQaSuiteArtifacts(params: { generatedAt: params.finishedAt.toISOString(), primaryModel: params.primaryModel, providerMode: params.providerMode, + repoRoot: params.repoRoot, scenarioDefinitions: params.scenarioDefinitions, scenarioResults: params.scenarios, }) @@ -1296,6 +1299,7 @@ export async function runQaFlowSuite(params?: QaSuiteRunParams): Promise { const partialFinishedAt = new Date(); const { report, reportPath } = await writeQaSuiteArtifacts({ + repoRoot, outputDir, startedAt, finishedAt: partialFinishedAt, @@ -1448,6 +1452,7 @@ export async function runQaFlowSuite(params?: QaSuiteRunParams): Promise buildScenarioEvidenceTarget(result.scenario)), results: fallbackResults.map((result) => ({ id: result.scenario.id, @@ -616,6 +618,7 @@ function buildTestFileEvidence(params: { generatedAt: params.generatedAt, primaryModel: params.primaryModel, providerMode: params.providerMode, + repoRoot: params.repoRoot, targets: params.results.map((result) => buildScenarioEvidenceTarget(result.scenario)), results: params.results.map((result) => ({ id: result.scenario.id, @@ -802,6 +805,7 @@ export async function runQaTestFileScenarios( kind, primaryModel: params.primaryModel, providerMode: params.providerMode, + repoRoot: params.repoRoot, results, }); const paths = await writeTestFileEvidenceFile({ diff --git a/scripts/qa/ux-matrix-evidence-producer.ts b/scripts/qa/ux-matrix-evidence-producer.ts index f71e3879e14..4730f77e688 100644 --- a/scripts/qa/ux-matrix-evidence-producer.ts +++ b/scripts/qa/ux-matrix-evidence-producer.ts @@ -10,6 +10,7 @@ import { QA_EVIDENCE_FILENAME, QA_EVIDENCE_SUMMARY_KIND, QA_EVIDENCE_SUMMARY_SCHEMA_VERSION, + resolveQaEvidenceEnvironment, validateQaEvidenceSummaryJson, type QaEvidenceStatus, type QaEvidenceSummaryEntry, @@ -174,15 +175,15 @@ function sanitizeArtifactText( function buildExecution(params: { artifacts: MatrixCell["artifacts"]; + repoRoot: string; source: string; }): QaEvidenceSummaryEntry["execution"] { return { runner: "ux-matrix-script-producer", - environment: { - ref: process.env.OPENCLAW_QA_REF?.trim() || process.env.GITHUB_SHA?.trim() || null, - os: process.platform, - nodeVersion: process.version, - }, + environment: resolveQaEvidenceEnvironment({ + env: process.env, + repoRoot: params.repoRoot, + }), provider: { id: "ux-matrix", live: false, @@ -202,7 +203,7 @@ function buildExecution(params: { }; } -function buildEvidenceEntry(cell: MatrixCell): QaEvidenceSummaryEntry { +function buildEvidenceEntry(cell: MatrixCell, repoRoot: string): QaEvidenceSummaryEntry { const source = `ux-matrix:${cell.surface}:${cell.stage}`; return { test: { @@ -221,6 +222,7 @@ function buildEvidenceEntry(cell: MatrixCell): QaEvidenceSummaryEntry { ], execution: buildExecution({ artifacts: cell.artifacts, + repoRoot, source, }), result: { @@ -243,13 +245,14 @@ function buildEvidenceEntry(cell: MatrixCell): QaEvidenceSummaryEntry { function buildEvidenceSummary(params: { cells: readonly MatrixCell[]; generatedAt: string; + repoRoot: string; }): QaEvidenceSummaryJson { return validateQaEvidenceSummaryJson({ kind: QA_EVIDENCE_SUMMARY_KIND, schemaVersion: QA_EVIDENCE_SUMMARY_SCHEMA_VERSION, generatedAt: params.generatedAt, evidenceMode: "full", - entries: params.cells.map(buildEvidenceEntry), + entries: params.cells.map((cell) => buildEvidenceEntry(cell, params.repoRoot)), }); } @@ -693,6 +696,7 @@ export async function runUxMatrixEvidenceProducer(options: ProducerOptions) { const previewEvidence = buildEvidenceSummary({ cells: initialCells, generatedAt: new Date().toISOString(), + repoRoot: options.repoRoot, }); const screenshotLog = await fs.readFile(path.join(screenshotCellDir, "logs.txt"), "utf8"); await writeProducerArtifactFixtureHtml({ @@ -753,7 +757,11 @@ export async function runUxMatrixEvidenceProducer(options: ProducerOptions) { ...initialCells, ]; - const evidence = buildEvidenceSummary({ cells, generatedAt: new Date().toISOString() }); + const evidence = buildEvidenceSummary({ + cells, + generatedAt: new Date().toISOString(), + repoRoot: options.repoRoot, + }); await writeProducerArtifactFixtureHtml({ artifactBase: options.artifactBase, evidence,