diff --git a/extensions/qa-lab/src/report.test.ts b/extensions/qa-lab/src/report.test.ts new file mode 100644 index 00000000000..15f9294d4b9 --- /dev/null +++ b/extensions/qa-lab/src/report.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, it } from "vitest"; +import { renderQaMarkdownReport } from "./report.js"; + +describe("renderQaMarkdownReport", () => { + it("renders multiline scenario details in fenced blocks", () => { + const report = renderQaMarkdownReport({ + title: "QA", + startedAt: new Date("2026-04-08T10:00:00.000Z"), + finishedAt: new Date("2026-04-08T10:00:02.000Z"), + scenarios: [ + { + name: "Character vibes: Gollum improv", + status: "pass", + steps: [ + { + name: "records transcript", + status: "pass", + details: "USER Alice: hello\n\nASSISTANT OpenClaw: my precious build", + }, + ], + }, + ], + }); + + expect(report).toContain("```text"); + expect(report).toContain("USER Alice: hello"); + expect(report).toContain("ASSISTANT OpenClaw: my precious build"); + }); +}); diff --git a/extensions/qa-lab/src/report.ts b/extensions/qa-lab/src/report.ts index ff8c254d92d..f0d9b8c0704 100644 --- a/extensions/qa-lab/src/report.ts +++ b/extensions/qa-lab/src/report.ts @@ -11,6 +11,15 @@ export type QaReportScenario = { steps?: QaReportCheck[]; }; +function pushDetailsBlock(lines: string[], label: string, details: string, indent = "") { + if (!details.includes("\n")) { + lines.push(`${indent}- ${label}: ${details}`); + return; + } + lines.push(`${indent}- ${label}:`); + lines.push("", "```text", details, "```"); +} + export function renderQaMarkdownReport(params: { title: string; startedAt: Date; @@ -45,7 +54,7 @@ export function renderQaMarkdownReport(params: { for (const check of checks) { lines.push(`- [${check.status === "pass" ? "x" : " "}] ${check.name}`); if (check.details) { - lines.push(` - ${check.details}`); + pushDetailsBlock(lines, "Details", check.details, " "); } } } @@ -57,14 +66,14 @@ export function renderQaMarkdownReport(params: { lines.push(""); lines.push(`- Status: ${scenario.status}`); if (scenario.details) { - lines.push(`- Details: ${scenario.details}`); + pushDetailsBlock(lines, "Details", scenario.details); } if (scenario.steps?.length) { lines.push("- Steps:"); for (const step of scenario.steps) { lines.push(` - [${step.status === "pass" ? "x" : " "}] ${step.name}`); if (step.details) { - lines.push(` - ${step.details}`); + pushDetailsBlock(lines, "Details", step.details, " "); } } } diff --git a/extensions/qa-lab/src/scenario-catalog.test.ts b/extensions/qa-lab/src/scenario-catalog.test.ts index d7d79ce7cd9..cce1f0acb69 100644 --- a/extensions/qa-lab/src/scenario-catalog.test.ts +++ b/extensions/qa-lab/src/scenario-catalog.test.ts @@ -18,6 +18,7 @@ describe("qa scenario catalog", () => { expect(pack.scenarios.some((scenario) => scenario.id === "image-generation-roundtrip")).toBe( true, ); + expect(pack.scenarios.some((scenario) => scenario.id === "character-vibes-gollum")).toBe(true); expect(pack.scenarios.every((scenario) => scenario.execution?.kind === "flow")).toBe(true); expect(pack.scenarios.some((scenario) => scenario.execution.flow?.steps.length)).toBe(true); }); diff --git a/extensions/qa-lab/src/suite.ts b/extensions/qa-lab/src/suite.ts index 0d534ca0add..e4e85855068 100644 --- a/extensions/qa-lab/src/suite.ts +++ b/extensions/qa-lab/src/suite.ts @@ -200,6 +200,37 @@ function recentOutboundSummary(state: QaBusState, limit = 5) { .join(" | "); } +function formatConversationTranscript( + state: QaBusState, + params: { + conversationId: string; + threadId?: string; + limit?: number; + }, +) { + const messages = state + .getSnapshot() + .messages.filter( + (message) => + message.conversation.id === params.conversationId && + (params.threadId ? message.threadId === params.threadId : true), + ); + const selected = params.limit ? messages.slice(-params.limit) : messages; + return selected + .map((message) => { + const direction = message.direction === "inbound" ? "user" : "assistant"; + const speaker = message.senderName?.trim() || message.senderId; + const attachmentSummary = + message.attachments && message.attachments.length > 0 + ? ` [attachments: ${message.attachments + .map((attachment) => `${attachment.kind}:${attachment.fileName ?? attachment.id}`) + .join(", ")}]` + : ""; + return `${direction.toUpperCase()} ${speaker}: ${message.text}${attachmentSummary}`; + }) + .join("\n\n"); +} + async function runScenario(name: string, steps: QaSuiteStep[]): Promise { const stepResults: QaReportCheck[] = []; for (const step of steps) { @@ -932,6 +963,7 @@ type QaScenarioFlowApi = { waitForOutboundMessage: typeof waitForOutboundMessage; waitForNoOutbound: typeof waitForNoOutbound; recentOutboundSummary: typeof recentOutboundSummary; + formatConversationTranscript: typeof formatConversationTranscript; fetchJson: typeof fetchJson; waitForGatewayHealthy: typeof waitForGatewayHealthy; waitForQaChannelReady: typeof waitForQaChannelReady; @@ -998,6 +1030,7 @@ function createScenarioFlowApi( waitForOutboundMessage, waitForNoOutbound, recentOutboundSummary, + formatConversationTranscript, fetchJson, waitForGatewayHealthy, waitForQaChannelReady, diff --git a/qa/scenarios/character-vibes-gollum.md b/qa/scenarios/character-vibes-gollum.md new file mode 100644 index 00000000000..78138cca7be --- /dev/null +++ b/qa/scenarios/character-vibes-gollum.md @@ -0,0 +1,81 @@ +# Character vibes: Gollum improv + +```yaml qa-scenario +id: character-vibes-gollum +title: "Character vibes: Gollum improv" +surface: character +objective: Capture a playful multi-turn character conversation so another model can later grade naturalness, vibe, and funniness from the raw transcript. +successCriteria: + - Agent responds on every turn of the improv. + - Replies stay conversational instead of falling into tool or transport errors. + - The report preserves the full transcript for later grading. +docsRefs: + - docs/help/testing.md + - docs/channels/qa-channel.md +codeRefs: + - extensions/qa-lab/src/report.ts + - extensions/qa-lab/src/bus-state.ts + - extensions/qa-lab/src/scenario-flow-runner.ts +execution: + kind: flow + summary: Capture a raw character-performance transcript for later quality grading. + config: + conversationId: alice + senderName: Alice + turns: + - "Fun character check. For the next four replies, you are Gollum skulking through a QA lab at midnight. Stay playful, weird, vivid, and cooperative. First: what shiny thing caught your eye in this repo, precious?" + - "The testers whisper that `dist/index.js` is the Precious Build Stamp. How do you react?" + - "A build just turned green, but the vibes are cursed. Give a naturally funny reaction in character." + - "One last line for the QA goblins before the next run. Make it oddly sweet and a little unhinged." + forbiddenNeedles: + - acp backend + - acpx + - not configured + - internal error + - tool failed +``` + +```yaml qa-flow +steps: + - name: completes the full Gollum improv and records the transcript + actions: + - call: resetBus + - forEach: + items: + ref: config.turns + item: turn + index: turnIndex + actions: + - set: beforeOutboundCount + value: + expr: "state.getSnapshot().messages.filter((message) => message.direction === 'outbound' && message.conversation.id === config.conversationId).length" + - call: state.addInboundMessage + args: + - conversation: + id: + ref: config.conversationId + kind: direct + senderId: alice + senderName: + ref: config.senderName + text: + ref: turn + - call: waitForOutboundMessage + saveAs: latestOutbound + args: + - ref: state + - lambda: + params: [candidate] + expr: "candidate.conversation.id === config.conversationId && candidate.text.trim().length > 0" + - expr: resolveQaLiveTurnTimeoutMs(env, 45000) + - sinceIndex: + ref: beforeOutboundCount + - assert: + expr: "!config.forbiddenNeedles.some((needle) => normalizeLowercaseStringOrEmpty(latestOutbound.text).includes(needle))" + message: + expr: "`gollum improv turn ${String(turnIndex)} hit fallback/error text: ${latestOutbound.text}`" + - assert: + expr: "state.getSnapshot().messages.filter((message) => message.direction === 'outbound' && message.conversation.id === config.conversationId).length === config.turns.length" + message: missing one or more Gollum replies + detailsExpr: "formatConversationTranscript(state, { conversationId: config.conversationId })" +```