feat: add qa character vibes eval

This commit is contained in:
Peter Steinberger
2026-04-08 12:05:04 +01:00
parent f3c304917a
commit 97dfbe0fe1
5 changed files with 156 additions and 3 deletions

View File

@@ -0,0 +1,29 @@
import { describe, expect, it } from "vitest";
import { renderQaMarkdownReport } from "./report.js";
describe("renderQaMarkdownReport", () => {
it("renders multiline scenario details in fenced blocks", () => {
const report = renderQaMarkdownReport({
title: "QA",
startedAt: new Date("2026-04-08T10:00:00.000Z"),
finishedAt: new Date("2026-04-08T10:00:02.000Z"),
scenarios: [
{
name: "Character vibes: Gollum improv",
status: "pass",
steps: [
{
name: "records transcript",
status: "pass",
details: "USER Alice: hello\n\nASSISTANT OpenClaw: my precious build",
},
],
},
],
});
expect(report).toContain("```text");
expect(report).toContain("USER Alice: hello");
expect(report).toContain("ASSISTANT OpenClaw: my precious build");
});
});

View File

@@ -11,6 +11,15 @@ export type QaReportScenario = {
steps?: QaReportCheck[];
};
function pushDetailsBlock(lines: string[], label: string, details: string, indent = "") {
if (!details.includes("\n")) {
lines.push(`${indent}- ${label}: ${details}`);
return;
}
lines.push(`${indent}- ${label}:`);
lines.push("", "```text", details, "```");
}
export function renderQaMarkdownReport(params: {
title: string;
startedAt: Date;
@@ -45,7 +54,7 @@ export function renderQaMarkdownReport(params: {
for (const check of checks) {
lines.push(`- [${check.status === "pass" ? "x" : " "}] ${check.name}`);
if (check.details) {
lines.push(` - ${check.details}`);
pushDetailsBlock(lines, "Details", check.details, " ");
}
}
}
@@ -57,14 +66,14 @@ export function renderQaMarkdownReport(params: {
lines.push("");
lines.push(`- Status: ${scenario.status}`);
if (scenario.details) {
lines.push(`- Details: ${scenario.details}`);
pushDetailsBlock(lines, "Details", scenario.details);
}
if (scenario.steps?.length) {
lines.push("- Steps:");
for (const step of scenario.steps) {
lines.push(` - [${step.status === "pass" ? "x" : " "}] ${step.name}`);
if (step.details) {
lines.push(` - ${step.details}`);
pushDetailsBlock(lines, "Details", step.details, " ");
}
}
}

View File

@@ -18,6 +18,7 @@ describe("qa scenario catalog", () => {
expect(pack.scenarios.some((scenario) => scenario.id === "image-generation-roundtrip")).toBe(
true,
);
expect(pack.scenarios.some((scenario) => scenario.id === "character-vibes-gollum")).toBe(true);
expect(pack.scenarios.every((scenario) => scenario.execution?.kind === "flow")).toBe(true);
expect(pack.scenarios.some((scenario) => scenario.execution.flow?.steps.length)).toBe(true);
});

View File

@@ -200,6 +200,37 @@ function recentOutboundSummary(state: QaBusState, limit = 5) {
.join(" | ");
}
function formatConversationTranscript(
state: QaBusState,
params: {
conversationId: string;
threadId?: string;
limit?: number;
},
) {
const messages = state
.getSnapshot()
.messages.filter(
(message) =>
message.conversation.id === params.conversationId &&
(params.threadId ? message.threadId === params.threadId : true),
);
const selected = params.limit ? messages.slice(-params.limit) : messages;
return selected
.map((message) => {
const direction = message.direction === "inbound" ? "user" : "assistant";
const speaker = message.senderName?.trim() || message.senderId;
const attachmentSummary =
message.attachments && message.attachments.length > 0
? ` [attachments: ${message.attachments
.map((attachment) => `${attachment.kind}:${attachment.fileName ?? attachment.id}`)
.join(", ")}]`
: "";
return `${direction.toUpperCase()} ${speaker}: ${message.text}${attachmentSummary}`;
})
.join("\n\n");
}
async function runScenario(name: string, steps: QaSuiteStep[]): Promise<QaSuiteScenarioResult> {
const stepResults: QaReportCheck[] = [];
for (const step of steps) {
@@ -932,6 +963,7 @@ type QaScenarioFlowApi = {
waitForOutboundMessage: typeof waitForOutboundMessage;
waitForNoOutbound: typeof waitForNoOutbound;
recentOutboundSummary: typeof recentOutboundSummary;
formatConversationTranscript: typeof formatConversationTranscript;
fetchJson: typeof fetchJson;
waitForGatewayHealthy: typeof waitForGatewayHealthy;
waitForQaChannelReady: typeof waitForQaChannelReady;
@@ -998,6 +1030,7 @@ function createScenarioFlowApi(
waitForOutboundMessage,
waitForNoOutbound,
recentOutboundSummary,
formatConversationTranscript,
fetchJson,
waitForGatewayHealthy,
waitForQaChannelReady,