mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-04 15:10:22 +00:00
feat: add qa character vibes eval
This commit is contained in:
29
extensions/qa-lab/src/report.test.ts
Normal file
29
extensions/qa-lab/src/report.test.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { renderQaMarkdownReport } from "./report.js";
|
||||
|
||||
describe("renderQaMarkdownReport", () => {
|
||||
it("renders multiline scenario details in fenced blocks", () => {
|
||||
const report = renderQaMarkdownReport({
|
||||
title: "QA",
|
||||
startedAt: new Date("2026-04-08T10:00:00.000Z"),
|
||||
finishedAt: new Date("2026-04-08T10:00:02.000Z"),
|
||||
scenarios: [
|
||||
{
|
||||
name: "Character vibes: Gollum improv",
|
||||
status: "pass",
|
||||
steps: [
|
||||
{
|
||||
name: "records transcript",
|
||||
status: "pass",
|
||||
details: "USER Alice: hello\n\nASSISTANT OpenClaw: my precious build",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(report).toContain("```text");
|
||||
expect(report).toContain("USER Alice: hello");
|
||||
expect(report).toContain("ASSISTANT OpenClaw: my precious build");
|
||||
});
|
||||
});
|
||||
@@ -11,6 +11,15 @@ export type QaReportScenario = {
|
||||
steps?: QaReportCheck[];
|
||||
};
|
||||
|
||||
function pushDetailsBlock(lines: string[], label: string, details: string, indent = "") {
|
||||
if (!details.includes("\n")) {
|
||||
lines.push(`${indent}- ${label}: ${details}`);
|
||||
return;
|
||||
}
|
||||
lines.push(`${indent}- ${label}:`);
|
||||
lines.push("", "```text", details, "```");
|
||||
}
|
||||
|
||||
export function renderQaMarkdownReport(params: {
|
||||
title: string;
|
||||
startedAt: Date;
|
||||
@@ -45,7 +54,7 @@ export function renderQaMarkdownReport(params: {
|
||||
for (const check of checks) {
|
||||
lines.push(`- [${check.status === "pass" ? "x" : " "}] ${check.name}`);
|
||||
if (check.details) {
|
||||
lines.push(` - ${check.details}`);
|
||||
pushDetailsBlock(lines, "Details", check.details, " ");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -57,14 +66,14 @@ export function renderQaMarkdownReport(params: {
|
||||
lines.push("");
|
||||
lines.push(`- Status: ${scenario.status}`);
|
||||
if (scenario.details) {
|
||||
lines.push(`- Details: ${scenario.details}`);
|
||||
pushDetailsBlock(lines, "Details", scenario.details);
|
||||
}
|
||||
if (scenario.steps?.length) {
|
||||
lines.push("- Steps:");
|
||||
for (const step of scenario.steps) {
|
||||
lines.push(` - [${step.status === "pass" ? "x" : " "}] ${step.name}`);
|
||||
if (step.details) {
|
||||
lines.push(` - ${step.details}`);
|
||||
pushDetailsBlock(lines, "Details", step.details, " ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ describe("qa scenario catalog", () => {
|
||||
expect(pack.scenarios.some((scenario) => scenario.id === "image-generation-roundtrip")).toBe(
|
||||
true,
|
||||
);
|
||||
expect(pack.scenarios.some((scenario) => scenario.id === "character-vibes-gollum")).toBe(true);
|
||||
expect(pack.scenarios.every((scenario) => scenario.execution?.kind === "flow")).toBe(true);
|
||||
expect(pack.scenarios.some((scenario) => scenario.execution.flow?.steps.length)).toBe(true);
|
||||
});
|
||||
|
||||
@@ -200,6 +200,37 @@ function recentOutboundSummary(state: QaBusState, limit = 5) {
|
||||
.join(" | ");
|
||||
}
|
||||
|
||||
function formatConversationTranscript(
|
||||
state: QaBusState,
|
||||
params: {
|
||||
conversationId: string;
|
||||
threadId?: string;
|
||||
limit?: number;
|
||||
},
|
||||
) {
|
||||
const messages = state
|
||||
.getSnapshot()
|
||||
.messages.filter(
|
||||
(message) =>
|
||||
message.conversation.id === params.conversationId &&
|
||||
(params.threadId ? message.threadId === params.threadId : true),
|
||||
);
|
||||
const selected = params.limit ? messages.slice(-params.limit) : messages;
|
||||
return selected
|
||||
.map((message) => {
|
||||
const direction = message.direction === "inbound" ? "user" : "assistant";
|
||||
const speaker = message.senderName?.trim() || message.senderId;
|
||||
const attachmentSummary =
|
||||
message.attachments && message.attachments.length > 0
|
||||
? ` [attachments: ${message.attachments
|
||||
.map((attachment) => `${attachment.kind}:${attachment.fileName ?? attachment.id}`)
|
||||
.join(", ")}]`
|
||||
: "";
|
||||
return `${direction.toUpperCase()} ${speaker}: ${message.text}${attachmentSummary}`;
|
||||
})
|
||||
.join("\n\n");
|
||||
}
|
||||
|
||||
async function runScenario(name: string, steps: QaSuiteStep[]): Promise<QaSuiteScenarioResult> {
|
||||
const stepResults: QaReportCheck[] = [];
|
||||
for (const step of steps) {
|
||||
@@ -932,6 +963,7 @@ type QaScenarioFlowApi = {
|
||||
waitForOutboundMessage: typeof waitForOutboundMessage;
|
||||
waitForNoOutbound: typeof waitForNoOutbound;
|
||||
recentOutboundSummary: typeof recentOutboundSummary;
|
||||
formatConversationTranscript: typeof formatConversationTranscript;
|
||||
fetchJson: typeof fetchJson;
|
||||
waitForGatewayHealthy: typeof waitForGatewayHealthy;
|
||||
waitForQaChannelReady: typeof waitForQaChannelReady;
|
||||
@@ -998,6 +1030,7 @@ function createScenarioFlowApi(
|
||||
waitForOutboundMessage,
|
||||
waitForNoOutbound,
|
||||
recentOutboundSummary,
|
||||
formatConversationTranscript,
|
||||
fetchJson,
|
||||
waitForGatewayHealthy,
|
||||
waitForQaChannelReady,
|
||||
|
||||
Reference in New Issue
Block a user