feat: add interactive qa lab suite runner

This commit is contained in:
Peter Steinberger
2026-04-06 17:20:41 +01:00
parent e70168212d
commit 350238d402
12 changed files with 996 additions and 33 deletions

View File

@@ -2,6 +2,7 @@ import {
type Bootstrap,
type OutcomesEnvelope,
type ReportEnvelope,
type RunnerSelection,
type Snapshot,
type TabId,
type UiState,
@@ -31,6 +32,25 @@ async function postJson<T>(path: string, body: unknown): Promise<T> {
return (await response.json()) as T;
}
function defaultModelsForProviderMode(
mode: RunnerSelection["providerMode"],
bootstrap?: Bootstrap | null,
): Pick<RunnerSelection, "primaryModel" | "alternateModel" | "fastMode"> {
if (mode === "live-openai") {
const preferred = bootstrap?.runnerCatalog.real[0]?.key;
return {
primaryModel: preferred ?? "openai/gpt-5.4",
alternateModel: preferred ?? "openai/gpt-5.4",
fastMode: true,
};
}
return {
primaryModel: "mock-openai/gpt-5.4",
alternateModel: "mock-openai/gpt-5.4-alt",
fastMode: false,
};
}
export async function createQaLabApp(root: HTMLDivElement) {
const state: UiState = {
bootstrap: null,
@@ -41,6 +61,8 @@ export async function createQaLabApp(root: HTMLDivElement) {
selectedThreadId: null,
selectedScenarioId: null,
activeTab: "debug",
runnerDraft: null,
runnerDraftDirty: false,
composer: {
conversationKind: "direct",
conversationId: "alice",
@@ -64,6 +86,13 @@ export async function createQaLabApp(root: HTMLDivElement) {
state.snapshot = snapshot;
state.latestReport = report.report ?? bootstrap.latestReport;
state.scenarioRun = outcomes.run;
if (!state.runnerDraft || !state.runnerDraftDirty) {
state.runnerDraft = {
...bootstrap.runner.selection,
scenarioIds: [...bootstrap.runner.selection.scenarioIds],
};
state.runnerDraftDirty = false;
}
if (!state.selectedConversationId) {
state.selectedConversationId = snapshot.conversations[0]?.id ?? null;
}
@@ -86,6 +115,22 @@ export async function createQaLabApp(root: HTMLDivElement) {
render();
}
function updateRunnerDraft(mutator: (draft: RunnerSelection) => RunnerSelection) {
const fallback = state.bootstrap?.runner.selection;
if (!state.runnerDraft && fallback) {
state.runnerDraft = {
...fallback,
scenarioIds: [...fallback.scenarioIds],
};
}
if (!state.runnerDraft) {
return;
}
state.runnerDraft = mutator(state.runnerDraft);
state.runnerDraftDirty = true;
render();
}
async function runSelfCheck() {
state.busy = true;
state.error = null;
@@ -163,6 +208,42 @@ export async function createQaLabApp(root: HTMLDivElement) {
}
}
async function runSuite() {
if (!state.runnerDraft) {
state.error = "Runner selection not ready yet.";
render();
return;
}
state.busy = true;
state.error = null;
render();
try {
const result = await postJson<{ runner: { selection: RunnerSelection } }>(
"/api/scenario/suite",
{
providerMode: state.runnerDraft.providerMode,
primaryModel: state.runnerDraft.primaryModel,
alternateModel: state.runnerDraft.alternateModel,
fastMode: state.runnerDraft.fastMode,
scenarioIds: state.runnerDraft.scenarioIds,
},
);
state.runnerDraft = {
...result.runner.selection,
scenarioIds: [...result.runner.selection.scenarioIds],
};
state.runnerDraftDirty = false;
state.activeTab = "debug";
await refresh();
} catch (error) {
state.error = error instanceof Error ? error.message : String(error);
render();
} finally {
state.busy = false;
render();
}
}
function downloadReport() {
if (!state.latestReport?.markdown) {
return;
@@ -221,10 +302,32 @@ export async function createQaLabApp(root: HTMLDivElement) {
void resetState();
});
root
.querySelector<HTMLButtonElement>("[data-action='self-check']")!
.addEventListener("click", () => {
.querySelector<HTMLButtonElement>("[data-action='self-check']")
?.addEventListener("click", () => {
void runSelfCheck();
});
root
.querySelector<HTMLButtonElement>("[data-action='run-suite']")
?.addEventListener("click", () => {
void runSuite();
});
root
.querySelector<HTMLButtonElement>("[data-action='select-all-scenarios']")
?.addEventListener("click", () => {
updateRunnerDraft((draft) => ({
...draft,
scenarioIds:
state.bootstrap?.scenarios.map((scenario) => scenario.id) ?? draft.scenarioIds,
}));
});
root
.querySelector<HTMLButtonElement>("[data-action='clear-scenarios']")
?.addEventListener("click", () => {
updateRunnerDraft((draft) => ({
...draft,
scenarioIds: [],
}));
});
root.querySelector<HTMLButtonElement>("[data-action='send']")?.addEventListener("click", () => {
void sendInbound();
});
@@ -233,6 +336,58 @@ export async function createQaLabApp(root: HTMLDivElement) {
?.addEventListener("click", () => {
downloadReport();
});
root.querySelector<HTMLSelectElement>("#provider-mode")?.addEventListener("change", (event) => {
const mode =
(event.currentTarget as HTMLSelectElement).value === "live-openai"
? "live-openai"
: "mock-openai";
updateRunnerDraft((draft) => ({
...draft,
providerMode: mode,
...defaultModelsForProviderMode(mode, state.bootstrap),
}));
});
root.querySelector<HTMLInputElement>("#fast-mode")?.addEventListener("change", (event) => {
updateRunnerDraft((draft) => ({
...draft,
fastMode: (event.currentTarget as HTMLInputElement).checked,
}));
});
root.querySelector<HTMLInputElement>("#primary-model")?.addEventListener("input", (event) => {
updateRunnerDraft((draft) => ({
...draft,
primaryModel: (event.currentTarget as HTMLInputElement).value,
}));
});
root.querySelector<HTMLInputElement>("#alternate-model")?.addEventListener("input", (event) => {
updateRunnerDraft((draft) => ({
...draft,
alternateModel: (event.currentTarget as HTMLInputElement).value,
}));
});
root.querySelectorAll<HTMLInputElement>("[data-scenario-toggle-id]").forEach((node) => {
node.addEventListener("change", () => {
const scenarioId = node.dataset.scenarioToggleId;
if (!scenarioId) {
return;
}
updateRunnerDraft((draft) => {
const selected = new Set(draft.scenarioIds);
if (node.checked) {
selected.add(scenarioId);
} else {
selected.delete(scenarioId);
}
const orderedIds = state.bootstrap?.scenarios
.map((scenario) => scenario.id)
.filter((id) => selected.has(id)) ?? [...selected];
return {
...draft,
scenarioIds: orderedIds,
};
});
});
});
root
.querySelector<HTMLSelectElement>("#conversation-kind")

View File

@@ -442,6 +442,98 @@ textarea {
gap: 0.75rem;
}
.run-form-grid {
display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 0.8rem;
margin-bottom: 1rem;
}
.checkbox-label {
display: flex;
flex-direction: column;
justify-content: end;
gap: 0.45rem;
}
.checkbox-label input {
width: 1.05rem;
height: 1.05rem;
margin: 0;
}
.panel-header.compact {
align-items: end;
}
.toolbar.mini button {
padding: 0.48rem 0.78rem;
}
.scenario-picker {
display: flex;
flex-direction: column;
gap: 0.6rem;
max-height: 28vh;
overflow: auto;
}
.scenario-toggle {
display: grid;
grid-template-columns: auto minmax(0, 1fr);
gap: 0.75rem;
align-items: start;
padding: 0.82rem 0.9rem;
border-radius: 16px;
border: 1px solid rgba(255, 255, 255, 0.08);
background: rgba(255, 255, 255, 0.03);
}
.scenario-toggle input {
width: 1rem;
height: 1rem;
margin-top: 0.18rem;
}
.scenario-toggle span {
display: flex;
flex-direction: column;
gap: 0.22rem;
}
.scenario-toggle small {
color: var(--muted);
}
.scenario-toggle.selected {
border-color: rgba(121, 224, 198, 0.34);
background: linear-gradient(180deg, rgba(121, 224, 198, 0.11), rgba(121, 224, 198, 0.04));
}
.artifact-list {
display: flex;
flex-direction: column;
gap: 0.45rem;
margin-top: 0.9rem;
}
.artifact-list code {
display: block;
padding: 0.62rem 0.72rem;
border-radius: 12px;
background: rgba(255, 255, 255, 0.04);
border: 1px solid rgba(255, 255, 255, 0.06);
color: #dce6f4;
white-space: pre-wrap;
word-break: break-word;
}
.runner-error {
margin: 0.9rem 0 0;
color: var(--danger);
white-space: pre-wrap;
}
.meta-label {
display: block;
margin-bottom: 0.28rem;
@@ -588,6 +680,11 @@ label span {
grid-template-columns: 1fr;
}
.run-form-grid,
.composer-grid {
grid-template-columns: 1fr;
}
.messages,
.report {
max-height: none;

View File

@@ -67,6 +67,11 @@ export type Bootstrap = {
senderId: string;
senderName: string;
};
runner: RunnerSnapshot;
runnerCatalog: {
status: "loading" | "ready" | "failed";
real: RunnerModelOption[];
};
};
export type ScenarioStep = {
@@ -101,6 +106,36 @@ export type ScenarioRun = {
};
};
export type RunnerSelection = {
providerMode: "mock-openai" | "live-openai";
primaryModel: string;
alternateModel: string;
fastMode: boolean;
scenarioIds: string[];
};
export type RunnerSnapshot = {
status: "idle" | "running" | "completed" | "failed";
selection: RunnerSelection;
startedAt?: string;
finishedAt?: string;
artifacts: null | {
outputDir: string;
reportPath: string;
summaryPath: string;
watchUrl: string;
};
error: string | null;
};
export type RunnerModelOption = {
key: string;
name: string;
provider: string;
input: string;
preferred: boolean;
};
export type OutcomesEnvelope = {
run: ScenarioRun | null;
};
@@ -116,6 +151,8 @@ export type UiState = {
selectedThreadId: string | null;
selectedScenarioId: string | null;
activeTab: TabId;
runnerDraft: RunnerSelection | null;
runnerDraftDirty: boolean;
composer: {
conversationKind: "direct" | "channel";
conversationId: string;
@@ -200,6 +237,49 @@ function renderStatusChip(status: ScenarioOutcome["status"]) {
return `<span class="status-chip status-${status}">${escapeHtml(label)}</span>`;
}
function renderRunnerStatusChip(status: RunnerSnapshot["status"]) {
const tone = status === "failed" ? "fail" : status === "completed" ? "pass" : status;
return `<span class="status-chip status-${tone}">${escapeHtml(status)}</span>`;
}
function deriveRunnerSelection(state: UiState): RunnerSelection | null {
return state.runnerDraft ?? state.bootstrap?.runner.selection ?? null;
}
function renderRunnerModelSelect(params: {
id: string;
label: string;
value: string;
options: RunnerModelOption[];
disabled: boolean;
}) {
const values = new Set(params.options.map((option) => option.key));
const options = [...params.options];
if (!values.has(params.value) && params.value.trim()) {
options.unshift({
key: params.value,
name: params.value,
provider: params.value.split("/")[0] ?? "custom",
input: "text",
preferred: false,
});
}
return `
<label>
<span>${escapeHtml(params.label)}</span>
<select id="${escapeHtml(params.id)}"${params.disabled ? " disabled" : ""}>
${options
.map(
(option) => `
<option value="${escapeHtml(option.key)}"${option.key === params.value ? " selected" : ""}>
${escapeHtml(option.key)}
</option>`,
)
.join("")}
</select>
</label>`;
}
function renderRefs(refs: string[] | undefined, kind: "docs" | "code") {
if (!refs?.length) {
return `<p class="empty">No ${kind} refs attached.</p>`;
@@ -318,29 +398,158 @@ function renderScenarioInspector(state: UiState, scenarios: SeedScenario[]) {
function renderRunPanel(state: UiState) {
const run = state.scenarioRun;
if (!run) {
const runner = state.bootstrap?.runner ?? null;
if (!run && !runner) {
return `
<section class="panel">
<h2>Run state</h2>
<p class="empty">No structured scenario run yet. Seed plan loaded; outcomes arrive once a suite or self-check starts.</p>
</section>`;
}
const selection = runner?.selection ?? null;
return `
<section class="panel">
<div class="panel-header">
<div>
<p class="eyebrow">Live run</p>
<h2>${escapeHtml(run.kind === "suite" ? "Scenario suite" : "Self-check")}</h2>
<p class="eyebrow">Run state</p>
<h2>${escapeHtml(run?.kind === "self-check" ? "Self-check" : "Scenario suite")}</h2>
</div>
<span class="status-chip status-${run.status === "completed" ? "pass" : run.status === "running" ? "running" : "pending"}">${escapeHtml(run.status)}</span>
${runner ? renderRunnerStatusChip(runner.status) : ""}
</div>
<div class="run-grid">
<div><span class="meta-label">Total</span><strong>${run.counts.total}</strong></div>
<div><span class="meta-label">Pass</span><strong>${run.counts.passed}</strong></div>
<div><span class="meta-label">Fail</span><strong>${run.counts.failed}</strong></div>
<div><span class="meta-label">Pending</span><strong>${run.counts.pending}</strong></div>
${
run
? `
<div class="run-grid">
<div><span class="meta-label">Total</span><strong>${run.counts.total}</strong></div>
<div><span class="meta-label">Pass</span><strong>${run.counts.passed}</strong></div>
<div><span class="meta-label">Fail</span><strong>${run.counts.failed}</strong></div>
<div><span class="meta-label">Pending</span><strong>${run.counts.pending}</strong></div>
</div>`
: '<p class="empty">Waiting for structured outcomes.</p>'
}
${
selection
? `<p class="subtle">${escapeHtml(selection.providerMode === "live-openai" ? "Real provider lane" : "Synthetic OpenAI")} · ${escapeHtml(selection.primaryModel)} · ${selection.scenarioIds.length} scenarios</p>`
: ""
}
<p class="subtle">Started ${escapeHtml(formatIso(runner?.startedAt ?? run?.startedAt))} · Finished ${escapeHtml(formatIso(runner?.finishedAt ?? run?.finishedAt))}</p>
${
runner?.artifacts
? `
<div class="artifact-list">
<code>${escapeHtml(runner.artifacts.outputDir)}</code>
<code>${escapeHtml(runner.artifacts.reportPath)}</code>
<code>${escapeHtml(runner.artifacts.summaryPath)}</code>
</div>`
: ""
}
${runner?.error ? `<p class="runner-error">${escapeHtml(runner.error)}</p>` : ""}
</section>`;
}
function renderRunnerConsole(state: UiState, scenarios: SeedScenario[]) {
const selection = deriveRunnerSelection(state);
if (!selection) {
return "";
}
const runner = state.bootstrap?.runner ?? null;
const realModelOptions = state.bootstrap?.runnerCatalog.real ?? [];
const selectedIds = new Set(selection.scenarioIds);
const isRunning = runner?.status === "running";
const usesRealCatalog = selection.providerMode === "live-openai" && realModelOptions.length > 0;
return `
<section class="panel run-console">
<div class="panel-header">
<div>
<p class="eyebrow">Suite console</p>
<h2>Launch matrix</h2>
</div>
${runner ? renderRunnerStatusChip(runner.status) : ""}
</div>
<div class="run-form-grid">
<label>
<span>Lane</span>
<select id="provider-mode"${isRunning ? " disabled" : ""}>
<option value="mock-openai"${selection.providerMode === "mock-openai" ? " selected" : ""}>Synthetic</option>
<option value="live-openai"${selection.providerMode === "live-openai" ? " selected" : ""}>Real providers</option>
</select>
</label>
<label class="checkbox-label">
<span>Fast mode</span>
<input id="fast-mode" type="checkbox"${selection.fastMode ? " checked" : ""}${isRunning ? " disabled" : ""} />
</label>
${
usesRealCatalog
? renderRunnerModelSelect({
id: "primary-model",
label: "Primary model",
value: selection.primaryModel,
options: realModelOptions,
disabled: isRunning,
})
: `<label>
<span>Primary model</span>
<input id="primary-model" value="${escapeHtml(selection.primaryModel)}"${isRunning ? " disabled" : ""} />
</label>`
}
${
usesRealCatalog
? renderRunnerModelSelect({
id: "alternate-model",
label: "Alt model",
value: selection.alternateModel,
options: realModelOptions,
disabled: isRunning,
})
: `<label>
<span>Alt model</span>
<input id="alternate-model" value="${escapeHtml(selection.alternateModel)}"${isRunning ? " disabled" : ""} />
</label>`
}
</div>
${
selection.providerMode === "live-openai"
? `<p class="subtle">${escapeHtml(
state.bootstrap?.runnerCatalog.status === "loading"
? "Loading real model catalog…"
: state.bootstrap?.runnerCatalog.status === "failed"
? "Real model catalog unavailable; using manual refs."
: `${realModelOptions.length} real models ready. gpt-5.4 stays pinned first when available.`,
)}</p>`
: ""
}
<div class="panel-header compact">
<div>
<p class="eyebrow">Scenario selection</p>
<h3>${selection.scenarioIds.length}/${scenarios.length} armed</h3>
</div>
<div class="toolbar mini">
<button data-action="select-all-scenarios"${isRunning ? " disabled" : ""}>All</button>
<button data-action="clear-scenarios"${isRunning ? " disabled" : ""}>None</button>
</div>
</div>
<div class="scenario-picker">
${
scenarios.length === 0
? '<p class="empty">No scenarios available.</p>'
: scenarios
.map(
(scenario) => `
<label class="scenario-toggle${selectedIds.has(scenario.id) ? " selected" : ""}">
<input type="checkbox" data-scenario-toggle-id="${escapeHtml(scenario.id)}"${selectedIds.has(scenario.id) ? " checked" : ""}${isRunning ? " disabled" : ""} />
<span>
<strong>${escapeHtml(scenario.title)}</strong>
<small>${escapeHtml(scenario.id)} · ${escapeHtml(scenario.surface)}</small>
</span>
</label>`,
)
.join("")
}
</div>
<div class="toolbar lower">
<button class="accent" data-action="run-suite"${isRunning || selection.scenarioIds.length === 0 || state.busy ? " disabled" : ""}>Run selected scenarios</button>
<button data-action="self-check"${isRunning || state.busy ? " disabled" : ""}>Run self-check</button>
</div>
<p class="subtle">Started ${escapeHtml(formatIso(run.startedAt))} · Finished ${escapeHtml(formatIso(run.finishedAt))}</p>
</section>`;
}
@@ -507,6 +716,7 @@ export function renderQaLabUi(state: UiState) {
const hasControlUi = Boolean(state.bootstrap?.controlUiEmbeddedUrl);
const dashboardShellClass = hasControlUi ? "dashboard split-dashboard" : "dashboard";
const run = state.scenarioRun;
const runner = state.bootstrap?.runner ?? null;
return `
<div class="${dashboardShellClass}">
@@ -539,7 +749,6 @@ export function renderQaLabUi(state: UiState) {
<div class="toolbar">
<button data-action="refresh"${state.busy ? " disabled" : ""}>Refresh</button>
<button data-action="reset"${state.busy ? " disabled" : ""}>Reset</button>
<button class="accent" data-action="self-check"${state.busy ? " disabled" : ""}>Run self-check</button>
</div>
</header>
<section class="statusbar">
@@ -552,11 +761,17 @@ export function renderQaLabUi(state: UiState) {
? `<span class="pill success">${escapeHtml(run.kind)} ${escapeHtml(run.status)} · ${run.counts.passed}/${run.counts.total} pass</span>`
: '<span class="pill">No structured run yet</span>'
}
${
runner
? `<span class="pill${runner.status === "failed" ? " error" : runner.status === "completed" ? " success" : ""}">${escapeHtml(runner.status)} lane · ${escapeHtml(runner.selection.providerMode)}</span>`
: ""
}
${state.latestReport ? `<span class="pill">Report ${escapeHtml(state.latestReport.outputPath)}</span>` : '<span class="pill">No report yet</span>'}
${state.error ? `<span class="pill error">${escapeHtml(state.error)}</span>` : ""}
</section>
<main class="workspace">
<aside class="rail">
${renderRunnerConsole(state, scenarios)}
${renderRunPanel(state)}
<section class="panel">
<h2>Conversations</h2>