diff --git a/extensions/diffs-language-pack/scripts/build-viewer.mjs b/extensions/diffs-language-pack/scripts/build-viewer.mjs index 06dcad17002..4d020ef8e6b 100644 --- a/extensions/diffs-language-pack/scripts/build-viewer.mjs +++ b/extensions/diffs-language-pack/scripts/build-viewer.mjs @@ -1,19 +1,45 @@ #!/usr/bin/env node -import { spawnSync } from "node:child_process"; +import fs from "node:fs/promises"; import path from "node:path"; import { fileURLToPath } from "node:url"; +import { build } from "esbuild"; -const scriptPath = path.resolve( - path.dirname(fileURLToPath(import.meta.url)), - "../../../scripts/build-diffs-viewer-runtime.mjs", -); -const result = spawnSync(process.execPath, [scriptPath, "full"], { stdio: "inherit" }); -if (result.error) { - throw result.error; +const extensionRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const repoRoot = path.resolve(extensionRoot, "../.."); +const outputPath = path.join(extensionRoot, "assets/viewer-runtime.js"); + +await fs.mkdir(path.dirname(outputPath), { recursive: true }); + +const result = await build({ + entryPoints: [path.join(repoRoot, "extensions/diffs/src/viewer-client.ts")], + bundle: true, + platform: "browser", + target: "es2020", + format: "esm", + minify: true, + legalComments: "none", + outfile: outputPath, + write: false, +}); + +const outputFile = result.outputFiles?.[0]; +if (!outputFile) { + throw new Error( + "esbuild did not produce extensions/diffs-language-pack/assets/viewer-runtime.js", + ); } -if (result.signal) { - console.error(`build-diffs-viewer-runtime exited with signal ${result.signal}`); - process.exit(1); + +const runtime = outputFile.text.replace(/[ \t]+$/gm, ""); +let previousRuntime = null; +try { + previousRuntime = await fs.readFile(outputPath, "utf8"); +} catch (error) { + if (error?.code !== "ENOENT") { + throw error; + } +} + +if (previousRuntime !== runtime) { + await fs.writeFile(outputPath, runtime); } -process.exit(result.status ?? 0); diff --git a/extensions/diffs/scripts/build-viewer.mjs b/extensions/diffs/scripts/build-viewer.mjs index 77ee6e42511..59379b933aa 100644 --- a/extensions/diffs/scripts/build-viewer.mjs +++ b/extensions/diffs/scripts/build-viewer.mjs @@ -1,19 +1,53 @@ #!/usr/bin/env node -import { spawnSync } from "node:child_process"; +import fs from "node:fs/promises"; import path from "node:path"; import { fileURLToPath } from "node:url"; +import { build } from "esbuild"; -const scriptPath = path.resolve( - path.dirname(fileURLToPath(import.meta.url)), - "../../../scripts/build-diffs-viewer-runtime.mjs", -); -const result = spawnSync(process.execPath, [scriptPath, "curated"], { stdio: "inherit" }); -if (result.error) { - throw result.error; +const extensionRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const repoRoot = path.resolve(extensionRoot, "../.."); +const outputPath = path.join(extensionRoot, "assets/viewer-runtime.js"); + +await fs.mkdir(path.dirname(outputPath), { recursive: true }); + +const result = await build({ + entryPoints: [path.join(extensionRoot, "src/viewer-client.ts")], + bundle: true, + platform: "browser", + target: "es2020", + format: "esm", + minify: true, + legalComments: "none", + outfile: outputPath, + write: false, + plugins: [ + { + name: "openclaw-diffs-curated-shiki", + setup(buildContext) { + buildContext.onResolve({ filter: /^shiki$/ }, () => ({ + path: path.join(repoRoot, "scripts/diffs-shiki-curated.ts"), + })); + }, + }, + ], +}); + +const outputFile = result.outputFiles?.[0]; +if (!outputFile) { + throw new Error("esbuild did not produce extensions/diffs/assets/viewer-runtime.js"); } -if (result.signal) { - console.error(`build-diffs-viewer-runtime exited with signal ${result.signal}`); - process.exit(1); + +const runtime = outputFile.text.replace(/[ \t]+$/gm, ""); +let previousRuntime = null; +try { + previousRuntime = await fs.readFile(outputPath, "utf8"); +} catch (error) { + if (error?.code !== "ENOENT") { + throw error; + } +} + +if (previousRuntime !== runtime) { + await fs.writeFile(outputPath, runtime); } -process.exit(result.status ?? 0); diff --git a/extensions/qa-lab/src/providers/mock-openai/server.test.ts b/extensions/qa-lab/src/providers/mock-openai/server.test.ts index a8e0aed0367..5698f576cef 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.test.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.test.ts @@ -323,7 +323,14 @@ describe("qa mock openai server", () => { stream: true, input: [ makeUserInput( - "Block streaming QA check: emit exactly two assistant message blocks in order. First exact marker: `BLOCK_ONE_OK`. Second exact marker: `BLOCK_TWO_OK`.", + [ + "Block streaming QA check: complete this whole sequence in one turn.", + "Step 1: send an assistant text block containing only this exact marker: `BLOCK_ONE_OK`.", + "That first marker block must be emitted before any tool call.", + "Step 2: after the first marker block, use the read tool exactly once on `QA_KICKOFF_TASK.md`.", + "Step 3: after that read completes, send a final assistant text block containing only this exact marker: `BLOCK_TWO_OK`.", + "Never put both markers in the same assistant text block.", + ].join("\n"), ), ], }), diff --git a/extensions/qa-lab/src/providers/mock-openai/server.ts b/extensions/qa-lab/src/providers/mock-openai/server.ts index 2b097566ab5..850bca339c1 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.ts @@ -820,6 +820,12 @@ function extractLastCapture(text: string, pattern: RegExp) { return lastMatch?.[1]?.trim() || null; } +function extractCaptures(text: string, pattern: RegExp) { + const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`; + const globalPattern = new RegExp(pattern.source, flags); + return Array.from(text.matchAll(globalPattern), (match) => match[1]?.trim()).filter(Boolean); +} + function extractLastMatchingUserText(texts: string[], pattern: RegExp) { for (let index = texts.length - 1; index >= 0; index -= 1) { const text = texts[index] ?? ""; @@ -872,6 +878,29 @@ function extractLabeledMarkerDirective(text: string, label: string) { ); } +function extractBlockStreamingMarkerDirectives(text: string) { + const firstLabeledMarker = extractLabeledMarkerDirective(text, "first exact marker"); + const secondLabeledMarker = extractLabeledMarkerDirective(text, "second exact marker"); + if (firstLabeledMarker && secondLabeledMarker) { + return { + first: firstLabeledMarker, + second: secondLabeledMarker, + }; + } + + const markers = extractCaptures(text, /exact marker\b[^:\n]{0,120}:\s*`([^`]+)`/i); + if (markers.length < 2) { + return null; + } + const [first, second] = markers.slice(-2); + return first && second + ? { + first, + second, + } + : null; +} + function extractQuotedToolArg(text: string, name: string) { const escapedName = escapeRegExp(name); return extractLastCapture(text, new RegExp(`\\b${escapedName}\\s*=\\s*"([^"]+)"`, "i")); @@ -1604,15 +1633,14 @@ async function buildResponsesPayload( extractExactReplyDirective(prompt) ?? extractExactReplyDirective(allInputText); const exactMarkerDirective = extractExactMarkerDirective(prompt) ?? extractExactMarkerDirective(allInputText); + const blockStreamingPrompt = + extractLastMatchingUserText(extractAllUserTexts(input), QA_BLOCK_STREAMING_PROMPT_RE) || + prompt || + allInputText; + const blockStreamingMarkers = + extractBlockStreamingMarkerDirectives(blockStreamingPrompt) ?? + extractBlockStreamingMarkerDirectives(allInputText); const latestImageUserTurn = extractLatestImageUserTurn(input); - const firstExactMarkerDirective = extractLabeledMarkerDirective( - allInputText, - "first exact marker", - ); - const secondExactMarkerDirective = extractLabeledMarkerDirective( - allInputText, - "second exact marker", - ); const isGroupChat = allInputText.includes('"is_group_chat": true'); const isBaselineUnmentionedChannelChatter = /\bno bot ping here\b/i.test(prompt); const hasReasoningOnlyRetryInstruction = allInputText.includes(QA_REASONING_ONLY_RETRY_NEEDLE); @@ -1832,23 +1860,19 @@ async function buildResponsesPayload( } return buildAssistantEvents(toolProgressReplyDirective); } - if ( - QA_BLOCK_STREAMING_PROMPT_RE.test(allInputText) && - firstExactMarkerDirective && - secondExactMarkerDirective - ) { + if (QA_BLOCK_STREAMING_PROMPT_RE.test(allInputText) && blockStreamingMarkers) { return buildAssistantEvents([ { id: "msg_mock_block_1", phase: "final_answer", - streamDeltas: splitMockStreamingText(firstExactMarkerDirective), - text: firstExactMarkerDirective, + streamDeltas: splitMockStreamingText(blockStreamingMarkers.first), + text: blockStreamingMarkers.first, }, { id: "msg_mock_block_2", phase: "final_answer", - streamDeltas: splitMockStreamingText(secondExactMarkerDirective), - text: secondExactMarkerDirective, + streamDeltas: splitMockStreamingText(blockStreamingMarkers.second), + text: blockStreamingMarkers.second, }, ]); } diff --git a/extensions/qa-matrix/src/runners/contract/runtime.test.ts b/extensions/qa-matrix/src/runners/contract/runtime.test.ts index d9dc7813d2d..ba51b9cae25 100644 --- a/extensions/qa-matrix/src/runners/contract/runtime.test.ts +++ b/extensions/qa-matrix/src/runners/contract/runtime.test.ts @@ -156,6 +156,43 @@ describe("matrix live qa runtime", () => { } }); + it("uses a scenario provider override for the canary only when the whole run is pinned", () => { + const blockStreamingScenario = liveTesting.MATRIX_QA_SCENARIOS.find( + (scenario) => scenario.id === "matrix-room-block-streaming", + ); + const threadScenario = liveTesting.MATRIX_QA_SCENARIOS.find( + (scenario) => scenario.id === "matrix-thread-follow-up", + ); + expect(blockStreamingScenario).toBeDefined(); + expect(threadScenario).toBeDefined(); + + const pinnedSchedule = liveTesting.scheduleMatrixQaScenariosInCatalogOrder([ + blockStreamingScenario!, + ]); + expect(liveTesting.selectMatrixQaCanaryProviderMode(pinnedSchedule)).toBe("mock-openai"); + + const mixedSchedule = liveTesting.scheduleMatrixQaScenariosInCatalogOrder([ + threadScenario!, + blockStreamingScenario!, + ]); + expect(liveTesting.selectMatrixQaCanaryProviderMode(mixedSchedule)).toBeUndefined(); + }); + + it("preserves explicit model pins when a scenario keeps the suite provider", () => { + const defaultModels = { + alternateModel: "mock-openai/custom-alt", + primaryModel: "mock-openai/custom", + providerMode: "mock-openai" as const, + }; + + expect( + liveTesting.resolveMatrixQaGatewayModels({ + defaultModels, + providerMode: "mock-openai", + }), + ).toEqual(defaultModels); + }); + it("injects a temporary Matrix account into the QA gateway config", () => { const baseCfg: OpenClawConfig = { plugins: { diff --git a/extensions/qa-matrix/src/runners/contract/runtime.ts b/extensions/qa-matrix/src/runners/contract/runtime.ts index e159655328e..6a41ed6bd56 100644 --- a/extensions/qa-matrix/src/runners/contract/runtime.ts +++ b/extensions/qa-matrix/src/runners/contract/runtime.ts @@ -7,7 +7,7 @@ import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; import { loadQaRuntimeModule } from "openclaw/plugin-sdk/qa-runner-runtime"; import type { QaReportCheck } from "../../report.js"; import { renderQaMarkdownReport } from "../../report.js"; -import { type QaProviderModeInput } from "../../run-config.js"; +import { normalizeQaProviderMode, type QaProviderModeInput } from "../../run-config.js"; import { appendLiveLaneIssue, buildLiveLaneArtifactsError, @@ -23,7 +23,7 @@ import { } from "../../substrate/config.js"; import type { MatrixQaObservedEvent } from "../../substrate/events.js"; import { startMatrixQaHarness } from "../../substrate/harness.runtime.js"; -import { resolveMatrixQaModels } from "./model-selection.js"; +import { resolveMatrixQaModels, type ResolvedMatrixQaModels } from "./model-selection.js"; import type { MatrixQaSyncStreams } from "./scenario-runtime-shared.js"; import { MATRIX_QA_SCENARIOS, @@ -59,8 +59,22 @@ type MatrixQaLiveLaneGatewayHarness = { stop(opts?: { keepTemp?: boolean; preserveToDir?: string }): Promise; }; -function buildMatrixQaGatewayConfigKey(overrides?: MatrixQaConfigOverrides) { - return JSON.stringify(overrides ?? null); +function buildMatrixQaGatewayConfigKey(params: { + models?: ResolvedMatrixQaModels; + overrides?: MatrixQaConfigOverrides; + providerModeKey?: string; +}) { + return JSON.stringify({ + models: params.models + ? { + alternateModel: params.models.alternateModel, + primaryModel: params.models.primaryModel, + providerMode: params.models.providerMode, + } + : undefined, + overrides: params.overrides ?? null, + providerModeKey: params.providerModeKey, + }); } const MATRIX_QA_EXECUTION_TAIL_SCENARIO_IDS = new Set(["matrix-e2ee-wrong-account-recovery-key"]); @@ -78,6 +92,11 @@ type MatrixQaScheduledScenario = { scenario: (typeof MATRIX_QA_SCENARIOS)[number]; }; +type MatrixQaGatewaySelection = { + overrides?: MatrixQaConfigOverrides; + providerMode?: QaProviderModeInput; +}; + type MatrixQaScenarioConfigEntry = MatrixQaSummary["config"]["scenarios"][number]; type MatrixQaSummary = { @@ -297,16 +316,20 @@ function buildMatrixQaScenarioConfigEntry(params: { ...params.gatewayConfigParams, overrides: params.scenario.configOverrides, }); + const providerSummary = params.scenario.providerMode + ? `providerMode=${params.scenario.providerMode}` + : undefined; + const configSummary = + params.scenario.configOverrides === undefined + ? undefined + : summarizeMatrixQaConfigSnapshot(snapshot); return { entry: { config: snapshot, id: params.scenario.id, title: params.scenario.title, }, - summary: - params.scenario.configOverrides === undefined - ? undefined - : summarizeMatrixQaConfigSnapshot(snapshot), + summary: [providerSummary, configSummary].filter(Boolean).join(", ") || undefined, }; } @@ -345,7 +368,10 @@ function scheduleMatrixQaScenariosInCatalogOrder( tailEntries.push(entry); continue; } - const key = buildMatrixQaGatewayConfigKey(entry.scenario.configOverrides); + const key = buildMatrixQaGatewayConfigKey({ + overrides: entry.scenario.configOverrides, + providerModeKey: entry.scenario.providerMode ?? "suite", + }); const existingIndex = groupIndexes.get(key); if (existingIndex !== undefined) { groupedEntries[existingIndex]?.push(entry); @@ -358,6 +384,38 @@ function scheduleMatrixQaScenariosInCatalogOrder( return [...groupedEntries.flat(), ...tailEntries]; } +function selectMatrixQaCanaryProviderMode( + scheduledScenarios: readonly MatrixQaScheduledScenario[], +): QaProviderModeInput | undefined { + let selectedProviderMode: QaProviderModeInput | undefined; + for (const { scenario } of scheduledScenarios) { + if (!scenario.providerMode) { + return undefined; + } + if (!selectedProviderMode) { + selectedProviderMode = scenario.providerMode; + continue; + } + if (scenario.providerMode !== selectedProviderMode) { + return undefined; + } + } + return selectedProviderMode; +} + +function resolveMatrixQaGatewayModels(params: { + defaultModels: ResolvedMatrixQaModels; + providerMode?: QaProviderModeInput; +}): ResolvedMatrixQaModels { + if (!params.providerMode) { + return params.defaultModels; + } + const providerMode = normalizeQaProviderMode(params.providerMode); + return providerMode === params.defaultModels.providerMode + ? params.defaultModels + : resolveMatrixQaModels({ providerMode }); +} + function getMatrixQaScenarioRestartReadyTimeoutMs(scenario: { timeoutMs: number }): number { return scenario.timeoutMs; } @@ -559,11 +617,12 @@ export async function runMatrixQaLive(params: { path.join(repoRoot, ".artifacts", "qa-e2e", `matrix-${Date.now().toString(36)}`); await fs.mkdir(outputDir, { recursive: true }); - const { providerMode, primaryModel, alternateModel } = resolveMatrixQaModels({ + const defaultModels = resolveMatrixQaModels({ providerMode: params.providerMode, primaryModel: params.primaryModel, alternateModel: params.alternateModel, }); + const { providerMode } = defaultModels; const sutAccountId = params.sutAccountId?.trim() || "sut"; const scenarios = findMatrixQaScenarios(params.scenarioIds, params.profile); const runSuffix = randomUUID().slice(0, 8); @@ -668,8 +727,16 @@ export async function runMatrixQaLive(params: { const scheduledScenarios = scheduleMatrixQaScenariosInCatalogOrder(scenarios); try { - const ensureGatewayHarness = async (overrides?: MatrixQaConfigOverrides) => { - const nextKey = buildMatrixQaGatewayConfigKey(overrides); + const ensureGatewayHarness = async (selection: MatrixQaGatewaySelection = {}) => { + const models = resolveMatrixQaGatewayModels({ + defaultModels, + providerMode: selection.providerMode, + }); + const overrides = selection.overrides; + const nextKey = buildMatrixQaGatewayConfigKey({ + models, + overrides, + }); if (gatewayHarness && gatewayHarnessKey === nextKey) { return { durationMs: 0, @@ -694,9 +761,9 @@ export async function runMatrixQaLive(params: { createGatewayConfig: () => ({}), }, transportBaseUrl: "http://127.0.0.1:43123", - providerMode, - primaryModel, - alternateModel, + providerMode: models.providerMode, + primaryModel: models.primaryModel, + alternateModel: models.alternateModel, fastMode: params.fastMode, controlUiEnabled: false, mutateConfig: (cfg) => @@ -719,7 +786,9 @@ export async function runMatrixQaLive(params: { }; { - const ensured = await ensureGatewayHarness(); + const ensured = await ensureGatewayHarness({ + providerMode: selectMatrixQaCanaryProviderMode(scheduledScenarios), + }); gatewayHarness = ensured.harness; initialGatewayBootMs = ensured.durationMs; } @@ -781,7 +850,10 @@ export async function runMatrixQaLive(params: { let transportInterruptMs = 0; try { writeMatrixQaProgress(`scenario start ${scenario.id}`); - const scenarioGateway = await ensureGatewayHarness(scenario.configOverrides); + const scenarioGateway = await ensureGatewayHarness({ + overrides: scenario.configOverrides, + providerMode: scenario.providerMode, + }); gatewayBootMs = scenarioGateway.durationMs; scenarioGatewayBootMs += gatewayBootMs; const measuredScenario = await measureMatrixQaStep(() => @@ -1142,6 +1214,8 @@ export const testing = { buildMatrixQaSummary, getMatrixQaScenarioRestartReadyTimeoutMs, scheduleMatrixQaScenariosInCatalogOrder, + selectMatrixQaCanaryProviderMode, + resolveMatrixQaGatewayModels, MATRIX_QA_SCENARIOS, buildMatrixQaConfig, buildMatrixQaConfigSnapshot, diff --git a/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts b/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts index 3e00c4a6ec3..008d55075d3 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts @@ -1,3 +1,4 @@ +import { type QaProviderModeInput } from "../../run-config.js"; import { collectLiveTransportStandardScenarioCoverage, selectLiveTransportScenarios, @@ -113,6 +114,7 @@ export type MatrixQaE2eeScenarioId = Extract & { configOverrides?: MatrixQaConfigOverrides; + providerMode?: QaProviderModeInput; topology?: MatrixQaTopologySpec; }; @@ -454,6 +456,7 @@ export const MATRIX_QA_SCENARIOS: MatrixQaScenarioDefinition[] = [ timeoutMs: 75_000, title: "Matrix block streaming preserves completed quiet preview blocks", topology: MATRIX_QA_BLOCK_ROOM_TOPOLOGY, + providerMode: "mock-openai", configOverrides: { agentDefaults: { blockStreamingChunk: { diff --git a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts index 79463fbe1f6..3aec59ebba5 100644 --- a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts @@ -4173,6 +4173,7 @@ describe("matrix live qa scenarios", () => { ); expect(body).toContain("Never put both markers in the same assistant text block."); expect(scenario.configOverrides?.toolProfile).toBe("coding"); + expect(scenario.providerMode).toBe("mock-openai"); expect(mockObjectArg(waitForRoomEvent, "waitForRoomEvent", 1).since).toBe( "driver-sync-block-one", );