From ecb3e0a62d6040e8a43768ce4c2cb6165135d511 Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Fri, 10 Apr 2026 21:33:47 +0530 Subject: [PATCH] fix(qa-lab): harden telegram qa artifacts --- extensions/qa-lab/src/cli.runtime.test.ts | 10 ++++ extensions/qa-lab/src/cli.runtime.ts | 34 +++++++++++--- extensions/qa-lab/src/gateway-child.ts | 5 +- .../qa-lab/src/telegram-live.runtime.test.ts | 37 +++++++++++++++ .../qa-lab/src/telegram-live.runtime.ts | 47 +++++++++++++++++-- 5 files changed, 122 insertions(+), 11 deletions(-) diff --git a/extensions/qa-lab/src/cli.runtime.test.ts b/extensions/qa-lab/src/cli.runtime.test.ts index a805bae61c2..9bf40c087e4 100644 --- a/extensions/qa-lab/src/cli.runtime.test.ts +++ b/extensions/qa-lab/src/cli.runtime.test.ts @@ -57,6 +57,7 @@ vi.mock("./docker-up.runtime.js", () => ({ })); import { + __testing, runQaLabSelfCheckCommand, runQaDockerBuildImageCommand, runQaDockerScaffoldCommand, @@ -185,6 +186,15 @@ describe("qa cli runtime", () => { }); }); + it("rejects output dirs that escape the repo root", () => { + expect(() => + __testing.resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "../outside"), + ).toThrow("--output-dir must stay within the repo root."); + expect(() => + __testing.resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "/tmp/outside"), + ).toThrow("--output-dir must be a relative path inside the repo root."); + }); + it("defaults telegram qa runs onto the live provider lane", async () => { await runQaTelegramCommand({ repoRoot: "/tmp/openclaw-repo", diff --git a/extensions/qa-lab/src/cli.runtime.ts b/extensions/qa-lab/src/cli.runtime.ts index 8182b26f8a2..5c1ea4d0bbb 100644 --- a/extensions/qa-lab/src/cli.runtime.ts +++ b/extensions/qa-lab/src/cli.runtime.ts @@ -22,6 +22,21 @@ type InterruptibleServer = { stop(): Promise; }; +function resolveRepoRelativeOutputDir(repoRoot: string, outputDir?: string) { + if (!outputDir) { + return undefined; + } + if (path.isAbsolute(outputDir)) { + throw new Error("--output-dir must be a relative path inside the repo root."); + } + const resolved = path.resolve(repoRoot, outputDir); + const relative = path.relative(repoRoot, resolved); + if (relative.startsWith("..") || path.isAbsolute(relative)) { + throw new Error("--output-dir must stay within the repo root."); + } + return resolved; +} + function resolveQaManualLaneModels(opts: { providerMode: QaProviderMode; primaryModel?: string; @@ -242,7 +257,7 @@ export async function runQaSuiteCommand(opts: { if (runner === "multipass") { const result = await runQaMultipass({ repoRoot, - outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined, + outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir), providerMode, primaryModel: opts.primaryModel, alternateModel: opts.alternateModel, @@ -265,7 +280,7 @@ export async function runQaSuiteCommand(opts: { } const result = await runQaSuiteFromRuntime({ repoRoot, - outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined, + outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir), providerMode, primaryModel: opts.primaryModel, alternateModel: opts.alternateModel, @@ -296,7 +311,7 @@ export async function runQaTelegramCommand(opts: { opts.providerMode === undefined ? "live-frontier" : normalizeQaProviderMode(opts.providerMode); const result = await runTelegramQaLive({ repoRoot, - outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined, + outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir), providerMode, primaryModel: opts.primaryModel, alternateModel: opts.alternateModel, @@ -328,7 +343,7 @@ export async function runQaCharacterEvalCommand(opts: { const judges = parseQaModelSpecs("--judge-model", opts.judgeModel); const result = await runQaCharacterEval({ repoRoot, - outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined, + outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir), models: candidates.models, scenarioId: opts.scenario, candidateFastMode: opts.fast, @@ -420,7 +435,10 @@ export async function runQaDockerScaffoldCommand(opts: { bindUiDist?: boolean; }) { const repoRoot = path.resolve(opts.repoRoot ?? process.cwd()); - const outputDir = path.resolve(repoRoot, opts.outputDir); + const outputDir = resolveRepoRelativeOutputDir(repoRoot, opts.outputDir); + if (!outputDir) { + throw new Error("--output-dir is required."); + } const result = await writeQaDockerHarnessFiles({ outputDir, repoRoot, @@ -457,7 +475,7 @@ export async function runQaDockerUpCommand(opts: { const repoRoot = path.resolve(opts.repoRoot ?? process.cwd()); const result = await runQaDockerUp({ repoRoot, - outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined, + outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir), gatewayPort: Number.isFinite(opts.gatewayPort) ? opts.gatewayPort : undefined, qaLabPort: Number.isFinite(opts.qaLabPort) ? opts.qaLabPort : undefined, providerBaseUrl: opts.providerBaseUrl, @@ -479,3 +497,7 @@ export async function runQaMockOpenAiCommand(opts: { host?: string; port?: numbe }); await runInterruptibleServer("QA mock OpenAI", server); } + +export const __testing = { + resolveRepoRelativeOutputDir, +}; diff --git a/extensions/qa-lab/src/gateway-child.ts b/extensions/qa-lab/src/gateway-child.ts index a384e40bc32..22ecb314265 100644 --- a/extensions/qa-lab/src/gateway-child.ts +++ b/extensions/qa-lab/src/gateway-child.ts @@ -678,7 +678,10 @@ export async function startQaGatewayChild(params: { controlUiEnabled: params.controlUiEnabled, }); const cfg = params.mutateConfig ? params.mutateConfig(baseCfg) : baseCfg; - await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8"); + await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, { + encoding: "utf8", + mode: 0o600, + }); const allowedPluginIds = [...(cfg.plugins?.allow ?? []), "openai"].filter( (pluginId, index, array): pluginId is string => { return ( diff --git a/extensions/qa-lab/src/telegram-live.runtime.test.ts b/extensions/qa-lab/src/telegram-live.runtime.test.ts index ba3a8053fee..e82be6f3848 100644 --- a/extensions/qa-lab/src/telegram-live.runtime.test.ts +++ b/extensions/qa-lab/src/telegram-live.runtime.test.ts @@ -166,6 +166,43 @@ describe("telegram live qa runtime", () => { ).toBe("match"); }); + it("redacts observed message content by default in artifacts", () => { + expect( + __testing.buildObservedMessagesArtifact({ + includeContent: false, + observedMessages: [ + { + updateId: 1, + messageId: 9, + chatId: -100123, + senderId: 42, + senderIsBot: true, + senderUsername: "driver_bot", + text: "secret text", + caption: "secret caption", + replyToMessageId: 8, + timestamp: 1_700_000_000_000, + inlineButtons: ["Approve"], + mediaKinds: ["photo"], + }, + ], + }), + ).toEqual([ + { + updateId: 1, + messageId: 9, + chatId: -100123, + senderId: 42, + senderIsBot: true, + senderUsername: "driver_bot", + replyToMessageId: 8, + timestamp: 1_700_000_000_000, + inlineButtons: ["Approve"], + mediaKinds: ["photo"], + }, + ]); + }); + it("formats phase-specific canary diagnostics with context", () => { const error = new Error( "SUT bot did not send any group reply after the canary command within 30s.", diff --git a/extensions/qa-lab/src/telegram-live.runtime.ts b/extensions/qa-lab/src/telegram-live.runtime.ts index af8e36e8962..a7b8a3ce49e 100644 --- a/extensions/qa-lab/src/telegram-live.runtime.ts +++ b/extensions/qa-lab/src/telegram-live.runtime.ts @@ -44,6 +44,11 @@ type TelegramObservedMessage = { mediaKinds: string[]; }; +type TelegramObservedMessageArtifact = Omit & { + text?: string; + caption?: string; +}; + type TelegramQaScenarioResult = { id: string; title: string; @@ -425,6 +430,28 @@ function renderTelegramQaMarkdown(params: { return lines.join("\n"); } +function buildObservedMessagesArtifact(params: { + observedMessages: TelegramObservedMessage[]; + includeContent: boolean; +}) { + return params.observedMessages.map((message) => + params.includeContent + ? { ...message } + : { + updateId: message.updateId, + messageId: message.messageId, + chatId: message.chatId, + senderId: message.senderId, + senderIsBot: message.senderIsBot, + senderUsername: message.senderUsername, + replyToMessageId: message.replyToMessageId, + timestamp: message.timestamp, + inlineButtons: message.inlineButtons, + mediaKinds: message.mediaKinds, + }, + ); +} + function findScenario(ids?: string[]) { if (!ids || ids.length === 0) { return [...TELEGRAM_QA_SCENARIOS]; @@ -628,6 +655,7 @@ export async function runTelegramQaLive(params: { const sutAccountId = params.sutAccountId?.trim() || "sut"; const scenarios = findScenario(params.scenarioIds); const observedMessages: TelegramObservedMessage[] = []; + const includeObservedMessageContent = process.env.OPENCLAW_QA_TELEGRAM_CAPTURE_CONTENT === "1"; const startedAt = new Date().toISOString(); const driverIdentity = await getBotIdentity(runtimeEnv.driverToken); @@ -755,13 +783,23 @@ export async function runTelegramQaLive(params: { finishedAt, scenarios: scenarioResults, })}\n`, - "utf8", + { encoding: "utf8", mode: 0o600 }, ); - await fs.writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf8"); + await fs.writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, { + encoding: "utf8", + mode: 0o600, + }); await fs.writeFile( observedMessagesPath, - `${JSON.stringify(observedMessages, null, 2)}\n`, - "utf8", + `${JSON.stringify( + buildObservedMessagesArtifact({ + observedMessages, + includeContent: includeObservedMessageContent, + }), + null, + 2, + )}\n`, + { encoding: "utf8", mode: 0o600 }, ); if (canaryFailure) { throw new Error( @@ -781,6 +819,7 @@ export async function runTelegramQaLive(params: { export const __testing = { TELEGRAM_QA_SCENARIOS, buildTelegramQaConfig, + buildObservedMessagesArtifact, canaryFailureMessage, classifyCanaryReply, normalizeTelegramObservedMessage,