fix(qa-lab): harden telegram qa artifacts

This commit is contained in:
Ayaan Zaidi
2026-04-10 21:33:47 +05:30
parent d69cc5da5c
commit ecb3e0a62d
5 changed files with 122 additions and 11 deletions

View File

@@ -57,6 +57,7 @@ vi.mock("./docker-up.runtime.js", () => ({
}));
import {
__testing,
runQaLabSelfCheckCommand,
runQaDockerBuildImageCommand,
runQaDockerScaffoldCommand,
@@ -185,6 +186,15 @@ describe("qa cli runtime", () => {
});
});
it("rejects output dirs that escape the repo root", () => {
expect(() =>
__testing.resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "../outside"),
).toThrow("--output-dir must stay within the repo root.");
expect(() =>
__testing.resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "/tmp/outside"),
).toThrow("--output-dir must be a relative path inside the repo root.");
});
it("defaults telegram qa runs onto the live provider lane", async () => {
await runQaTelegramCommand({
repoRoot: "/tmp/openclaw-repo",

View File

@@ -22,6 +22,21 @@ type InterruptibleServer = {
stop(): Promise<void>;
};
function resolveRepoRelativeOutputDir(repoRoot: string, outputDir?: string) {
if (!outputDir) {
return undefined;
}
if (path.isAbsolute(outputDir)) {
throw new Error("--output-dir must be a relative path inside the repo root.");
}
const resolved = path.resolve(repoRoot, outputDir);
const relative = path.relative(repoRoot, resolved);
if (relative.startsWith("..") || path.isAbsolute(relative)) {
throw new Error("--output-dir must stay within the repo root.");
}
return resolved;
}
function resolveQaManualLaneModels(opts: {
providerMode: QaProviderMode;
primaryModel?: string;
@@ -242,7 +257,7 @@ export async function runQaSuiteCommand(opts: {
if (runner === "multipass") {
const result = await runQaMultipass({
repoRoot,
outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined,
outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir),
providerMode,
primaryModel: opts.primaryModel,
alternateModel: opts.alternateModel,
@@ -265,7 +280,7 @@ export async function runQaSuiteCommand(opts: {
}
const result = await runQaSuiteFromRuntime({
repoRoot,
outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined,
outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir),
providerMode,
primaryModel: opts.primaryModel,
alternateModel: opts.alternateModel,
@@ -296,7 +311,7 @@ export async function runQaTelegramCommand(opts: {
opts.providerMode === undefined ? "live-frontier" : normalizeQaProviderMode(opts.providerMode);
const result = await runTelegramQaLive({
repoRoot,
outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined,
outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir),
providerMode,
primaryModel: opts.primaryModel,
alternateModel: opts.alternateModel,
@@ -328,7 +343,7 @@ export async function runQaCharacterEvalCommand(opts: {
const judges = parseQaModelSpecs("--judge-model", opts.judgeModel);
const result = await runQaCharacterEval({
repoRoot,
outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined,
outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir),
models: candidates.models,
scenarioId: opts.scenario,
candidateFastMode: opts.fast,
@@ -420,7 +435,10 @@ export async function runQaDockerScaffoldCommand(opts: {
bindUiDist?: boolean;
}) {
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
const outputDir = path.resolve(repoRoot, opts.outputDir);
const outputDir = resolveRepoRelativeOutputDir(repoRoot, opts.outputDir);
if (!outputDir) {
throw new Error("--output-dir is required.");
}
const result = await writeQaDockerHarnessFiles({
outputDir,
repoRoot,
@@ -457,7 +475,7 @@ export async function runQaDockerUpCommand(opts: {
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
const result = await runQaDockerUp({
repoRoot,
outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined,
outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir),
gatewayPort: Number.isFinite(opts.gatewayPort) ? opts.gatewayPort : undefined,
qaLabPort: Number.isFinite(opts.qaLabPort) ? opts.qaLabPort : undefined,
providerBaseUrl: opts.providerBaseUrl,
@@ -479,3 +497,7 @@ export async function runQaMockOpenAiCommand(opts: { host?: string; port?: numbe
});
await runInterruptibleServer("QA mock OpenAI", server);
}
export const __testing = {
resolveRepoRelativeOutputDir,
};

View File

@@ -678,7 +678,10 @@ export async function startQaGatewayChild(params: {
controlUiEnabled: params.controlUiEnabled,
});
const cfg = params.mutateConfig ? params.mutateConfig(baseCfg) : baseCfg;
await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, {
encoding: "utf8",
mode: 0o600,
});
const allowedPluginIds = [...(cfg.plugins?.allow ?? []), "openai"].filter(
(pluginId, index, array): pluginId is string => {
return (

View File

@@ -166,6 +166,43 @@ describe("telegram live qa runtime", () => {
).toBe("match");
});
it("redacts observed message content by default in artifacts", () => {
expect(
__testing.buildObservedMessagesArtifact({
includeContent: false,
observedMessages: [
{
updateId: 1,
messageId: 9,
chatId: -100123,
senderId: 42,
senderIsBot: true,
senderUsername: "driver_bot",
text: "secret text",
caption: "secret caption",
replyToMessageId: 8,
timestamp: 1_700_000_000_000,
inlineButtons: ["Approve"],
mediaKinds: ["photo"],
},
],
}),
).toEqual([
{
updateId: 1,
messageId: 9,
chatId: -100123,
senderId: 42,
senderIsBot: true,
senderUsername: "driver_bot",
replyToMessageId: 8,
timestamp: 1_700_000_000_000,
inlineButtons: ["Approve"],
mediaKinds: ["photo"],
},
]);
});
it("formats phase-specific canary diagnostics with context", () => {
const error = new Error(
"SUT bot did not send any group reply after the canary command within 30s.",

View File

@@ -44,6 +44,11 @@ type TelegramObservedMessage = {
mediaKinds: string[];
};
type TelegramObservedMessageArtifact = Omit<TelegramObservedMessage, "text" | "caption"> & {
text?: string;
caption?: string;
};
type TelegramQaScenarioResult = {
id: string;
title: string;
@@ -425,6 +430,28 @@ function renderTelegramQaMarkdown(params: {
return lines.join("\n");
}
function buildObservedMessagesArtifact(params: {
observedMessages: TelegramObservedMessage[];
includeContent: boolean;
}) {
return params.observedMessages.map<TelegramObservedMessageArtifact>((message) =>
params.includeContent
? { ...message }
: {
updateId: message.updateId,
messageId: message.messageId,
chatId: message.chatId,
senderId: message.senderId,
senderIsBot: message.senderIsBot,
senderUsername: message.senderUsername,
replyToMessageId: message.replyToMessageId,
timestamp: message.timestamp,
inlineButtons: message.inlineButtons,
mediaKinds: message.mediaKinds,
},
);
}
function findScenario(ids?: string[]) {
if (!ids || ids.length === 0) {
return [...TELEGRAM_QA_SCENARIOS];
@@ -628,6 +655,7 @@ export async function runTelegramQaLive(params: {
const sutAccountId = params.sutAccountId?.trim() || "sut";
const scenarios = findScenario(params.scenarioIds);
const observedMessages: TelegramObservedMessage[] = [];
const includeObservedMessageContent = process.env.OPENCLAW_QA_TELEGRAM_CAPTURE_CONTENT === "1";
const startedAt = new Date().toISOString();
const driverIdentity = await getBotIdentity(runtimeEnv.driverToken);
@@ -755,13 +783,23 @@ export async function runTelegramQaLive(params: {
finishedAt,
scenarios: scenarioResults,
})}\n`,
"utf8",
{ encoding: "utf8", mode: 0o600 },
);
await fs.writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf8");
await fs.writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, {
encoding: "utf8",
mode: 0o600,
});
await fs.writeFile(
observedMessagesPath,
`${JSON.stringify(observedMessages, null, 2)}\n`,
"utf8",
`${JSON.stringify(
buildObservedMessagesArtifact({
observedMessages,
includeContent: includeObservedMessageContent,
}),
null,
2,
)}\n`,
{ encoding: "utf8", mode: 0o600 },
);
if (canaryFailure) {
throw new Error(
@@ -781,6 +819,7 @@ export async function runTelegramQaLive(params: {
export const __testing = {
TELEGRAM_QA_SCENARIOS,
buildTelegramQaConfig,
buildObservedMessagesArtifact,
canaryFailureMessage,
classifyCanaryReply,
normalizeTelegramObservedMessage,