mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-14 02:31:24 +00:00
fix(qa-lab): harden telegram qa artifacts
This commit is contained in:
@@ -57,6 +57,7 @@ vi.mock("./docker-up.runtime.js", () => ({
|
||||
}));
|
||||
|
||||
import {
|
||||
__testing,
|
||||
runQaLabSelfCheckCommand,
|
||||
runQaDockerBuildImageCommand,
|
||||
runQaDockerScaffoldCommand,
|
||||
@@ -185,6 +186,15 @@ describe("qa cli runtime", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects output dirs that escape the repo root", () => {
|
||||
expect(() =>
|
||||
__testing.resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "../outside"),
|
||||
).toThrow("--output-dir must stay within the repo root.");
|
||||
expect(() =>
|
||||
__testing.resolveRepoRelativeOutputDir("/tmp/openclaw-repo", "/tmp/outside"),
|
||||
).toThrow("--output-dir must be a relative path inside the repo root.");
|
||||
});
|
||||
|
||||
it("defaults telegram qa runs onto the live provider lane", async () => {
|
||||
await runQaTelegramCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
|
||||
@@ -22,6 +22,21 @@ type InterruptibleServer = {
|
||||
stop(): Promise<void>;
|
||||
};
|
||||
|
||||
function resolveRepoRelativeOutputDir(repoRoot: string, outputDir?: string) {
|
||||
if (!outputDir) {
|
||||
return undefined;
|
||||
}
|
||||
if (path.isAbsolute(outputDir)) {
|
||||
throw new Error("--output-dir must be a relative path inside the repo root.");
|
||||
}
|
||||
const resolved = path.resolve(repoRoot, outputDir);
|
||||
const relative = path.relative(repoRoot, resolved);
|
||||
if (relative.startsWith("..") || path.isAbsolute(relative)) {
|
||||
throw new Error("--output-dir must stay within the repo root.");
|
||||
}
|
||||
return resolved;
|
||||
}
|
||||
|
||||
function resolveQaManualLaneModels(opts: {
|
||||
providerMode: QaProviderMode;
|
||||
primaryModel?: string;
|
||||
@@ -242,7 +257,7 @@ export async function runQaSuiteCommand(opts: {
|
||||
if (runner === "multipass") {
|
||||
const result = await runQaMultipass({
|
||||
repoRoot,
|
||||
outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined,
|
||||
outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir),
|
||||
providerMode,
|
||||
primaryModel: opts.primaryModel,
|
||||
alternateModel: opts.alternateModel,
|
||||
@@ -265,7 +280,7 @@ export async function runQaSuiteCommand(opts: {
|
||||
}
|
||||
const result = await runQaSuiteFromRuntime({
|
||||
repoRoot,
|
||||
outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined,
|
||||
outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir),
|
||||
providerMode,
|
||||
primaryModel: opts.primaryModel,
|
||||
alternateModel: opts.alternateModel,
|
||||
@@ -296,7 +311,7 @@ export async function runQaTelegramCommand(opts: {
|
||||
opts.providerMode === undefined ? "live-frontier" : normalizeQaProviderMode(opts.providerMode);
|
||||
const result = await runTelegramQaLive({
|
||||
repoRoot,
|
||||
outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined,
|
||||
outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir),
|
||||
providerMode,
|
||||
primaryModel: opts.primaryModel,
|
||||
alternateModel: opts.alternateModel,
|
||||
@@ -328,7 +343,7 @@ export async function runQaCharacterEvalCommand(opts: {
|
||||
const judges = parseQaModelSpecs("--judge-model", opts.judgeModel);
|
||||
const result = await runQaCharacterEval({
|
||||
repoRoot,
|
||||
outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined,
|
||||
outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir),
|
||||
models: candidates.models,
|
||||
scenarioId: opts.scenario,
|
||||
candidateFastMode: opts.fast,
|
||||
@@ -420,7 +435,10 @@ export async function runQaDockerScaffoldCommand(opts: {
|
||||
bindUiDist?: boolean;
|
||||
}) {
|
||||
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
|
||||
const outputDir = path.resolve(repoRoot, opts.outputDir);
|
||||
const outputDir = resolveRepoRelativeOutputDir(repoRoot, opts.outputDir);
|
||||
if (!outputDir) {
|
||||
throw new Error("--output-dir is required.");
|
||||
}
|
||||
const result = await writeQaDockerHarnessFiles({
|
||||
outputDir,
|
||||
repoRoot,
|
||||
@@ -457,7 +475,7 @@ export async function runQaDockerUpCommand(opts: {
|
||||
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
|
||||
const result = await runQaDockerUp({
|
||||
repoRoot,
|
||||
outputDir: opts.outputDir ? path.resolve(repoRoot, opts.outputDir) : undefined,
|
||||
outputDir: resolveRepoRelativeOutputDir(repoRoot, opts.outputDir),
|
||||
gatewayPort: Number.isFinite(opts.gatewayPort) ? opts.gatewayPort : undefined,
|
||||
qaLabPort: Number.isFinite(opts.qaLabPort) ? opts.qaLabPort : undefined,
|
||||
providerBaseUrl: opts.providerBaseUrl,
|
||||
@@ -479,3 +497,7 @@ export async function runQaMockOpenAiCommand(opts: { host?: string; port?: numbe
|
||||
});
|
||||
await runInterruptibleServer("QA mock OpenAI", server);
|
||||
}
|
||||
|
||||
export const __testing = {
|
||||
resolveRepoRelativeOutputDir,
|
||||
};
|
||||
|
||||
@@ -678,7 +678,10 @@ export async function startQaGatewayChild(params: {
|
||||
controlUiEnabled: params.controlUiEnabled,
|
||||
});
|
||||
const cfg = params.mutateConfig ? params.mutateConfig(baseCfg) : baseCfg;
|
||||
await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
|
||||
await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, {
|
||||
encoding: "utf8",
|
||||
mode: 0o600,
|
||||
});
|
||||
const allowedPluginIds = [...(cfg.plugins?.allow ?? []), "openai"].filter(
|
||||
(pluginId, index, array): pluginId is string => {
|
||||
return (
|
||||
|
||||
@@ -166,6 +166,43 @@ describe("telegram live qa runtime", () => {
|
||||
).toBe("match");
|
||||
});
|
||||
|
||||
it("redacts observed message content by default in artifacts", () => {
|
||||
expect(
|
||||
__testing.buildObservedMessagesArtifact({
|
||||
includeContent: false,
|
||||
observedMessages: [
|
||||
{
|
||||
updateId: 1,
|
||||
messageId: 9,
|
||||
chatId: -100123,
|
||||
senderId: 42,
|
||||
senderIsBot: true,
|
||||
senderUsername: "driver_bot",
|
||||
text: "secret text",
|
||||
caption: "secret caption",
|
||||
replyToMessageId: 8,
|
||||
timestamp: 1_700_000_000_000,
|
||||
inlineButtons: ["Approve"],
|
||||
mediaKinds: ["photo"],
|
||||
},
|
||||
],
|
||||
}),
|
||||
).toEqual([
|
||||
{
|
||||
updateId: 1,
|
||||
messageId: 9,
|
||||
chatId: -100123,
|
||||
senderId: 42,
|
||||
senderIsBot: true,
|
||||
senderUsername: "driver_bot",
|
||||
replyToMessageId: 8,
|
||||
timestamp: 1_700_000_000_000,
|
||||
inlineButtons: ["Approve"],
|
||||
mediaKinds: ["photo"],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("formats phase-specific canary diagnostics with context", () => {
|
||||
const error = new Error(
|
||||
"SUT bot did not send any group reply after the canary command within 30s.",
|
||||
|
||||
@@ -44,6 +44,11 @@ type TelegramObservedMessage = {
|
||||
mediaKinds: string[];
|
||||
};
|
||||
|
||||
type TelegramObservedMessageArtifact = Omit<TelegramObservedMessage, "text" | "caption"> & {
|
||||
text?: string;
|
||||
caption?: string;
|
||||
};
|
||||
|
||||
type TelegramQaScenarioResult = {
|
||||
id: string;
|
||||
title: string;
|
||||
@@ -425,6 +430,28 @@ function renderTelegramQaMarkdown(params: {
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
function buildObservedMessagesArtifact(params: {
|
||||
observedMessages: TelegramObservedMessage[];
|
||||
includeContent: boolean;
|
||||
}) {
|
||||
return params.observedMessages.map<TelegramObservedMessageArtifact>((message) =>
|
||||
params.includeContent
|
||||
? { ...message }
|
||||
: {
|
||||
updateId: message.updateId,
|
||||
messageId: message.messageId,
|
||||
chatId: message.chatId,
|
||||
senderId: message.senderId,
|
||||
senderIsBot: message.senderIsBot,
|
||||
senderUsername: message.senderUsername,
|
||||
replyToMessageId: message.replyToMessageId,
|
||||
timestamp: message.timestamp,
|
||||
inlineButtons: message.inlineButtons,
|
||||
mediaKinds: message.mediaKinds,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function findScenario(ids?: string[]) {
|
||||
if (!ids || ids.length === 0) {
|
||||
return [...TELEGRAM_QA_SCENARIOS];
|
||||
@@ -628,6 +655,7 @@ export async function runTelegramQaLive(params: {
|
||||
const sutAccountId = params.sutAccountId?.trim() || "sut";
|
||||
const scenarios = findScenario(params.scenarioIds);
|
||||
const observedMessages: TelegramObservedMessage[] = [];
|
||||
const includeObservedMessageContent = process.env.OPENCLAW_QA_TELEGRAM_CAPTURE_CONTENT === "1";
|
||||
const startedAt = new Date().toISOString();
|
||||
|
||||
const driverIdentity = await getBotIdentity(runtimeEnv.driverToken);
|
||||
@@ -755,13 +783,23 @@ export async function runTelegramQaLive(params: {
|
||||
finishedAt,
|
||||
scenarios: scenarioResults,
|
||||
})}\n`,
|
||||
"utf8",
|
||||
{ encoding: "utf8", mode: 0o600 },
|
||||
);
|
||||
await fs.writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf8");
|
||||
await fs.writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, {
|
||||
encoding: "utf8",
|
||||
mode: 0o600,
|
||||
});
|
||||
await fs.writeFile(
|
||||
observedMessagesPath,
|
||||
`${JSON.stringify(observedMessages, null, 2)}\n`,
|
||||
"utf8",
|
||||
`${JSON.stringify(
|
||||
buildObservedMessagesArtifact({
|
||||
observedMessages,
|
||||
includeContent: includeObservedMessageContent,
|
||||
}),
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
{ encoding: "utf8", mode: 0o600 },
|
||||
);
|
||||
if (canaryFailure) {
|
||||
throw new Error(
|
||||
@@ -781,6 +819,7 @@ export async function runTelegramQaLive(params: {
|
||||
export const __testing = {
|
||||
TELEGRAM_QA_SCENARIOS,
|
||||
buildTelegramQaConfig,
|
||||
buildObservedMessagesArtifact,
|
||||
canaryFailureMessage,
|
||||
classifyCanaryReply,
|
||||
normalizeTelegramObservedMessage,
|
||||
|
||||
Reference in New Issue
Block a user