From d69cc5da5ca391aa89e0b8f61eb566b87c1be92a Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Fri, 10 Apr 2026 18:31:11 +0530 Subject: [PATCH] fix(qa-lab): address remaining review comments --- extensions/qa-lab/src/cli.ts | 4 +- .../qa-lab/src/telegram-live.runtime.test.ts | 43 ++++++++++++ .../qa-lab/src/telegram-live.runtime.ts | 70 +++++++++++++++---- 3 files changed, 100 insertions(+), 17 deletions(-) diff --git a/extensions/qa-lab/src/cli.ts b/extensions/qa-lab/src/cli.ts index 93c74c7b70e..e541e1ecb5c 100644 --- a/extensions/qa-lab/src/cli.ts +++ b/extensions/qa-lab/src/cli.ts @@ -163,7 +163,7 @@ export function registerQaLabCli(program: Command) { .option( "--provider-mode ", "Provider mode: mock-openai or live-frontier (legacy live-openai still works)", - "live-frontier", + "mock-openai", ) .option("--model ", "Primary provider/model ref") .option("--alt-model ", "Alternate provider/model ref") @@ -223,7 +223,7 @@ export function registerQaLabCli(program: Command) { .option( "--provider-mode ", "Provider mode: mock-openai or live-frontier (legacy live-openai still works)", - "mock-openai", + "live-frontier", ) .option("--model ", "Primary provider/model ref") .option("--alt-model ", "Alternate provider/model ref") diff --git a/extensions/qa-lab/src/telegram-live.runtime.test.ts b/extensions/qa-lab/src/telegram-live.runtime.test.ts index 0c7de85b725..ba3a8053fee 100644 --- a/extensions/qa-lab/src/telegram-live.runtime.test.ts +++ b/extensions/qa-lab/src/telegram-live.runtime.test.ts @@ -123,6 +123,49 @@ describe("telegram live qa runtime", () => { }); }); + it("ignores unrelated sut replies when matching the canary response", () => { + expect( + __testing.classifyCanaryReply({ + groupId: "-100123", + sutBotId: 88, + driverMessageId: 55, + message: { + updateId: 1, + messageId: 9, + chatId: -100123, + senderId: 88, + senderIsBot: true, + senderUsername: "sut_bot", + text: "other reply", + replyToMessageId: 999, + timestamp: 1_700_000_000_000, + inlineButtons: [], + mediaKinds: [], + }, + }), + ).toBe("unthreaded"); + expect( + __testing.classifyCanaryReply({ + groupId: "-100123", + sutBotId: 88, + driverMessageId: 55, + message: { + updateId: 2, + messageId: 10, + chatId: -100123, + senderId: 88, + senderIsBot: true, + senderUsername: "sut_bot", + text: "canary reply", + replyToMessageId: 55, + timestamp: 1_700_000_001_000, + inlineButtons: [], + mediaKinds: [], + }, + }), + ).toBe("match"); + }); + it("formats phase-specific canary diagnostics with context", () => { const error = new Error( "SUT bot did not send any group reply after the canary command within 30s.", diff --git a/extensions/qa-lab/src/telegram-live.runtime.ts b/extensions/qa-lab/src/telegram-live.runtime.ts index 4c3ce897e72..af8e36e8962 100644 --- a/extensions/qa-lab/src/telegram-live.runtime.ts +++ b/extensions/qa-lab/src/telegram-live.runtime.ts @@ -436,6 +436,24 @@ function findScenario(ids?: string[]) { return selected; } +function classifyCanaryReply(params: { + message: TelegramObservedMessage; + groupId: string; + sutBotId: number; + driverMessageId: number; +}) { + if ( + params.message.chatId !== Number(params.groupId) || + params.message.senderId !== params.sutBotId || + !params.message.text.trim() + ) { + return "ignore" as const; + } + return params.message.replyToMessageId === params.driverMessageId + ? ("match" as const) + : ("unthreaded" as const); +} + async function runCanary(params: { driverToken: string; groupId: string; @@ -449,6 +467,9 @@ async function runCanary(params: { params.groupId, `/help@${params.sutUsername}`, ); + let firstUnthreadedReply: + | Pick + | undefined; let sutObserved: Awaited>; try { sutObserved = await waitForObservedMessage({ @@ -456,10 +477,41 @@ async function runCanary(params: { initialOffset: offset, timeoutMs: 30_000, observedMessages: params.observedMessages, - predicate: (message) => - message.chatId === Number(params.groupId) && message.senderId === params.sutBotId, + predicate: (message) => { + const classification = classifyCanaryReply({ + message, + groupId: params.groupId, + sutBotId: params.sutBotId, + driverMessageId: driverMessage.message_id, + }); + if (classification === "ignore") { + return false; + } + if (classification === "unthreaded") { + firstUnthreadedReply ??= { + messageId: message.messageId, + replyToMessageId: message.replyToMessageId, + text: message.text, + }; + return false; + } + return classification === "match"; + }, }); } catch (error) { + if (firstUnthreadedReply) { + throw new TelegramQaCanaryError( + "sut_reply_not_threaded", + "SUT bot replied, but not as a reply to the canary driver message.", + { + groupId: params.groupId, + sutBotId: params.sutBotId, + driverMessageId: driverMessage.message_id, + sutMessageId: firstUnthreadedReply.messageId, + sutReplyToMessageId: firstUnthreadedReply.replyToMessageId, + }, + ); + } throw new TelegramQaCanaryError( "sut_reply_timeout", "SUT bot did not send any group reply after the canary command within 30s.", @@ -471,19 +523,6 @@ async function runCanary(params: { }, ); } - if (sutObserved.message.replyToMessageId !== driverMessage.message_id) { - throw new TelegramQaCanaryError( - "sut_reply_not_threaded", - "SUT bot replied, but not as a reply to the canary driver message.", - { - groupId: params.groupId, - sutBotId: params.sutBotId, - driverMessageId: driverMessage.message_id, - sutMessageId: sutObserved.message.messageId, - sutReplyToMessageId: sutObserved.message.replyToMessageId, - }, - ); - } if (!sutObserved.message.text.trim()) { throw new TelegramQaCanaryError( "sut_reply_empty", @@ -743,6 +782,7 @@ export const __testing = { TELEGRAM_QA_SCENARIOS, buildTelegramQaConfig, canaryFailureMessage, + classifyCanaryReply, normalizeTelegramObservedMessage, resolveTelegramQaRuntimeEnv, };