From 1efda3d041f3193ddca0b1fc6a20ff086bfd2640 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 22 Mar 2026 20:57:04 +0000 Subject: [PATCH] test: improve live test progress feedback --- docs/help/testing.md | 4 ++ src/agents/models.profiles.live.test.ts | 52 ++++++++++++++++--- .../gateway-models.profiles.live.test.ts | 31 ++++++++++- 3 files changed, 78 insertions(+), 9 deletions(-) diff --git a/docs/help/testing.md b/docs/help/testing.md index d7fb4b03b3b..d198201b86b 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -130,6 +130,10 @@ Think of the suites as “increasing realism” (and increasing flakiness/cost): - Prefer running narrowed subsets instead of “everything” - Live runs will source `~/.profile` to pick up missing API keys - API key rotation (provider-specific): set `*_API_KEYS` with comma/semicolon format or `*_API_KEY_1`, `*_API_KEY_2` (for example `OPENAI_API_KEYS`, `ANTHROPIC_API_KEYS`, `GEMINI_API_KEYS`) or per-live override via `OPENCLAW_LIVE_*_KEY`; tests retry on rate limit responses. +- Progress/heartbeat output: + - Live suites now emit progress lines to stderr so long provider calls are visibly active even when Vitest console capture is quiet. + - Tune direct-model heartbeats with `OPENCLAW_LIVE_HEARTBEAT_MS`. + - Tune gateway/probe heartbeats with `OPENCLAW_LIVE_GATEWAY_HEARTBEAT_MS`. ## Which suite should I run? diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts index 8aa04447c30..763d5e3b638 100644 --- a/src/agents/models.profiles.live.test.ts +++ b/src/agents/models.profiles.live.test.ts @@ -19,6 +19,7 @@ import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js"; const LIVE = isTruthyEnvValue(process.env.LIVE) || isTruthyEnvValue(process.env.OPENCLAW_LIVE_TEST); const DIRECT_ENABLED = Boolean(process.env.OPENCLAW_LIVE_MODELS?.trim()); const REQUIRE_PROFILE_KEYS = isTruthyEnvValue(process.env.OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS); +const LIVE_HEARTBEAT_MS = Math.max(1_000, toInt(process.env.OPENCLAW_LIVE_HEARTBEAT_MS, 30_000)); const describeLive = LIVE ? describe : describe.skip; @@ -43,7 +44,29 @@ function parseModelFilter(raw?: string): Set | null { } function logProgress(message: string): void { - console.log(`[live] ${message}`); + process.stderr.write(`[live] ${message}\n`); +} + +function formatElapsedSeconds(ms: number): string { + return `${Math.max(1, Math.round(ms / 1_000))}s`; +} + +async function withLiveHeartbeat(operation: Promise, context: string): Promise { + const startedAt = Date.now(); + let heartbeatCount = 0; + const timer = setInterval(() => { + heartbeatCount += 1; + logProgress(`${context}: still running (${formatElapsedSeconds(Date.now() - startedAt)})`); + }, LIVE_HEARTBEAT_MS); + timer.unref?.(); + try { + return await operation; + } finally { + clearInterval(timer); + if (heartbeatCount > 0) { + logProgress(`${context}: completed after ${formatElapsedSeconds(Date.now() - startedAt)}`); + } + } } function formatFailurePreview( @@ -237,6 +260,7 @@ async function completeSimpleWithTimeout( context: Parameters>[1], options: Parameters>[2], timeoutMs: number, + progressContext: string, ) { const maxTimeoutMs = Math.max(1, timeoutMs); const controller = new AbortController(); @@ -252,13 +276,16 @@ async function completeSimpleWithTimeout( hardTimer.unref?.(); }); try { - return await Promise.race([ - completeSimple(model, context, { - ...options, - signal: controller.signal, - }), - timeout, - ]); + return await withLiveHeartbeat( + Promise.race([ + completeSimple(model, context, { + ...options, + signal: controller.signal, + }), + timeout, + ]), + progressContext, + ); } finally { clearTimeout(abortTimer); if (hardTimer) { @@ -271,6 +298,7 @@ async function completeOkWithRetry(params: { model: Model; apiKey: string; timeoutMs: number; + progressLabel: string; }) { const runOnce = async (maxTokens: number) => { const res = await completeSimpleWithTimeout( @@ -291,6 +319,7 @@ async function completeOkWithRetry(params: { maxTokens, }, params.timeoutMs, + `${params.progressLabel}: prompt call (maxTokens=${maxTokens})`, ); const text = res.content .filter((block) => block.type === "text") @@ -395,6 +424,9 @@ describeLive("live models (profile keys)", () => { ); } logProgress(`[live-models] running ${selectedCandidates.length} models`); + logProgress( + `[live-models] heartbeat=${formatElapsedSeconds(LIVE_HEARTBEAT_MS)} timeout=${formatElapsedSeconds(perModelTimeoutMs)}`, + ); const total = selectedCandidates.length; for (const [index, entry] of selectedCandidates.entries()) { @@ -441,6 +473,7 @@ describeLive("live models (profile keys)", () => { maxTokens: 128, }, perModelTimeoutMs, + `${progressLabel}: tool-only regression first call`, ); let toolCall = first.content.find((b) => b.type === "toolCall"); @@ -470,6 +503,7 @@ describeLive("live models (profile keys)", () => { maxTokens: 128, }, perModelTimeoutMs, + `${progressLabel}: tool-only regression retry ${i + 1}`, ); toolCall = first.content.find((b) => b.type === "toolCall"); @@ -514,6 +548,7 @@ describeLive("live models (profile keys)", () => { maxTokens: 256, }, perModelTimeoutMs, + `${progressLabel}: tool-only regression followup`, ); const secondText = second.content @@ -530,6 +565,7 @@ describeLive("live models (profile keys)", () => { model, apiKey, timeoutMs: perModelTimeoutMs, + progressLabel, }); if (ok.res.stopReason === "error") { diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts index 3abadfd93e3..e742b32e6f0 100644 --- a/src/gateway/gateway-models.profiles.live.test.ts +++ b/src/gateway/gateway-models.profiles.live.test.ts @@ -55,6 +55,10 @@ const GATEWAY_LIVE_PROBE_TIMEOUT_MS = Math.max( 30_000, toInt(process.env.OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS, 90_000), ); +const GATEWAY_LIVE_HEARTBEAT_MS = Math.max( + 1_000, + toInt(process.env.OPENCLAW_LIVE_GATEWAY_HEARTBEAT_MS, 30_000), +); const GATEWAY_LIVE_MAX_MODELS = resolveGatewayLiveMaxModels(); const GATEWAY_LIVE_SUITE_TIMEOUT_MS = resolveGatewayLiveSuiteTimeoutMs(GATEWAY_LIVE_MAX_MODELS); @@ -108,6 +112,15 @@ function isGatewayLiveProbeTimeout(error: string): boolean { async function withGatewayLiveProbeTimeout(operation: Promise, context: string): Promise { let timeoutHandle: ReturnType | undefined; + const startedAt = Date.now(); + let heartbeatCount = 0; + const heartbeat = setInterval(() => { + heartbeatCount += 1; + logProgress( + `${context}: still running (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`, + ); + }, GATEWAY_LIVE_HEARTBEAT_MS); + heartbeat.unref?.(); try { return await Promise.race([ operation, @@ -118,9 +131,15 @@ async function withGatewayLiveProbeTimeout(operation: Promise, context: st }), ]); } finally { + clearInterval(heartbeat); if (timeoutHandle) { clearTimeout(timeoutHandle); } + if (heartbeatCount > 0) { + logProgress( + `${context}: completed after ${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s`, + ); + } } } @@ -168,7 +187,7 @@ function capByProviderSpread( } function logProgress(message: string): void { - console.log(`[live] ${message}`); + process.stderr.write(`[live] ${message}\n`); } function enterProductionEnvForLiveRun() { @@ -568,6 +587,7 @@ async function waitForSessionAssistantText(params: { context: string; }) { const startedAt = Date.now(); + let lastHeartbeatAt = startedAt; let delayMs = 50; while (Date.now() - startedAt < GATEWAY_LIVE_PROBE_TIMEOUT_MS) { const assistantTexts = readSessionAssistantTexts(params.sessionKey); @@ -580,6 +600,12 @@ async function waitForSessionAssistantText(params: { return freshText; } } + if (Date.now() - lastHeartbeatAt >= GATEWAY_LIVE_HEARTBEAT_MS) { + lastHeartbeatAt = Date.now(); + logProgress( + `${params.context}: waiting for transcript (${Math.max(1, Math.round((Date.now() - startedAt) / 1_000))}s)`, + ); + } await new Promise((resolve) => setTimeout(resolve, delayMs)); delayMs = Math.min(delayMs * 2, 250); } @@ -863,6 +889,9 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) { logProgress( `[${params.label}] running ${params.candidates.length} models (thinking=${params.thinkingLevel})`, ); + logProgress( + `[${params.label}] heartbeat=${Math.max(1, Math.round(GATEWAY_LIVE_HEARTBEAT_MS / 1_000))}s probe-timeout=${Math.max(1, Math.round(GATEWAY_LIVE_PROBE_TIMEOUT_MS / 1_000))}s`, + ); const anthropicKeys = collectAnthropicApiKeys(); if (anthropicKeys.length > 0) { process.env.ANTHROPIC_API_KEY = anthropicKeys[0];