diff --git a/extensions/qa-lab/src/scenario-catalog.test.ts b/extensions/qa-lab/src/scenario-catalog.test.ts index bcaf1c628a3..295366b1f32 100644 --- a/extensions/qa-lab/src/scenario-catalog.test.ts +++ b/extensions/qa-lab/src/scenario-catalog.test.ts @@ -149,6 +149,34 @@ describe("qa scenario catalog", () => { ]); }); + it("includes the OpenAI native web search live scenario", () => { + const scenario = readQaScenarioById("openai-native-web-search-live"); + const config = readQaScenarioExecutionConfig("openai-native-web-search-live") as + | { + requiredProvider?: string; + requiredModel?: string; + expectedMarker?: string; + } + | undefined; + + expect(scenario.sourcePath).toBe("qa/scenarios/models/openai-native-web-search-live.md"); + expect(scenario.gatewayConfigPatch?.tools).toEqual({ + web: { + search: { + enabled: true, + provider: null, + }, + }, + }); + expect(config?.requiredProvider).toBe("openai"); + expect(config?.requiredModel).toBe("gpt-5.4"); + expect(config?.expectedMarker).toBe("WEB-SEARCH-OK"); + expect(scenario.execution.flow?.steps.map((step) => step.name)).toEqual([ + "confirms live OpenAI GPT-5.4 web search auto mode", + "searches official OpenAI News through the live model", + ]); + }); + it("includes the thinking slash model remap scenario", () => { const scenario = readQaScenarioById("thinking-slash-model-remap"); const config = readQaScenarioExecutionConfig("thinking-slash-model-remap") as diff --git a/qa/scenarios/models/openai-native-web-search-live.md b/qa/scenarios/models/openai-native-web-search-live.md new file mode 100644 index 00000000000..c1b2afe4854 --- /dev/null +++ b/qa/scenarios/models/openai-native-web-search-live.md @@ -0,0 +1,146 @@ +# OpenAI native web search live + +```yaml qa-scenario +id: openai-native-web-search-live +title: OpenAI native web search live +surface: model-provider +coverage: + primary: + - tools.web-search + secondary: + - models.openai +objective: Verify a live OpenAI GPT model can use OpenAI native web_search when OpenClaw web search is enabled in auto mode. +successCriteria: + - A live-frontier run fails fast unless the selected primary provider is openai. + - The selected primary model is GPT-5.4, not a mini or pro variant. + - Web search is enabled without pinning a managed web_search provider. + - The live reply includes the required marker plus an official OpenAI News URL and headline found through web search. +gatewayConfigPatch: + tools: + web: + search: + enabled: true + provider: null +docsRefs: + - docs/tools/web.md + - docs/help/testing.md + - docs/concepts/qa-e2e-automation.md +codeRefs: + - extensions/openai/native-web-search.ts + - extensions/openai/shared.ts + - extensions/openai/openai-provider.ts + - extensions/qa-lab/src/suite.ts +execution: + kind: flow + summary: Run with `OPENCLAW_LIVE_OPENAI_KEY="${OPENAI_API_KEY}" pnpm openclaw qa suite --provider-mode live-frontier --model openai/gpt-5.4 --alt-model openai/gpt-5.4 --scenario openai-native-web-search-live`. + config: + requiredProvider: openai + requiredModel: gpt-5.4 + expectedMarker: WEB-SEARCH-OK + failureMarker: WEB-SEARCH-FAILED + searchPrompt: |- + Web search QA: use web search now for `site:openai.com/news OpenAI latest news`. + Reply in exactly three lines: + WEB-SEARCH-OK + URL: + HEADLINE:
+ Do not answer from memory. If web search is unavailable, reply exactly WEB-SEARCH-FAILED. +``` + +```yaml qa-flow +steps: + - name: confirms live OpenAI GPT-5.4 web search auto mode + actions: + - call: waitForGatewayHealthy + args: + - ref: env + - 60000 + - call: waitForQaChannelReady + args: + - ref: env + - 60000 + - set: selected + value: + expr: splitModelRef(env.primaryModel) + - assert: + expr: "env.providerMode !== 'live-frontier' || selected?.provider === config.requiredProvider" + message: + expr: "`expected live primary provider ${config.requiredProvider}, got ${env.primaryModel}`" + - assert: + expr: "env.providerMode !== 'live-frontier' || selected?.model === config.requiredModel" + message: + expr: "`expected live primary model ${config.requiredModel}, got ${env.primaryModel}`" + - call: readConfigSnapshot + saveAs: snapshot + args: + - ref: env + - set: searchConfig + value: + expr: "snapshot.config.tools?.web?.search ?? {}" + - assert: + expr: "searchConfig.enabled !== false" + message: + expr: "`expected web search enabled, got ${JSON.stringify(searchConfig)}`" + - assert: + expr: "typeof searchConfig.provider !== 'string' || ['auto', 'openai', ''].includes(searchConfig.provider.trim().toLowerCase())" + message: + expr: "`expected web search provider auto/openai/unset for native OpenAI search, got ${JSON.stringify(searchConfig)}`" + - assert: + expr: "env.providerMode !== 'live-frontier' || Boolean(env.gateway.runtimeEnv.OPENAI_API_KEY?.trim() || env.gateway.runtimeEnv.OPENCLAW_LIVE_OPENAI_KEY?.trim())" + message: expected OPENAI_API_KEY or OPENCLAW_LIVE_OPENAI_KEY for live OpenAI QA + detailsExpr: "env.providerMode === 'live-frontier' ? `provider=${selected?.provider} model=${selected?.model} webSearch=${JSON.stringify(searchConfig)}` : `mock-compatible provider=${selected?.provider}`" + - name: searches official OpenAI News through the live model + actions: + - if: + expr: "env.providerMode !== 'live-frontier'" + then: + - assert: "true" + else: + - call: reset + - set: selected + value: + expr: splitModelRef(env.primaryModel) + - call: runAgentPrompt + args: + - ref: env + - sessionKey: agent:qa:openai-native-web-search + message: + expr: config.searchPrompt + provider: + expr: selected?.provider + model: + expr: selected?.model + timeoutMs: + expr: resolveQaLiveTurnTimeoutMs(env, 180000, env.primaryModel) + - call: waitForOutboundMessage + saveAs: searchOutbound + args: + - ref: state + - lambda: + params: [candidate] + expr: "candidate.conversation.id === 'qa-operator'" + - expr: resolveQaLiveTurnTimeoutMs(env, 60000, env.primaryModel) + - set: searchText + value: + expr: searchOutbound.text + - set: searchTextLower + value: + expr: normalizeLowercaseStringOrEmpty(searchText) + - assert: + expr: "searchText.includes(config.expectedMarker)" + message: + expr: "`missing ${config.expectedMarker}: ${searchText}`" + - assert: + expr: "!searchText.includes(config.failureMarker) && !/(web search is unavailable|unable to search|cannot search|can't search)/i.test(searchText)" + message: + expr: "`search looked unavailable: ${searchText}`" + - assert: + expr: "/URL:\\s*https?:\\/\\/[^\\s]*openai\\.com\\/news/i.test(searchText)" + message: + expr: "`missing official OpenAI News URL: ${searchText}`" + - assert: + expr: "/HEADLINE:\\s*\\S.{8,}/i.test(searchText)" + message: + expr: "`missing searched headline: ${searchText}`" + detailsExpr: "env.providerMode !== 'live-frontier' ? 'mock mode: skipped live OpenAI web search probe' : searchText" +``` diff --git a/scripts/test-projects.test-support.mjs b/scripts/test-projects.test-support.mjs index cff2146d2cc..2595c8da487 100644 --- a/scripts/test-projects.test-support.mjs +++ b/scripts/test-projects.test-support.mjs @@ -22,6 +22,7 @@ import { isProviderExtensionRoot, isProviderOpenAiExtensionRoot, } from "../test/vitest/vitest.extension-provider-paths.mjs"; +import { isQaExtensionRoot } from "../test/vitest/vitest.extension-qa-paths.mjs"; import { isTelegramExtensionRoot } from "../test/vitest/vitest.extension-telegram-paths.mjs"; import { isVoiceCallExtensionRoot } from "../test/vitest/vitest.extension-voice-call-paths.mjs"; import { isWhatsAppExtensionRoot } from "../test/vitest/vitest.extension-whatsapp-paths.mjs"; @@ -79,6 +80,7 @@ const EXTENSION_MESSAGING_VITEST_CONFIG = "test/vitest/vitest.extension-messagin const EXTENSION_PROVIDER_OPENAI_VITEST_CONFIG = "test/vitest/vitest.extension-provider-openai.config.ts"; const EXTENSION_PROVIDERS_VITEST_CONFIG = "test/vitest/vitest.extension-providers.config.ts"; +const EXTENSION_QA_VITEST_CONFIG = "test/vitest/vitest.extension-qa.config.ts"; const EXTENSION_SIGNAL_VITEST_CONFIG = "test/vitest/vitest.extension-signal.config.ts"; const EXTENSION_SLACK_VITEST_CONFIG = "test/vitest/vitest.extension-slack.config.ts"; const EXTENSION_TELEGRAM_VITEST_CONFIG = "test/vitest/vitest.extension-telegram.config.ts"; @@ -146,6 +148,7 @@ const VITEST_CONFIG_BY_KIND = { extensionMsTeams: EXTENSION_MSTEAMS_VITEST_CONFIG, extensionProviderOpenAi: EXTENSION_PROVIDER_OPENAI_VITEST_CONFIG, extensionProvider: EXTENSION_PROVIDERS_VITEST_CONFIG, + extensionQa: EXTENSION_QA_VITEST_CONFIG, extensionSignal: EXTENSION_SIGNAL_VITEST_CONFIG, extensionSlack: EXTENSION_SLACK_VITEST_CONFIG, extensionTelegram: EXTENSION_TELEGRAM_VITEST_CONFIG, @@ -537,6 +540,9 @@ function classifyTarget(arg, cwd) { if (isProviderOpenAiExtensionRoot(extensionRoot)) { return "extensionProviderOpenAi"; } + if (isQaExtensionRoot(extensionRoot)) { + return "extensionQa"; + } if (isChannelSurfaceTestFile(relative)) { return "extensionChannel"; } @@ -837,6 +843,7 @@ export function buildVitestRunPlans( "extensionMessaging", "extensionProviderOpenAi", "extensionProvider", + "extensionQa", "extensionSignal", "extensionSlack", "extensionFull", diff --git a/test/scripts/test-projects.test.ts b/test/scripts/test-projects.test.ts index d40915ee132..cb3bc497bd7 100644 --- a/test/scripts/test-projects.test.ts +++ b/test/scripts/test-projects.test.ts @@ -162,6 +162,21 @@ describe("scripts/test-projects changed-target routing", () => { ]); }); + it("routes QA extension changes to the QA extension lane", () => { + const plans = buildVitestRunPlans(["--changed", "origin/main"], process.cwd(), () => [ + "extensions/qa-lab/src/scenario-catalog.test.ts", + ]); + + expect(plans).toEqual([ + { + config: "test/vitest/vitest.extension-qa.config.ts", + forwardedArgs: [], + includePatterns: ["extensions/qa-lab/src/scenario-catalog.test.ts"], + watchMode: false, + }, + ]); + }); + it("routes the top-level extensions target to every extension shard", () => { expect(buildVitestRunPlans(["extensions"], process.cwd())).toEqual( listFullExtensionVitestProjectConfigs().map((config) => ({