From cf21c8abcb5286cdd4004b0cc7ccc8afa6569c0a Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 26 May 2026 16:31:30 +0100 Subject: [PATCH] ci: harden live release gates --- .../openclaw-live-and-e2e-checks-reusable.yml | 2 +- src/agents/live-model-filter.ts | 4 +-- src/agents/model-compat.test.ts | 7 ++-- src/agents/models.profiles.live.test.ts | 4 +-- src/agents/tool-replay-repair.live.test.ts | 33 +++++++++++-------- 5 files changed, 28 insertions(+), 22 deletions(-) diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml index ab9a425f9a7..3a90e6e182f 100644 --- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml +++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml @@ -2295,7 +2295,7 @@ jobs: profiles: beta minimum stable full - suite_id: live-gateway-anthropic-docker label: Docker live gateway Anthropic - command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MAX_MODELS=2 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=90000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=180000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --kill-after=30s 35m bash .release-harness/scripts/test-live-gateway-models-docker.sh + command: OPENCLAW_LIVE_GATEWAY_PROVIDERS=anthropic OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-sonnet-4-6 OPENCLAW_LIVE_GATEWAY_MAX_MODELS=1 OPENCLAW_LIVE_GATEWAY_STEP_TIMEOUT_MS=90000 OPENCLAW_LIVE_GATEWAY_MODEL_TIMEOUT_MS=180000 OPENCLAW_LIVE_DOCKER_REPO_ROOT="$GITHUB_WORKSPACE" timeout --kill-after=30s 35m bash .release-harness/scripts/test-live-gateway-models-docker.sh timeout_minutes: 40 profile_env_only: false profiles: stable full diff --git a/src/agents/live-model-filter.ts b/src/agents/live-model-filter.ts index ebada7718ba..ca6c230d858 100644 --- a/src/agents/live-model-filter.ts +++ b/src/agents/live-model-filter.ts @@ -10,11 +10,11 @@ type ModelRef = { }; const HIGH_SIGNAL_LIVE_MODEL_PRIORITY = [ - "anthropic/claude-opus-4-7", - "anthropic/claude-opus-4-6", "anthropic/claude-sonnet-4-6", + "anthropic/claude-opus-4-7", "google/gemini-3.1-pro-preview", "google/gemini-3-flash-preview", + "anthropic/claude-opus-4-6", "deepseek/deepseek-v4-flash", "deepseek/deepseek-v4-pro", "minimax/minimax-m2.7", diff --git a/src/agents/model-compat.test.ts b/src/agents/model-compat.test.ts index 12ce235f629..7d0f54370f6 100644 --- a/src/agents/model-compat.test.ts +++ b/src/agents/model-compat.test.ts @@ -656,11 +656,11 @@ describe("isPrioritizedHighSignalLiveModelRef", () => { it("lists priority refs as provider/id pairs", () => { expect(listPrioritizedHighSignalLiveModelRefs()).toStrictEqual([ - { provider: "anthropic", id: "claude-opus-4-7" }, - { provider: "anthropic", id: "claude-opus-4-6" }, { provider: "anthropic", id: "claude-sonnet-4-6" }, + { provider: "anthropic", id: "claude-opus-4-7" }, { provider: "google", id: "gemini-3.1-pro-preview" }, { provider: "google", id: "gemini-3-flash-preview" }, + { provider: "anthropic", id: "claude-opus-4-6" }, { provider: "deepseek", id: "deepseek-v4-flash" }, { provider: "deepseek", id: "deepseek-v4-pro" }, { provider: "minimax", id: "minimax-m2.7" }, @@ -682,6 +682,7 @@ describe("isPrioritizedHighSignalLiveModelRef", () => { describe("selectHighSignalLiveItems", () => { it("prefers curated Google replacements before fallback provider spread", () => { const items = [ + { provider: "anthropic", id: "claude-sonnet-4-6" }, { provider: "anthropic", id: "claude-opus-4-7" }, { provider: "anthropic", id: "claude-opus-4-6" }, { provider: "google", id: "gemini-3.1-pro-preview" }, @@ -699,8 +700,8 @@ describe("selectHighSignalLiveItems", () => { (item) => item.provider, ), ).toEqual([ + { provider: "anthropic", id: "claude-sonnet-4-6" }, { provider: "anthropic", id: "claude-opus-4-7" }, - { provider: "anthropic", id: "claude-opus-4-6" }, { provider: "google", id: "gemini-3.1-pro-preview" }, { provider: "google", id: "gemini-3-flash-preview" }, ]); diff --git a/src/agents/models.profiles.live.test.ts b/src/agents/models.profiles.live.test.ts index 076516c263e..fbcfc820f7d 100644 --- a/src/agents/models.profiles.live.test.ts +++ b/src/agents/models.profiles.live.test.ts @@ -369,12 +369,12 @@ function resolveLiveModelsJsonTimeoutMs( modelsJsonTimeoutRaw?: string, setupTimeoutMs = LIVE_SETUP_TIMEOUT_MS, ): number { - return Math.max(setupTimeoutMs, toInt(modelsJsonTimeoutRaw, 120_000)); + return Math.max(setupTimeoutMs, toInt(modelsJsonTimeoutRaw, 180_000)); } describe("resolveLiveModelsJsonTimeoutMs", () => { it("defaults models.json preparation to a longer setup timeout", () => { - expect(resolveLiveModelsJsonTimeoutMs(undefined, 45_000)).toBe(120_000); + expect(resolveLiveModelsJsonTimeoutMs(undefined, 45_000)).toBe(180_000); }); it("never goes below the shared live setup timeout", () => { diff --git a/src/agents/tool-replay-repair.live.test.ts b/src/agents/tool-replay-repair.live.test.ts index 42b35c06e26..1ebcc035c81 100644 --- a/src/agents/tool-replay-repair.live.test.ts +++ b/src/agents/tool-replay-repair.live.test.ts @@ -62,6 +62,23 @@ function isOpenAIResponsesFamily(api: string): boolean { ); } +function createNoopTools() { + return [ + { + name: "noop", + description: "Return ok.", + parameters: Type.Object({}, { additionalProperties: false }), + }, + ]; +} + +function replayValidationTools(model: Model) { + // Responses-family providers may force a new tool call whenever tools are + // present. These live probes validate repaired historical transcript shape, + // not fresh tool invocation. + return isOpenAIResponsesFamily(model.api) ? undefined : createNoopTools(); +} + function buildReplayMessages(model: Model): AgentMessage[] { const now = Date.now(); // Gemini source metadata deliberately simulates a model switch from a @@ -253,13 +270,7 @@ describeLive("tool replay repair live", () => { { systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.", messages: sanitized as never, - tools: [ - { - name: "noop", - description: "Return ok.", - parameters: Type.Object({}, { additionalProperties: false }), - }, - ], + tools: replayValidationTools(model), }, { apiKey: requireApiKey(apiKeyInfo, model.provider), @@ -334,13 +345,7 @@ describeLive("tool replay repair live", () => { { systemPrompt: "You are a concise assistant. Follow the user's instruction exactly.", messages: transformed as never, - tools: [ - { - name: "noop", - description: "Return ok.", - parameters: Type.Object({}, { additionalProperties: false }), - }, - ], + tools: replayValidationTools(model), }, { apiKey: requireApiKey(apiKeyInfo, model.provider),