diff --git a/docs/help/testing-live.md b/docs/help/testing-live.md index 7438da2d2ee..9323873113d 100644 --- a/docs/help/testing-live.md +++ b/docs/help/testing-live.md @@ -339,7 +339,7 @@ Narrow, explicit allowlists are fastest and least flaky: - `OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.2" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` - Tool calling across several providers: - - `OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.2,openai-codex/gpt-5.2,anthropic/claude-opus-4-6,google/gemini-3-flash-preview,deepseek/deepseek-v4-flash,zai/glm-4.7,minimax/MiniMax-M2.7" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` + - `OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.2,openai-codex/gpt-5.2,anthropic/claude-opus-4-6,google/gemini-3-flash-preview,deepseek/deepseek-v4-flash,zai/glm-5.1,minimax/MiniMax-M2.7" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` - Google focus (Gemini API key + Antigravity): - Gemini (API key): `OPENCLAW_LIVE_GATEWAY_MODELS="google/gemini-3-flash-preview" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` @@ -373,11 +373,11 @@ This is the “common models” run we expect to keep working: - Google (Gemini API): `google/gemini-3.1-pro-preview` and `google/gemini-3-flash-preview` (avoid older Gemini 2.x models) - Google (Antigravity): `google-antigravity/claude-opus-4-6-thinking` and `google-antigravity/gemini-3-flash` - DeepSeek: `deepseek/deepseek-v4-flash` and `deepseek/deepseek-v4-pro` -- Z.AI (GLM): `zai/glm-4.7` +- Z.AI (GLM): `zai/glm-5.1` - MiniMax: `minimax/MiniMax-M2.7` Run gateway smoke with tools + image: -`OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.2,openai-codex/gpt-5.2,anthropic/claude-opus-4-6,google/gemini-3.1-pro-preview,google/gemini-3-flash-preview,google-antigravity/claude-opus-4-6-thinking,google-antigravity/gemini-3-flash,deepseek/deepseek-v4-flash,zai/glm-4.7,minimax/MiniMax-M2.7" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` +`OPENCLAW_LIVE_GATEWAY_MODELS="openai/gpt-5.2,openai-codex/gpt-5.2,anthropic/claude-opus-4-6,google/gemini-3.1-pro-preview,google/gemini-3-flash-preview,google-antigravity/claude-opus-4-6-thinking,google-antigravity/gemini-3-flash,deepseek/deepseek-v4-flash,zai/glm-5.1,minimax/MiniMax-M2.7" pnpm test:live src/gateway/gateway-models.profiles.live.test.ts` ### Baseline: tool calling (Read + optional Exec) @@ -387,7 +387,7 @@ Pick at least one per provider family: - Anthropic: `anthropic/claude-opus-4-6` (or `anthropic/claude-sonnet-4-6`) - Google: `google/gemini-3-flash-preview` (or `google/gemini-3.1-pro-preview`) - DeepSeek: `deepseek/deepseek-v4-flash` -- Z.AI (GLM): `zai/glm-4.7` +- Z.AI (GLM): `zai/glm-5.1` - MiniMax: `minimax/MiniMax-M2.7` Optional additional coverage (nice to have): diff --git a/src/agents/live-model-filter.ts b/src/agents/live-model-filter.ts index 5d08e03fcd3..ff724075f16 100644 --- a/src/agents/live-model-filter.ts +++ b/src/agents/live-model-filter.ts @@ -23,9 +23,11 @@ const HIGH_SIGNAL_LIVE_MODEL_PRIORITY = [ "opencode-go/glm-5", "openrouter/ai21/jamba-large-1.7", "xai/grok-4-1-fast-non-reasoning", - "zai/glm-4.7", + "zai/glm-5.1", "fireworks/accounts/fireworks/models/kimi-k2p6", "fireworks/accounts/fireworks/routers/kimi-k2p5-turbo", + "fireworks/accounts/fireworks/models/glm-5", + "fireworks/accounts/fireworks/models/glm-5p1", "minimax-portal/minimax-m2.7", ] as const; @@ -104,6 +106,11 @@ function isOldMiniMaxLiveModelRef(id: string): boolean { return modelName === "minimax-m2.1" || modelName.startsWith("minimax-m2.1:"); } +function isOldGlmLiveModelRef(id: string): boolean { + const modelName = normalizeLowercaseStringOrEmpty(id).split("/").pop() ?? ""; + return /^glm-4(?:$|[.\-p])/.test(modelName); +} + export function isModernModelRef(ref: ModelRef): boolean { const provider = normalizeProviderId(ref.provider ?? ""); const id = normalizeLowercaseStringOrEmpty(ref.id); @@ -139,6 +146,9 @@ export function isHighSignalLiveModelRef(ref: ModelRef): boolean { if (isOldMiniMaxLiveModelRef(id)) { return false; } + if (isOldGlmLiveModelRef(id)) { + return false; + } return isHighSignalClaudeModelId(id); } diff --git a/src/agents/model-compat.test.ts b/src/agents/model-compat.test.ts index 4c08f56117a..da4742f2d13 100644 --- a/src/agents/model-compat.test.ts +++ b/src/agents/model-compat.test.ts @@ -520,6 +520,28 @@ describe("isHighSignalLiveModelRef", () => { ); }); + it("drops GLM 4.x models from the default live matrix while keeping GLM 5", () => { + providerRuntimeMocks.resolveProviderModernModelRef.mockReturnValue(true); + + expect(isHighSignalLiveModelRef({ provider: "zai", id: "glm-4.7" })).toBe(false); + expect( + isHighSignalLiveModelRef({ provider: "fireworks", id: "accounts/fireworks/models/glm-4p7" }), + ).toBe(false); + expect( + isHighSignalLiveModelRef({ + provider: "fireworks", + id: "accounts/fireworks/models/glm-4p5-air", + }), + ).toBe(false); + expect(isHighSignalLiveModelRef({ provider: "zai", id: "glm-5.1" })).toBe(true); + expect( + isHighSignalLiveModelRef({ provider: "fireworks", id: "accounts/fireworks/models/glm-5" }), + ).toBe(true); + expect( + isHighSignalLiveModelRef({ provider: "fireworks", id: "accounts/fireworks/models/glm-5p1" }), + ).toBe(true); + }); + it("keeps DeepSeek V4 models in the default live matrix when the provider marks them modern", () => { providerRuntimeMocks.resolveProviderModernModelRef.mockImplementation(({ provider, context }) => provider === "deepseek" && context.modelId.startsWith("deepseek-v4") ? true : undefined, @@ -579,6 +601,27 @@ describe("selectHighSignalLiveItems", () => { { provider: "minimax", id: "minimax-m2.7" }, ]); }); + + it("prioritizes Fireworks GLM 5 models over GLM 4.x fallback entries", () => { + const items = [ + { provider: "fireworks", id: "accounts/fireworks/models/glm-4p7" }, + { provider: "fireworks", id: "accounts/fireworks/models/glm-5" }, + { provider: "fireworks", id: "accounts/fireworks/models/glm-5p1" }, + { provider: "fireworks", id: "accounts/fireworks/models/gpt-oss-120b" }, + ]; + + expect( + selectHighSignalLiveItems( + items, + 2, + (item) => item, + (item) => item.provider, + ), + ).toEqual([ + { provider: "fireworks", id: "accounts/fireworks/models/glm-5" }, + { provider: "fireworks", id: "accounts/fireworks/models/glm-5p1" }, + ]); + }); }); describe("resolveHighSignalLiveModelLimit", () => { diff --git a/src/agents/zai.live.test.ts b/src/agents/zai.live.test.ts index 4cc40285868..2ce4765b684 100644 --- a/src/agents/zai.live.test.ts +++ b/src/agents/zai.live.test.ts @@ -11,7 +11,7 @@ const LIVE = isLiveTestEnabled(["ZAI_LIVE_TEST"]); const describeLive = LIVE && ZAI_KEY ? describe : describe.skip; -async function expectModelReturnsAssistantText(modelId: "glm-5" | "glm-4.7") { +async function expectModelReturnsAssistantText(modelId: "glm-5" | "glm-5.1") { const model = getModel("zai", modelId); const res = await completeSimple( model, @@ -29,7 +29,7 @@ describeLive("zai live", () => { await expectModelReturnsAssistantText("glm-5"); }, 20000); - it("glm-4.7 returns assistant text", async () => { - await expectModelReturnsAssistantText("glm-4.7"); + it("glm-5.1 returns assistant text", async () => { + await expectModelReturnsAssistantText("glm-5.1"); }, 20000); }); diff --git a/src/gateway/gateway-models.profiles.live.test.ts b/src/gateway/gateway-models.profiles.live.test.ts index 4f0d4359e8a..7edea9a02d3 100644 --- a/src/gateway/gateway-models.profiles.live.test.ts +++ b/src/gateway/gateway-models.profiles.live.test.ts @@ -695,7 +695,7 @@ describe("shouldSkipToolNonceProbeMissForLiveModel", () => { { modelKey: "opencode/big-pickle", expected: true }, { modelKey: "opencode-go/glm-5", expected: true }, { modelKey: "xai/grok-4.1-fast", expected: true }, - { modelKey: "zai/glm-4.7", expected: true }, + { modelKey: "zai/glm-5.1", expected: true }, { modelKey: "google/gemini-3-flash-preview", expected: true }, { modelKey: "openai/gpt-5.4", expected: false }, ])("returns $expected for $modelKey", ({ modelKey, expected }) => { @@ -2287,7 +2287,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { const authStorage = discoverAuthStorage(agentDir); const modelRegistry = discoverModels(authStorage, agentDir); const anthropic = modelRegistry.find("anthropic", "claude-opus-4-6") as Model | null; - const zai = modelRegistry.find("zai", "glm-4.7") as Model | null; + const zai = modelRegistry.find("zai", "glm-5.1") as Model | null; if (!anthropic || !zai) { return; @@ -2393,7 +2393,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { await withGatewayLiveProbeTimeout( client.request("sessions.patch", { key: sessionKey, - model: "zai/glm-4.7", + model: "zai/glm-5.1", }), "zai-fallback: sessions-patch-zai", ); @@ -2402,7 +2402,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { client, sessionKey, idempotencyKey: `idem-${randomUUID()}-followup`, - modelKey: "zai/glm-4.7", + modelKey: "zai/glm-5.1", message: `What are the values of nonceA and nonceB in "${toolProbePath}"? ` + `Reply with exactly: ${nonceA} ${nonceB}.`, @@ -2411,7 +2411,7 @@ describeLive("gateway live (dev agent, profile keys)", () => { }); assertNoReasoningTags({ text: followupText, - model: "zai/glm-4.7", + model: "zai/glm-5.1", phase: "zai-fallback-followup", label: "zai-fallback", });