diff --git a/CHANGELOG.md b/CHANGELOG.md index 44b785c2e70..9d33a6b2f9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai - CLI/update: treat inherited Gateway service markers as origin hints and only block package replacement when the managed Gateway is still live, so self-updates can stop the service and continue safely. (#75729) Thanks @hxy91819. - Agents/failover: exempt run-level timeouts that fire during tool execution from model fallback, timeout-triggered compaction, and generic timeout payload synthesis. Long `process(poll)`, browser, or `exec` tool calls that exceed `agents.defaults.timeoutSeconds` previously rotated auth profiles, switched to a fallback model, and surfaced a misleading "LLM request timed out" error even though the primary model had already responded. Mirrors the existing `timedOutDuringCompaction` precedent (#46889). Fixes #52147. (#75873) Thanks @simonusa. - Docker: copy Bun 1.3.13 from a digest-pinned image and keep CI on the same version. Fixes #74356. Thanks @fede-kamel and @sallyom. +- Agents/compaction: keep prior context on consecutive turns against z.ai-style providers (z.ai direct, openrouter z-ai/*, in-house GLM gateways); Pi's internal auto-compaction was misfiring after successful turns and clearing state.messages before the next provider request. (#76056) Thanks @openperf. ## 2026.5.2 diff --git a/src/agents/pi-embedded-runner/compact.hooks.harness.ts b/src/agents/pi-embedded-runner/compact.hooks.harness.ts index 31d69395ef6..fa564aa9471 100644 --- a/src/agents/pi-embedded-runner/compact.hooks.harness.ts +++ b/src/agents/pi-embedded-runner/compact.hooks.harness.ts @@ -301,8 +301,10 @@ export async function loadCompactHooksHarness(): Promise<{ })); vi.doMock("../pi-settings.js", () => ({ + applyPiAutoCompactionGuard: vi.fn(() => ({ supported: true, disabled: false })), applyPiCompactionSettingsFromConfig: vi.fn(), ensurePiCompactionReserveTokens: vi.fn(), + isSilentOverflowProneModel: vi.fn(() => false), resolveCompactionReserveTokensFloor: vi.fn(() => 0), })); diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 71d7390f583..d5f756d6500 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -75,7 +75,11 @@ import { setCompactionSafeguardCancelReason, } from "../pi-hooks/compaction-safeguard-runtime.js"; import { createPreparedEmbeddedPiSettingsManager } from "../pi-project-settings.js"; -import { applyPiCompactionSettingsFromConfig } from "../pi-settings.js"; +import { + applyPiAutoCompactionGuard, + applyPiCompactionSettingsFromConfig, + isSilentOverflowProneModel, +} from "../pi-settings.js"; import { createOpenClawCodingTools } from "../pi-tools.js"; import { wrapStreamFnTextTransforms } from "../plugin-text-transforms.js"; import { registerProviderStreamForModel } from "../provider-stream.js"; @@ -960,12 +964,26 @@ async function compactEmbeddedPiSessionDirectOnce( }); await resourceLoader.reload(); // DefaultResourceLoader.reload() rehydrates settings from disk and can drop OpenClaw - // compaction overrides applied in createPreparedEmbeddedPiSettingsManager. + // compaction overrides applied in createPreparedEmbeddedPiSettingsManager — same + // rehydration also restores Pi's auto-compaction (openclaw#75799), so re-apply + // both guards. effectiveModel.baseUrl matches the surrounding scope so + // auth-profile-injected baseUrls reach the endpoint-class detector. applyPiCompactionSettingsFromConfig({ settingsManager, cfg: params.config, contextTokenBudget: ctxInfo.tokens, }); + // contextEngineInfo is intentionally omitted: this guard runs inside the + // compaction LLM session, which is not the user-facing agent session and + // has no associated context engine. + applyPiAutoCompactionGuard({ + settingsManager, + silentOverflowProneProvider: isSilentOverflowProneModel({ + provider, + modelId, + baseUrl: effectiveModel.baseUrl ?? undefined, + }), + }); const { customTools } = splitSdkTools({ tools: effectiveTools, diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts index bdf4e90e184..5583dd52d8a 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts @@ -321,6 +321,7 @@ vi.mock("../../pi-settings.js", () => ({ keepRecentTokens: 40_000, }, }), + isSilentOverflowProneModel: () => false, })); vi.mock("../extensions.js", () => ({ diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 02be76253f3..46db6251d1c 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -107,6 +107,7 @@ import { createPreparedEmbeddedPiSettingsManager } from "../../pi-project-settin import { applyPiAutoCompactionGuard, applyPiCompactionSettingsFromConfig, + isSilentOverflowProneModel, } from "../../pi-settings.js"; import { createClientToolNameConflictError, @@ -1474,10 +1475,16 @@ export async function runEmbeddedAttempt( cfg: params.config, contextTokenBudget: params.contextTokenBudget, }); - applyPiAutoCompactionGuard({ + const piAutoCompactionGuardArgs = { settingsManager, contextEngineInfo: activeContextEngine?.info, - }); + silentOverflowProneProvider: isSilentOverflowProneModel({ + provider: params.provider, + modelId: params.modelId, + baseUrl: params.model.baseUrl ?? undefined, + }), + }; + applyPiAutoCompactionGuard(piAutoCompactionGuardArgs); // Sets compaction/pruning runtime state and returns extension factories // that must be passed to the resource loader for the safeguard to be active. @@ -1496,12 +1503,15 @@ export async function runEmbeddedAttempt( }); await resourceLoader.reload(); // DefaultResourceLoader.reload() rehydrates settings from disk and can drop OpenClaw - // compaction overrides applied in createPreparedEmbeddedPiSettingsManager. + // compaction overrides applied in createPreparedEmbeddedPiSettingsManager — same + // rehydration also restores Pi's auto-compaction (openclaw#75799), so re-apply + // both guards. applyPiCompactionSettingsFromConfig({ settingsManager, cfg: params.config, contextTokenBudget: params.contextTokenBudget, }); + applyPiAutoCompactionGuard(piAutoCompactionGuardArgs); prepStages.mark("session-resource-loader"); // Get hook runner early so it's available when creating tools diff --git a/src/agents/pi-settings.test.ts b/src/agents/pi-settings.test.ts index 35fa1666afd..81c05e12c6c 100644 --- a/src/agents/pi-settings.test.ts +++ b/src/agents/pi-settings.test.ts @@ -1,8 +1,10 @@ import { describe, expect, it, vi } from "vitest"; import { MIN_PROMPT_BUDGET_RATIO, MIN_PROMPT_BUDGET_TOKENS } from "./pi-compaction-constants.js"; import { + applyPiAutoCompactionGuard, applyPiCompactionSettingsFromConfig, DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR, + isSilentOverflowProneModel, resolveCompactionReserveTokensFloor, } from "./pi-settings.js"; @@ -345,3 +347,179 @@ describe("resolveCompactionReserveTokensFloor", () => { ).toBe(0); }); }); + +describe("isSilentOverflowProneModel", () => { + // Reporter's repro shape: openrouter routing to z-ai/glm. Both the bare + // `z-ai/...` form and the `openrouter/z-ai/...` qualified form must hit. + it("flags z-ai-prefixed model ids regardless of qualifier", () => { + expect(isSilentOverflowProneModel({ provider: "openrouter", modelId: "z-ai/glm-5.1" })).toBe( + true, + ); + expect( + isSilentOverflowProneModel({ provider: "openrouter", modelId: "openrouter/z-ai/glm-5" }), + ).toBe(true); + }); + + it("flags a config-set z.ai provider regardless of model id", () => { + expect(isSilentOverflowProneModel({ provider: "z.ai", modelId: "glm-5.1" })).toBe(true); + expect(isSilentOverflowProneModel({ provider: "z-ai", modelId: "glm-5.1" })).toBe(true); + }); + + it("flags a direct api.z.ai baseUrl via endpointClass", () => { + expect( + isSilentOverflowProneModel({ + provider: "openai", + modelId: "glm-5.1", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + }), + ).toBe(true); + }); + + // openclaw#75799 reporter's setup: an OpenAI-compatible in-house gateway + // exposing Zhipu's GLM family directly (model id `glm-5.1`, no `z-ai/` + // qualifier, custom baseUrl that is not api.z.ai). Catch the bare GLM + // family name so direct gateway deployments hit the guard regardless of + // what `provider` field the user picked — gateways relabel the upstream + // identity, so `provider` here can be anything from `openai` to a custom + // string. False positives only disable Pi's secondary compaction path; + // OpenClaw's preemptive compaction continues to handle real overflow. + it("flags bare glm- model ids without a namespace prefix, regardless of provider", () => { + expect(isSilentOverflowProneModel({ provider: "custom", modelId: "glm-5.1" })).toBe(true); + expect(isSilentOverflowProneModel({ provider: "custom", modelId: "glm-4.7" })).toBe(true); + expect(isSilentOverflowProneModel({ provider: "openai", modelId: "glm-5.1" })).toBe(true); + expect(isSilentOverflowProneModel({ provider: "openrouter", modelId: "glm-5.1" })).toBe(true); + }); + + // Detection is intentionally narrow to z.ai-style accounting. Namespaced GLM + // ids that route through providers with their own overflow accounting must + // NOT be flagged — those hosts may not exhibit the z.ai silent-overflow + // shape, and disabling Pi auto-compaction for them would over-broaden the + // kill surface beyond the reproducible repro. + it("does not flag namespaced GLM ids routed through non-z.ai hosts", () => { + expect( + isSilentOverflowProneModel({ provider: "ollama", modelId: "ollama/glm-5.1:cloud" }), + ).toBe(false); + expect( + isSilentOverflowProneModel({ provider: "opencode-go", modelId: "opencode-go/glm-5.1" }), + ).toBe(false); + }); + + // pi-ai's overflow.ts only documents z.ai as the silent-overflow style. We + // intentionally do NOT extend the guard to anthropic/openai/google/openrouter- + // anthropic routes — adding them without a reproducible repro would broaden + // the kill surface and regress baseline behavior for those providers. + it("does not flag anthropic, openai, google or other routes", () => { + expect( + isSilentOverflowProneModel({ provider: "anthropic", modelId: "claude-sonnet-4.6" }), + ).toBe(false); + expect(isSilentOverflowProneModel({ provider: "openai", modelId: "gpt-5.5" })).toBe(false); + expect( + isSilentOverflowProneModel({ + provider: "openrouter", + modelId: "anthropic/claude-sonnet-4.6", + }), + ).toBe(false); + expect(isSilentOverflowProneModel({ provider: "google", modelId: "gemini-2.5-pro" })).toBe( + false, + ); + }); + + it("treats missing fields as not silent-overflow-prone", () => { + expect(isSilentOverflowProneModel({})).toBe(false); + expect( + isSilentOverflowProneModel({ provider: undefined, modelId: undefined, baseUrl: null }), + ).toBe(false); + }); +}); + +describe("applyPiAutoCompactionGuard", () => { + // Direct repro of openclaw#75799: pi-ai's silent-overflow detection misfires + // on a successful turn against z.ai-style providers, triggering Pi's + // _runAutoCompaction from inside Session.prompt() and reassigning + // agent.state.messages between the runner's prompt.submitted trajectory + // event and the provider request. Disabling Pi auto-compaction here keeps + // state.messages intact; OpenClaw's preemptive compaction continues to + // handle real overflow on its own path. + it("disables Pi auto-compaction for silent-overflow-prone providers", () => { + const setCompactionEnabled = vi.fn(); + const settingsManager = { + getCompactionReserveTokens: () => 20_000, + getCompactionKeepRecentTokens: () => 4_000, + applyOverrides: () => {}, + setCompactionEnabled, + }; + + const result = applyPiAutoCompactionGuard({ + settingsManager, + silentOverflowProneProvider: true, + }); + + expect(result).toEqual({ supported: true, disabled: true }); + expect(setCompactionEnabled).toHaveBeenCalledWith(false); + }); + + it("disables Pi auto-compaction when a context engine plugin owns compaction", () => { + const setCompactionEnabled = vi.fn(); + const settingsManager = { + getCompactionReserveTokens: () => 20_000, + getCompactionKeepRecentTokens: () => 4_000, + applyOverrides: () => {}, + setCompactionEnabled, + }; + + const result = applyPiAutoCompactionGuard({ + settingsManager, + contextEngineInfo: { + id: "third-party", + name: "Third-party Context Engine", + version: "0.1.0", + ownsCompaction: true, + }, + }); + + expect(result).toEqual({ supported: true, disabled: true }); + expect(setCompactionEnabled).toHaveBeenCalledWith(false); + }); + + // Default-mode runs against ordinary providers must keep Pi's auto-compaction + // enabled. Disabling it across the board would silently remove Pi's + // overflow-recovery path inside Session.prompt() for users who are not + // affected by z.ai's silent-overflow accounting. + it("leaves Pi auto-compaction alone for non-z.ai providers without engine ownership", () => { + const setCompactionEnabled = vi.fn(); + const settingsManager = { + getCompactionReserveTokens: () => 20_000, + getCompactionKeepRecentTokens: () => 4_000, + applyOverrides: () => {}, + setCompactionEnabled, + }; + + const result = applyPiAutoCompactionGuard({ + settingsManager, + contextEngineInfo: { + id: "legacy", + name: "Legacy Context Engine", + version: "1.0.0", + }, + silentOverflowProneProvider: false, + }); + + expect(result).toEqual({ supported: true, disabled: false }); + expect(setCompactionEnabled).not.toHaveBeenCalled(); + }); + + it("reports unsupported when the settings manager has no setCompactionEnabled hook", () => { + const settingsManager = { + getCompactionReserveTokens: () => 20_000, + getCompactionKeepRecentTokens: () => 4_000, + applyOverrides: () => {}, + }; + + const result = applyPiAutoCompactionGuard({ + settingsManager, + silentOverflowProneProvider: true, + }); + + expect(result).toEqual({ supported: false, disabled: false }); + }); +}); diff --git a/src/agents/pi-settings.ts b/src/agents/pi-settings.ts index 3046d406429..4daedcad832 100644 --- a/src/agents/pi-settings.ts +++ b/src/agents/pi-settings.ts @@ -1,6 +1,8 @@ import type { OpenClawConfig } from "../config/types.openclaw.js"; import type { ContextEngineInfo } from "../context-engine/types.js"; import { MIN_PROMPT_BUDGET_RATIO, MIN_PROMPT_BUDGET_TOKENS } from "./pi-compaction-constants.js"; +import { resolveProviderEndpoint } from "./provider-attribution.js"; +import { normalizeProviderId } from "./provider-id.js"; export const DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR = 20_000; @@ -122,18 +124,81 @@ export function applyPiCompactionSettingsFromConfig(params: { }; } -/** Decide whether Pi's internal auto-compaction should be disabled for this run. */ -function shouldDisablePiAutoCompaction(params: { contextEngineInfo?: ContextEngineInfo }): boolean { - return params.contextEngineInfo?.ownsCompaction === true; +/** + * Detect providers whose pi-ai `isContextOverflow` Case 2 (silent overflow) + * fires on a successful turn and triggers Pi's `_runAutoCompaction` from + * inside `Session.prompt()`, collapsing `agent.state.messages` before the + * provider call (openclaw#75799). + * + * True on any of: `zai-native` endpoint class, normalized provider id `zai`, + * a `z-ai/` / `openrouter/z-ai/` model-id namespace prefix, or a bare `glm-` + * model id (no namespace prefix) — the latter covers in-house gateways that + * expose Zhipu's GLM family directly without a `z-ai/` qualifier. Intentionally + * narrow: namespaced GLM ids that route through other providers (e.g. + * `ollama/glm-*`, `opencode-go/glm-*`) are NOT included because their hosts + * have their own overflow accounting and may not exhibit the z.ai silent- + * overflow shape. Other providers documented as silently truncating are not + * added without a reproducible repro. + */ +export function isSilentOverflowProneModel(model: { + provider?: string | null; + modelId?: string | null; + baseUrl?: string | null; +}): boolean { + const provider = normalizeProviderId(typeof model.provider === "string" ? model.provider : ""); + if (provider === "zai") { + return true; + } + if (typeof model.baseUrl === "string" && model.baseUrl.length > 0) { + if (resolveProviderEndpoint(model.baseUrl).endpointClass === "zai-native") { + return true; + } + } + if (typeof model.modelId === "string" && model.modelId.length > 0) { + const normalized = model.modelId.toLowerCase(); + if ( + normalized.startsWith("z-ai/") || + normalized.startsWith("openrouter/z-ai/") || + normalized.startsWith("glm-") + ) { + return true; + } + } + return false; } -/** Disable Pi auto-compaction via settings when a context engine owns compaction. */ +/** + * Disable Pi's `_checkCompaction → _runAutoCompaction` (which would otherwise + * fire from inside `Session.prompt()` and reassign `agent.state.messages` + * before the provider call) when OpenClaw or a plugin owns compaction: + * `contextEngineInfo.ownsCompaction === true`, or the active model is + * silent-overflow-prone (openclaw#75799). Default-mode runs against ordinary + * providers keep Pi's auto-compaction as the existing baseline. + */ +function shouldDisablePiAutoCompaction(params: { + contextEngineInfo?: ContextEngineInfo; + silentOverflowProneProvider?: boolean; +}): boolean { + return ( + params.contextEngineInfo?.ownsCompaction === true || params.silentOverflowProneProvider === true + ); +} + +/** + * Apply the auto-compaction guard. Callers that reload a `DefaultResourceLoader` + * MUST call this AGAIN after each `reload()` — `settingsManager.reload()` + * rehydrates `compaction.enabled` from disk and silently restores Pi's + * default-on behavior, undoing the guard. Mirrors the existing + * `applyPiCompactionSettingsFromConfig` re-call pattern at the same sites. + */ export function applyPiAutoCompactionGuard(params: { settingsManager: PiSettingsManagerLike; contextEngineInfo?: ContextEngineInfo; + silentOverflowProneProvider?: boolean; }): { supported: boolean; disabled: boolean } { const disable = shouldDisablePiAutoCompaction({ contextEngineInfo: params.contextEngineInfo, + silentOverflowProneProvider: params.silentOverflowProneProvider, }); const hasMethod = typeof params.settingsManager.setCompactionEnabled === "function"; if (!disable || !hasMethod) {