From caf1b84822a01a4a008219ae415931e078a04ba4 Mon Sep 17 00:00:00 2001 From: GitBuck Date: Sun, 8 Mar 2026 18:47:34 +0100 Subject: [PATCH] feat: allow compaction model override via config (#38753) Merged via squash. Prepared head SHA: a3d6d6c845c9ef492370c4cc12ea790ca92123f0 Co-authored-by: starbuck100 <25417736+starbuck100@users.noreply.github.com> Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com> Reviewed-by: @jalehman --- CHANGELOG.md | 1 + docs/concepts/compaction.md | 30 +++++++++ docs/gateway/configuration-reference.md | 2 + scripts/test-parallel.mjs | 2 + src/agents/models-config.merge.test.ts | 5 +- src/agents/pi-embedded-runner/compact.ts | 29 +++++++- .../pi-embedded-runner/run/attempt.test.ts | 66 +++++++++++++++++++ src/config/schema.help.quality.test.ts | 4 ++ src/config/schema.help.ts | 2 + src/config/schema.labels.ts | 1 + src/config/types.agent-defaults.ts | 4 ++ src/config/zod-schema.agent-defaults.ts | 1 + 12 files changed, 143 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 083dfab122e..a5fe54b1fa0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ Docs: https://docs.openclaw.ai - Mattermost/model picker: add Telegram-style interactive provider/model browsing for `/oc_model` and `/oc_models`, fix picker callback updates, and emit a normal confirmation reply when a model is selected. (#38767) thanks @mukhtharcm. - Docker/multi-stage build: restructure Dockerfile as a multi-stage build to produce a minimal runtime image without build tools, source code, or Bun; add `OPENCLAW_VARIANT=slim` build arg for a bookworm-slim variant. (#38479) Thanks @sallyom. - Google/Gemini 3.1 Flash-Lite: add first-class `google/gemini-3.1-flash-lite-preview` support across model-id normalization, default aliases, media-understanding image lookups, Google Gemini CLI forward-compat fallback, and docs. +- Agents/compaction model override: allow `agents.defaults.compaction.model` to route compaction summarization through a different model than the main session, and document the override across config help/reference surfaces. (#38753) thanks @starbuck100. ### Breaking diff --git a/docs/concepts/compaction.md b/docs/concepts/compaction.md index 8d243bf234d..73f6372c3f7 100644 --- a/docs/concepts/compaction.md +++ b/docs/concepts/compaction.md @@ -24,6 +24,36 @@ Compaction **persists** in the session’s JSONL history. Use the `agents.defaults.compaction` setting in your `openclaw.json` to configure compaction behavior (mode, target tokens, etc.). Compaction summarization preserves opaque identifiers by default (`identifierPolicy: "strict"`). You can override this with `identifierPolicy: "off"` or provide custom text with `identifierPolicy: "custom"` and `identifierInstructions`. +You can optionally specify a different model for compaction summarization via `agents.defaults.compaction.model`. This is useful when your primary model is a local or small model and you want compaction summaries produced by a more capable model. The override accepts any `provider/model-id` string: + +```json +{ + "agents": { + "defaults": { + "compaction": { + "model": "openrouter/anthropic/claude-sonnet-4-5" + } + } + } +} +``` + +This also works with local models, for example a second Ollama model dedicated to summarization or a fine-tuned compaction specialist: + +```json +{ + "agents": { + "defaults": { + "compaction": { + "model": "ollama/llama3.1:8b" + } + } + } +} +``` + +When unset, compaction uses the agent's primary model. + ## Auto-compaction (default on) When a session nears or exceeds the model’s context window, OpenClaw triggers auto-compaction and may retry the original request using the compacted context. diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md index 880ccdd198b..ca6a3681410 100644 --- a/docs/gateway/configuration-reference.md +++ b/docs/gateway/configuration-reference.md @@ -1005,6 +1005,7 @@ Periodic heartbeat runs. identifierPolicy: "strict", // strict | off | custom identifierInstructions: "Preserve deployment IDs, ticket IDs, and host:port pairs exactly.", // used when identifierPolicy=custom postCompactionSections: ["Session Startup", "Red Lines"], // [] disables reinjection + model: "openrouter/anthropic/claude-sonnet-4-5", // optional compaction-only model override memoryFlush: { enabled: true, softThresholdTokens: 6000, @@ -1021,6 +1022,7 @@ Periodic heartbeat runs. - `identifierPolicy`: `strict` (default), `off`, or `custom`. `strict` prepends built-in opaque identifier retention guidance during compaction summarization. - `identifierInstructions`: optional custom identifier-preservation text used when `identifierPolicy=custom`. - `postCompactionSections`: optional AGENTS.md H2/H3 section names to re-inject after compaction. Defaults to `["Session Startup", "Red Lines"]`; set `[]` to disable reinjection. When unset or explicitly set to that default pair, older `Every Session`/`Safety` headings are also accepted as a legacy fallback. +- `model`: optional `provider/model-id` override for compaction summarization only. Use this when the main session should keep one model but compaction summaries should run on another; when unset, compaction uses the session's primary model. - `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Skipped when workspace is read-only. ### `agents.defaults.contextPruning` diff --git a/scripts/test-parallel.mjs b/scripts/test-parallel.mjs index d524fb87438..f57a0569047 100644 --- a/scripts/test-parallel.mjs +++ b/scripts/test-parallel.mjs @@ -86,6 +86,8 @@ const unitIsolatedFilesRaw = [ "src/slack/monitor/slash.test.ts", // Uses process-level unhandledRejection listeners; keep it off vmForks to avoid cross-file leakage. "src/imessage/monitor.shutdown.unhandled-rejection.test.ts", + // Mutates process.cwd() and mocks core module loaders; isolate from the shared fast lane. + "src/infra/git-commit.test.ts", ]; const unitIsolatedFiles = unitIsolatedFilesRaw.filter((file) => fs.existsSync(file)); diff --git a/src/agents/models-config.merge.test.ts b/src/agents/models-config.merge.test.ts index 223a534e08f..b76b3509e26 100644 --- a/src/agents/models-config.merge.test.ts +++ b/src/agents/models-config.merge.test.ts @@ -52,7 +52,10 @@ describe("models-config merge helpers", () => { it("merges explicit providers onto trimmed keys", () => { const merged = mergeProviders({ explicit: { - " custom ": { api: "openai-responses", models: [] } as ProviderConfig, + " custom ": { + api: "openai-responses", + models: [] as ProviderConfig["models"], + } as ProviderConfig, }, }); diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 05fa3490658..3a51da22271 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -271,8 +271,31 @@ export async function compactEmbeddedPiSessionDirect( const resolvedWorkspace = resolveUserPath(params.workspaceDir); const prevCwd = process.cwd(); - const provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER; - const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL; + // Resolve compaction model: prefer config override, then fall back to caller-supplied model + const compactionModelOverride = params.config?.agents?.defaults?.compaction?.model?.trim(); + let provider: string; + let modelId: string; + // When switching provider via override, drop the primary auth profile to avoid + // sending the wrong credentials (e.g. OpenAI profile token to OpenRouter). + let authProfileId: string | undefined = params.authProfileId; + if (compactionModelOverride) { + const slashIdx = compactionModelOverride.indexOf("/"); + if (slashIdx > 0) { + provider = compactionModelOverride.slice(0, slashIdx).trim(); + modelId = compactionModelOverride.slice(slashIdx + 1).trim() || DEFAULT_MODEL; + // Provider changed — drop primary auth profile so getApiKeyForModel + // falls back to provider-based key resolution for the override model. + if (provider !== (params.provider ?? "").trim()) { + authProfileId = undefined; + } + } else { + provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER; + modelId = compactionModelOverride; + } + } else { + provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER; + modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL; + } const fail = (reason: string): EmbeddedPiCompactResult => { log.warn( `[compaction-diag] end runId=${runId} sessionKey=${params.sessionKey ?? params.sessionId} ` + @@ -302,7 +325,7 @@ export async function compactEmbeddedPiSessionDirect( const apiKeyInfo = await getApiKeyForModel({ model, cfg: params.config, - profileId: params.authProfileId, + profileId: authProfileId, agentDir, }); diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index 197a2903183..649679632e8 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -639,6 +639,72 @@ describe("prependSystemPromptAddition", () => { }); describe("buildAfterTurnLegacyCompactionParams", () => { + it("uses primary model when compaction.model is not set", () => { + const legacy = buildAfterTurnLegacyCompactionParams({ + attempt: { + sessionKey: "agent:main:session:abc", + messageChannel: "slack", + messageProvider: "slack", + agentAccountId: "acct-1", + authProfileId: "openai:p1", + config: {} as OpenClawConfig, + skillsSnapshot: undefined, + senderIsOwner: true, + provider: "openai-codex", + modelId: "gpt-5.3-codex", + thinkLevel: "off", + reasoningLevel: "on", + extraSystemPrompt: "extra", + ownerNumbers: ["+15555550123"], + }, + workspaceDir: "/tmp/workspace", + agentDir: "/tmp/agent", + }); + + expect(legacy).toMatchObject({ + provider: "openai-codex", + model: "gpt-5.3-codex", + }); + }); + + it("passes primary model through even when compaction.model is set (override resolved in compactDirect)", () => { + const legacy = buildAfterTurnLegacyCompactionParams({ + attempt: { + sessionKey: "agent:main:session:abc", + messageChannel: "slack", + messageProvider: "slack", + agentAccountId: "acct-1", + authProfileId: "openai:p1", + config: { + agents: { + defaults: { + compaction: { + model: "openrouter/anthropic/claude-sonnet-4-5", + }, + }, + }, + } as OpenClawConfig, + skillsSnapshot: undefined, + senderIsOwner: true, + provider: "openai-codex", + modelId: "gpt-5.3-codex", + thinkLevel: "off", + reasoningLevel: "on", + extraSystemPrompt: "extra", + ownerNumbers: ["+15555550123"], + }, + workspaceDir: "/tmp/workspace", + agentDir: "/tmp/agent", + }); + + // buildAfterTurnLegacyCompactionParams no longer resolves the override; + // compactEmbeddedPiSessionDirect does it centrally for both auto + manual paths. + expect(legacy).toMatchObject({ + provider: "openai-codex", + model: "gpt-5.3-codex", + }); + }); + it("includes resolved auth profile fields for context-engine afterTurn compaction", () => { const legacy = buildAfterTurnLegacyCompactionParams({ attempt: { diff --git a/src/config/schema.help.quality.test.ts b/src/config/schema.help.quality.test.ts index 6cb8e489920..fa9451456bf 100644 --- a/src/config/schema.help.quality.test.ts +++ b/src/config/schema.help.quality.test.ts @@ -378,6 +378,7 @@ const TARGET_KEYS = [ "agents.defaults.compaction.qualityGuard.enabled", "agents.defaults.compaction.qualityGuard.maxRetries", "agents.defaults.compaction.postCompactionSections", + "agents.defaults.compaction.model", "agents.defaults.compaction.memoryFlush", "agents.defaults.compaction.memoryFlush.enabled", "agents.defaults.compaction.memoryFlush.softThresholdTokens", @@ -810,6 +811,9 @@ describe("config help copy quality", () => { expect(/Every Session|Safety/i.test(postCompactionSections)).toBe(true); expect(/\[\]|disable/i.test(postCompactionSections)).toBe(true); + const compactionModel = FIELD_HELP["agents.defaults.compaction.model"]; + expect(/provider\/model|different model|primary agent model/i.test(compactionModel)).toBe(true); + const flush = FIELD_HELP["agents.defaults.compaction.memoryFlush.enabled"]; expect(/pre-compaction|memory flush|token/i.test(flush)).toBe(true); }); diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 01f8c40ff6a..50d9502ffea 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -1013,6 +1013,8 @@ export const FIELD_HELP: Record = { "Maximum number of regeneration retries after a failed safeguard summary quality audit. Use small values to bound extra latency and token cost.", "agents.defaults.compaction.postCompactionSections": 'AGENTS.md H2/H3 section names re-injected after compaction so the agent reruns critical startup guidance. Leave unset to use "Session Startup"/"Red Lines" with legacy fallback to "Every Session"/"Safety"; set to [] to disable reinjection entirely.', + "agents.defaults.compaction.model": + "Optional provider/model override used only for compaction summarization. Set this when you want compaction to run on a different model than the session default, and leave it unset to keep using the primary agent model.", "agents.defaults.compaction.memoryFlush": "Pre-compaction memory flush settings that run an agentic memory write before heavy compaction. Keep enabled for long sessions so salient context is persisted before aggressive trimming.", "agents.defaults.compaction.memoryFlush.enabled": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 46350c15d94..f8961d9e8dd 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -458,6 +458,7 @@ export const FIELD_LABELS: Record = { "agents.defaults.compaction.qualityGuard.enabled": "Compaction Quality Guard Enabled", "agents.defaults.compaction.qualityGuard.maxRetries": "Compaction Quality Guard Max Retries", "agents.defaults.compaction.postCompactionSections": "Post-Compaction Context Sections", + "agents.defaults.compaction.model": "Compaction Model Override", "agents.defaults.compaction.memoryFlush": "Compaction Memory Flush", "agents.defaults.compaction.memoryFlush.enabled": "Compaction Memory Flush Enabled", "agents.defaults.compaction.memoryFlush.softThresholdTokens": diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts index a242d0bbcc1..9124e4084d8 100644 --- a/src/config/types.agent-defaults.ts +++ b/src/config/types.agent-defaults.ts @@ -322,6 +322,10 @@ export type AgentCompactionConfig = { * Set to [] to disable post-compaction context injection entirely. */ postCompactionSections?: string[]; + /** Optional model override for compaction summarization (e.g. "openrouter/anthropic/claude-sonnet-4-5"). + * When set, compaction uses this model instead of the agent's primary model. + * Falls back to the primary model when unset. */ + model?: string; }; export type AgentCompactionMemoryFlushConfig = { diff --git a/src/config/zod-schema.agent-defaults.ts b/src/config/zod-schema.agent-defaults.ts index 1e83a92f54c..242d6959729 100644 --- a/src/config/zod-schema.agent-defaults.ts +++ b/src/config/zod-schema.agent-defaults.ts @@ -104,6 +104,7 @@ export const AgentDefaultsSchema = z .strict() .optional(), postCompactionSections: z.array(z.string()).optional(), + model: z.string().optional(), memoryFlush: z .object({ enabled: z.boolean().optional(),