From b23d59a522ba054d0854d013f6ee6edc36569fa8 Mon Sep 17 00:00:00 2001 From: Xan Torres Date: Wed, 15 Apr 2026 23:05:26 +0200 Subject: [PATCH] Gateway/skills: invalidate session skills snapshot on config write --- CHANGELOG.md | 1 + src/agents/skills/refresh-state.ts | 2 +- src/gateway/config-reload.test.ts | 100 +++++++++++++++++++++++++++++ src/gateway/config-reload.ts | 38 +++++++++++ 4 files changed, 140 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3cfc97e018e..bc88a9b91a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai - Speech/TTS: auto-enable the bundled Microsoft and ElevenLabs speech providers, and route generic TTS directive tokens through the explicit or active provider first so overrides like `[[tts:speed=1.2]]` stop silently landing on the wrong provider. (#62846) Thanks @stainlu. - OpenAI Codex/models: normalize stale native transport metadata in both runtime resolution and discovery/listing so legacy `openai-codex` rows with missing `api` or `https://chatgpt.com/backend-api/v1` self-heal to the canonical Codex transport instead of routing requests through broken HTML/Cloudflare paths, combining the original fixes proposed in #66969 (saamuelng601-pixel) and #67159 (hclsys). (#67635) - Agents/failover: treat HTML provider error pages as upstream transport failures for CDN-style 5xx responses without misclassifying embedded body text as API rate limits, while still preserving auth remediation for HTML 401/403 pages and proxy remediation for HTML 407 pages. (#67642) Thanks @stainlu. +- Gateway/skills: bump the cached skills-snapshot version whenever a config write touches `skills.*` (for example `skills.allowBundled`, `skills.entries..enabled`, or `skills.profile`). Existing agent sessions persist a `skillsSnapshot` in `sessions.json` that reuses the skill list frozen at session creation; without this invalidation, removing a bundled skill from the allowlist left the old snapshot live and the model kept calling the disabled tool, producing `Tool not found` loops that ran until the embedded-run timeout. (#67401) Thanks @xantorres. ## 2026.4.15-beta.1 diff --git a/src/agents/skills/refresh-state.ts b/src/agents/skills/refresh-state.ts index 45054dda512..c2c64367fb2 100644 --- a/src/agents/skills/refresh-state.ts +++ b/src/agents/skills/refresh-state.ts @@ -1,6 +1,6 @@ export type SkillsChangeEvent = { workspaceDir?: string; - reason: "watch" | "manual" | "remote-node"; + reason: "watch" | "manual" | "remote-node" | "config-change"; changedPath?: string; }; diff --git a/src/gateway/config-reload.test.ts b/src/gateway/config-reload.test.ts index 8bc63fe782e..4e802a89cb5 100644 --- a/src/gateway/config-reload.test.ts +++ b/src/gateway/config-reload.test.ts @@ -2,6 +2,10 @@ import chokidar from "chokidar"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { listChannelPlugins } from "../channels/plugins/index.js"; import type { ChannelPlugin } from "../channels/plugins/types.js"; +import { + getSkillsSnapshotVersion, + resetSkillsRefreshStateForTest, +} from "../agents/skills/refresh-state.js"; import type { ConfigFileSnapshot, ConfigWriteNotification } from "../config/config.js"; import { setActivePluginRegistry } from "../plugins/runtime.js"; import { createTestRegistry } from "../test-utils/channel-plugins.js"; @@ -9,6 +13,7 @@ import { buildGatewayReloadPlan, diffConfigPaths, resolveGatewayReloadSettings, + shouldInvalidateSkillsSnapshotForPaths, startGatewayConfigReloader, } from "./config-reload.js"; @@ -616,3 +621,98 @@ describe("startGatewayConfigReloader", () => { await harness.reloader.stop(); }); }); + +describe("shouldInvalidateSkillsSnapshotForPaths", () => { + it.each([ + "skills", + "skills.allowBundled", + "skills.entries", + "skills.entries.himalaya", + "skills.entries.himalaya.enabled", + "skills.profile", + ])("returns true for skills path %s", (path) => { + expect(shouldInvalidateSkillsSnapshotForPaths([path])).toBe(true); + }); + + it.each([ + "tools.profile", + "agents.defaults.model", + "gateway.port", + "skillset.allowBundled", + "channels.telegram.enabled", + ])("returns false for unrelated path %s", (path) => { + expect(shouldInvalidateSkillsSnapshotForPaths([path])).toBe(false); + }); + + it("returns true when any path in the list matches", () => { + expect( + shouldInvalidateSkillsSnapshotForPaths([ + "gateway.port", + "skills.allowBundled", + "channels.telegram.enabled", + ]), + ).toBe(true); + }); + + it("returns false for empty input", () => { + expect(shouldInvalidateSkillsSnapshotForPaths([])).toBe(false); + }); +}); + +describe("startGatewayConfigReloader skills invalidation", () => { + beforeEach(() => { + vi.useFakeTimers(); + resetSkillsRefreshStateForTest(); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); + resetSkillsRefreshStateForTest(); + }); + + it("bumps the skills snapshot version when skills.allowBundled changes", async () => { + const before = getSkillsSnapshotVersion(); + const readSnapshot = vi.fn<() => Promise>().mockResolvedValueOnce( + makeSnapshot({ + config: { + gateway: { reload: { debounceMs: 0 } }, + skills: { allowBundled: ["gog"] }, + }, + hash: "skills-change-1", + }), + ); + const { watcher, log, reloader } = createReloaderHarness(readSnapshot); + + watcher.emit("change"); + await vi.runOnlyPendingTimersAsync(); + + const after = getSkillsSnapshotVersion(); + expect(after).toBeGreaterThan(before); + expect(log.info).toHaveBeenCalledWith( + expect.stringContaining("skills snapshot invalidated by config change"), + ); + + await reloader.stop(); + }); + + it("does not bump the snapshot version when unrelated config changes", async () => { + const before = getSkillsSnapshotVersion(); + const readSnapshot = vi.fn<() => Promise>().mockResolvedValueOnce( + makeSnapshot({ + config: { + gateway: { reload: { debounceMs: 0 }, port: 18790 }, + }, + hash: "unrelated-change-1", + }), + ); + const { watcher, reloader } = createReloaderHarness(readSnapshot); + + watcher.emit("change"); + await vi.runOnlyPendingTimersAsync(); + + expect(getSkillsSnapshotVersion()).toBe(before); + + await reloader.stop(); + }); +}); diff --git a/src/gateway/config-reload.ts b/src/gateway/config-reload.ts index 6ca2742e017..2af65757b34 100644 --- a/src/gateway/config-reload.ts +++ b/src/gateway/config-reload.ts @@ -1,5 +1,6 @@ import { isDeepStrictEqual } from "node:util"; import chokidar from "chokidar"; +import { bumpSkillsSnapshotVersion } from "../agents/skills/refresh-state.js"; import type { OpenClawConfig, ConfigFileSnapshot, @@ -25,6 +26,31 @@ const DEFAULT_RELOAD_SETTINGS: GatewayReloadSettings = { const MISSING_CONFIG_RETRY_DELAY_MS = 150; const MISSING_CONFIG_MAX_RETRIES = 2; +/** + * Paths under `skills.*` always change the snapshot that sessions cache in + * sessions.json. Any prefix match here (for example `skills.allowBundled`, + * `skills.entries.X.enabled`, `skills.profile`) forces sessions to rebuild + * their snapshot on the next turn rather than silently advertising stale + * tools to the model. + */ +const SKILLS_INVALIDATION_PREFIXES = ["skills"] as const; + +export function shouldInvalidateSkillsSnapshotForPaths(changedPaths: string[]): boolean { + return changedPaths.some((path) => + SKILLS_INVALIDATION_PREFIXES.some( + (prefix) => path === prefix || path.startsWith(`${prefix}.`), + ), + ); +} + +function firstSkillsChangedPath(changedPaths: string[]): string | undefined { + return changedPaths.find((path) => + SKILLS_INVALIDATION_PREFIXES.some( + (prefix) => path === prefix || path.startsWith(`${prefix}.`), + ), + ); +} + export function diffConfigPaths(prev: unknown, next: unknown, prefix = ""): string[] { if (prev === next) { return []; @@ -164,6 +190,18 @@ export function startGatewayConfigReloader(opts: { return; } + // Invalidate cached skills snapshots (persisted in sessions.json) whenever + // the user touches skills.* config. Without this, sessions keep advertising + // tools that no longer exist in the allowlist, which causes infinite + // tool-not-found loops against the model. + if (shouldInvalidateSkillsSnapshotForPaths(changedPaths)) { + const changedPath = firstSkillsChangedPath(changedPaths); + bumpSkillsSnapshotVersion({ reason: "config-change", changedPath }); + opts.log.info( + `skills snapshot invalidated by config change (${changedPath ?? "skills.*"})`, + ); + } + opts.log.info(`config change detected; evaluating reload (${changedPaths.join(", ")})`); const plan = buildGatewayReloadPlan(changedPaths); if (settings.mode === "off") {