Gateway/skills: invalidate session skills snapshot on config write

This commit is contained in:
Xan Torres
2026-04-15 23:05:26 +02:00
committed by Ayaan Zaidi
parent e588e904a7
commit b23d59a522
4 changed files with 140 additions and 1 deletions

View File

@@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai
- Speech/TTS: auto-enable the bundled Microsoft and ElevenLabs speech providers, and route generic TTS directive tokens through the explicit or active provider first so overrides like `[[tts:speed=1.2]]` stop silently landing on the wrong provider. (#62846) Thanks @stainlu.
- OpenAI Codex/models: normalize stale native transport metadata in both runtime resolution and discovery/listing so legacy `openai-codex` rows with missing `api` or `https://chatgpt.com/backend-api/v1` self-heal to the canonical Codex transport instead of routing requests through broken HTML/Cloudflare paths, combining the original fixes proposed in #66969 (saamuelng601-pixel) and #67159 (hclsys). (#67635)
- Agents/failover: treat HTML provider error pages as upstream transport failures for CDN-style 5xx responses without misclassifying embedded body text as API rate limits, while still preserving auth remediation for HTML 401/403 pages and proxy remediation for HTML 407 pages. (#67642) Thanks @stainlu.
- Gateway/skills: bump the cached skills-snapshot version whenever a config write touches `skills.*` (for example `skills.allowBundled`, `skills.entries.<id>.enabled`, or `skills.profile`). Existing agent sessions persist a `skillsSnapshot` in `sessions.json` that reuses the skill list frozen at session creation; without this invalidation, removing a bundled skill from the allowlist left the old snapshot live and the model kept calling the disabled tool, producing `Tool <name> not found` loops that ran until the embedded-run timeout. (#67401) Thanks @xantorres.
## 2026.4.15-beta.1

View File

@@ -1,6 +1,6 @@
export type SkillsChangeEvent = {
workspaceDir?: string;
reason: "watch" | "manual" | "remote-node";
reason: "watch" | "manual" | "remote-node" | "config-change";
changedPath?: string;
};

View File

@@ -2,6 +2,10 @@ import chokidar from "chokidar";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { listChannelPlugins } from "../channels/plugins/index.js";
import type { ChannelPlugin } from "../channels/plugins/types.js";
import {
getSkillsSnapshotVersion,
resetSkillsRefreshStateForTest,
} from "../agents/skills/refresh-state.js";
import type { ConfigFileSnapshot, ConfigWriteNotification } from "../config/config.js";
import { setActivePluginRegistry } from "../plugins/runtime.js";
import { createTestRegistry } from "../test-utils/channel-plugins.js";
@@ -9,6 +13,7 @@ import {
buildGatewayReloadPlan,
diffConfigPaths,
resolveGatewayReloadSettings,
shouldInvalidateSkillsSnapshotForPaths,
startGatewayConfigReloader,
} from "./config-reload.js";
@@ -616,3 +621,98 @@ describe("startGatewayConfigReloader", () => {
await harness.reloader.stop();
});
});
describe("shouldInvalidateSkillsSnapshotForPaths", () => {
it.each([
"skills",
"skills.allowBundled",
"skills.entries",
"skills.entries.himalaya",
"skills.entries.himalaya.enabled",
"skills.profile",
])("returns true for skills path %s", (path) => {
expect(shouldInvalidateSkillsSnapshotForPaths([path])).toBe(true);
});
it.each([
"tools.profile",
"agents.defaults.model",
"gateway.port",
"skillset.allowBundled",
"channels.telegram.enabled",
])("returns false for unrelated path %s", (path) => {
expect(shouldInvalidateSkillsSnapshotForPaths([path])).toBe(false);
});
it("returns true when any path in the list matches", () => {
expect(
shouldInvalidateSkillsSnapshotForPaths([
"gateway.port",
"skills.allowBundled",
"channels.telegram.enabled",
]),
).toBe(true);
});
it("returns false for empty input", () => {
expect(shouldInvalidateSkillsSnapshotForPaths([])).toBe(false);
});
});
describe("startGatewayConfigReloader skills invalidation", () => {
beforeEach(() => {
vi.useFakeTimers();
resetSkillsRefreshStateForTest();
});
afterEach(() => {
vi.useRealTimers();
vi.restoreAllMocks();
resetSkillsRefreshStateForTest();
});
it("bumps the skills snapshot version when skills.allowBundled changes", async () => {
const before = getSkillsSnapshotVersion();
const readSnapshot = vi.fn<() => Promise<ConfigFileSnapshot>>().mockResolvedValueOnce(
makeSnapshot({
config: {
gateway: { reload: { debounceMs: 0 } },
skills: { allowBundled: ["gog"] },
},
hash: "skills-change-1",
}),
);
const { watcher, log, reloader } = createReloaderHarness(readSnapshot);
watcher.emit("change");
await vi.runOnlyPendingTimersAsync();
const after = getSkillsSnapshotVersion();
expect(after).toBeGreaterThan(before);
expect(log.info).toHaveBeenCalledWith(
expect.stringContaining("skills snapshot invalidated by config change"),
);
await reloader.stop();
});
it("does not bump the snapshot version when unrelated config changes", async () => {
const before = getSkillsSnapshotVersion();
const readSnapshot = vi.fn<() => Promise<ConfigFileSnapshot>>().mockResolvedValueOnce(
makeSnapshot({
config: {
gateway: { reload: { debounceMs: 0 }, port: 18790 },
},
hash: "unrelated-change-1",
}),
);
const { watcher, reloader } = createReloaderHarness(readSnapshot);
watcher.emit("change");
await vi.runOnlyPendingTimersAsync();
expect(getSkillsSnapshotVersion()).toBe(before);
await reloader.stop();
});
});

View File

@@ -1,5 +1,6 @@
import { isDeepStrictEqual } from "node:util";
import chokidar from "chokidar";
import { bumpSkillsSnapshotVersion } from "../agents/skills/refresh-state.js";
import type {
OpenClawConfig,
ConfigFileSnapshot,
@@ -25,6 +26,31 @@ const DEFAULT_RELOAD_SETTINGS: GatewayReloadSettings = {
const MISSING_CONFIG_RETRY_DELAY_MS = 150;
const MISSING_CONFIG_MAX_RETRIES = 2;
/**
* Paths under `skills.*` always change the snapshot that sessions cache in
* sessions.json. Any prefix match here (for example `skills.allowBundled`,
* `skills.entries.X.enabled`, `skills.profile`) forces sessions to rebuild
* their snapshot on the next turn rather than silently advertising stale
* tools to the model.
*/
const SKILLS_INVALIDATION_PREFIXES = ["skills"] as const;
export function shouldInvalidateSkillsSnapshotForPaths(changedPaths: string[]): boolean {
return changedPaths.some((path) =>
SKILLS_INVALIDATION_PREFIXES.some(
(prefix) => path === prefix || path.startsWith(`${prefix}.`),
),
);
}
function firstSkillsChangedPath(changedPaths: string[]): string | undefined {
return changedPaths.find((path) =>
SKILLS_INVALIDATION_PREFIXES.some(
(prefix) => path === prefix || path.startsWith(`${prefix}.`),
),
);
}
export function diffConfigPaths(prev: unknown, next: unknown, prefix = ""): string[] {
if (prev === next) {
return [];
@@ -164,6 +190,18 @@ export function startGatewayConfigReloader(opts: {
return;
}
// Invalidate cached skills snapshots (persisted in sessions.json) whenever
// the user touches skills.* config. Without this, sessions keep advertising
// tools that no longer exist in the allowlist, which causes infinite
// tool-not-found loops against the model.
if (shouldInvalidateSkillsSnapshotForPaths(changedPaths)) {
const changedPath = firstSkillsChangedPath(changedPaths);
bumpSkillsSnapshotVersion({ reason: "config-change", changedPath });
opts.log.info(
`skills snapshot invalidated by config change (${changedPath ?? "skills.*"})`,
);
}
opts.log.info(`config change detected; evaluating reload (${changedPaths.join(", ")})`);
const plan = buildGatewayReloadPlan(changedPaths);
if (settings.mode === "off") {