Agents/tool-loop: enable unknown-tool stream guard by default

This commit is contained in:
Xan Torres
2026-04-15 23:09:49 +02:00
committed by Ayaan Zaidi
parent b23d59a522
commit 36ed36768c
3 changed files with 36 additions and 11 deletions

View File

@@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai
- OpenAI Codex/models: normalize stale native transport metadata in both runtime resolution and discovery/listing so legacy `openai-codex` rows with missing `api` or `https://chatgpt.com/backend-api/v1` self-heal to the canonical Codex transport instead of routing requests through broken HTML/Cloudflare paths, combining the original fixes proposed in #66969 (saamuelng601-pixel) and #67159 (hclsys). (#67635)
- Agents/failover: treat HTML provider error pages as upstream transport failures for CDN-style 5xx responses without misclassifying embedded body text as API rate limits, while still preserving auth remediation for HTML 401/403 pages and proxy remediation for HTML 407 pages. (#67642) Thanks @stainlu.
- Gateway/skills: bump the cached skills-snapshot version whenever a config write touches `skills.*` (for example `skills.allowBundled`, `skills.entries.<id>.enabled`, or `skills.profile`). Existing agent sessions persist a `skillsSnapshot` in `sessions.json` that reuses the skill list frozen at session creation; without this invalidation, removing a bundled skill from the allowlist left the old snapshot live and the model kept calling the disabled tool, producing `Tool <name> not found` loops that ran until the embedded-run timeout. (#67401) Thanks @xantorres.
- Agents/tool-loop: enable the unknown-tool stream guard by default. Previously `resolveUnknownToolGuardThreshold` returned `undefined` unless `tools.loopDetection.enabled` was explicitly set to `true`, which left the protection off in the default configuration. A hallucinated or removed tool (for example `himalaya` after it was dropped from `skills.allowBundled`) would then loop "Tool X not found" attempts until the full embedded-run timeout. The guard has no false-positive surface because it only triggers on tools that are objectively not registered in the run, so it now stays on regardless of `tools.loopDetection.enabled` and still accepts `tools.loopDetection.unknownToolThreshold` as a per-run override (default 10). (#67401) Thanks @xantorres.
## 2026.4.15-beta.1

View File

@@ -424,20 +424,34 @@ describe("resolveAttemptFsWorkspaceOnly", () => {
});
describe("resolveUnknownToolGuardThreshold", () => {
it("returns undefined when loop detection is disabled", () => {
expect(resolveUnknownToolGuardThreshold({ enabled: false, unknownToolThreshold: 4 })).toBe(
undefined,
);
expect(resolveUnknownToolGuardThreshold(undefined)).toBe(undefined);
it("returns the default threshold when no loop-detection config is provided", () => {
expect(resolveUnknownToolGuardThreshold(undefined)).toBe(10);
expect(resolveUnknownToolGuardThreshold({})).toBe(10);
});
it("uses the default threshold when loop detection is enabled without an override", () => {
expect(resolveUnknownToolGuardThreshold({ enabled: true })).toBe(10);
it("stays on even when tools.loopDetection.enabled is false (safety net)", () => {
// The unknown-tool guard has no false-positive surface — the tool is
// objectively not registered — so it is always on regardless of the
// opt-in genericRepeat/pingPong/pollNoProgress detectors.
expect(resolveUnknownToolGuardThreshold({ enabled: false })).toBe(10);
expect(resolveUnknownToolGuardThreshold({ enabled: false, unknownToolThreshold: 3 })).toBe(3);
});
it("uses the configured threshold override when provided", () => {
expect(resolveUnknownToolGuardThreshold({ enabled: true, unknownToolThreshold: 4 })).toBe(4);
});
it("falls back to the default threshold when the override is non-positive", () => {
expect(resolveUnknownToolGuardThreshold({ unknownToolThreshold: 0 })).toBe(10);
expect(resolveUnknownToolGuardThreshold({ unknownToolThreshold: -5 })).toBe(10);
expect(
resolveUnknownToolGuardThreshold({ unknownToolThreshold: Number.NaN }),
).toBe(10);
});
it("floors fractional overrides", () => {
expect(resolveUnknownToolGuardThreshold({ unknownToolThreshold: 3.7 })).toBe(3);
});
});
describe("wrapStreamFnTrimToolCallNames", () => {

View File

@@ -294,11 +294,21 @@ const MAX_BTW_SNAPSHOT_MESSAGES = 100;
export function resolveUnknownToolGuardThreshold(loopDetection?: {
enabled?: boolean;
unknownToolThreshold?: number;
}): number | undefined {
if (loopDetection?.enabled !== true) {
return undefined;
}): number {
// The unknown-tool guard is a safety net against the model hallucinating a
// tool name or calling a tool that has since been removed from the allowlist
// (for example after a `skills.allowBundled` config change). After `threshold`
// consecutive unknown-tool attempts the stream wrapper rewrites the assistant
// message content to tell the model to stop, which breaks otherwise-infinite
// Tool-not-found loops against the provider. Unlike the genericRepeat /
// pingPong / pollNoProgress detectors this guard has no false-positive
// surface because the tool is objectively not registered in this run, so it
// stays on regardless of `tools.loopDetection.enabled`.
const raw = loopDetection?.unknownToolThreshold;
if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) {
return Math.floor(raw);
}
return loopDetection.unknownToolThreshold ?? UNKNOWN_TOOL_THRESHOLD;
return UNKNOWN_TOOL_THRESHOLD;
}
function summarizeMessagePayload(msg: AgentMessage): { textChars: number; imageBlocks: number } {