diff --git a/CHANGELOG.md b/CHANGELOG.md index bc88a9b91a4..267f3bf6369 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai - OpenAI Codex/models: normalize stale native transport metadata in both runtime resolution and discovery/listing so legacy `openai-codex` rows with missing `api` or `https://chatgpt.com/backend-api/v1` self-heal to the canonical Codex transport instead of routing requests through broken HTML/Cloudflare paths, combining the original fixes proposed in #66969 (saamuelng601-pixel) and #67159 (hclsys). (#67635) - Agents/failover: treat HTML provider error pages as upstream transport failures for CDN-style 5xx responses without misclassifying embedded body text as API rate limits, while still preserving auth remediation for HTML 401/403 pages and proxy remediation for HTML 407 pages. (#67642) Thanks @stainlu. - Gateway/skills: bump the cached skills-snapshot version whenever a config write touches `skills.*` (for example `skills.allowBundled`, `skills.entries..enabled`, or `skills.profile`). Existing agent sessions persist a `skillsSnapshot` in `sessions.json` that reuses the skill list frozen at session creation; without this invalidation, removing a bundled skill from the allowlist left the old snapshot live and the model kept calling the disabled tool, producing `Tool not found` loops that ran until the embedded-run timeout. (#67401) Thanks @xantorres. +- Agents/tool-loop: enable the unknown-tool stream guard by default. Previously `resolveUnknownToolGuardThreshold` returned `undefined` unless `tools.loopDetection.enabled` was explicitly set to `true`, which left the protection off in the default configuration. A hallucinated or removed tool (for example `himalaya` after it was dropped from `skills.allowBundled`) would then loop "Tool X not found" attempts until the full embedded-run timeout. The guard has no false-positive surface because it only triggers on tools that are objectively not registered in the run, so it now stays on regardless of `tools.loopDetection.enabled` and still accepts `tools.loopDetection.unknownToolThreshold` as a per-run override (default 10). (#67401) Thanks @xantorres. ## 2026.4.15-beta.1 diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index 2c93d062216..49af9a330b0 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -424,20 +424,34 @@ describe("resolveAttemptFsWorkspaceOnly", () => { }); describe("resolveUnknownToolGuardThreshold", () => { - it("returns undefined when loop detection is disabled", () => { - expect(resolveUnknownToolGuardThreshold({ enabled: false, unknownToolThreshold: 4 })).toBe( - undefined, - ); - expect(resolveUnknownToolGuardThreshold(undefined)).toBe(undefined); + it("returns the default threshold when no loop-detection config is provided", () => { + expect(resolveUnknownToolGuardThreshold(undefined)).toBe(10); + expect(resolveUnknownToolGuardThreshold({})).toBe(10); }); - it("uses the default threshold when loop detection is enabled without an override", () => { - expect(resolveUnknownToolGuardThreshold({ enabled: true })).toBe(10); + it("stays on even when tools.loopDetection.enabled is false (safety net)", () => { + // The unknown-tool guard has no false-positive surface — the tool is + // objectively not registered — so it is always on regardless of the + // opt-in genericRepeat/pingPong/pollNoProgress detectors. + expect(resolveUnknownToolGuardThreshold({ enabled: false })).toBe(10); + expect(resolveUnknownToolGuardThreshold({ enabled: false, unknownToolThreshold: 3 })).toBe(3); }); it("uses the configured threshold override when provided", () => { expect(resolveUnknownToolGuardThreshold({ enabled: true, unknownToolThreshold: 4 })).toBe(4); }); + + it("falls back to the default threshold when the override is non-positive", () => { + expect(resolveUnknownToolGuardThreshold({ unknownToolThreshold: 0 })).toBe(10); + expect(resolveUnknownToolGuardThreshold({ unknownToolThreshold: -5 })).toBe(10); + expect( + resolveUnknownToolGuardThreshold({ unknownToolThreshold: Number.NaN }), + ).toBe(10); + }); + + it("floors fractional overrides", () => { + expect(resolveUnknownToolGuardThreshold({ unknownToolThreshold: 3.7 })).toBe(3); + }); }); describe("wrapStreamFnTrimToolCallNames", () => { diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 6c4144f5a07..c48377ceafb 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -294,11 +294,21 @@ const MAX_BTW_SNAPSHOT_MESSAGES = 100; export function resolveUnknownToolGuardThreshold(loopDetection?: { enabled?: boolean; unknownToolThreshold?: number; -}): number | undefined { - if (loopDetection?.enabled !== true) { - return undefined; +}): number { + // The unknown-tool guard is a safety net against the model hallucinating a + // tool name or calling a tool that has since been removed from the allowlist + // (for example after a `skills.allowBundled` config change). After `threshold` + // consecutive unknown-tool attempts the stream wrapper rewrites the assistant + // message content to tell the model to stop, which breaks otherwise-infinite + // Tool-not-found loops against the provider. Unlike the genericRepeat / + // pingPong / pollNoProgress detectors this guard has no false-positive + // surface because the tool is objectively not registered in this run, so it + // stays on regardless of `tools.loopDetection.enabled`. + const raw = loopDetection?.unknownToolThreshold; + if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) { + return Math.floor(raw); } - return loopDetection.unknownToolThreshold ?? UNKNOWN_TOOL_THRESHOLD; + return UNKNOWN_TOOL_THRESHOLD; } function summarizeMessagePayload(msg: AgentMessage): { textChars: number; imageBlocks: number } {