diff --git a/CHANGELOG.md b/CHANGELOG.md index d54dc05333b..59e6f12bf85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai - Cron/agents: honor configured subagent model fallbacks for isolated scheduled runs and forward that fallback policy into embedded agent timeout failover. Fixes #74985. Thanks @chrisgwynne. - Codex app-server/MCP: scope user MCP servers to specific OpenClaw agent ids through an optional `mcp.servers..codex.agents` list and accept `codex.defaultToolsApprovalMode` (`auto`/`prompt`/`approve`) for native Codex approval defaults; OpenClaw strips the `codex` block before handing `mcp_servers` config to Codex. (#82180) Thanks @sercada. - Agents/OpenAI Responses: clamp `input_tokens - cached_tokens` at zero and reconstruct `totalTokens` from input + output + cached components so Responses-API streams report consistent usage when providers under-report `input_tokens` relative to `cached_tokens`. +- Agents: mark adapter-caught tool execution failures as error tool results in embedded Pi sessions, so models can retry recoverable edit failures instead of seeing a successful tool result. Fixes #81546. (#81564) Thanks @najef1979-code and @MonkeyLeeT. - Plugins: reject malformed `package.json` `openclaw.extensions` metadata during install, discovery, and post-update payload smoke instead of silently dropping invalid entries. - Plugins: reject package metadata records whose `package.json` resolves outside the plugin root instead of trusting persisted or reconstructed registry snapshots. - Plugins: ignore malformed persisted package channel/install metadata instead of crashing catalog reconstruction or leaking invalid install hints. @@ -333,7 +334,6 @@ Docs: https://docs.openclaw.ai - Config: return the canonical persisted config from `config.set`, `config.apply`, and `config.patch` responses after write-time shaping. Fixes #77455. - Codex auth: accept OAuth profiles backed by `oauthRef` during runtime auth selection, so official Codex OAuth logins are used by app-server agent runs. (#81633) Thanks @obviyus. - Telegram: release stopped polling leases after the gateway stop grace so in-process restarts can reuse the same bot token without weakening active duplicate-poller protection. Fixes #81507. (#81890) Thanks @joshavant. -- Agents: mark adapter-caught tool execution failures as error tool results in embedded Pi sessions, so models can retry recoverable edit failures instead of seeing a successful tool result. Fixes #81546. Thanks @najef1979-code. - ACP: preserve redacted numeric JSON-RPC `RequestError` details in runtime failure text, so backend diagnostics are visible instead of only `Internal error`. Fixes #81126. (#81188) Thanks @vyctorbrzezowski. - Agents: cache unchanged PI model discovery stores and model lookups, reducing repeated model-resolution startup latency under large model configs. Fixes #78851. - Onboarding: carry returned Codex plugin migration config through the OpenAI model wizard so accepted plugin migrations are saved with the final config write. diff --git a/src/agents/pi-embedded-runner.extensions.test.ts b/src/agents/pi-embedded-runner.extensions.test.ts index 5c3f8f3ffd4..a88bb9cf69a 100644 --- a/src/agents/pi-embedded-runner.extensions.test.ts +++ b/src/agents/pi-embedded-runner.extensions.test.ts @@ -162,4 +162,93 @@ describe("buildEmbeddedExtensionFactories", () => { isError: true, }); }); + + it("marks status-timeout tool results as model-visible failures", async () => { + setActivePluginRegistry(createEmptyPluginRegistry()); + + const factories = buildEmbeddedExtensionFactories({ + cfg: undefined, + sessionManager: SessionManager.inMemory(), + provider: "openai", + modelId: "gpt-5.4", + model: undefined, + }); + + const handlers = new Map(); + await factories[0]?.({ + on(event: string, handler: Function) { + handlers.set(event, handler); + }, + } as never); + const handler = handlers.get("tool_result"); + + const result = await handler?.( + { + toolName: "exec", + toolCallId: "call-exec", + content: [{ type: "text", text: "Timed out" }], + details: { status: "timeout", tool: "exec", error: "Timed out" }, + isError: false, + }, + { cwd: "/tmp" }, + ); + + expect(result).toEqual({ + content: [{ type: "text", text: "Timed out" }], + details: { status: "timeout", tool: "exec", error: "Timed out" }, + isError: true, + }); + }); + + it("does not mark results as errors when status is absent or non-error", async () => { + setActivePluginRegistry(createEmptyPluginRegistry()); + + const factories = buildEmbeddedExtensionFactories({ + cfg: undefined, + sessionManager: SessionManager.inMemory(), + provider: "openai", + modelId: "gpt-5.4", + model: undefined, + }); + + const handlers = new Map(); + await factories[0]?.({ + on(event: string, handler: Function) { + handlers.set(event, handler); + }, + } as never); + const handler = handlers.get("tool_result"); + + // Empty details — no status field + const noStatusResult = await handler?.( + { + toolName: "read", + toolCallId: "call-read", + content: [{ type: "text", text: "file contents" }], + details: {}, + isError: false, + }, + { cwd: "/tmp" }, + ); + expect(noStatusResult).toEqual({ + content: [{ type: "text", text: "file contents" }], + details: {}, + }); + + // Explicit ok status + const okResult = await handler?.( + { + toolName: "read", + toolCallId: "call-read-2", + content: [{ type: "text", text: "ok" }], + details: { status: "ok" }, + isError: false, + }, + { cwd: "/tmp" }, + ); + expect(okResult).toEqual({ + content: [{ type: "text", text: "ok" }], + details: { status: "ok" }, + }); + }); }); diff --git a/src/agents/pi-embedded-runner/extensions.ts b/src/agents/pi-embedded-runner/extensions.ts index ab2be0952d0..9522112d3e2 100644 --- a/src/agents/pi-embedded-runner/extensions.ts +++ b/src/agents/pi-embedded-runner/extensions.ts @@ -34,6 +34,10 @@ function recordFromUnknown(value: unknown): Record { : {}; } +// Only checks "error" and "timeout" — the status values emitted by the +// adapter's buildToolExecutionErrorResult. The subscribe-side classifier +// (isErrorLikeStatus) uses a broader regex because it handles arbitrary +// external tool results; this bridge only elevates adapter-produced statuses. function hasErrorToolResultStatus(result: AgentToolResult): boolean { const details = recordFromUnknown(result.details); const status = normalizeOptionalLowercaseString(details.status);