diff --git a/CHANGELOG.md b/CHANGELOG.md index 806f21b6dff..84b1c55a5c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ Docs: https://docs.openclaw.ai - Ollama: normalize provider-prefixed tool-call names at the native stream boundary so Kimi/Ollama calls such as `functions.exec` dispatch as `exec` instead of missing configured tools. Fixes #74487. Thanks @afurm and @carreipeia. - Security/audit: resolve configured model aliases before model-tier and small-parameter checks, so alias-based GPT-5/Codex configs no longer report false weak-model warnings. Fixes #74455. Thanks @blaspat. +- CLI/agent: isolate Gateway-timeout embedded fallback runs under explicit `gateway-fallback-*` sessions so accepted Gateway runs cannot race transcript locks or replace the routed conversation session. Fixes #62981. Thanks @HemantSudarshan. - Models/UI: hide unauthenticated providers from the default Web chat, `/models`, and model setup pickers while keeping explicit full-catalog browse paths through `view: "all"`, `/models all`, and `models list --all`. Fixes #74423. Thanks @guarismo and @SymbolStar. - Slack/prompts: rely on Slack `interactiveReplies` guidance instead of generic `inlineButtons` config hints so enabled Slack button directives are not contradicted. Fixes #46647. Thanks @jeremykoerber. - Slack/reactions: treat duplicate `already_reacted` responses as idempotent success so repeated agent reaction adds no longer surface as tool failures. Fixes #69005. Thanks @shipitsteven and @martingarramon. diff --git a/docs/cli/agent.md b/docs/cli/agent.md index a25d06e0eb1..67e5284c68d 100644 --- a/docs/cli/agent.md +++ b/docs/cli/agent.md @@ -59,6 +59,7 @@ openclaw agent --agent ops --message "Run locally" --local - `--channel`, `--reply-channel`, and `--reply-account` affect reply delivery, not session routing. - `--json` keeps stdout reserved for the JSON response. Gateway, plugin, and embedded-fallback diagnostics are routed to stderr so scripts can parse stdout directly. - Embedded fallback JSON includes `meta.transport: "embedded"` and `meta.fallbackFrom: "gateway"` so scripts can distinguish fallback runs from Gateway runs. +- If the Gateway accepts an agent run but the CLI times out waiting for the final reply, embedded fallback uses a fresh explicit `gateway-fallback-*` session/run id and reports `meta.fallbackReason: "gateway_timeout"` plus the fallback session fields. This avoids racing the Gateway-owned transcript lock or silently replacing the original routed conversation session. - When this command triggers `models.json` regeneration, SecretRef-managed provider credentials are persisted as non-secret markers (for example env var names, `secretref-env:ENV_VAR_NAME`, or `secretref-managed`), not resolved secret plaintext. - Marker writes are source-authoritative: OpenClaw persists markers from the active source config snapshot, not from resolved runtime secret values. diff --git a/src/agents/command/session.ts b/src/agents/command/session.ts index 93b4978b4b4..a2cc1d2dba5 100644 --- a/src/agents/command/session.ts +++ b/src/agents/command/session.ts @@ -54,7 +54,10 @@ type SessionIdMatchSet = { storeByKey: Map; }; -function buildExplicitSessionIdSessionKey(params: { sessionId: string; agentId?: string }): string { +export function buildExplicitSessionIdSessionKey(params: { + sessionId: string; + agentId?: string; +}): string { return `agent:${normalizeAgentId(params.agentId)}:explicit:${params.sessionId.trim()}`; } diff --git a/src/agents/command/types.ts b/src/agents/command/types.ts index 85d2e2b432a..2430951f7c3 100644 --- a/src/agents/command/types.ts +++ b/src/agents/command/types.ts @@ -17,6 +17,9 @@ export type { AgentStreamParams } from "./shared-types.js"; export type AgentCommandResultMetaOverrides = { transport?: "embedded"; fallbackFrom?: "gateway"; + fallbackReason?: "gateway_timeout"; + fallbackSessionId?: string; + fallbackSessionKey?: string; }; export type AcpTurnSource = "manual_spawn"; diff --git a/src/commands/agent-via-gateway.test.ts b/src/commands/agent-via-gateway.test.ts index 06f97c017ca..cdbaccecaba 100644 --- a/src/commands/agent-via-gateway.test.ts +++ b/src/commands/agent-via-gateway.test.ts @@ -10,6 +10,15 @@ import type { agentCommand as AgentCommand } from "./agent.js"; const loadConfig = vi.hoisted(() => vi.fn()); const callGateway = vi.hoisted(() => vi.fn()); +const isGatewayTransportError = vi.hoisted(() => + vi.fn((value: unknown) => { + if (!(value instanceof Error) || value.name !== "GatewayTransportError") { + return false; + } + const kind = (value as { kind?: unknown }).kind; + return kind === "closed" || kind === "timeout"; + }), +); const agentCommand = vi.hoisted(() => vi.fn()); const runtime: RuntimeEnv = { @@ -78,9 +87,24 @@ function mockLocalAgentReply(text = "local") { }); } +function createGatewayTimeoutError() { + const err = new Error("gateway timeout after 90000ms"); + err.name = "GatewayTransportError"; + return Object.assign(err, { + kind: "timeout", + timeoutMs: 90_000, + connectionDetails: { + url: "ws://127.0.0.1:18789", + urlSource: "local loopback", + message: "Gateway target: ws://127.0.0.1:18789", + }, + }); +} + vi.mock("../config/config.js", () => ({ getRuntimeConfig: loadConfig, loadConfig })); vi.mock("../gateway/call.js", () => ({ callGateway, + isGatewayTransportError, randomIdempotencyKey: () => "idem-1", })); vi.mock("./agent.js", () => ({ agentCommand })); @@ -182,6 +206,73 @@ describe("agentCliCommand", () => { }); }); + it("uses a fresh embedded session when gateway agent times out", async () => { + await withTempStore(async () => { + callGateway.mockRejectedValue(createGatewayTimeoutError()); + mockLocalAgentReply(); + + await agentCliCommand( + { + message: "hi", + sessionId: "locked-session", + runId: "locked-run", + }, + runtime, + ); + + expect(callGateway).toHaveBeenCalledTimes(1); + expect(agentCommand).toHaveBeenCalledTimes(1); + const fallbackOpts = agentCommand.mock.calls[0]?.[0] as { + sessionId?: string; + sessionKey?: string; + runId?: string; + resultMetaOverrides?: unknown; + }; + expect(fallbackOpts.sessionId).toMatch(/^gateway-fallback-/); + expect(fallbackOpts.sessionId).not.toBe("locked-session"); + expect(fallbackOpts.sessionKey).toBe(`agent:main:explicit:${fallbackOpts.sessionId}`); + expect(fallbackOpts.runId).toBe(fallbackOpts.sessionId); + expect(fallbackOpts.resultMetaOverrides).toMatchObject({ + transport: "embedded", + fallbackFrom: "gateway", + fallbackReason: "gateway_timeout", + fallbackSessionId: fallbackOpts.sessionId, + fallbackSessionKey: fallbackOpts.sessionKey, + }); + expect(runtime.error).toHaveBeenCalledWith( + expect.stringContaining( + "Gateway agent timed out; running embedded agent with fresh session", + ), + ); + expect(runtime.log).toHaveBeenCalledWith("local"); + }); + }); + + it("keeps timeout fallback from replacing the routed conversation session key", async () => { + await withTempStore(async () => { + callGateway.mockRejectedValue(createGatewayTimeoutError()); + mockLocalAgentReply(); + + await agentCliCommand( + { + message: "hi", + to: "+1555", + }, + runtime, + ); + + const fallbackOpts = agentCommand.mock.calls[0]?.[0] as { + sessionId?: string; + sessionKey?: string; + to?: string; + }; + expect(fallbackOpts.to).toBe("+1555"); + expect(fallbackOpts.sessionId).toMatch(/^gateway-fallback-/); + expect(fallbackOpts.sessionKey).toBe(`agent:main:explicit:${fallbackOpts.sessionId}`); + expect(fallbackOpts.sessionKey).not.toBe("agent:main:+1555"); + }); + }); + it("passes fallback metadata into JSON embedded fallback output", async () => { await withTempStore(async () => { callGateway.mockRejectedValue(new Error("gateway not connected")); diff --git a/src/commands/agent-via-gateway.ts b/src/commands/agent-via-gateway.ts index e01f8cf8042..a4925ab61d9 100644 --- a/src/commands/agent-via-gateway.ts +++ b/src/commands/agent-via-gateway.ts @@ -1,3 +1,4 @@ +import { randomUUID } from "node:crypto"; import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload"; import { listAgentIds } from "../agents/agent-scope.js"; import { formatCliCommand } from "../cli/command-format.js"; @@ -5,7 +6,7 @@ import type { CliDeps } from "../cli/deps.types.js"; import { withProgress } from "../cli/progress.js"; import { getRuntimeConfig } from "../config/config.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; -import { callGateway, randomIdempotencyKey } from "../gateway/call.js"; +import { callGateway, isGatewayTransportError, randomIdempotencyKey } from "../gateway/call.js"; import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../gateway/protocol/client-info.js"; import { routeLogsToStderr } from "../logging/console.js"; import { normalizeAgentId } from "../routing/session-key.js"; @@ -13,7 +14,7 @@ import { type RuntimeEnv, writeRuntimeJson } from "../runtime.js"; import { normalizeOptionalString } from "../shared/string-coerce.js"; import { normalizeMessageChannel } from "../utils/message-channel.js"; import { agentCommand } from "./agent.js"; -import { resolveSessionKeyForRequest } from "./agent/session.js"; +import { buildExplicitSessionIdSessionKey, resolveSessionKeyForRequest } from "./agent/session.js"; type AgentGatewayResult = { payloads?: Array<{ @@ -36,6 +37,7 @@ const EMBEDDED_FALLBACK_META = { transport: "embedded", fallbackFrom: "gateway", } as const; +const GATEWAY_TIMEOUT_FALLBACK_SESSION_PREFIX = "gateway-fallback-"; export type AgentCliOpts = { message: string; @@ -96,6 +98,28 @@ function formatPayloadForLog(payload: { return lines.join("\n").trimEnd(); } +function isGatewayAgentTimeoutError(err: unknown): boolean { + if (isGatewayTransportError(err)) { + return err.kind === "timeout"; + } + return err instanceof Error && err.message.includes("gateway request timeout for agent"); +} + +function createGatewayTimeoutFallbackSessionId(): string { + return `${GATEWAY_TIMEOUT_FALLBACK_SESSION_PREFIX}${randomUUID()}`; +} + +function createGatewayTimeoutFallbackSession(agentId?: string): { + sessionId: string; + sessionKey: string; +} { + const sessionId = createGatewayTimeoutFallbackSessionId(); + return { + sessionId, + sessionKey: buildExplicitSessionIdSessionKey({ sessionId, agentId }), + }; +} + export async function agentViaGatewayCommand(opts: AgentCliOpts, runtime: RuntimeEnv) { protectJsonStdout(opts); const body = (opts.message ?? "").trim(); @@ -207,6 +231,29 @@ export async function agentCliCommand(opts: AgentCliOpts, runtime: RuntimeEnv, d try { return await agentViaGatewayCommand(opts, runtime); } catch (err) { + if (isGatewayAgentTimeoutError(err)) { + const fallbackSession = createGatewayTimeoutFallbackSession(opts.agent); + runtime.error?.( + `EMBEDDED FALLBACK: Gateway agent timed out; running embedded agent with fresh session ${fallbackSession.sessionId}: ${String(err)}`, + ); + return await agentCommand( + { + ...localOpts, + sessionId: fallbackSession.sessionId, + sessionKey: fallbackSession.sessionKey, + runId: fallbackSession.sessionId, + resultMetaOverrides: { + ...EMBEDDED_FALLBACK_META, + fallbackReason: "gateway_timeout", + fallbackSessionId: fallbackSession.sessionId, + fallbackSessionKey: fallbackSession.sessionKey, + }, + }, + runtime, + deps, + ); + } + runtime.error?.( `EMBEDDED FALLBACK: Gateway agent failed; running embedded agent: ${String(err)}`, );