From 4252f07ff0f1b8a80eaadf77e96afa5aa65d2f90 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 28 May 2026 13:17:57 +0100 Subject: [PATCH] fix: reduce gateway warning noise Reduce repeated gateway warning noise in startup/auth retry paths while preserving credential mismatch and rate-limit audit visibility. Also hardens empty embedded-assistant retry handling by carrying lifecycle state through the missing-assistant guard, and keeps the relevant regression coverage in gateway and agent tests. --- .../telegram/src/bot-native-commands.test.ts | 41 +++++ .../telegram/src/bot-native-commands.ts | 157 +++++++++++------- .../run.incomplete-turn.test.ts | 85 ++++++++++ src/agents/embedded-agent-runner/run.ts | 31 +++- .../embedded-agent-runner/run/attempt.ts | 1 + .../run/incomplete-turn.ts | 53 ++++++ src/gateway/call.test.ts | 105 ++++++++++++ src/gateway/call.ts | 80 ++++++++- src/gateway/server/ws-connection.ts | 47 +++++- .../handshake-auth-log-limiter.test.ts | 85 ++++++++++ .../handshake-auth-log-limiter.ts | 80 +++++++++ .../server/ws-connection/message-handler.ts | 32 +++- src/logging/logger-settings.test.ts | 6 + src/logging/logger.ts | 14 +- 14 files changed, 740 insertions(+), 77 deletions(-) create mode 100644 src/gateway/server/ws-connection/handshake-auth-log-limiter.test.ts create mode 100644 src/gateway/server/ws-connection/handshake-auth-log-limiter.ts diff --git a/extensions/telegram/src/bot-native-commands.test.ts b/extensions/telegram/src/bot-native-commands.test.ts index e2f77b00bdc..cd75a27a9ed 100644 --- a/extensions/telegram/src/bot-native-commands.test.ts +++ b/extensions/telegram/src/bot-native-commands.test.ts @@ -225,6 +225,47 @@ describe("registerTelegramNativeCommands", () => { }); }); + it("drops per-skill commands before truncating an over-limit Telegram menu", async () => { + const { bot, commandHandlers, setMyCommands } = createCommandBot(); + const runtimeLog = vi.fn(); + listSkillCommandsForAgents.mockReturnValue( + Array.from({ length: 120 }, (_, index) => ({ + name: `demo_skill_${index}`, + skillName: `demo-skill-${index}`, + description: `Demo skill ${index}`, + })), + ); + pluginCommandMocks.getPluginCommandSpecs.mockReturnValue([ + { + name: "demo_skill_0", + description: "Conflicting plugin command", + }, + ] as never); + + registerTelegramNativeCommands( + createNativeCommandTestParams( + { + commands: { native: true, nativeSkills: true }, + agents: { list: [{ id: "main", default: true }] }, + }, + { + bot, + runtime: { log: runtimeLog } as unknown as RuntimeEnv, + }, + ), + ); + + const registeredCommands = await waitForRegisteredCommands(setMyCommands); + expect(registeredCommands.length).toBeLessThanOrEqual(100); + expect(registeredCommands.some((entry) => entry.command.startsWith("demo_skill_"))).toBe(false); + expect(commandHandlers.has("demo_skill_0")).toBe(true); + expect(runtimeLog).toHaveBeenCalledWith( + expect.stringContaining( + "commands exceeds limit; removing per-skill commands and keeping /skill.", + ), + ); + }); + it("truncates Telegram command registration to 100 commands", async () => { const customCommands = Array.from({ length: 120 }, (_, index) => ({ command: `cmd_${index}`, diff --git a/extensions/telegram/src/bot-native-commands.ts b/extensions/telegram/src/bot-native-commands.ts index dc737d66560..33b467716c9 100644 --- a/extensions/telegram/src/bot-native-commands.ts +++ b/extensions/telegram/src/bot-native-commands.ts @@ -741,50 +741,109 @@ export const registerTelegramNativeCommands = ({ "nativeSkillsEnabled is true but no agent route is bound for this Telegram account; skill commands will not appear in the native menu.", ); } - const skillCommands = + let skillCommands = nativeEnabled && nativeSkillsEnabled && boundRoute ? telegramDeps.listSkillCommandsForAgents({ cfg, agentIds: [boundRoute.agentId], }) : []; - const nativeCommands = nativeEnabled - ? listNativeCommandSpecsForConfig(cfg, { - skillCommands, - provider: "telegram", - }) - : []; - const reservedCommands = new Set( - listNativeCommandSpecs().map((command) => normalizeTelegramCommandName(command.name)), - ); - for (const command of skillCommands) { - reservedCommands.add(normalizeLowercaseStringOrEmpty(command.name)); - } - const customResolution = resolveTelegramCustomCommands({ - commands: telegramCfg.customCommands, - reservedCommands, - }); - for (const issue of customResolution.issues) { - runtime.error?.(danger(issue.message)); - } - const customCommands = customResolution.commands; const pluginCommandSpecs = ( telegramDeps.getPluginCommandSpecs ?? defaultTelegramNativeCommandDeps.getPluginCommandSpecs )?.("telegram", { config: cfg }) ?? []; - const existingCommands = new Set( - [ - ...nativeCommands.map((command) => normalizeTelegramCommandName(command.name)), - ...customCommands.map((command) => command.command), - ].map((command) => normalizeLowercaseStringOrEmpty(command)), - ); - const pluginCatalog = buildPluginTelegramMenuCommands({ - specs: pluginCommandSpecs, - existingCommands, - }); - for (const issue of pluginCatalog.issues) { - runtime.error?.(danger(issue)); + const resolveTelegramMenuCommandCatalog = ( + activeSkillCommands: typeof skillCommands, + reservedSkillCommands = activeSkillCommands, + ) => { + const nativeCommands = nativeEnabled + ? listNativeCommandSpecsForConfig(cfg, { + skillCommands: activeSkillCommands, + provider: "telegram", + }) + : []; + const reservedCommands = new Set( + listNativeCommandSpecs().map((command) => normalizeTelegramCommandName(command.name)), + ); + for (const command of reservedSkillCommands) { + reservedCommands.add(normalizeLowercaseStringOrEmpty(command.name)); + } + const customResolution = resolveTelegramCustomCommands({ + commands: telegramCfg.customCommands, + reservedCommands, + }); + for (const issue of customResolution.issues) { + runtime.error?.(danger(issue.message)); + } + const customCommands = customResolution.commands; + const existingCommands = new Set( + [ + ...nativeCommands.map((command) => normalizeTelegramCommandName(command.name)), + ...customCommands.map((command) => command.command), + ].map((command) => normalizeLowercaseStringOrEmpty(command)), + ); + for (const command of reservedSkillCommands) { + existingCommands.add(normalizeTelegramCommandName(command.name)); + } + const pluginCatalog = buildPluginTelegramMenuCommands({ + specs: pluginCommandSpecs, + existingCommands, + }); + for (const issue of pluginCatalog.issues) { + runtime.error?.(danger(issue)); + } + const allCommandsFull: TelegramMenuCommand[] = [ + ...nativeCommands + .map((command): TelegramMenuCommand | null => { + const normalized = normalizeTelegramCommandName(command.name); + if (!TELEGRAM_COMMAND_NAME_PATTERN.test(normalized)) { + runtime.error?.( + danger( + `Native command "${command.name}" is invalid for Telegram (resolved to "${normalized}"). Skipping.`, + ), + ); + return null; + } + const menuCommand: TelegramMenuCommand = { + command: normalized, + description: command.description, + }; + if (command.isAlias) { + menuCommand.isAlias = true; + } + if (command.descriptionLocalizations) { + menuCommand.descriptionLocalizations = command.descriptionLocalizations; + } + return menuCommand; + }) + .filter((cmd) => cmd !== null), + ...(nativeEnabled ? pluginCatalog.commands : []), + ...customCommands, + ]; + return { + nativeCommands, + customCommands, + pluginCatalog, + ...buildCappedTelegramMenuCommands({ + allCommands: allCommandsFull, + }), + }; + }; + const fullCommandCatalog = resolveTelegramMenuCommandCatalog(skillCommands); + let menuCommandCatalog = fullCommandCatalog; + if ( + nativeEnabled && + nativeSkillsEnabled && + skillCommands.length > 0 && + fullCommandCatalog.overflowCount > 0 + ) { + const initialCommandCount = fullCommandCatalog.totalCommands; + menuCommandCatalog = resolveTelegramMenuCommandCatalog([], skillCommands); + runtime.log?.( + `Telegram: ${initialCommandCount} commands exceeds limit; removing per-skill commands and keeping /skill.`, + ); } + const { nativeCommands, pluginCatalog } = fullCommandCatalog; const loadFreshRuntimeConfig = (): OpenClawConfig => telegramDeps.getRuntimeConfig(); const resolveFreshTelegramConfig = (runtimeCfg: OpenClawConfig): TelegramAccountConfig => { try { @@ -799,34 +858,6 @@ export const registerTelegramNativeCommands = ({ return telegramCfg; } }; - const allCommandsFull: TelegramMenuCommand[] = [ - ...nativeCommands - .map((command): TelegramMenuCommand | null => { - const normalized = normalizeTelegramCommandName(command.name); - if (!TELEGRAM_COMMAND_NAME_PATTERN.test(normalized)) { - runtime.error?.( - danger( - `Native command "${command.name}" is invalid for Telegram (resolved to "${normalized}"). Skipping.`, - ), - ); - return null; - } - const menuCommand: TelegramMenuCommand = { - command: normalized, - description: command.description, - }; - if (command.isAlias) { - menuCommand.isAlias = true; - } - if (command.descriptionLocalizations) { - menuCommand.descriptionLocalizations = command.descriptionLocalizations; - } - return menuCommand; - }) - .filter((cmd) => cmd !== null), - ...(nativeEnabled ? pluginCatalog.commands : []), - ...customCommands, - ]; const { commandsToRegister, totalCommands, @@ -835,9 +866,7 @@ export const registerTelegramNativeCommands = ({ maxTotalChars, descriptionTrimmed, textBudgetDropCount, - } = buildCappedTelegramMenuCommands({ - allCommands: allCommandsFull, - }); + } = menuCommandCatalog; if (overflowCount > 0) { runtime.log?.( `Telegram limits bots to ${maxCommands} commands. ` + diff --git a/src/agents/embedded-agent-runner/run.incomplete-turn.test.ts b/src/agents/embedded-agent-runner/run.incomplete-turn.test.ts index b37c867f68d..d5295df7cdd 100644 --- a/src/agents/embedded-agent-runner/run.incomplete-turn.test.ts +++ b/src/agents/embedded-agent-runner/run.incomplete-turn.test.ts @@ -36,6 +36,7 @@ import { resolveReplayInvalidFlag, resolveRunLivenessState, resolveSilentToolResultReplyPayload, + shouldRetryMissingAssistantTurn, shouldTreatEmptyAssistantReplyAsSilent, } from "./run/incomplete-turn.js"; import type { EmbeddedRunAttemptResult } from "./run/types.js"; @@ -746,6 +747,43 @@ describe("runEmbeddedAgent incomplete-turn safety", () => { expectWarnMessageWith("empty response detected"); }); + it("retries replay-safe missing terminal assistant turns once with the same prompt", async () => { + mockedClassifyFailoverReason.mockReturnValue(null); + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + assistantTexts: [], + lastAssistant: undefined, + currentAttemptAssistant: undefined, + }), + ); + mockedRunEmbeddedAttempt.mockResolvedValueOnce( + makeAttemptResult({ + assistantTexts: ["Recovered answer."], + lastAssistant: { + role: "assistant", + stopReason: "end_turn", + provider: "openai", + model: "gpt-5.5", + content: [{ type: "text", text: "Recovered answer." }], + } as unknown as EmbeddedRunAttemptResult["lastAssistant"], + }), + ); + + const result = await runEmbeddedAgent({ + ...overflowBaseRunParams, + provider: "openai", + model: "gpt-5.5", + runId: "run-missing-assistant-retry", + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(runAttemptCall(1).prompt).toBe(runAttemptCall(0).prompt); + expect(result.meta?.finalAssistantVisibleText).toBe("Recovered answer."); + expectWarnMessageWith("missing assistant terminal message detected"); + expectNoWarnMessageWith("empty response detected"); + expectNoWarnMessageWith("incomplete turn detected"); + }); + it("retries zero-token empty Claude stop turns with a visible-answer continuation instruction", async () => { mockedClassifyFailoverReason.mockReturnValue(null); mockedRunEmbeddedAttempt.mockResolvedValueOnce( @@ -1274,6 +1312,53 @@ describe("runEmbeddedAgent incomplete-turn safety", () => { expect(explicitCancellationText).toBeNull(); }); + it("allows a same-prompt retry only for replay-safe missing assistant turns", () => { + const replaySafeAttempt = makeAttemptResult({ + assistantTexts: [], + lastAssistant: undefined, + currentAttemptAssistant: undefined, + }); + + expect( + shouldRetryMissingAssistantTurn({ + payloadCount: 0, + aborted: false, + timedOut: false, + attempt: replaySafeAttempt, + }), + ).toBe(true); + expect( + shouldRetryMissingAssistantTurn({ + payloadCount: 0, + aborted: false, + timedOut: false, + attempt: makeAttemptResult({ + assistantTexts: [], + lastAssistant: undefined, + currentAttemptAssistant: undefined, + toolMetas: [{ toolName: "image_generate", asyncStarted: true }], + }), + }), + ).toBe(false); + expect( + shouldRetryMissingAssistantTurn({ + payloadCount: 0, + aborted: false, + timedOut: false, + attempt: makeAttemptResult({ + assistantTexts: [], + lastAssistant: undefined, + currentAttemptAssistant: undefined, + itemLifecycle: { + startedCount: 1, + completedCount: 0, + activeCount: 1, + }, + }), + }), + ).toBe(false); + }); + it("detects tool-use terminal turn with pre-tool text as incomplete (#76477)", () => { // When the last assistant message ended with stopReason=toolUse, pre-tool // text alone must not suppress the incomplete-turn guard. The model diff --git a/src/agents/embedded-agent-runner/run.ts b/src/agents/embedded-agent-runner/run.ts index 1c7efd3e78c..1fce64e9e24 100644 --- a/src/agents/embedded-agent-runner/run.ts +++ b/src/agents/embedded-agent-runner/run.ts @@ -173,6 +173,7 @@ import { STRICT_AGENTIC_BLOCKED_TEXT, resolveReplayInvalidFlag, resolveRunLivenessState, + shouldRetryMissingAssistantTurn, shouldTreatEmptyAssistantReplyAsSilent, } from "./run/incomplete-turn.js"; import type { RunEmbeddedAgentParams } from "./run/params.js"; @@ -1157,6 +1158,8 @@ export async function runEmbeddedAgent( // visible-answer retry instruction instead. const MAX_EMPTY_ERROR_RETRIES = 3; let emptyErrorRetries = 0; + const MAX_MISSING_ASSISTANT_RETRIES = 1; + let missingAssistantRetryAttempts = 0; const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config); const overloadProfileRotationLimit = resolveOverloadProfileRotationLimit(params.config); const rateLimitProfileRotationLimit = resolveRateLimitProfileRotationLimit(params.config); @@ -3061,6 +3064,24 @@ export async function runEmbeddedAgent( !nextPlanningOnlyRetryInstruction && nextReasoningOnlyRetryInstruction && reasoningOnlyRetryAttempts >= maxReasoningOnlyRetryAttempts; + if ( + !emptyAssistantReplyIsSilent && + shouldRetryMissingAssistantTurn({ + payloadCount, + aborted, + promptError, + timedOut, + attempt, + }) && + missingAssistantRetryAttempts < MAX_MISSING_ASSISTANT_RETRIES + ) { + missingAssistantRetryAttempts += 1; + log.warn( + `missing assistant terminal message detected: runId=${params.runId} sessionId=${params.sessionId} ` + + `provider=${activeErrorContext.provider}/${activeErrorContext.model} — retrying ${missingAssistantRetryAttempts}/${MAX_MISSING_ASSISTANT_RETRIES} with same prompt`, + ); + continue; + } if ( !nextPlanningOnlyRetryInstruction && !nextReasoningOnlyRetryInstruction && @@ -3288,9 +3309,17 @@ export async function runEmbeddedAgent( livenessState, }); const incompleteStopReason = attempt.lastAssistant?.stopReason; + const replayMetadata = resolveAttemptReplayMetadata(attempt); log.warn( `incomplete turn detected: runId=${params.runId} sessionId=${params.sessionId} ` + - `stopReason=${incompleteStopReason} payloads=${payloadCount} — surfacing error to user`, + `provider=${activeErrorContext.provider}/${activeErrorContext.model} ` + + `stopReason=${incompleteStopReason ?? "missing"} hasLastAssistant=${attempt.lastAssistant ? "yes" : "no"} ` + + `hasCurrentAttemptAssistant=${attempt.currentAttemptAssistant ? "yes" : "no"} payloads=${payloadCount} ` + + `tools=${attempt.toolMetas?.length ?? 0} replaySafe=${replayMetadata.replaySafe ? "yes" : "no"} ` + + `compactions=${attemptCompactionCount} planningRetries=${planningOnlyRetryAttempts}/${maxPlanningOnlyRetryAttempts} ` + + `reasoningRetries=${reasoningOnlyRetryAttempts}/${maxReasoningOnlyRetryAttempts} ` + + `emptyRetries=${emptyResponseRetryAttempts}/${maxEmptyResponseRetryAttempts} ` + + `missingAssistantRetries=${missingAssistantRetryAttempts}/${MAX_MISSING_ASSISTANT_RETRIES} — surfacing error to user`, ); // Mark the failing profile for cooldown so multi-profile setups diff --git a/src/agents/embedded-agent-runner/run/attempt.ts b/src/agents/embedded-agent-runner/run/attempt.ts index b172d2eb684..3c430e8f336 100644 --- a/src/agents/embedded-agent-runner/run/attempt.ts +++ b/src/agents/embedded-agent-runner/run/attempt.ts @@ -4559,6 +4559,7 @@ export async function runEmbeddedAttempt( acceptedSessionSpawns, lastToolError, lastAssistant, + itemLifecycle: getItemLifecycle(), toolMetas: toolMetasNormalized, replayMetadata, promptErrorSource, diff --git a/src/agents/embedded-agent-runner/run/incomplete-turn.ts b/src/agents/embedded-agent-runner/run/incomplete-turn.ts index bc0963d634d..5ff69227c76 100644 --- a/src/agents/embedded-agent-runner/run/incomplete-turn.ts +++ b/src/agents/embedded-agent-runner/run/incomplete-turn.ts @@ -46,6 +46,7 @@ type IncompleteTurnAttempt = Pick< | "messagingToolSentTargets" | "lastToolError" | "lastAssistant" + | "itemLifecycle" | "replayMetadata" | "promptErrorSource" | "timedOutDuringCompaction" @@ -306,6 +307,58 @@ export function resolveIncompleteTurnPayloadText(params: { : "⚠️ Agent couldn't generate a response. Please try again."; } +export function shouldRetryMissingAssistantTurn(params: { + payloadCount: number; + aborted: boolean; + promptError?: unknown; + timedOut: boolean; + attempt: IncompleteTurnAttempt; +}): boolean { + if ( + params.payloadCount !== 0 || + params.aborted || + Boolean(params.promptError) || + params.timedOut || + params.attempt.clientToolCalls || + params.attempt.currentAttemptAssistant || + params.attempt.lastAssistant || + params.attempt.yieldDetected || + params.attempt.didSendDeterministicApprovalPrompt || + params.attempt.lastToolError + ) { + return false; + } + + if (hasOnlySilentAssistantReply(params.attempt.assistantTexts)) { + return false; + } + + if (joinAssistantTexts(params.attempt.assistantTexts).length > 0) { + return false; + } + + if (hasCommittedMessagingToolDeliveryEvidence(params.attempt)) { + return false; + } + + if (hasAcceptedSessionSpawn(params.attempt.acceptedSessionSpawns)) { + return false; + } + + if (hasAsyncStartedToolActivity(params.attempt.toolMetas)) { + return false; + } + + if ( + (params.attempt.itemLifecycle?.startedCount ?? 0) > 0 || + (params.attempt.itemLifecycle?.activeCount ?? 0) > 0 + ) { + return false; + } + + return !resolveAttemptReplayMetadata(params.attempt).hadPotentialSideEffects; +} + function joinAssistantTexts(assistantTexts?: readonly string[]): string { return (assistantTexts ?? []).join("\n\n").trim(); } diff --git a/src/gateway/call.test.ts b/src/gateway/call.test.ts index 8d5c2217718..28e1a5448f6 100644 --- a/src/gateway/call.test.ts +++ b/src/gateway/call.test.ts @@ -6,6 +6,7 @@ import type { OpenClawConfig } from "../config/config.js"; import type { DeviceIdentity } from "../infra/device-identity.js"; import { createEmptyPluginRegistry } from "../plugins/registry-empty.js"; import { setActivePluginRegistry } from "../plugins/runtime.js"; +import type { DeviceAuthEntry } from "../shared/device-auth.js"; import { captureEnv } from "../test-utils/env.js"; import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-channel.js"; import { @@ -23,6 +24,9 @@ const deviceIdentityState = vi.hoisted(() => ({ } satisfies DeviceIdentity, throwOnLoad: false, })); +const loadDeviceAuthTokenMock = vi.hoisted(() => + vi.fn<(...args: unknown[]) => DeviceAuthEntry | null>(() => null), +); const eventLoopReadyState = vi.hoisted(() => ({ calls: [] as Array<{ maxWaitMs?: number } | undefined>, @@ -262,9 +266,17 @@ function resetGatewayCallMocks() { } return deviceIdentityState.value; }, + loadDeviceAuthToken: loadDeviceAuthTokenMock, resolveGatewayPort: resolveGatewayPortForTests, }); deviceIdentityState.throwOnLoad = false; + loadDeviceAuthTokenMock.mockReset(); + loadDeviceAuthTokenMock.mockReturnValue({ + token: "paired-device-token", + role: "operator", + scopes: ["operator.read"], + updatedAtMs: 123, + }); } function setGatewayNetworkDefaults(port = 18789) { @@ -427,6 +439,95 @@ describe("callGateway url resolution", () => { expect(lastClientOptions?.deviceIdentity).toBeNull(); }); + it("fails before opening a websocket when backend token auth has no shared or paired credential", async () => { + getRuntimeConfig.mockReturnValue({ + gateway: { mode: "local", bind: "loopback", auth: { mode: "token" } }, + }); + setGatewayNetworkDefaults(); + loadDeviceAuthTokenMock.mockReturnValue(null); + + await expect(callGateway({ method: "sessions.list" })).rejects.toThrow( + "requires credentials before opening a websocket", + ); + + expect(lastClientOptions).toBeNull(); + expect(startCalls).toBe(0); + expect(loadDeviceAuthTokenMock).toHaveBeenCalledWith({ + deviceId: "test-device-identity", + role: "operator", + env: process.env, + }); + }); + + it("fails before opening a websocket when default token auth has no shared or paired credential", async () => { + getRuntimeConfig.mockReturnValue({ + gateway: { mode: "local", bind: "loopback" }, + }); + setGatewayNetworkDefaults(); + loadDeviceAuthTokenMock.mockReturnValue(null); + + await expect(callGateway({ method: "sessions.list" })).rejects.toThrow( + "requires credentials before opening a websocket", + ); + + expect(lastClientOptions).toBeNull(); + expect(startCalls).toBe(0); + }); + + it("allows paired backend device auth without explicit shared credentials", async () => { + getRuntimeConfig.mockReturnValue({ + gateway: { mode: "local", bind: "loopback", auth: { mode: "token" } }, + }); + setGatewayNetworkDefaults(); + loadDeviceAuthTokenMock.mockReturnValue({ + token: "paired-device-token", + role: "operator", + scopes: ["operator.read"], + updatedAtMs: 123, + }); + + await callGateway({ method: "sessions.list" }); + + expect(lastClientOptions?.url).toBe("ws://127.0.0.1:18789"); + expect(lastClientOptions?.token).toBeUndefined(); + expect(lastClientOptions?.deviceIdentity).toEqual(deviceIdentityState.value); + }); + + it("allows Tailscale-authenticated backend calls without client-side credentials", async () => { + getRuntimeConfig.mockReturnValue({ + gateway: { + mode: "remote", + remote: { url: "wss://openclaw.example.test" }, + auth: { mode: "token", allowTailscale: true }, + }, + }); + setGatewayNetworkDefaults(); + + await callGateway({ method: "sessions.list" }); + + expect(lastClientOptions?.url).toBe("wss://openclaw.example.test"); + expect(lastClientOptions?.token).toBeUndefined(); + expect(lastClientOptions?.password).toBeUndefined(); + }); + + it("allows Tailscale Serve backend calls without explicit allowTailscale", async () => { + getRuntimeConfig.mockReturnValue({ + gateway: { + mode: "remote", + remote: { url: "wss://openclaw.example.test" }, + auth: { mode: "token" }, + tailscale: { mode: "serve" }, + }, + }); + setGatewayNetworkDefaults(); + + await callGateway({ method: "sessions.list" }); + + expect(lastClientOptions?.url).toBe("wss://openclaw.example.test"); + expect(lastClientOptions?.token).toBeUndefined(); + expect(lastClientOptions?.password).toBeUndefined(); + }); + it("keeps device identity enabled for explicit CLI loopback shared-token auth", async () => { setLocalLoopbackGatewayConfig(); @@ -1299,6 +1400,7 @@ describe("callGateway error details", () => { }) as never, getRuntimeConfig: getRuntimeConfig as unknown as () => OpenClawConfig, loadOrCreateDeviceIdentity: () => deviceIdentityState.value, + loadDeviceAuthToken: loadDeviceAuthTokenMock, resolveGatewayPort: resolveGatewayPort as unknown as ( cfg?: OpenClawConfig, env?: NodeJS.ProcessEnv, @@ -1363,6 +1465,7 @@ describe("callGateway error details", () => { }) as never, getRuntimeConfig: getRuntimeConfig as unknown as () => OpenClawConfig, loadOrCreateDeviceIdentity: () => deviceIdentityState.value, + loadDeviceAuthToken: loadDeviceAuthTokenMock, resolveGatewayPort: resolveGatewayPort as unknown as ( cfg?: OpenClawConfig, env?: NodeJS.ProcessEnv, @@ -1448,6 +1551,7 @@ describe("callGateway error details", () => { }) as never, getRuntimeConfig: getRuntimeConfig as unknown as () => OpenClawConfig, loadOrCreateDeviceIdentity: () => deviceIdentityState.value, + loadDeviceAuthToken: loadDeviceAuthTokenMock, resolveGatewayPort: resolveGatewayPort as unknown as ( cfg?: OpenClawConfig, env?: NodeJS.ProcessEnv, @@ -1509,6 +1613,7 @@ describe("callGateway error details", () => { }) as never, getRuntimeConfig: getRuntimeConfig as unknown as () => OpenClawConfig, loadOrCreateDeviceIdentity: () => deviceIdentityState.value, + loadDeviceAuthToken: loadDeviceAuthTokenMock, resolveGatewayPort: resolveGatewayPort as unknown as ( cfg?: OpenClawConfig, env?: NodeJS.ProcessEnv, diff --git a/src/gateway/call.ts b/src/gateway/call.ts index b56894d88d3..d19584f2eca 100644 --- a/src/gateway/call.ts +++ b/src/gateway/call.ts @@ -6,6 +6,7 @@ import { resolveStateDir as resolveStateDirFromPaths, } from "../config/paths.js"; import type { OpenClawConfig } from "../config/types.openclaw.js"; +import { loadDeviceAuthToken } from "../infra/device-auth-store.js"; import { loadOrCreateDeviceIdentity, type DeviceIdentity } from "../infra/device-identity.js"; import { loadGatewayTlsRuntime } from "../infra/tls/gateway.js"; import { isLoopbackIpAddress } from "../shared/net/ip.js"; @@ -19,6 +20,7 @@ import { } from "../utils/message-channel.js"; import { resolveSafeTimeoutDelayMs } from "../utils/timer-delay.js"; import { VERSION } from "../version.js"; +import { resolveGatewayAuth } from "./auth-resolve.js"; import { startGatewayClientWhenEventLoopReady } from "./client-start-readiness.js"; import { GatewayClient, @@ -205,6 +207,7 @@ const defaultGatewayCallDeps = { resolveConfigPath: resolveConfigPathFromPaths, resolveStateDir: resolveStateDirFromPaths, loadGatewayTlsRuntime, + loadDeviceAuthToken, }; const gatewayCallDeps = { ...defaultGatewayCallDeps, @@ -296,6 +299,8 @@ export const testing = { deps?.resolveStateDir ?? defaultGatewayCallDeps.resolveStateDir; gatewayCallDeps.loadGatewayTlsRuntime = deps?.loadGatewayTlsRuntime ?? defaultGatewayCallDeps.loadGatewayTlsRuntime; + gatewayCallDeps.loadDeviceAuthToken = + deps?.loadDeviceAuthToken ?? defaultGatewayCallDeps.loadDeviceAuthToken; }, setCreateGatewayClientForTests(createGatewayClient?: typeof defaultCreateGatewayClient): void { gatewayCallDeps.createGatewayClient = @@ -309,6 +314,7 @@ export const testing = { gatewayCallDeps.resolveConfigPath = defaultGatewayCallDeps.resolveConfigPath; gatewayCallDeps.resolveStateDir = defaultGatewayCallDeps.resolveStateDir; gatewayCallDeps.loadGatewayTlsRuntime = defaultGatewayCallDeps.loadGatewayTlsRuntime; + gatewayCallDeps.loadDeviceAuthToken = defaultGatewayCallDeps.loadDeviceAuthToken; }, }; @@ -358,6 +364,61 @@ function resolveDeviceIdentityForGatewayCall(params: { } } +function hasStoredOperatorDeviceAuthToken(deviceIdentity: DeviceIdentity | null): boolean { + if (!deviceIdentity) { + return false; + } + try { + return Boolean( + gatewayCallDeps.loadDeviceAuthToken({ + deviceId: deviceIdentity.deviceId, + role: "operator", + env: process.env, + })?.token, + ); + } catch { + return false; + } +} + +function resolveGatewayCallAuth(config: OpenClawConfig) { + return resolveGatewayAuth({ + authConfig: config.gateway?.auth, + env: process.env, + tailscaleMode: config.gateway?.tailscale?.mode, + }); +} + +function ensureGatewayCallCanAuthenticate(params: { + opts: CallGatewayBaseOptions; + context: ResolvedGatewayCallContext; + token?: string; + password?: string; + deviceIdentity: DeviceIdentity | null; +}): void { + const resolvedAuth = resolveGatewayCallAuth(params.context.config); + const authMode = resolvedAuth.mode; + if (authMode !== "token" && authMode !== "password") { + return; + } + if (params.token || params.password || params.opts.approvalRuntimeToken) { + return; + } + if (resolvedAuth.allowTailscale) { + return; + } + if (hasStoredOperatorDeviceAuthToken(params.deviceIdentity)) { + return; + } + throw new Error( + [ + `gateway ${params.opts.method} requires credentials before opening a websocket`, + "Fix: configure gateway.auth token/password, pair this device, or pass --token/--password.", + `Config: ${params.context.configPath}`, + ].join("\n"), + ); +} + export type { ExplicitGatewayAuth } from "./credentials.js"; export function resolveExplicitGatewayAuth(opts?: ExplicitGatewayAuth): ExplicitGatewayAuth { @@ -675,6 +736,7 @@ async function executeGatewayRequestWithScopes(params: { timeoutMs: number; safeTimerTimeoutMs: number; connectionDetails: GatewayConnectionDetails; + deviceIdentity: DeviceIdentity | null; }): Promise { const { opts, @@ -686,6 +748,7 @@ async function executeGatewayRequestWithScopes(params: { preauthHandshakeTimeoutMs, timeoutMs, safeTimerTimeoutMs, + deviceIdentity, } = params; return await new Promise((resolve, reject) => { if (opts.signal?.aborted) { @@ -771,10 +834,7 @@ async function executeGatewayRequestWithScopes(params: { ...(opts.approvalRuntimeToken ? { approvalRuntimeToken: opts.approvalRuntimeToken } : {}), role: "operator", scopes, - deviceIdentity: - opts.deviceIdentity === undefined - ? resolveDeviceIdentityForGatewayCall({ opts, url, token, password }) - : opts.deviceIdentity, + deviceIdentity, minProtocol: opts.minProtocol ?? MIN_CLIENT_PROTOCOL_VERSION, maxProtocol: opts.maxProtocol ?? PROTOCOL_VERSION, onHelloOk: async (hello) => { @@ -888,6 +948,17 @@ async function callGatewayWithScopes>( const url = connectionDetails.url; const tlsFingerprint = await resolveGatewayTlsFingerprint({ opts, context, url }); const { token, password } = resolvedCredentials; + const deviceIdentity = + opts.deviceIdentity === undefined + ? resolveDeviceIdentityForGatewayCall({ opts, url, token, password }) + : opts.deviceIdentity; + ensureGatewayCallCanAuthenticate({ + opts, + context, + token, + password, + deviceIdentity, + }); return await executeGatewayRequestWithScopes({ opts, scopes, @@ -899,6 +970,7 @@ async function callGatewayWithScopes>( timeoutMs, safeTimerTimeoutMs, connectionDetails, + deviceIdentity, }); } diff --git a/src/gateway/server/ws-connection.ts b/src/gateway/server/ws-connection.ts index 6c5b38b6e14..f878ae4cab7 100644 --- a/src/gateway/server/ws-connection.ts +++ b/src/gateway/server/ws-connection.ts @@ -28,6 +28,11 @@ import { logWs } from "../ws-log.js"; import { getHealthVersion, incrementPresenceVersion } from "./health-state.js"; import type { PreauthConnectionBudget } from "./preauth-connection-budget.js"; import { broadcastPresenceSnapshot } from "./presence-events.js"; +import { + buildHandshakeAuthLogKey, + HandshakeAuthLogLimiter, + shouldLimitMissingCredentialAuthLog, +} from "./ws-connection/handshake-auth-log-limiter.js"; import type { GatewayWsMessageHandlerParams, WsOriginCheckMetrics, @@ -40,6 +45,7 @@ type SubsystemLogger = ReturnType; const LOG_HEADER_MAX_LEN = 300; const LOG_HEADER_FORMAT_REGEX = /\p{Cf}/gu; const MAX_QUEUED_MESSAGE_HANDLER_FRAMES = 16; +const unauthorizedCloseBeforeConnectLogLimiter = new HandshakeAuthLogLimiter(); function replaceControlChars(value: string): string { let cleaned = ""; @@ -56,6 +62,11 @@ function replaceControlChars(value: string): string { } return cleaned; } + +function stringMetaValue(meta: Record, key: string): string | undefined { + const value = meta[key]; + return typeof value === "string" && value.trim().length > 0 ? value : undefined; +} const sanitizeLogValue = (value: string | undefined): string | undefined => { if (!value) { return undefined; @@ -387,10 +398,38 @@ export function attachGatewayWsConnectionHandler(params: AttachGatewayWsConnecti isNoisySwiftPmHelperClose(requestUserAgent, remoteAddr) || isExpectedStartupRetryClose ? logWsControl.debug : logWsControl.warn; - logFn( - `closed before connect conn=${connId} peer=${endpoint ?? "n/a"} remote=${remoteAddr ?? "?"} fwd=${logForwardedFor || "n/a"} origin=${logOrigin || "n/a"} host=${logHost || "n/a"} ua=${logUserAgent || "n/a"} code=${code ?? "n/a"} reason=${logReason || "n/a"}`, - closeContext, - ); + const authReason = stringMetaValue(closeMeta, "authReason"); + // This pre-connect close path has no client object yet; treat only + // missing shared credentials as suppressible startup retry noise. + const shouldLimitMissingAuthClose = + closeCause === "unauthorized" && + shouldLimitMissingCredentialAuthLog({ + reason: authReason, + authProvided: "none", + }); + const closeLogDecision = shouldLimitMissingAuthClose + ? unauthorizedCloseBeforeConnectLogLimiter.register( + buildHandshakeAuthLogKey({ + reason: authReason, + remoteAddr, + client: + stringMetaValue(closeMeta, "clientDisplayName") ?? + stringMetaValue(closeMeta, "client"), + mode: stringMetaValue(closeMeta, "mode"), + authProvided: "none", + }), + ) + : { shouldLog: true, suppressedSinceLastLog: 0 }; + if (closeLogDecision.shouldLog) { + const suppressedText = + closeLogDecision.suppressedSinceLastLog > 0 + ? ` suppressed=${closeLogDecision.suppressedSinceLastLog}` + : ""; + logFn( + `closed before connect conn=${connId} peer=${endpoint ?? "n/a"} remote=${remoteAddr ?? "?"} fwd=${logForwardedFor || "n/a"} origin=${logOrigin || "n/a"} host=${logHost || "n/a"} ua=${logUserAgent || "n/a"} code=${code ?? "n/a"} reason=${logReason || "n/a"}${suppressedText}`, + closeContext, + ); + } } if (client && isWebchatClient(client.connect.client)) { logWsControl.info( diff --git a/src/gateway/server/ws-connection/handshake-auth-log-limiter.test.ts b/src/gateway/server/ws-connection/handshake-auth-log-limiter.test.ts new file mode 100644 index 00000000000..2d19781a404 --- /dev/null +++ b/src/gateway/server/ws-connection/handshake-auth-log-limiter.test.ts @@ -0,0 +1,85 @@ +import { describe, expect, it } from "vitest"; +import { + buildHandshakeAuthLogKey, + HandshakeAuthLogLimiter, + shouldLimitMissingCredentialAuthLog, +} from "./handshake-auth-log-limiter.js"; + +describe("HandshakeAuthLogLimiter", () => { + it("suppresses repeated selected failures for the same client key within the interval", () => { + const limiter = new HandshakeAuthLogLimiter({ intervalMs: 1_000 }); + const key = buildHandshakeAuthLogKey({ + reason: "token_missing", + remoteAddr: "127.0.0.1", + client: "gateway:sessions.list", + mode: "backend", + authProvided: "none", + }); + + expect(limiter.register(key, 10_000)).toEqual({ + shouldLog: true, + suppressedSinceLastLog: 0, + }); + expect(limiter.register(key, 10_100)).toEqual({ + shouldLog: false, + suppressedSinceLastLog: 0, + }); + expect(limiter.register(key, 10_200)).toEqual({ + shouldLog: false, + suppressedSinceLastLog: 0, + }); + expect(limiter.register(key, 11_001)).toEqual({ + shouldLog: true, + suppressedSinceLastLog: 2, + }); + }); + + it("does not suppress distinct clients", () => { + const limiter = new HandshakeAuthLogLimiter({ intervalMs: 1_000 }); + + expect(limiter.register("token_missing|127.0.0.1|gateway:sessions.list", 10)).toEqual({ + shouldLog: true, + suppressedSinceLastLog: 0, + }); + expect(limiter.register("token_missing|127.0.0.1|gateway:health", 20)).toEqual({ + shouldLog: true, + suppressedSinceLastLog: 0, + }); + }); + + it("only rate-limits benign missing-credential startup retries", () => { + expect( + shouldLimitMissingCredentialAuthLog({ + reason: "token_missing", + authProvided: "none", + }), + ).toBe(true); + expect( + shouldLimitMissingCredentialAuthLog({ + reason: "password_missing", + authProvided: "none", + }), + ).toBe(true); + + for (const reason of [ + "token_mismatch", + "password_mismatch", + "device_token_mismatch", + "rate_limited", + "token_missing_config", + ]) { + expect( + shouldLimitMissingCredentialAuthLog({ + reason, + authProvided: "none", + }), + ).toBe(false); + } + expect( + shouldLimitMissingCredentialAuthLog({ + reason: "token_missing", + authProvided: "token", + }), + ).toBe(false); + }); +}); diff --git a/src/gateway/server/ws-connection/handshake-auth-log-limiter.ts b/src/gateway/server/ws-connection/handshake-auth-log-limiter.ts new file mode 100644 index 00000000000..2203806a8a2 --- /dev/null +++ b/src/gateway/server/ws-connection/handshake-auth-log-limiter.ts @@ -0,0 +1,80 @@ +export type HandshakeAuthLogDecision = { + shouldLog: boolean; + suppressedSinceLastLog: number; +}; + +type HandshakeAuthLogState = { + lastLoggedAtMs: number; + suppressedSinceLastLog: number; +}; + +export class HandshakeAuthLogLimiter { + private readonly intervalMs: number; + private readonly maxEntries: number; + private readonly entries = new Map(); + + constructor(options?: { intervalMs?: number; maxEntries?: number }) { + this.intervalMs = Math.max(1, Math.floor(options?.intervalMs ?? 30_000)); + this.maxEntries = Math.max(1, Math.floor(options?.maxEntries ?? 256)); + } + + register(key: string, nowMs = Date.now()): HandshakeAuthLogDecision { + const entry = this.entries.get(key); + if (!entry) { + this.pruneIfNeeded(); + this.entries.set(key, { + lastLoggedAtMs: nowMs, + suppressedSinceLastLog: 0, + }); + return { shouldLog: true, suppressedSinceLastLog: 0 }; + } + + if (nowMs - entry.lastLoggedAtMs < this.intervalMs) { + entry.suppressedSinceLastLog += 1; + return { shouldLog: false, suppressedSinceLastLog: 0 }; + } + + const suppressedSinceLastLog = entry.suppressedSinceLastLog; + entry.lastLoggedAtMs = nowMs; + entry.suppressedSinceLastLog = 0; + return { shouldLog: true, suppressedSinceLastLog }; + } + + private pruneIfNeeded(): void { + if (this.entries.size < this.maxEntries) { + return; + } + const oldestKey = this.entries.keys().next().value; + if (oldestKey !== undefined) { + this.entries.delete(oldestKey); + } + } +} + +export function buildHandshakeAuthLogKey(params: { + reason?: string; + remoteAddr?: string; + client?: string; + mode?: string; + authProvided?: string; +}): string { + return [ + params.reason ?? "unknown", + params.remoteAddr ?? "?", + params.client ?? "?", + params.mode ?? "?", + params.authProvided ?? "?", + ].join("|"); +} + +export function shouldLimitMissingCredentialAuthLog(params: { + reason?: string; + authProvided?: string; +}): boolean { + // Only no-credential retries are startup/config churn. Credential mismatches + // and auth rate limits are security audit events and must log per attempt. + return ( + params.authProvided === "none" && + (params.reason === "token_missing" || params.reason === "password_missing") + ); +} diff --git a/src/gateway/server/ws-connection/message-handler.ts b/src/gateway/server/ws-connection/message-handler.ts index 3d77a2cc0ed..2bcfa146fa2 100644 --- a/src/gateway/server/ws-connection/message-handler.ts +++ b/src/gateway/server/ws-connection/message-handler.ts @@ -152,6 +152,11 @@ import { shouldAllowSilentLocalPairing, shouldSkipLocalBackendSelfPairing, } from "./handshake-auth-helpers.js"; +import { + buildHandshakeAuthLogKey, + HandshakeAuthLogLimiter, + shouldLimitMissingCredentialAuthLog, +} from "./handshake-auth-log-limiter.js"; import { isUnauthorizedRoleError, UnauthorizedFloodGuard } from "./unauthorized-flood-guard.js"; type SubsystemLogger = ReturnType; @@ -162,6 +167,7 @@ const DEVICE_CREDENTIAL_INVALIDATING_METHODS = new Set([ "device.token.rotate", "device.token.revoke", ]); +const unauthorizedHandshakeLogLimiter = new HandshakeAuthLogLimiter(); /** Match production release versions (YYYY.M.D or YYYY.M.D-beta.N). */ const RELEASED_VERSION_RE = /^\d{4}\.\d+\.\d+/; @@ -759,9 +765,29 @@ export function attachGatewayWsMessageHandler(params: GatewayWsMessageHandlerPar scopeCount: scopes.length, hasDeviceIdentity: Boolean(device), }); - logWsControl.warn( - `unauthorized conn=${connId} peer=${formatForLog(peerLabel)} remote=${remoteAddr ?? "?"} client=${formatForLog(clientLabel)} ${connectParams.client.mode} v${formatForLog(connectParams.client.version)} role=${role} scopes=${scopes.length} auth=${authProvided} device=${device ? "yes" : "no"} platform=${formatForLog(connectParams.client.platform)} instance=${formatForLog(connectParams.client.instanceId ?? "n/a")} host=${formatForLog(requestHost ?? "n/a")} origin=${formatForLog(requestOrigin ?? "n/a")} ua=${formatForLog(requestUserAgent ?? "n/a")} reason=${failedAuth.reason ?? "unknown"}`, - ); + const authLogDecision = shouldLimitMissingCredentialAuthLog({ + reason: failedAuth.reason, + authProvided, + }) + ? unauthorizedHandshakeLogLimiter.register( + buildHandshakeAuthLogKey({ + reason: failedAuth.reason, + remoteAddr, + client: clientLabel, + mode: connectParams.client.mode, + authProvided, + }), + ) + : { shouldLog: true, suppressedSinceLastLog: 0 }; + if (authLogDecision.shouldLog) { + const suppressedText = + authLogDecision.suppressedSinceLastLog > 0 + ? ` suppressed=${authLogDecision.suppressedSinceLastLog}` + : ""; + logWsControl.warn( + `unauthorized conn=${connId} peer=${formatForLog(peerLabel)} remote=${remoteAddr ?? "?"} client=${formatForLog(clientLabel)} ${connectParams.client.mode} v${formatForLog(connectParams.client.version)} role=${role} scopes=${scopes.length} auth=${authProvided} device=${device ? "yes" : "no"} platform=${formatForLog(connectParams.client.platform)} instance=${formatForLog(connectParams.client.instanceId ?? "n/a")} host=${formatForLog(requestHost ?? "n/a")} origin=${formatForLog(requestOrigin ?? "n/a")} ua=${formatForLog(requestUserAgent ?? "n/a")} reason=${failedAuth.reason ?? "unknown"}${suppressedText}`, + ); + } const authMessage = formatGatewayAuthFailureMessage({ authMode: resolvedAuth.mode, authProvided, diff --git a/src/logging/logger-settings.test.ts b/src/logging/logger-settings.test.ts index c32e94eb662..57339b70e84 100644 --- a/src/logging/logger-settings.test.ts +++ b/src/logging/logger-settings.test.ts @@ -1,3 +1,4 @@ +import path from "node:path"; import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; let originalTestFileLog: string | undefined; @@ -67,5 +68,10 @@ describe("getResolvedLoggerSettings", () => { const settings = logging.getResolvedLoggerSettings(); expect(settings.level).toBe("info"); + expect(settings.file).toContain(path.join(".artifacts", "test-logs")); + expect(path.basename(settings.file)).toMatch(/^openclaw-vitest-\d+-\d{4}-\d{2}-\d{2}\.log$/); + expect(settings.file).not.toBe( + `/tmp/openclaw/openclaw-${new Date().toISOString().slice(0, 10)}.log`, + ); }); }); diff --git a/src/logging/logger.ts b/src/logging/logger.ts index 9ddc8fa8cdb..19031abb9f7 100644 --- a/src/logging/logger.ts +++ b/src/logging/logger.ts @@ -473,6 +473,18 @@ function canUseSilentVitestFileLogFastPath(envLevel: LogLevel | undefined): bool ); } +function resolveDefaultActiveLogFile(): string { + if (process.env.VITEST === "true" && process.env.OPENCLAW_TEST_FILE_LOG === "1") { + return path.join( + process.cwd(), + ".artifacts", + "test-logs", + `${LOG_PREFIX}-vitest-${process.pid}-${formatLocalDate(new Date())}${LOG_SUFFIX}`, + ); + } + return defaultRollingPathForToday(); +} + function resolveSettings(): ResolvedSettings { if (!canUseNodeFs()) { return { @@ -499,7 +511,7 @@ function resolveSettings(): ResolvedSettings { process.env.VITEST === "true" && process.env.OPENCLAW_TEST_FILE_LOG !== "1" ? "silent" : "info"; const fromConfig = normalizeLogLevel(cfg?.level, defaultLevel); const level = envLevel ?? fromConfig; - const file = cfg?.file ?? defaultRollingPathForToday(); + const file = cfg?.file ?? resolveDefaultActiveLogFile(); const maxFileBytes = resolveMaxLogFileBytes(cfg?.maxFileBytes); return { level, file, maxFileBytes }; }